Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Finalize validations for custom granularities #370

Merged
merged 8 commits into from
Dec 2, 2024
50 changes: 26 additions & 24 deletions dbt_semantic_interfaces/implementations/metric.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from typing import Dict, List, Optional, Sequence, Set
from copy import deepcopy
from typing import Any, Dict, List, Optional, Sequence, Set

from typing_extensions import override

Expand Down Expand Up @@ -83,7 +84,7 @@ def _from_yaml_value(cls, input: PydanticParseableValueType) -> PydanticMetricTi
The MetricTimeWindow is always expected to be provided as a string in user-defined YAML configs.
"""
if isinstance(input, str):
return PydanticMetricTimeWindow.parse(window=input.lower(), custom_granularity_names=(), strict=False)
return PydanticMetricTimeWindow.parse(window=input.lower())
else:
raise ValueError(
f"MetricTimeWindow inputs from model configs are expected to always be of type string, but got "
Expand All @@ -101,12 +102,8 @@ def window_string(self) -> str:
return f"{self.count} {self.granularity}"

@staticmethod
def parse(window: str, custom_granularity_names: Sequence[str], strict: bool = True) -> PydanticMetricTimeWindow:
"""Returns window values if parsing succeeds, None otherwise.

If strict=True, then the granularity in the window must exist as a valid granularity.
Use strict=True for when you have all valid granularities, otherwise use strict=False.
"""
def parse(window: str) -> PydanticMetricTimeWindow:
"""Returns window values if parsing succeeds, None otherwise."""
parts = window.lower().split(" ")
if len(parts) != 2:
raise ParsingException(
Expand All @@ -115,22 +112,6 @@ def parse(window: str, custom_granularity_names: Sequence[str], strict: bool = T
)

granularity = parts[1]

valid_time_granularities = {item.value.lower() for item in TimeGranularity} | set(
c.lower() for c in custom_granularity_names
)

# if we switched to python 3.9 this could just be `granularity = parts[0].removesuffix('s')
if granularity.endswith("s") and granularity[:-1] in valid_time_granularities:
# months -> month
granularity = granularity[:-1]

if strict and granularity not in valid_time_granularities:
raise ParsingException(
f"Invalid time granularity {granularity} in metric window string: ({window})",
)
# If not strict and not standard granularity, it may be a custom grain, so validations happens later

count = parts[0]
if not count.isdigit():
raise ParsingException(f"Invalid count ({count}) in cumulative metric window string: ({window})")
Expand Down Expand Up @@ -222,6 +203,27 @@ def _implements_protocol(self) -> Metric: # noqa: D
config: Optional[PydanticSemanticLayerElementConfig]
time_granularity: Optional[str] = None

@classmethod
def parse_obj(cls, input: Any) -> PydanticMetric:
"""Adds custom parsing to the default method."""
data = deepcopy(input)

# Ensure grain_to_date is lowercased
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we lowercase window here too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could but I think the way it's implemented now is less hacky! This one feels less optimal because we have to go through these dict keys that are not typed. But I couldn't use the same strategy as we did for window because grain is just a string, not an object.

type_params = data.get("type_params", {})
grain_to_date = type_params.get("cumulative_type_params", {}).get("grain_to_date")
if isinstance(grain_to_date, str):
data["type_params"]["cumulative_type_params"]["grain_to_date"] = grain_to_date.lower()

# Ensure offset_to_grain is lowercased
input_metrics = type_params.get("metrics", [])
if input_metrics:
for input_metric in input_metrics:
offset_to_grain = input_metric.get("offset_to_grain")
if offset_to_grain and isinstance(offset_to_grain, str):
input_metric["offset_to_grain"] = offset_to_grain.lower()

return super(HashableBaseModel, cls).parse_obj(data)

@property
def input_measures(self) -> Sequence[PydanticMetricInputMeasure]:
"""Return the complete list of input measure configurations for this metric."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from dbt_semantic_interfaces.transformations.transform_rule import (
SemanticManifestTransformRule,
)
from dbt_semantic_interfaces.type_enums import MetricType
from dbt_semantic_interfaces.type_enums import MetricType, TimeGranularity


class RemovePluralFromWindowGranularityRule(ProtocolHint[SemanticManifestTransformRule[PydanticSemanticManifest]]):
Expand All @@ -33,12 +33,18 @@ def _update_metric(
semantic_manifest: PydanticSemanticManifest, metric_name: str, custom_granularity_names: Sequence[str]
) -> None:
"""Mutates all the MetricTimeWindow by reparsing to remove the trailing 's'."""
valid_time_granularities = {item.value.lower() for item in TimeGranularity} | set(
c.lower() for c in custom_granularity_names
)

def reparse_window(window: PydanticMetricTimeWindow) -> PydanticMetricTimeWindow:
def trim_trailing_s(window: PydanticMetricTimeWindow) -> PydanticMetricTimeWindow:
"""Reparse the window to remove the trailing 's'."""
return PydanticMetricTimeWindow.parse(
window=window.window_string, custom_granularity_names=custom_granularity_names
)
granularity = window.granularity
if granularity.endswith("s") and granularity[:-1] in valid_time_granularities:
# months -> month
granularity = granularity[:-1]
window.granularity = granularity
return window

matched_metric = next(
iter((metric for metric in semantic_manifest.metrics if metric.name == metric_name)), None
Expand All @@ -49,22 +55,23 @@ def reparse_window(window: PydanticMetricTimeWindow) -> PydanticMetricTimeWindow
matched_metric.type_params.cumulative_type_params
and matched_metric.type_params.cumulative_type_params.window
):
matched_metric.type_params.cumulative_type_params.window = reparse_window(
matched_metric.type_params.cumulative_type_params.window = trim_trailing_s(
matched_metric.type_params.cumulative_type_params.window
)

elif matched_metric.type is MetricType.CONVERSION:
if (
matched_metric.type_params.conversion_type_params
and matched_metric.type_params.conversion_type_params.window
):
matched_metric.type_params.conversion_type_params.window = reparse_window(
matched_metric.type_params.conversion_type_params.window = trim_trailing_s(
matched_metric.type_params.conversion_type_params.window
)

elif matched_metric.type is MetricType.DERIVED or matched_metric.type is MetricType.RATIO:
for input_metric in matched_metric.input_metrics:
if input_metric.offset_window:
input_metric.offset_window = reparse_window(input_metric.offset_window)
input_metric.offset_window = trim_trailing_s(input_metric.offset_window)
elif matched_metric.type is MetricType.SIMPLE:
pass
else:
Expand Down
141 changes: 100 additions & 41 deletions dbt_semantic_interfaces/validations/metrics.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
import traceback
from typing import Dict, Generic, List, Optional, Sequence

from dbt_semantic_interfaces.errors import ParsingException
from dbt_semantic_interfaces.implementations.metric import (
PydanticMetric,
PydanticMetricTimeWindow,
)
from dbt_semantic_interfaces.implementations.metric import PydanticMetric
from dbt_semantic_interfaces.protocols import (
ConversionTypeParams,
Dimension,
Metric,
MetricInputMeasure,
MetricTimeWindow,
SemanticManifest,
SemanticManifestT,
SemanticModel,
Expand Down Expand Up @@ -42,20 +38,23 @@
WhereFiltersAreParseable, # noQa
)

TEMP_CUSTOM_GRAIN_MSG = "Custom granularities are not supported for this field yet."


class CumulativeMetricRule(SemanticManifestValidationRule[SemanticManifestT], Generic[SemanticManifestT]):
"""Checks that cumulative metrics are configured properly."""

@staticmethod
@classmethod
@validate_safely(whats_being_done="running model validation ensuring cumulative metrics are valid")
def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D
def validate_manifest(cls, semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D
issues: List[ValidationIssue] = []

custom_granularity_names = [
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit / premature optimization - why not make this a set here, rather than making it a set inside validate_metric_time_window, which is called every iteration of the for loop in this function?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good idea!

granularity.name
for time_spine in semantic_manifest.project_configuration.time_spines
for granularity in time_spine.custom_granularities
]
standard_granularities = {item.value.lower() for item in TimeGranularity}

for metric in semantic_manifest.metrics or []:
if metric.type != MetricType.CUMULATIVE:
Expand Down Expand Up @@ -100,6 +99,18 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati
grain_to_date = metric.type_params.grain_to_date.value if metric.type_params.grain_to_date else None
if metric.type_params.cumulative_type_params and metric.type_params.cumulative_type_params.grain_to_date:
grain_to_date = metric.type_params.cumulative_type_params.grain_to_date

if grain_to_date and grain_to_date not in standard_granularities:
issues.append(
ValidationError(
context=metric_context,
message=(
f"Invalid time granularity found in `grain_to_date`: '{grain_to_date}'. "
f"{TEMP_CUSTOM_GRAIN_MSG}"
),
)
)

if window and grain_to_date:
issues.append(
ValidationError(
Expand All @@ -109,19 +120,44 @@ def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[Validati
)

if window:
try:
# TODO: Should not call an implementation class.
PydanticMetricTimeWindow.parse(
window=window.window_string, custom_granularity_names=custom_granularity_names
)
except ParsingException as e:
issues.append(
ValidationError(
context=metric_context,
message="".join(traceback.format_exception_only(type(e), value=e)),
extra_detail="".join(traceback.format_tb(e.__traceback__)),
)
)
issues += cls.validate_metric_time_window(
metric_context=metric_context, window=window, custom_granularities=custom_granularity_names
)

return issues

@classmethod
def validate_metric_time_window( # noqa: D
cls,
metric_context: MetricContext,
window: MetricTimeWindow,
custom_granularities: Sequence[str],
allow_custom: bool = False,
) -> Sequence[ValidationIssue]:
issues: List[ValidationIssue] = []

standard_granularities = {item.value.lower() for item in TimeGranularity}
valid_granularities = set(custom_granularities) | standard_granularities
window_granularity = window.granularity
if window_granularity.endswith("s") and window_granularity[:-1] in valid_granularities:
# months -> month
window_granularity = window_granularity[:-1]

msg = f"Invalid time granularity '{window_granularity}' in window: '{window.window_string}'"
if window_granularity not in valid_granularities:
issues.append(
ValidationError(
context=metric_context,
message=msg,
)
)
elif not allow_custom and (window_granularity not in standard_granularities):
issues.append(
ValidationError(
context=metric_context,
message=msg + " " + TEMP_CUSTOM_GRAIN_MSG,
)
)

return issues

Expand Down Expand Up @@ -188,17 +224,37 @@ def _validate_input_metrics_exist(semantic_manifest: SemanticManifest) -> List[V

@staticmethod
@validate_safely(whats_being_done="checking that input metric time offset params are valid")
def _validate_time_offset_params(metric: Metric) -> List[ValidationIssue]:
def _validate_time_offset_params(metric: Metric, custom_granularities: Sequence[str]) -> List[ValidationIssue]:
issues: List[ValidationIssue] = []

standard_granularities = {item.value.lower() for item in TimeGranularity}

metric_context = MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
)
for input_metric in metric.type_params.metrics or []:
if input_metric.offset_window and input_metric.offset_to_grain:
if input_metric.offset_window:
issues += CumulativeMetricRule.validate_metric_time_window(
metric_context=metric_context,
window=input_metric.offset_window,
custom_granularities=custom_granularities,
allow_custom=True,
)
if input_metric.offset_to_grain and input_metric.offset_to_grain not in standard_granularities:
issues.append(
ValidationError(
context=MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
context=metric_context,
message=(
f"Invalid time granularity found in `offset_to_grain`: '{input_metric.offset_to_grain}'. "
f"{TEMP_CUSTOM_GRAIN_MSG}"
),
)
)
if input_metric.offset_window and input_metric.offset_to_grain:
issues.append(
ValidationError(
context=metric_context,
message=f"Both offset_window and offset_to_grain set for derived metric '{metric.name}' on "
f"input metric '{input_metric.name}'. Please set one or the other.",
)
Expand Down Expand Up @@ -247,10 +303,18 @@ def _validate_expr(metric: Metric) -> List[ValidationIssue]:
def validate_manifest(semantic_manifest: SemanticManifestT) -> Sequence[ValidationIssue]: # noqa: D
issues: List[ValidationIssue] = []

custom_granularity_names = [
granularity.name
for time_spine in semantic_manifest.project_configuration.time_spines
for granularity in time_spine.custom_granularities
]

issues += DerivedMetricRule._validate_input_metrics_exist(semantic_manifest=semantic_manifest)
for metric in semantic_manifest.metrics or []:
issues += DerivedMetricRule._validate_alias_collision(metric=metric)
issues += DerivedMetricRule._validate_time_offset_params(metric=metric)
issues += DerivedMetricRule._validate_time_offset_params(
metric=metric, custom_granularities=custom_granularity_names
)
issues += DerivedMetricRule._validate_expr(metric=metric)
return issues

Expand All @@ -267,20 +331,15 @@ def _validate_type_params(

window = conversion_type_params.window
if window:
try:
window_str = f"{window.count} {window.granularity}"
PydanticMetricTimeWindow.parse(window=window_str, custom_granularity_names=custom_granularity_names)
except ParsingException as e:
issues.append(
ValidationError(
context=MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
),
message="".join(traceback.format_exception_only(type(e), value=e)),
extra_detail="".join(traceback.format_tb(e.__traceback__)),
)
)
issues += CumulativeMetricRule.validate_metric_time_window(
metric_context=MetricContext(
file_context=FileContext.from_metadata(metadata=metric.metadata),
metric=MetricModelReference(metric_name=metric.name),
),
window=window,
custom_granularities=custom_granularity_names,
)

return issues

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dbt-semantic-interfaces"
version = "0.8.1"
version = "0.8.2"
description = 'The shared semantic layer definitions that dbt-core and MetricFlow use'
readme = "README.md"
requires-python = ">=3.8"
Expand Down
Loading