Skip to content

Commit

Permalink
Enable custom granularity inputs for filters
Browse files Browse the repository at this point in the history
We are enabling support for custom granularity, which means
users will need to be able to reference them in where filters and
other contexts where their input representations of their manifest
elements will need to be parsed.

Historically, we have restricted time granularities to an enumerated
set of standard supported grains. With the addition of custom
granularities we must now admit the possibility of arbitrary string
names representing valid time granularities.

This change updates the interfaces for parsing time dimensions out
of filter expressions to allow for arbitrary string inputs representing
custom granularity names.

For various reasons - mainly having to do with the filter parser
not having access to the semantic manifest - we will NOT be able
to support custom granularity references via dundered string names. So
something like `metric_time__martian_year` will not work, but
the explicit syntax of `Dimension('metric_time').grain('martian_year')`
will be accepted and processed.
  • Loading branch information
tlento committed Sep 9, 2024
1 parent 47f831e commit 88f3873
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 31 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240908-221757.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Allow filter parameters to accept custom granularities
time: 2024-09-08T22:17:57.865557-07:00
custom:
Author: tlento
Issue: "345"
3 changes: 1 addition & 2 deletions dbt_semantic_interfaces/call_parameter_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
MetricReference,
TimeDimensionReference,
)
from dbt_semantic_interfaces.type_enums import TimeGranularity
from dbt_semantic_interfaces.type_enums.date_part import DatePart


Expand All @@ -29,7 +28,7 @@ class TimeDimensionCallParameterSet:

entity_path: Tuple[EntityReference, ...]
time_dimension_reference: TimeDimensionReference
time_granularity: Optional[TimeGranularity] = None
time_granularity_name: Optional[str] = None
date_part: Optional[DatePart] = None


Expand Down
6 changes: 5 additions & 1 deletion dbt_semantic_interfaces/naming/dundered.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@ class StructuredDunderedName:
element_name: "ds"
granularity: TimeGranularity.WEEK
The time granularity is part of legacy query syntax and there are plans to migrate away from this format.
The time granularity is part of legacy query syntax and there are plans to migrate away from this format. As such,
this will not be updated to allow for custom granularity values. This implies that any query paths that push named
parameters through this class will not support a custom grain reference of the form `metric_time__martian_year`,
and users wishing to use their martian year grain will have to explicitly reference it via a separate parameter
instead of gluing it onto the end of the name.
"""

entity_links: Tuple[EntityReference, ...]
Expand Down
13 changes: 2 additions & 11 deletions dbt_semantic_interfaces/parsing/text_input/ti_description.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.errors import InvalidQuerySyntax
from dbt_semantic_interfaces.naming.dundered import StructuredDunderedName
from dbt_semantic_interfaces.type_enums import TimeGranularity
from dbt_semantic_interfaces.type_enums.date_part import DatePart


Expand Down Expand Up @@ -65,21 +64,13 @@ def __post_init__(self) -> None: # noqa: D105
raise InvalidQuerySyntax("The entity path should not be specified for a metric.")
if len(structured_item_name.entity_links) > 0:
raise InvalidQuerySyntax("The name of the metric should not have entity links.")
# Check that dimensions / time dimensions have a valid time granularity / date part.
# Check that dimensions / time dimensions have a valid date part.
elif item_type is QueryItemType.DIMENSION or item_type is QueryItemType.TIME_DIMENSION:
if self.time_granularity_name is not None:
valid_time_granularity_names = set(time_granularity.value for time_granularity in TimeGranularity)
if self.time_granularity_name.lower() not in valid_time_granularity_names:
raise InvalidQuerySyntax(
f"{self.time_granularity_name!r} is not a valid time granularity. Valid values are"
f" {valid_time_granularity_names}"
)

if self.date_part_name is not None:
valid_date_part_names = set(date_part.value for date_part in DatePart)
if self.date_part_name.lower() not in set(date_part.value for date_part in DatePart):
raise InvalidQuerySyntax(
f"{self.time_granularity_name!r} is not a valid time granularity. Valid values are"
f"{self.date_part_name!r} is not a valid date part. Valid values are"
f" {valid_date_part_names}"
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,19 @@
MetricReference,
TimeDimensionReference,
)
from dbt_semantic_interfaces.type_enums import TimeGranularity
from dbt_semantic_interfaces.type_enums.date_part import DatePart


class ParameterSetFactory:
"""Creates parameter sets for use in the Jinja sandbox."""
"""Creates parameter sets for use in the Jinja sandbox.
This is ONLY to be used for parsing where filters, and then only for the purposes of extracting
some basic information about which elements are being accessed in the filter expression in the
small number of contexts where a more complete semantic layer implementation is not available.
In practice, today, this is used by the dbt core parser, which cannot take on a MetricFlow
dependency, in order to provide some DAG annotations around elements referenced in where filters.
"""

@staticmethod
def _exception_message_for_incorrect_format(element_name: str) -> str:
Expand All @@ -37,26 +44,46 @@ def create_time_dimension(
entity_path: Sequence[str] = (),
date_part_name: Optional[str] = None,
) -> TimeDimensionCallParameterSet:
"""Gets called by Jinja when rendering {{ TimeDimension(...) }}."""
"""Gets called by Jinja when rendering {{ TimeDimension(...) }}.
There is a lot of strangeness around the time granularity specification here. Historically,
we accepted time dimension names of the form `metric_time__week` or `customer__registration_date__month`
in this interface. We have not yet fully deprecated this, and it's unclear if we ever will.
Key points to note:
1. The time dimension name parsing only accepts standard time granularities. This will not change.
2. The time granularity parameter is what we want everybody to use because it's more explicit.
3. The time granularity parameter will support custom granularities, so that's nice
While this all may seem pretty bad it's not as terrible as all that - this class is only used
for parsing where filters. When we solve the problems with our current where filter spec this will
persist as a backwards compatibility model, but nothing more.
"""
group_by_item_name = DunderedNameFormatter.parse_name(time_dimension_name)
if len(group_by_item_name.entity_links) != 1 and not is_metric_time_name(group_by_item_name.element_name):
raise ParseWhereFilterException(
ParameterSetFactory._exception_message_for_incorrect_format(time_dimension_name)
)
grain_parsed_from_name = group_by_item_name.time_granularity
grain_from_param = TimeGranularity(time_granularity_name) if time_granularity_name else None
grain_parsed_from_name = (
group_by_item_name.time_granularity.value if group_by_item_name.time_granularity else None
)
grain_from_param = time_granularity_name
if grain_parsed_from_name and grain_from_param and grain_from_param != grain_parsed_from_name:
raise ParseWhereFilterException(
f"Received different grains in `time_dimension_name` parameter ('{time_dimension_name}') "
f"and `time_granularity_name` parameter ('{time_granularity_name}')."
f"and `time_granularity_name` parameter ('{time_granularity_name}'). Remove the grain suffix "
f"(`{grain_parsed_from_name}`) from the time dimension name and use the `time_granularity_name` "
"parameter to specify the intendend grain."
)

time_granularity_name = grain_parsed_from_name or grain_from_param

return TimeDimensionCallParameterSet(
time_dimension_reference=TimeDimensionReference(element_name=group_by_item_name.element_name),
entity_path=(
tuple(EntityReference(element_name=arg) for arg in entity_path) + group_by_item_name.entity_links
),
time_granularity=grain_parsed_from_name or grain_from_param,
time_granularity_name=time_granularity_name.lower() if time_granularity_name else None,
date_part=DatePart(date_part_name.lower()) if date_part_name else None,
)

Expand Down
12 changes: 6 additions & 6 deletions tests/implementations/where_filter/test_parse_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def test_extract_dimension_with_grain_call_parameter_sets() -> None: # noqa: D
TimeDimensionCallParameterSet(
entity_path=(),
time_dimension_reference=TimeDimensionReference(element_name="metric_time"),
time_granularity=TimeGranularity.WEEK,
time_granularity_name=TimeGranularity.WEEK.value,
),
),
entity_call_parameter_sets=(),
Expand All @@ -91,7 +91,7 @@ def test_extract_time_dimension_call_parameter_sets() -> None: # noqa: D
EntityReference("listing"),
EntityReference("user"),
),
time_granularity=TimeGranularity.MONTH,
time_granularity_name=TimeGranularity.MONTH.value,
),
)
)
Expand All @@ -110,7 +110,7 @@ def test_extract_time_dimension_call_parameter_sets() -> None: # noqa: D
EntityReference("listing"),
EntityReference("user"),
),
time_granularity=TimeGranularity.MONTH,
time_granularity_name=TimeGranularity.MONTH.value,
),
)
)
Expand All @@ -126,7 +126,7 @@ def test_extract_metric_time_dimension_call_parameter_sets() -> None: # noqa: D
TimeDimensionCallParameterSet(
time_dimension_reference=TimeDimensionReference(element_name="metric_time"),
entity_path=(),
time_granularity=TimeGranularity.MONTH,
time_granularity_name=TimeGranularity.MONTH.value,
),
)
)
Expand Down Expand Up @@ -216,7 +216,7 @@ def test_where_filter_interesection_extract_call_parameter_sets() -> None:
TimeDimensionCallParameterSet(
time_dimension_reference=TimeDimensionReference(element_name="metric_time"),
entity_path=(),
time_granularity=TimeGranularity.MONTH,
time_granularity_name=TimeGranularity.MONTH.value,
),
)
)
Expand Down Expand Up @@ -269,7 +269,7 @@ def test_time_dimension_without_granularity() -> None: # noqa: D
TimeDimensionCallParameterSet(
entity_path=(EntityReference("booking"),),
time_dimension_reference=TimeDimensionReference(element_name="created_at"),
time_granularity=None,
time_granularity_name=None,
),
),
entity_call_parameter_sets=(),
Expand Down
4 changes: 2 additions & 2 deletions tests/parsing/test_object_builder_item_description.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@ def test_valid_object_builder_items() -> None: # noqa: D
valid_items = (
"Dimension('listing__created_at', entity_path=['host'])",
"Dimension('listing__created_at', entity_path=['host']).grain('day').date_part('day')",
"Dimension('metric_time').grain('martian_year')",
"TimeDimension('listing__created_at', time_granularity_name='day', entity_path=['host'], date_part_name='day')",
"Entity('listing__created_at', entity_path=['host'])",
"Metric('bookings', group_by=['listing__created_at'])",
"TimeDimension('metric_time', time_granularity_name='martian_year')",
)
for valid_item in valid_items:
logger.info(f"Checking {valid_item=}")
Expand All @@ -32,9 +34,7 @@ def test_invalid_object_builder_items() -> None: # noqa: D
text_processor = ObjectBuilderTextProcessor()

invalid_items = (
"Dimension('listing__created_at').grain('invalid')",
"Dimension('listing__created_at').date_part('invalid')",
"TimeDimension('listing__created_at', 'invalid', 'day')",
"TimeDimension('listing__created_at', 'day', date_part_name='invalid')",
"TimeDimension('listing__created_at', 'day', date_part_name='month').grain('month')",
"TimeDimension('listing__created_at', 'day', date_part_name='month').date_part('month')",
Expand Down
4 changes: 2 additions & 2 deletions tests/parsing/test_where_filter_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,15 +185,15 @@ def test_dimension_date_part() -> None: # noqa
TimeDimensionCallParameterSet(
time_dimension_reference=TimeDimensionReference("metric_time"),
entity_path=(),
time_granularity=TimeGranularity.WEEK,
time_granularity_name=TimeGranularity.WEEK.value,
),
),
(
"{{ TimeDimension('metric_time', time_granularity_name='week') }} > '2023-01-01'",
TimeDimensionCallParameterSet(
time_dimension_reference=TimeDimensionReference("metric_time"),
entity_path=(),
time_granularity=TimeGranularity.WEEK,
time_granularity_name=TimeGranularity.WEEK.value,
),
),
],
Expand Down

0 comments on commit 88f3873

Please sign in to comment.