From 7f64a70c27a66ddc5923dec8dea629ebf8f39fe7 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Tue, 12 Sep 2023 18:17:23 -0700 Subject: [PATCH] Separate validation for date part, esp for cumulative metrics --- metricflow/query/query_parser.py | 21 ++++++++++++-- metricflow/test/query/test_query_parser.py | 13 +++++++-- .../test/time/test_time_granularity_solver.py | 16 +++++------ metricflow/time/time_granularity_solver.py | 28 ++++++++----------- 4 files changed, 47 insertions(+), 31 deletions(-) diff --git a/metricflow/query/query_parser.py b/metricflow/query/query_parser.py index 329bb3bdc3..82cc913303 100644 --- a/metricflow/query/query_parser.py +++ b/metricflow/query/query_parser.py @@ -433,7 +433,8 @@ def _parse_and_validate_query( if len(time_dimension_specs) == 0: self._validate_no_time_dimension_query(metric_references=metric_references) - self._time_granularity_solver.validate_time_granularity_and_date_part(metric_references, time_dimension_specs) + self._time_granularity_solver.validate_time_granularity(metric_references, time_dimension_specs) + self._validate_date_part(metric_references, time_dimension_specs) for time_dimension_spec in time_dimension_specs: if ( time_dimension_spec.date_part @@ -508,7 +509,7 @@ def _parse_and_validate_query( raise InvalidQueryException(f"Limit was specified as {limit}, which is < 0.") if where_filter_spec: - self._time_granularity_solver.validate_time_granularity_and_date_part( + self._time_granularity_solver.validate_time_granularity( metric_references=metric_references, time_dimension_specs=where_filter_spec.linkable_spec_set.time_dimension_specs, ) @@ -546,6 +547,20 @@ def _validate_order_by_specs( ): raise InvalidQueryException(f"Order by item {order_by_spec} not in the query") + def _validate_date_part( + self, metric_references: Sequence[MetricReference], time_dimension_specs: Sequence[TimeDimensionSpec] + ) -> None: + """Validate that date parts can be used for metrics.""" + date_part_requested = False + for time_dimension_spec in time_dimension_specs: + if time_dimension_spec.date_part: + date_part_requested = True + break + + for metric_reference in metric_references: + if self._metric_lookup.get_metric(metric_reference).type == MetricType.CUMULATIVE and date_part_requested: + raise UnableToSatisfyQueryError("Cannot extract date part for cumulative metrics.") + def _adjust_time_range_constraint( self, metric_references: Sequence[MetricReference], @@ -553,7 +568,7 @@ def _adjust_time_range_constraint( time_range_constraint: TimeRangeConstraint, ) -> TimeRangeConstraint: """Adjust the time range constraint so that it matches the boundaries of the granularity of the result.""" - self._time_granularity_solver.validate_time_granularity_and_date_part(metric_references, time_dimension_specs) + self._time_granularity_solver.validate_time_granularity(metric_references, time_dimension_specs) smallest_primary_time_granularity_in_query = self._find_smallest_metric_time_dimension_spec_granularity( time_dimension_specs diff --git a/metricflow/test/query/test_query_parser.py b/metricflow/test/query/test_query_parser.py index 41f205ed05..2bba217baf 100644 --- a/metricflow/test/query/test_query_parser.py +++ b/metricflow/test/query/test_query_parser.py @@ -399,19 +399,26 @@ def test_date_part_parsing() -> None: ) # Date part is incompatible with metric's defined time granularity - with pytest.raises(RequestTimeGranularityException, match="is not valid for querying"): + with pytest.raises(RequestTimeGranularityException): query_parser.parse_and_validate_query( metric_names=["revenue"], group_by=[MockQueryParameter(name="metric_time", date_part=DatePart.DOW)], ) - # Date part is compatible with the requested time granularity for the same time dimension - with pytest.raises(RequestTimeGranularityException, match="is not compatible with time granularity"): + # Date part is incompatible with the requested time granularity for the same time dimension + with pytest.raises(RequestTimeGranularityException): query_parser.parse_and_validate_query( metric_names=["revenue"], group_by=[MockQueryParameter(name="metric_time", grain=TimeGranularity.YEAR, date_part=DatePart.MONTH)], ) + # Can't query date part for cumulative metrics + with pytest.raises(UnableToSatisfyQueryError): + query_parser.parse_and_validate_query( + metric_names=["revenue_cumulative"], + group_by=[MockQueryParameter(name="metric_time", date_part=DatePart.DOY)], + ) + # Date part is compatible query_parser.parse_and_validate_query( metric_names=["revenue"], diff --git a/metricflow/test/time/test_time_granularity_solver.py b/metricflow/test/time/test_time_granularity_solver.py index e354881588..025778df10 100644 --- a/metricflow/test/time/test_time_granularity_solver.py +++ b/metricflow/test/time/test_time_granularity_solver.py @@ -27,21 +27,21 @@ def time_granularity_solver( # noqa: D def test_validate_day_granuarity_for_day_metric(time_granularity_solver: TimeGranularitySolver) -> None: # noqa: D - time_granularity_solver.validate_time_granularity_and_date_part( + time_granularity_solver.validate_time_granularity( metric_references=[MetricReference(element_name="bookings")], time_dimension_specs=[DataSet.metric_time_dimension_spec(TimeGranularity.DAY)], ) def test_validate_month_granuarity_for_day_metric(time_granularity_solver: TimeGranularitySolver) -> None: # noqa: D - time_granularity_solver.validate_time_granularity_and_date_part( + time_granularity_solver.validate_time_granularity( metric_references=[MetricReference(element_name="bookings")], time_dimension_specs=[DataSet.metric_time_dimension_spec(TimeGranularity.MONTH)], ) def test_validate_month_granuarity_for_month_metric(time_granularity_solver: TimeGranularitySolver) -> None: # noqa: D - time_granularity_solver.validate_time_granularity_and_date_part( + time_granularity_solver.validate_time_granularity( metric_references=[MetricReference(element_name="bookings_monthly")], time_dimension_specs=[DataSet.metric_time_dimension_spec(TimeGranularity.MONTH)], ) @@ -50,7 +50,7 @@ def test_validate_month_granuarity_for_month_metric(time_granularity_solver: Tim def test_validate_month_granuarity_for_day_and_month_metrics( # noqa: D time_granularity_solver: TimeGranularitySolver, ) -> None: - time_granularity_solver.validate_time_granularity_and_date_part( + time_granularity_solver.validate_time_granularity( metric_references=[MetricReference(element_name="bookings"), MetricReference(element_name="bookings_monthly")], time_dimension_specs=[DataSet.metric_time_dimension_spec(TimeGranularity.MONTH)], ) @@ -59,7 +59,7 @@ def test_validate_month_granuarity_for_day_and_month_metrics( # noqa: D def test_validate_year_granularity_for_day_and_month_metrics( # noqa: D time_granularity_solver: TimeGranularitySolver, ) -> None: - time_granularity_solver.validate_time_granularity_and_date_part( + time_granularity_solver.validate_time_granularity( metric_references=[MetricReference(element_name="bookings"), MetricReference(element_name="bookings_monthly")], time_dimension_specs=[DataSet.metric_time_dimension_spec(TimeGranularity.YEAR)], ) @@ -67,7 +67,7 @@ def test_validate_year_granularity_for_day_and_month_metrics( # noqa: D def test_validate_day_granuarity_for_month_metric(time_granularity_solver: TimeGranularitySolver) -> None: # noqa: D with pytest.raises(RequestTimeGranularityException): - time_granularity_solver.validate_time_granularity_and_date_part( + time_granularity_solver.validate_time_granularity( metric_references=[MetricReference(element_name="bookings_monthly")], time_dimension_specs=[DataSet.metric_time_dimension_spec(TimeGranularity.DAY)], ) @@ -77,7 +77,7 @@ def test_validate_day_granularity_for_day_and_month_metric( # noqa: D time_granularity_solver: TimeGranularitySolver, ) -> None: with pytest.raises(RequestTimeGranularityException): - time_granularity_solver.validate_time_granularity_and_date_part( + time_granularity_solver.validate_time_granularity( metric_references=[ MetricReference(element_name="bookings"), MetricReference(element_name="bookings_monthly"), @@ -120,7 +120,7 @@ def test_granularity_error_for_cumulative_metric( # noqa: D time_granularity_solver: TimeGranularitySolver, ) -> None: with pytest.raises(RequestTimeGranularityException): - time_granularity_solver.validate_time_granularity_and_date_part( + time_granularity_solver.validate_time_granularity( metric_references=[ MetricReference(element_name="weekly_bookers"), MetricReference(element_name="bookings_monthly"), diff --git a/metricflow/time/time_granularity_solver.py b/metricflow/time/time_granularity_solver.py index 367fc832a5..ecf6c5d13c 100644 --- a/metricflow/time/time_granularity_solver.py +++ b/metricflow/time/time_granularity_solver.py @@ -67,34 +67,28 @@ def __init__( # noqa: D ) -> None: self._semantic_manifest_lookup = semantic_manifest_lookup - def validate_time_granularity_and_date_part( + def validate_time_granularity( self, metric_references: Sequence[MetricReference], time_dimension_specs: Sequence[TimeDimensionSpec] ) -> None: - """Check that the granularity & date_part specified for time dimensions is valid with respect to the metrics. + """Check that the granularity specified for time dimensions is valid with respect to the metrics. - e.g. throw an error if "ds__week" or "extract week" is specified for a metric with a time granularity of MONTH. + e.g. throw an error if "ds__week" is specified for a metric with a time granularity of MONTH. """ valid_group_by_elements = self._semantic_manifest_lookup.metric_lookup.linkable_set_for_metrics( metric_references=metric_references, ) for time_dimension_spec in time_dimension_specs: - match_found_with_granularity = False - match_found_for_date_part = False + match_found = False for path_key in valid_group_by_elements.path_key_to_linkable_dimensions: - if path_key.element_name == time_dimension_spec.element_name and ( - path_key.entity_links == time_dimension_spec.entity_links + if ( + path_key.element_name == time_dimension_spec.element_name + and (path_key.entity_links == time_dimension_spec.entity_links) + and path_key.time_granularity == time_dimension_spec.time_granularity ): - if path_key.time_granularity == time_dimension_spec.time_granularity: - match_found_with_granularity = True - if not time_dimension_spec.date_part or ( - path_key.time_granularity - and path_key.time_granularity.to_int() <= time_dimension_spec.date_part.to_int() - ): - match_found_for_date_part = True - if match_found_with_granularity and match_found_for_date_part: - break - if not (match_found_with_granularity and match_found_for_date_part): + match_found = True + break + if not match_found: raise RequestTimeGranularityException( f"{time_dimension_spec} is not valid for querying {metric_references}" )