From cda9a5230a073217378df061cde3d80dc95a1c21 Mon Sep 17 00:00:00 2001 From: Courtney Holcomb Date: Mon, 9 Oct 2023 19:29:42 -0700 Subject: [PATCH] Cleanup --- metricflow/plan_conversion/dataflow_to_sql.py | 2 +- metricflow/query/query_parser.py | 9 ++-- .../builder/test_dataflow_plan_builder.py | 48 +++++++++++++++++++ .../test_cases/itest_dimensions.yaml | 7 +-- .../test/integration/test_configured_cases.py | 1 - 5 files changed, 58 insertions(+), 9 deletions(-) diff --git a/metricflow/plan_conversion/dataflow_to_sql.py b/metricflow/plan_conversion/dataflow_to_sql.py index 179bc14f51..d79c30a678 100644 --- a/metricflow/plan_conversion/dataflow_to_sql.py +++ b/metricflow/plan_conversion/dataflow_to_sql.py @@ -800,7 +800,7 @@ def visit_pass_elements_filter_node(self, node: FilterElementsNode) -> SqlDataSe CreateSelectColumnsForInstances(from_data_set_alias, self._column_association_resolver) ).as_tuple() - # If no measures are passed, group by all columns. + # If distinct values requested, group by all select columns. group_bys = select_columns if node.distinct else () return SqlDataSet( instance_set=output_instance_set, diff --git a/metricflow/query/query_parser.py b/metricflow/query/query_parser.py index 17b4139eb1..4ae7b16822 100644 --- a/metricflow/query/query_parser.py +++ b/metricflow/query/query_parser.py @@ -228,7 +228,8 @@ def _validate_no_time_dimension_query(self, metric_references: Sequence[MetricRe "dimension 'metric_time'." ) - def _validate_linkable_specs( + # TODO: write tests for invalid linkable specs - should error + def _validate_linkable_specs_for_metrics( self, metric_references: Tuple[MetricReference, ...], all_linkable_specs: QueryTimeLinkableSpecSet, @@ -423,7 +424,7 @@ def _parse_and_validate_query( # For each metric, verify that it's possible to retrieve all group by elements, including the ones as required # by the filters. - # TODO: Consider moving this logic into _validate_linkable_specs(). + # TODO: Consider moving this logic into _validate_linkable_specs_for_metrics(). for metric_reference in metric_references: metric = self._metric_lookup.get_metric(metric_reference) if metric.filter is not None: @@ -435,7 +436,7 @@ def _parse_and_validate_query( # Combine the group by elements from the query with the group by elements that are required by the # metric filter to see if that's a valid set that could be queried. - self._validate_linkable_specs( + self._validate_linkable_specs_for_metrics( metric_references=(metric_reference,), all_linkable_specs=QueryTimeLinkableSpecSet.combine( ( @@ -453,7 +454,7 @@ def _parse_and_validate_query( ) # Validate all of them together. - self._validate_linkable_specs( + self._validate_linkable_specs_for_metrics( metric_references=metric_references, all_linkable_specs=requested_linkable_specs_with_requested_filter_specs, time_dimension_specs=time_dimension_specs, diff --git a/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py b/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py index f063bf9fd3..1a9f1b5726 100644 --- a/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py +++ b/metricflow/test/dataflow/builder/test_dataflow_plan_builder.py @@ -605,6 +605,54 @@ def test_distinct_values_plan( # noqa: D ) +def test_distinct_values_plan_with_join( # noqa: D + request: FixtureRequest, + mf_test_session_state: MetricFlowTestSessionState, + dataflow_plan_builder: DataflowPlanBuilder, + column_association_resolver: ColumnAssociationResolver, +) -> None: + """Tests a plan to get distinct values of 2 dimensions, where a join is required.""" + dataflow_plan = dataflow_plan_builder.build_plan_for_distinct_values( + query_spec=MetricFlowQuerySpec( + dimension_specs=( + DimensionSpec(element_name="home_state_latest", entity_links=(EntityReference(element_name="user"),)), + DimensionSpec(element_name="is_lux_latest", entity_links=(EntityReference(element_name="listing"),)), + ), + where_constraint=( + WhereSpecFactory( + column_association_resolver=column_association_resolver, + ).create_from_where_filter( + PydanticWhereFilter( + where_sql_template="{{ Dimension('listing__country_latest') }} = 'us'", + ) + ) + ), + order_by_specs=( + OrderBySpec( + dimension_spec=DimensionSpec( + element_name="country_latest", entity_links=(EntityReference(element_name="listing"),) + ), + descending=True, + ), + ), + limit=100, + ) + ) + + assert_plan_snapshot_text_equal( + request=request, + mf_test_session_state=mf_test_session_state, + plan=dataflow_plan, + plan_snapshot_text=dataflow_plan_as_text(dataflow_plan), + ) + + display_graph_if_requested( + request=request, + mf_test_session_state=mf_test_session_state, + dag_graph=dataflow_plan, + ) + + def test_measure_constraint_plan( request: FixtureRequest, mf_test_session_state: MetricFlowTestSessionState, diff --git a/metricflow/test/integration/test_cases/itest_dimensions.yaml b/metricflow/test/integration/test_cases/itest_dimensions.yaml index a4b42d42db..33ea65c039 100644 --- a/metricflow/test/integration/test_cases/itest_dimensions.yaml +++ b/metricflow/test/integration/test_cases/itest_dimensions.yaml @@ -145,17 +145,18 @@ integration_test: u.home_state_latest , l.is_lux --- +# TODO: test for dimension with non-day granularity integration_test: name: query_time_dimension_without_granularity - description: Query just a time dimension, no granularity specified + description: Query just a time dimension, no granularity specified. Should assume default granularity for dimension. model: SIMPLE_MODEL group_bys: [ "verification__ds"] check_query: | SELECT - v.ds + v.ds__day FROM {{ source_schema }}.fct_id_verifications v GROUP BY - v.ds + v.ds__day --- integration_test: name: query_dimension_only_with_constraint diff --git a/metricflow/test/integration/test_configured_cases.py b/metricflow/test/integration/test_configured_cases.py index d6d8e1e729..eec106d555 100644 --- a/metricflow/test/integration/test_configured_cases.py +++ b/metricflow/test/integration/test_configured_cases.py @@ -320,6 +320,5 @@ def test_case( double_data_type_name=check_query_helpers.double_data_type_name, ) ) - # If we sort, it's effectively not checking the order whatever order that the output was would be overwritten. assert_dataframes_equal(actual, expected, sort_columns=not case.check_order, allow_empty=case.allow_empty)