Skip to content

Commit

Permalink
Resolve granularity for partial time dimensions
Browse files Browse the repository at this point in the history
  • Loading branch information
courtneyholcomb committed Oct 10, 2023
1 parent cda9a52 commit 5f331f1
Show file tree
Hide file tree
Showing 7 changed files with 229 additions and 49 deletions.
2 changes: 1 addition & 1 deletion metricflow/engine/metricflow_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ def __init__(
self._query_parser = MetricFlowQueryParser(
column_association_resolver=self._column_association_resolver,
model=self._semantic_manifest_lookup,
source_nodes=source_nodes,
read_nodes=read_nodes,
node_output_resolver=node_output_resolver,
)

Expand Down
13 changes: 11 additions & 2 deletions metricflow/query/query_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,15 @@ def __init__( # noqa: D
self,
column_association_resolver: ColumnAssociationResolver,
model: SemanticManifestLookup,
source_nodes: Sequence[BaseOutput],
read_nodes: Sequence[BaseOutput],
node_output_resolver: DataflowPlanNodeOutputDataSetResolver,
) -> None:
self._column_association_resolver = column_association_resolver
self._model = model
self._metric_lookup = model.metric_lookup
self._semantic_model_lookup = model.semantic_model_lookup
self._node_output_resolver = node_output_resolver
self._read_nodes = read_nodes

# Set up containers for known element names
self._known_entity_element_references = self._semantic_model_lookup.get_entity_references()
Expand Down Expand Up @@ -404,6 +406,8 @@ def _parse_and_validate_query(
self._time_granularity_solver.resolve_granularity_for_partial_time_dimension_specs(
metric_references=metric_references,
partial_time_dimension_specs=requested_linkable_specs.partial_time_dimension_specs,
read_nodes=self._read_nodes,
node_output_resolver=self._node_output_resolver,
)
)

Expand Down Expand Up @@ -575,6 +579,8 @@ def _adjust_time_range_constraint(
self._time_granularity_solver.resolve_granularity_for_partial_time_dimension_specs(
metric_references=metric_references,
partial_time_dimension_specs=(partial_metric_time_spec,),
read_nodes=self._read_nodes,
node_output_resolver=self._node_output_resolver,
)
)
adjust_to_granularity = partial_time_dimension_spec_to_time_dimension_spec[
Expand Down Expand Up @@ -773,7 +779,10 @@ def _verify_resolved_granularity_for_date_part(
ensure that the correct value was passed in.
"""
resolved_granularity = self._time_granularity_solver.find_minimum_granularity_for_partial_time_dimension_spec(
partial_time_dimension_spec=partial_time_dimension_spec, metric_references=metric_references
partial_time_dimension_spec=partial_time_dimension_spec,
metric_references=metric_references,
read_nodes=self._read_nodes,
node_output_resolver=self._node_output_resolver,
)
if resolved_granularity != requested_dimension_structured_name.time_granularity:
raise RequestTimeGranularityException(
Expand Down
13 changes: 11 additions & 2 deletions metricflow/test/fixtures/model_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,10 @@ def query_parser_from_yaml(yaml_contents: List[YamlConfigFile]) -> MetricFlowQue
).semantic_manifest
)
SemanticManifestValidator[SemanticManifest]().checked_validations(semantic_manifest_lookup.semantic_manifest)
source_nodes = _data_set_to_source_nodes(semantic_manifest_lookup, create_data_sets(semantic_manifest_lookup))
return MetricFlowQueryParser(
model=semantic_manifest_lookup,
column_association_resolver=DunderColumnAssociationResolver(semantic_manifest_lookup),
source_nodes=source_nodes,
read_nodes=list(_data_set_to_read_nodes(create_data_sets(semantic_manifest_lookup)).values()),
node_output_resolver=DataflowPlanNodeOutputDataSetResolver(
column_association_resolver=DunderColumnAssociationResolver(semantic_manifest_lookup),
semantic_manifest_lookup=semantic_manifest_lookup,
Expand Down Expand Up @@ -241,3 +240,13 @@ def cyclic_join_semantic_manifest_lookup(template_mapping: Dict[str, str]) -> Se
"""Manifest that contains a potential cycle in the join graph (if not handled properly)."""
build_result = load_semantic_manifest("cyclic_join_manifest", template_mapping)
return SemanticManifestLookup(build_result.semantic_manifest)


@pytest.fixture(scope="session")
def node_output_resolver( # noqa:D
simple_semantic_manifest_lookup: SemanticManifestLookup,
) -> DataflowPlanNodeOutputDataSetResolver:
return DataflowPlanNodeOutputDataSetResolver(
column_association_resolver=DunderColumnAssociationResolver(simple_semantic_manifest_lookup),
semantic_manifest_lookup=simple_semantic_manifest_lookup,
)
17 changes: 14 additions & 3 deletions metricflow/test/integration/test_cases/itest_dimensions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -145,18 +145,29 @@ integration_test:
u.home_state_latest
, l.is_lux
---
# TODO: test for dimension with non-day granularity
integration_test:
name: query_time_dimension_without_granularity
description: Query just a time dimension, no granularity specified. Should assume default granularity for dimension.
model: SIMPLE_MODEL
group_bys: [ "verification__ds"]
check_query: |
SELECT
v.ds__day
v.ds as verification__ds__day
FROM {{ source_schema }}.fct_id_verifications v
GROUP BY
v.ds__day
v.ds
---
integration_test:
name: query_non_default_time_dimension_without_granularity
description: Query just a time dimension, no granularity specified. Should assume default granularity for dimension.
model: EXTENDED_DATE_MODEL
group_bys: [ "monthly_ds"]
check_query: |
SELECT
ds AS monthly_ds__month
FROM {{ source_schema }}.fct_bookings_extended_monthly
GROUP BY
ds
---
integration_test:
name: query_dimension_only_with_constraint
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
<DataflowPlan>
<WriteToResultDataframeNode>
<!-- description = Write to Dataframe -->
<!-- node_id = wrd_0 -->
<OrderByLimitNode>
<!-- description = Order By ['listing__country_latest'] Limit 100 -->
<!-- node_id = obl_0 -->
<!-- order_by_spec = -->
<!-- {'class': 'OrderBySpec', -->
<!-- 'descending': True, -->
<!-- 'metric_spec': None, -->
<!-- 'dimension_spec': {'class': 'DimensionSpec', -->
<!-- 'element_name': 'country_latest', -->
<!-- 'entity_links': ({'class': 'EntityReference', -->
<!-- 'element_name': 'listing'},)}, -->
<!-- 'time_dimension_spec': None, -->
<!-- 'entity_spec': None} -->
<!-- limit = 100 -->
<WhereConstraintNode>
<!-- description = Constrain Output with WHERE -->
<!-- node_id = wcc_0 -->
<!-- where_condition = -->
<!-- {'class': 'WhereFilterSpec', -->
<!-- 'where_sql': "listing__country_latest = 'us'", -->
<!-- 'bind_parameters': {'class': 'SqlBindParameters', 'param_items': ()}, -->
<!-- 'linkable_spec_set': {'class': 'LinkableSpecSet', -->
<!-- 'dimension_specs': ({'class': 'DimensionSpec', -->
<!-- 'element_name': 'country_latest', -->
<!-- 'entity_links': ({'class': 'EntityReference', -->
<!-- 'element_name': 'listing'},)},), -->
<!-- 'time_dimension_specs': (), -->
<!-- 'entity_specs': ()}} -->
<FilterElementsNode>
<!-- description = -->
<!-- Pass Only Elements: -->
<!-- ['user__home_state_latest', 'listing__is_lux_latest'] -->
<!-- node_id = pfe_1 -->
<!-- include_spec = -->
<!-- {'class': 'DimensionSpec', -->
<!-- 'element_name': 'home_state_latest', -->
<!-- 'entity_links': ({'class': 'EntityReference', 'element_name': 'user'},)} -->
<!-- include_spec = -->
<!-- {'class': 'DimensionSpec', -->
<!-- 'element_name': 'is_lux_latest', -->
<!-- 'entity_links': ({'class': 'EntityReference', 'element_name': 'listing'},)} -->
<!-- distinct = True -->
<JoinToBaseOutputNode>
<!-- description = Join Standard Outputs -->
<!-- node_id = jso_0 -->
<!-- join0_for_node_id_pfe_0 = -->
<!-- {'class': 'JoinDescription', -->
<!-- 'join_node': FilterElementsNode(node_id=pfe_0), -->
<!-- 'join_on_entity': {'class': 'LinklessEntitySpec', -->
<!-- 'element_name': 'user', -->
<!-- 'entity_links': ()}, -->
<!-- 'join_on_partition_dimensions': (), -->
<!-- 'join_on_partition_time_dimensions': (), -->
<!-- 'validity_window': None} -->
<ReadSqlSourceNode>
<!-- description = -->
<!-- Read From SemanticModelDataSet(SemanticModelReference(semantic_model_name='listings_latest')) -->
<!-- node_id = rss_10004 -->
<!-- data_set = -->
<!-- SemanticModelDataSet(SemanticModelReference(semantic_model_name='listings_latest')) -->
</ReadSqlSourceNode>
<FilterElementsNode>
<!-- description = -->
<!-- Pass Only Elements: -->
<!-- ['home_state_latest', 'user'] -->
<!-- node_id = pfe_0 -->
<!-- include_spec = -->
<!-- {'class': 'DimensionSpec', -->
<!-- 'element_name': 'home_state_latest', -->
<!-- 'entity_links': ()} -->
<!-- include_spec = LinklessEntitySpec(element_name='user', entity_links=()) -->
<!-- distinct = False -->
<ReadSqlSourceNode>
<!-- description = -->
<!-- Read From SemanticModelDataSet(SemanticModelReference(semantic_model_name='users_latest')) -->
<!-- node_id = rss_10008 -->
<!-- data_set = -->
<!-- SemanticModelDataSet(SemanticModelReference(semantic_model_name='users_latest')) -->
</ReadSqlSourceNode>
</FilterElementsNode>
</JoinToBaseOutputNode>
</FilterElementsNode>
</WhereConstraintNode>
</OrderByLimitNode>
</WriteToResultDataframeNode>
</DataflowPlan>
22 changes: 20 additions & 2 deletions metricflow/test/time/test_time_granularity_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
from dbt_semantic_interfaces.references import MetricReference
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity

from metricflow.dataflow.builder.node_data_set import DataflowPlanNodeOutputDataSetResolver
from metricflow.dataset.dataset import DataSet
from metricflow.filters.time_constraint import TimeRangeConstraint
from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup
from metricflow.test.fixtures.model_fixtures import ConsistentIdObjectRepository
from metricflow.test.time.metric_time_dimension import MTD_SPEC_DAY, MTD_SPEC_MONTH
from metricflow.time.time_granularity_solver import (
PartialTimeDimensionSpec,
Expand Down Expand Up @@ -89,30 +91,46 @@ def test_validate_day_granularity_for_day_and_month_metric( # noqa: D
PARTIAL_PTD_SPEC = PartialTimeDimensionSpec(element_name=DataSet.metric_time_dimension_name(), entity_links=())


def test_granularity_solution_for_day_metric(time_granularity_solver: TimeGranularitySolver) -> None: # noqa: D
def test_granularity_solution_for_day_metric( # noqa: D
time_granularity_solver: TimeGranularitySolver,
node_output_resolver: DataflowPlanNodeOutputDataSetResolver,
consistent_id_object_repository: ConsistentIdObjectRepository,
) -> None:
assert time_granularity_solver.resolve_granularity_for_partial_time_dimension_specs(
metric_references=[MetricReference(element_name="bookings")],
partial_time_dimension_specs=[PARTIAL_PTD_SPEC],
node_output_resolver=node_output_resolver,
read_nodes=list(consistent_id_object_repository.simple_model_read_nodes.values()),
) == {
PARTIAL_PTD_SPEC: MTD_SPEC_DAY,
}


def test_granularity_solution_for_month_metric(time_granularity_solver: TimeGranularitySolver) -> None: # noqa: D
def test_granularity_solution_for_month_metric( # noqa: D
time_granularity_solver: TimeGranularitySolver,
node_output_resolver: DataflowPlanNodeOutputDataSetResolver,
consistent_id_object_repository: ConsistentIdObjectRepository,
) -> None:
assert time_granularity_solver.resolve_granularity_for_partial_time_dimension_specs(
metric_references=[MetricReference(element_name="bookings_monthly")],
partial_time_dimension_specs=[PARTIAL_PTD_SPEC],
node_output_resolver=node_output_resolver,
read_nodes=list(consistent_id_object_repository.simple_model_read_nodes.values()),
) == {
PARTIAL_PTD_SPEC: MTD_SPEC_MONTH,
}


def test_granularity_solution_for_day_and_month_metrics( # noqa: D
time_granularity_solver: TimeGranularitySolver,
node_output_resolver: DataflowPlanNodeOutputDataSetResolver,
consistent_id_object_repository: ConsistentIdObjectRepository,
) -> None:
assert time_granularity_solver.resolve_granularity_for_partial_time_dimension_specs(
metric_references=[MetricReference(element_name="bookings"), MetricReference(element_name="bookings_monthly")],
partial_time_dimension_specs=[PARTIAL_PTD_SPEC],
node_output_resolver=node_output_resolver,
read_nodes=list(consistent_id_object_repository.simple_model_read_nodes.values()),
) == {PARTIAL_PTD_SPEC: MTD_SPEC_MONTH}


Expand Down
Loading

0 comments on commit 5f331f1

Please sign in to comment.