Skip to content

Commit

Permalink
Merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
courtneyholcomb committed Oct 11, 2023
2 parents 42d4e27 + df0ae70 commit b054f2f
Show file tree
Hide file tree
Showing 696 changed files with 7,735 additions and 7,701 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20231004-102255.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Support for the Dimension(...).grain(...) syntax for the where parameter
time: 2023-10-04T10:22:55.730467-05:00
custom:
Author: DevonFulcher
Issue: None
7 changes: 7 additions & 0 deletions .changes/unreleased/Fixes-20231005-124722.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Fixes
body: Coerce time granularity to configured value to prevent finer-grained timestamps
from causing unexpected query behavior
time: 2023-10-05T12:47:22.662371-07:00
custom:
Author: tlento
Issue: "714"
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20231009-195312.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Prioritize source nodes based on correct cost
time: 2023-10-09T19:53:12.491719-07:00
custom:
Author: courtneyholcomb
Issue: "801"
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20231010-144137.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Enables case insensitivity for various query params.
time: 2023-10-10T14:41:37.181704-07:00
custom:
Author: courtneyholcomb
Issue: "802"
6 changes: 6 additions & 0 deletions .changes/unreleased/Under the Hood-20231006-095540.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Under the Hood
body: Expose underlying where clause error message
time: 2023-10-06T09:55:40.737735-05:00
custom:
Author: DevonFulcher
Issue: None
6 changes: 6 additions & 0 deletions .changes/unreleased/Under the Hood-20231006-170905.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Under the Hood
body: Remove query interface and depend on DSI protocol instead
time: 2023-10-06T17:09:05.593372-05:00
custom:
Author: DevonFulcher
Issue: None
1 change: 1 addition & 0 deletions metricflow/dag/id_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
SQL_EXPR_IS_NULL_PREFIX = "isn"
SQL_EXPR_CAST_TO_TIMESTAMP_PREFIX = "ctt"
SQL_EXPR_DATE_TRUNC = "dt"
SQL_EXPR_SUBTRACT_TIME_INTERVAL_PREFIX = "sti"
SQL_EXPR_EXTRACT = "ex"
SQL_EXPR_RATIO_COMPUTATION = "rc"
SQL_EXPR_BETWEEN_PREFIX = "betw"
Expand Down
18 changes: 9 additions & 9 deletions metricflow/dataflow/builder/dataflow_plan_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,17 +578,17 @@ def _find_dataflow_recipe(
logger.info(f"Found {len(node_to_evaluation)} candidate source nodes.")

if len(node_to_evaluation) > 0:
# All source nodes cost 0. Get evaluation with lowest cost.
node_with_lowest_evaluation_cost = min(
node_to_evaluation, key=lambda x: len(node_to_evaluation[x].join_recipes)
# All source nodes cost the same. Find evaluation with lowest number of joins.
node_with_lowest_cost_plan = min(
node_to_evaluation, key=lambda node: len(node_to_evaluation[node].join_recipes)
)
evaluation = node_to_evaluation[node_with_lowest_evaluation_cost]
evaluation = node_to_evaluation[node_with_lowest_cost_plan]
logger.info(
"Lowest cost node is:\n"
"Lowest cost plan is:\n"
+ pformat_big_objects(
lowest_cost_node=dataflow_dag_as_text(node_with_lowest_evaluation_cost),
node=dataflow_dag_as_text(node_with_lowest_cost_plan),
evaluation=evaluation,
joins=len(node_to_evaluation[node_with_lowest_evaluation_cost].join_recipes),
joins=len(node_to_evaluation[node_with_lowest_cost_plan].join_recipes),
)
)

Expand All @@ -606,14 +606,14 @@ def _find_dataflow_recipe(
)

return DataflowRecipe(
source_node=node_with_lowest_evaluation_cost,
source_node=node_with_lowest_cost_plan,
required_local_linkable_specs=(
evaluation.local_linkable_specs
+ required_local_entity_specs
+ required_local_dimension_specs
+ required_local_time_dimension_specs
),
join_linkable_instances_recipes=node_to_evaluation[node_with_lowest_evaluation_cost].join_recipes,
join_linkable_instances_recipes=node_to_evaluation[node_with_lowest_cost_plan].join_recipes,
)

logger.error("No recipe could be constructed.")
Expand Down
2 changes: 1 addition & 1 deletion metricflow/dataflow/dataflow_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -887,7 +887,7 @@ def accept(self, visitor: DataflowPlanNodeVisitor[VisitorOutputT]) -> VisitorOut

@property
def description(self) -> str: # noqa: D
return f"Order By {[x.item.qualified_name for x in self._order_by_specs]}" + (
return f"Order By {[order_by_spec.instance_spec.qualified_name for order_by_spec in self._order_by_specs]}" + (
f" Limit {self._limit}" if self.limit else ""
)

Expand Down
136 changes: 85 additions & 51 deletions metricflow/dataset/convert_semantic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,73 +241,107 @@ def _convert_dimensions(
column_alias=dimension_instance.associated_column.column_name,
)
)

elif dimension.type == DimensionType.TIME:
defined_time_granularity = TimeGranularity.DAY
if dimension.type_params and dimension.type_params.time_granularity:
defined_time_granularity = dimension.type_params.time_granularity
derived_time_dimension_instances, time_select_columns = self._convert_time_dimension(
dimension_select_expr=dimension_select_expr,
dimension=dimension,
semantic_model_name=semantic_model_name,
entity_links=entity_links,
)
time_dimension_instances += derived_time_dimension_instances
select_columns += time_select_columns
else:
assert False, f"Unhandled dimension type: {dimension.type}"

return DimensionConversionResult(
dimension_instances=dimension_instances,
time_dimension_instances=time_dimension_instances,
select_columns=select_columns,
)

def _convert_time_dimension(
self,
dimension_select_expr: SqlExpressionNode,
dimension: Dimension,
semantic_model_name: str,
entity_links: Tuple[EntityReference, ...],
) -> Tuple[List[TimeDimensionInstance], List[SqlSelectColumn]]:
"""Converts Dimension objects with type TIME into the relevant DataSet columns.
Time dimensions require special handling that includes adding additional instances
and select column statements for each granularity and date part
"""
time_dimension_instances: List[TimeDimensionInstance] = []
select_columns: List[SqlSelectColumn] = []

defined_time_granularity = TimeGranularity.DAY
if dimension.type_params and dimension.type_params.time_granularity:
defined_time_granularity = dimension.type_params.time_granularity

time_dimension_instance = self._create_time_dimension_instance(
semantic_model_name=semantic_model_name,
time_dimension=dimension,
entity_links=entity_links,
time_granularity=defined_time_granularity,
)
time_dimension_instances.append(time_dimension_instance)

# Until we support minimal granularities, we cannot truncate for
# any time dimension used as part of a validity window, since a validity window might
# be stored in seconds while we would truncate to daily.
if dimension.validity_params:
select_columns.append(
SqlSelectColumn(
expr=dimension_select_expr,
column_alias=time_dimension_instance.associated_column.column_name,
)
)
else:
select_columns.append(
SqlSelectColumn(
expr=SqlDateTruncExpression(time_granularity=defined_time_granularity, arg=dimension_select_expr),
column_alias=time_dimension_instance.associated_column.column_name,
)
)

# Add time dimensions with a smaller granularity for ease in query resolution
for time_granularity in TimeGranularity:
if time_granularity.to_int() > defined_time_granularity.to_int():
time_dimension_instance = self._create_time_dimension_instance(
semantic_model_name=semantic_model_name,
time_dimension=dimension,
entity_links=entity_links,
time_granularity=defined_time_granularity,
time_granularity=time_granularity,
)
time_dimension_instances.append(time_dimension_instance)

select_columns.append(
SqlSelectColumn(
expr=dimension_select_expr,
expr=SqlDateTruncExpression(time_granularity=time_granularity, arg=dimension_select_expr),
column_alias=time_dimension_instance.associated_column.column_name,
)
)

# Add time dimensions with a smaller granularity for ease in query resolution
for time_granularity in TimeGranularity:
if time_granularity.to_int() > defined_time_granularity.to_int():
time_dimension_instance = self._create_time_dimension_instance(
semantic_model_name=semantic_model_name,
time_dimension=dimension,
entity_links=entity_links,
time_granularity=time_granularity,
)
time_dimension_instances.append(time_dimension_instance)

select_columns.append(
SqlSelectColumn(
expr=SqlDateTruncExpression(
time_granularity=time_granularity, arg=dimension_select_expr
),
column_alias=time_dimension_instance.associated_column.column_name,
)
)

# Add all date part options for easy query resolution
for date_part in DatePart:
if date_part.to_int() >= defined_time_granularity.to_int():
time_dimension_instance = self._create_time_dimension_instance(
semantic_model_name=semantic_model_name,
time_dimension=dimension,
entity_links=entity_links,
time_granularity=defined_time_granularity,
date_part=date_part,
)
time_dimension_instances.append(time_dimension_instance)

select_columns.append(
SqlSelectColumn(
expr=SqlExtractExpression(date_part=date_part, arg=dimension_select_expr),
column_alias=time_dimension_instance.associated_column.column_name,
)
)
# Add all date part options for easy query resolution
for date_part in DatePart:
if date_part.to_int() >= defined_time_granularity.to_int():
time_dimension_instance = self._create_time_dimension_instance(
semantic_model_name=semantic_model_name,
time_dimension=dimension,
entity_links=entity_links,
time_granularity=defined_time_granularity,
date_part=date_part,
)
time_dimension_instances.append(time_dimension_instance)

else:
assert False, f"Unhandled dimension type: {dimension.type}"
select_columns.append(
SqlSelectColumn(
expr=SqlExtractExpression(date_part=date_part, arg=dimension_select_expr),
column_alias=time_dimension_instance.associated_column.column_name,
)
)

return DimensionConversionResult(
dimension_instances=dimension_instances,
time_dimension_instances=time_dimension_instances,
select_columns=select_columns,
)
return (time_dimension_instances, select_columns)

def _create_entity_instances(
self,
Expand Down
4 changes: 3 additions & 1 deletion metricflow/plan_conversion/dataflow_to_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,9 @@ def visit_order_by_limit_node(self, node: OrderByLimitNode) -> SqlDataSet: # no
expr=SqlColumnReferenceExpression(
col_ref=SqlColumnReference(
table_alias=from_data_set_alias,
column_name=self._column_association_resolver.resolve_spec(order_by_spec.item).column_name,
column_name=self._column_association_resolver.resolve_spec(
order_by_spec.instance_spec
).column_name,
)
),
desc=order_by_spec.descending,
Expand Down
6 changes: 3 additions & 3 deletions metricflow/plan_conversion/sql_join_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
SqlIsNullExpression,
SqlLogicalExpression,
SqlLogicalOperator,
SqlTimeDeltaExpression,
SqlSubtractTimeIntervalExpression,
)
from metricflow.sql.sql_plan import SqlExpressionNode, SqlJoinDescription, SqlJoinType, SqlSelectStatementNode

Expand Down Expand Up @@ -441,7 +441,7 @@ def make_cumulative_metric_time_range_join_description(
start_of_range_comparison_expr = SqlComparisonExpression(
left_expr=metric_time_column_expr,
comparison=SqlComparison.GREATER_THAN,
right_expr=SqlTimeDeltaExpression(
right_expr=SqlSubtractTimeIntervalExpression(
arg=time_spine_column_expr,
count=node.window.count,
granularity=node.window.granularity,
Expand Down Expand Up @@ -481,7 +481,7 @@ def make_join_to_time_spine_join_description(
col_ref=SqlColumnReference(table_alias=time_spine_alias, column_name=metric_time_dimension_column_name)
)
if node.offset_window:
left_expr = SqlTimeDeltaExpression(
left_expr = SqlSubtractTimeIntervalExpression(
arg=left_expr, count=node.offset_window.count, granularity=node.offset_window.granularity
)
elif node.offset_to_grain:
Expand Down
87 changes: 0 additions & 87 deletions metricflow/protocols/query_interface.py

This file was deleted.

Loading

0 comments on commit b054f2f

Please sign in to comment.