Skip to content

Commit

Permalink
Normalize BigQuery and Redshift day of week extraction to ISO
Browse files Browse the repository at this point in the history
This completes the ISO normalization of day of week results. Now
all engines should return the ISO standard 1 (Monday) - 7 (Sunday)
values for day of week extraction.
  • Loading branch information
tlento committed Oct 16, 2023
1 parent 0f36dd1 commit 5ca6a61
Show file tree
Hide file tree
Showing 138 changed files with 1,077 additions and 1,026 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20231015-170649.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Ensure extract calls return consistent results across engines
time: 2023-10-15T17:06:49.646146-07:00
custom:
Author: tlento
Issue: "792"
22 changes: 22 additions & 0 deletions metricflow/sql/render/big_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from metricflow.sql.sql_exprs import (
SqlCastToTimestampExpression,
SqlDateTruncExpression,
SqlExtractExpression,
SqlGenerateUuidExpression,
SqlPercentileExpression,
SqlPercentileFunctionType,
Expand Down Expand Up @@ -139,6 +140,27 @@ def render_date_part(self, date_part: DatePart) -> str:

return super().render_date_part(date_part)

@override
def visit_extract_expr(self, node: SqlExtractExpression) -> SqlExpressionRenderResult:
"""Renders extract expressions with required output conversions for BigQuery.
BigQuery does not have native support for the ISO standard day of week output of 1 (Monday) - 7 (Sunday).
Instead, BigQuery returns 1 (Sunday) - 7 (Monday). Therefore, we need custom rendering logic to normalize
the return values to the ISO standard.
"""
extract_rendering_result = super().visit_extract_expr(node)

if node.date_part is not DatePart.DOW:
return extract_rendering_result

extract_stmt = extract_rendering_result.sql
case_expr = f"IF({extract_stmt} = 1, 7, {extract_stmt} - 1)"

return SqlExpressionRenderResult(
sql=case_expr,
bind_parameters=extract_rendering_result.bind_parameters,
)

@override
def visit_time_delta_expr(self, node: SqlSubtractTimeIntervalExpression) -> SqlExpressionRenderResult:
"""Render time delta for BigQuery, which requires ISO prefixing for the WEEK granularity value."""
Expand Down
36 changes: 31 additions & 5 deletions metricflow/sql/render/redshift.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@
)
from metricflow.sql.render.sql_plan_renderer import DefaultSqlQueryPlanRenderer
from metricflow.sql.sql_bind_parameters import SqlBindParameters
from metricflow.sql.sql_exprs import SqlGenerateUuidExpression, SqlPercentileExpression, SqlPercentileFunctionType
from metricflow.sql.sql_exprs import (
SqlExtractExpression,
SqlGenerateUuidExpression,
SqlPercentileExpression,
SqlPercentileFunctionType,
)
from metricflow.time.date_part import DatePart


Expand All @@ -26,10 +31,6 @@ def double_data_type(self) -> str:
"""Custom double data type for the Redshift engine."""
return "DOUBLE PRECISION"

@override
def render_date_part(self, date_part: DatePart) -> str:
return date_part.value

@property
@override
def supported_percentile_function_types(self) -> Collection[SqlPercentileFunctionType]:
Expand Down Expand Up @@ -64,6 +65,31 @@ def visit_percentile_expr(self, node: SqlPercentileExpression) -> SqlExpressionR
bind_parameters=params,
)

@override
def render_date_part(self, date_part: DatePart) -> str:
return date_part.value

@override
def visit_extract_expr(self, node: SqlExtractExpression) -> SqlExpressionRenderResult:
"""Renders extract expressions with required output conversions for Redshift.
Redshift does not have native support for the ISO standard day of week output of 1 (Monday) - 7 (Sunday).
Instead, Redshift returns 0 (Sunday) - 6 (Monday). Therefore, we need custom rendering logic to normalize
the return values to the ISO standard.
"""
extract_rendering_result = super().visit_extract_expr(node)

if node.date_part is not DatePart.DOW:
return extract_rendering_result

extract_stmt = extract_rendering_result.sql
case_expr = f"CASE WHEN {extract_stmt} = 0 THEN {extract_stmt} + 7 ELSE {extract_stmt} END"

return SqlExpressionRenderResult(
sql=case_expr,
bind_parameters=extract_rendering_result.bind_parameters,
)

@override
def visit_generate_uuid_expr(self, node: SqlGenerateUuidExpression) -> SqlExpressionRenderResult:
"""Generates a "good enough" random key to simulate a UUID.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ SELECT
, EXTRACT(quarter FROM revenue_src_10006.created_at) AS ds__extract_quarter
, EXTRACT(month FROM revenue_src_10006.created_at) AS ds__extract_month
, EXTRACT(day FROM revenue_src_10006.created_at) AS ds__extract_day
, EXTRACT(dayofweek FROM revenue_src_10006.created_at) AS ds__extract_dow
, IF(EXTRACT(dayofweek FROM revenue_src_10006.created_at) = 1, 7, EXTRACT(dayofweek FROM revenue_src_10006.created_at) - 1) AS ds__extract_dow
, EXTRACT(dayofyear FROM revenue_src_10006.created_at) AS ds__extract_doy
, DATE_TRUNC(revenue_src_10006.created_at, day) AS company__ds__day
, DATE_TRUNC(revenue_src_10006.created_at, isoweek) AS company__ds__week
Expand All @@ -21,7 +21,7 @@ SELECT
, EXTRACT(quarter FROM revenue_src_10006.created_at) AS company__ds__extract_quarter
, EXTRACT(month FROM revenue_src_10006.created_at) AS company__ds__extract_month
, EXTRACT(day FROM revenue_src_10006.created_at) AS company__ds__extract_day
, EXTRACT(dayofweek FROM revenue_src_10006.created_at) AS company__ds__extract_dow
, IF(EXTRACT(dayofweek FROM revenue_src_10006.created_at) = 1, 7, EXTRACT(dayofweek FROM revenue_src_10006.created_at) - 1) AS company__ds__extract_dow
, EXTRACT(dayofyear FROM revenue_src_10006.created_at) AS company__ds__extract_doy
, revenue_src_10006.user_id AS user
, revenue_src_10006.user_id AS company__user
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ SELECT
, EXTRACT(quarter FROM id_verifications_src_10003.ds) AS ds__extract_quarter
, EXTRACT(month FROM id_verifications_src_10003.ds) AS ds__extract_month
, EXTRACT(day FROM id_verifications_src_10003.ds) AS ds__extract_day
, EXTRACT(dayofweek FROM id_verifications_src_10003.ds) AS ds__extract_dow
, IF(EXTRACT(dayofweek FROM id_verifications_src_10003.ds) = 1, 7, EXTRACT(dayofweek FROM id_verifications_src_10003.ds) - 1) AS ds__extract_dow
, EXTRACT(dayofyear FROM id_verifications_src_10003.ds) AS ds__extract_doy
, DATE_TRUNC(id_verifications_src_10003.ds_partitioned, day) AS ds_partitioned__day
, DATE_TRUNC(id_verifications_src_10003.ds_partitioned, isoweek) AS ds_partitioned__week
Expand All @@ -21,7 +21,7 @@ SELECT
, EXTRACT(quarter FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_quarter
, EXTRACT(month FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_month
, EXTRACT(day FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_day
, EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_dow
, IF(EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) = 1, 7, EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) - 1) AS ds_partitioned__extract_dow
, EXTRACT(dayofyear FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_doy
, id_verifications_src_10003.verification_type
, DATE_TRUNC(id_verifications_src_10003.ds, day) AS verification__ds__day
Expand All @@ -33,7 +33,7 @@ SELECT
, EXTRACT(quarter FROM id_verifications_src_10003.ds) AS verification__ds__extract_quarter
, EXTRACT(month FROM id_verifications_src_10003.ds) AS verification__ds__extract_month
, EXTRACT(day FROM id_verifications_src_10003.ds) AS verification__ds__extract_day
, EXTRACT(dayofweek FROM id_verifications_src_10003.ds) AS verification__ds__extract_dow
, IF(EXTRACT(dayofweek FROM id_verifications_src_10003.ds) = 1, 7, EXTRACT(dayofweek FROM id_verifications_src_10003.ds) - 1) AS verification__ds__extract_dow
, EXTRACT(dayofyear FROM id_verifications_src_10003.ds) AS verification__ds__extract_doy
, DATE_TRUNC(id_verifications_src_10003.ds_partitioned, day) AS verification__ds_partitioned__day
, DATE_TRUNC(id_verifications_src_10003.ds_partitioned, isoweek) AS verification__ds_partitioned__week
Expand All @@ -44,7 +44,7 @@ SELECT
, EXTRACT(quarter FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_quarter
, EXTRACT(month FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_month
, EXTRACT(day FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_day
, EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_dow
, IF(EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) = 1, 7, EXTRACT(dayofweek FROM id_verifications_src_10003.ds_partitioned) - 1) AS verification__ds_partitioned__extract_dow
, EXTRACT(dayofyear FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_doy
, id_verifications_src_10003.verification_type AS verification__verification_type
, id_verifications_src_10003.verification_id AS verification
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ SELECT
, EXTRACT(quarter FROM users_latest_src_10008.ds) AS ds_latest__extract_quarter
, EXTRACT(month FROM users_latest_src_10008.ds) AS ds_latest__extract_month
, EXTRACT(day FROM users_latest_src_10008.ds) AS ds_latest__extract_day
, EXTRACT(dayofweek FROM users_latest_src_10008.ds) AS ds_latest__extract_dow
, IF(EXTRACT(dayofweek FROM users_latest_src_10008.ds) = 1, 7, EXTRACT(dayofweek FROM users_latest_src_10008.ds) - 1) AS ds_latest__extract_dow
, EXTRACT(dayofyear FROM users_latest_src_10008.ds) AS ds_latest__extract_doy
, users_latest_src_10008.home_state_latest
, DATE_TRUNC(users_latest_src_10008.ds, day) AS user__ds_latest__day
Expand All @@ -21,7 +21,7 @@ SELECT
, EXTRACT(quarter FROM users_latest_src_10008.ds) AS user__ds_latest__extract_quarter
, EXTRACT(month FROM users_latest_src_10008.ds) AS user__ds_latest__extract_month
, EXTRACT(day FROM users_latest_src_10008.ds) AS user__ds_latest__extract_day
, EXTRACT(dayofweek FROM users_latest_src_10008.ds) AS user__ds_latest__extract_dow
, IF(EXTRACT(dayofweek FROM users_latest_src_10008.ds) = 1, 7, EXTRACT(dayofweek FROM users_latest_src_10008.ds) - 1) AS user__ds_latest__extract_dow
, EXTRACT(dayofyear FROM users_latest_src_10008.ds) AS user__ds_latest__extract_doy
, users_latest_src_10008.home_state_latest AS user__home_state_latest
, users_latest_src_10008.user_id AS user
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ SELECT
, EXTRACT(quarter FROM revenue_src_10006.created_at) AS ds__extract_quarter
, EXTRACT(month FROM revenue_src_10006.created_at) AS ds__extract_month
, EXTRACT(day FROM revenue_src_10006.created_at) AS ds__extract_day
, EXTRACT(dow FROM revenue_src_10006.created_at) AS ds__extract_dow
, CASE WHEN EXTRACT(dow FROM revenue_src_10006.created_at) = 0 THEN EXTRACT(dow FROM revenue_src_10006.created_at) + 7 ELSE EXTRACT(dow FROM revenue_src_10006.created_at) END AS ds__extract_dow
, EXTRACT(doy FROM revenue_src_10006.created_at) AS ds__extract_doy
, DATE_TRUNC('day', revenue_src_10006.created_at) AS company__ds__day
, DATE_TRUNC('week', revenue_src_10006.created_at) AS company__ds__week
Expand All @@ -21,7 +21,7 @@ SELECT
, EXTRACT(quarter FROM revenue_src_10006.created_at) AS company__ds__extract_quarter
, EXTRACT(month FROM revenue_src_10006.created_at) AS company__ds__extract_month
, EXTRACT(day FROM revenue_src_10006.created_at) AS company__ds__extract_day
, EXTRACT(dow FROM revenue_src_10006.created_at) AS company__ds__extract_dow
, CASE WHEN EXTRACT(dow FROM revenue_src_10006.created_at) = 0 THEN EXTRACT(dow FROM revenue_src_10006.created_at) + 7 ELSE EXTRACT(dow FROM revenue_src_10006.created_at) END AS company__ds__extract_dow
, EXTRACT(doy FROM revenue_src_10006.created_at) AS company__ds__extract_doy
, revenue_src_10006.user_id AS user
, revenue_src_10006.user_id AS company__user
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ SELECT
, EXTRACT(quarter FROM id_verifications_src_10003.ds) AS ds__extract_quarter
, EXTRACT(month FROM id_verifications_src_10003.ds) AS ds__extract_month
, EXTRACT(day FROM id_verifications_src_10003.ds) AS ds__extract_day
, EXTRACT(dow FROM id_verifications_src_10003.ds) AS ds__extract_dow
, CASE WHEN EXTRACT(dow FROM id_verifications_src_10003.ds) = 0 THEN EXTRACT(dow FROM id_verifications_src_10003.ds) + 7 ELSE EXTRACT(dow FROM id_verifications_src_10003.ds) END AS ds__extract_dow
, EXTRACT(doy FROM id_verifications_src_10003.ds) AS ds__extract_doy
, DATE_TRUNC('day', id_verifications_src_10003.ds_partitioned) AS ds_partitioned__day
, DATE_TRUNC('week', id_verifications_src_10003.ds_partitioned) AS ds_partitioned__week
Expand All @@ -21,7 +21,7 @@ SELECT
, EXTRACT(quarter FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_quarter
, EXTRACT(month FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_month
, EXTRACT(day FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_day
, EXTRACT(dow FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_dow
, CASE WHEN EXTRACT(dow FROM id_verifications_src_10003.ds_partitioned) = 0 THEN EXTRACT(dow FROM id_verifications_src_10003.ds_partitioned) + 7 ELSE EXTRACT(dow FROM id_verifications_src_10003.ds_partitioned) END AS ds_partitioned__extract_dow
, EXTRACT(doy FROM id_verifications_src_10003.ds_partitioned) AS ds_partitioned__extract_doy
, id_verifications_src_10003.verification_type
, DATE_TRUNC('day', id_verifications_src_10003.ds) AS verification__ds__day
Expand All @@ -33,7 +33,7 @@ SELECT
, EXTRACT(quarter FROM id_verifications_src_10003.ds) AS verification__ds__extract_quarter
, EXTRACT(month FROM id_verifications_src_10003.ds) AS verification__ds__extract_month
, EXTRACT(day FROM id_verifications_src_10003.ds) AS verification__ds__extract_day
, EXTRACT(dow FROM id_verifications_src_10003.ds) AS verification__ds__extract_dow
, CASE WHEN EXTRACT(dow FROM id_verifications_src_10003.ds) = 0 THEN EXTRACT(dow FROM id_verifications_src_10003.ds) + 7 ELSE EXTRACT(dow FROM id_verifications_src_10003.ds) END AS verification__ds__extract_dow
, EXTRACT(doy FROM id_verifications_src_10003.ds) AS verification__ds__extract_doy
, DATE_TRUNC('day', id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__day
, DATE_TRUNC('week', id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__week
Expand All @@ -44,7 +44,7 @@ SELECT
, EXTRACT(quarter FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_quarter
, EXTRACT(month FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_month
, EXTRACT(day FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_day
, EXTRACT(dow FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_dow
, CASE WHEN EXTRACT(dow FROM id_verifications_src_10003.ds_partitioned) = 0 THEN EXTRACT(dow FROM id_verifications_src_10003.ds_partitioned) + 7 ELSE EXTRACT(dow FROM id_verifications_src_10003.ds_partitioned) END AS verification__ds_partitioned__extract_dow
, EXTRACT(doy FROM id_verifications_src_10003.ds_partitioned) AS verification__ds_partitioned__extract_doy
, id_verifications_src_10003.verification_type AS verification__verification_type
, id_verifications_src_10003.verification_id AS verification
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ SELECT
, EXTRACT(quarter FROM users_latest_src_10008.ds) AS ds_latest__extract_quarter
, EXTRACT(month FROM users_latest_src_10008.ds) AS ds_latest__extract_month
, EXTRACT(day FROM users_latest_src_10008.ds) AS ds_latest__extract_day
, EXTRACT(dow FROM users_latest_src_10008.ds) AS ds_latest__extract_dow
, CASE WHEN EXTRACT(dow FROM users_latest_src_10008.ds) = 0 THEN EXTRACT(dow FROM users_latest_src_10008.ds) + 7 ELSE EXTRACT(dow FROM users_latest_src_10008.ds) END AS ds_latest__extract_dow
, EXTRACT(doy FROM users_latest_src_10008.ds) AS ds_latest__extract_doy
, users_latest_src_10008.home_state_latest
, DATE_TRUNC('day', users_latest_src_10008.ds) AS user__ds_latest__day
Expand All @@ -21,7 +21,7 @@ SELECT
, EXTRACT(quarter FROM users_latest_src_10008.ds) AS user__ds_latest__extract_quarter
, EXTRACT(month FROM users_latest_src_10008.ds) AS user__ds_latest__extract_month
, EXTRACT(day FROM users_latest_src_10008.ds) AS user__ds_latest__extract_day
, EXTRACT(dow FROM users_latest_src_10008.ds) AS user__ds_latest__extract_dow
, CASE WHEN EXTRACT(dow FROM users_latest_src_10008.ds) = 0 THEN EXTRACT(dow FROM users_latest_src_10008.ds) + 7 ELSE EXTRACT(dow FROM users_latest_src_10008.ds) END AS user__ds_latest__extract_dow
, EXTRACT(doy FROM users_latest_src_10008.ds) AS user__ds_latest__extract_doy
, users_latest_src_10008.home_state_latest AS user__home_state_latest
, users_latest_src_10008.user_id AS user
Expand Down
Loading

0 comments on commit 5ca6a61

Please sign in to comment.