Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use FULL OUTER JOIN when combining derived metrics #841

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Breaking Changes-20231102-182815.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Breaking Changes
body: Use FULL OUTER JOIN to combine input metrics for derived metrics. This is a change from using INNER JOIN and may result in changes in output.
time: 2023-11-02T18:28:15.181064-07:00
custom:
Author: courtneyholcomb
Issue: "841"
2 changes: 1 addition & 1 deletion metricflow/dataflow/builder/dataflow_plan_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def _build_metrics_output_node(
queried_linkable_specs=queried_linkable_specs,
where_constraint=where_constraint,
time_range_constraint=time_range_constraint,
combine_metrics_join_type=SqlJoinType.INNER,
combine_metrics_join_type=SqlJoinType.FULL_OUTER,
),
metric_specs=[metric_spec],
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
<CombineMetricsNode>
<!-- description = Combine Metrics -->
<!-- node_id = cbm_0 -->
<!-- join type = SqlJoinType.INNER -->
<!-- join type = SqlJoinType.FULL_OUTER -->
<!-- de-duplication method = post-join aggregation across all dimensions -->
<ComputeMetricsNode>
<!-- description = Compute Metrics via Expressions -->
<!-- node_id = cm_0 -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
<CombineMetricsNode>
<!-- description = Combine Metrics -->
<!-- node_id = cbm_0 -->
<!-- join type = SqlJoinType.INNER -->
<!-- join type = SqlJoinType.FULL_OUTER -->
<!-- de-duplication method = post-join aggregation across all dimensions -->
<ComputeMetricsNode>
<!-- description = Compute Metrics via Expressions -->
<!-- node_id = cm_0 -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
<CombineMetricsNode>
<!-- description = Combine Metrics -->
<!-- node_id = cbm_0 -->
<!-- join type = SqlJoinType.INNER -->
<!-- join type = SqlJoinType.FULL_OUTER -->
<!-- de-duplication method = post-join aggregation across all dimensions -->
<ComputeMetricsNode>
<!-- description = Compute Metrics via Expressions -->
<!-- node_id = cm_0 -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
<CombineMetricsNode>
<!-- description = Combine Metrics -->
<!-- node_id = cbm_0 -->
<!-- join type = SqlJoinType.INNER -->
<!-- join type = SqlJoinType.FULL_OUTER -->
<!-- de-duplication method = post-join aggregation across all dimensions -->
<ComputeMetricsNode>
<!-- description = Compute Metrics via Expressions -->
<!-- node_id = cm_0 -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
<CombineMetricsNode>
<!-- description = Combine Metrics -->
<!-- node_id = cbm_0 -->
<!-- join type = SqlJoinType.INNER -->
<!-- join type = SqlJoinType.FULL_OUTER -->
<!-- de-duplication method = post-join aggregation across all dimensions -->
<ComputeMetricsNode>
<!-- description = Compute Metrics via Expressions -->
<!-- node_id = cm_0 -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
<CombineMetricsNode>
<!-- description = Combine Metrics -->
<!-- node_id = cbm_0 -->
<!-- join type = SqlJoinType.INNER -->
<!-- join type = SqlJoinType.FULL_OUTER -->
<!-- de-duplication method = post-join aggregation across all dimensions -->
<ComputeMetricsNode>
<!-- description = Compute Metrics via Expressions -->
<!-- node_id = cm_0 -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ FROM (
SELECT
COALESCE(subq_9.ds__day, subq_19.ds__day) AS ds__day
, COALESCE(subq_9.listing__country_latest, subq_19.listing__country_latest) AS listing__country_latest
, subq_9.bookings AS bookings
, subq_19.views AS views
, MAX(subq_9.bookings) AS bookings
, MAX(subq_19.views) AS views
FROM (
-- Compute Metrics via Expressions
SELECT
Expand Down Expand Up @@ -387,7 +387,7 @@ FROM (
, subq_7.listing__country_latest
) subq_8
) subq_9
INNER JOIN (
FULL OUTER JOIN (
-- Compute Metrics via Expressions
SELECT
subq_18.ds__day
Expand Down Expand Up @@ -689,20 +689,11 @@ FROM (
) subq_19
ON
(
(
subq_9.listing__country_latest = subq_19.listing__country_latest
) OR (
(
subq_9.listing__country_latest IS NULL
) AND (
subq_19.listing__country_latest IS NULL
)
)
subq_9.listing__country_latest = subq_19.listing__country_latest
) AND (
(
subq_9.ds__day = subq_19.ds__day
) OR (
(subq_9.ds__day IS NULL) AND (subq_19.ds__day IS NULL)
)
subq_9.ds__day = subq_19.ds__day
)
GROUP BY
COALESCE(subq_9.ds__day, subq_19.ds__day)
, COALESCE(subq_9.listing__country_latest, subq_19.listing__country_latest)
) subq_20
Original file line number Diff line number Diff line change
@@ -1,82 +1,80 @@
-- Combine Metrics
-- Compute Metrics via Expressions
SELECT
COALESCE(subq_30.ds__day, subq_40.ds__day) AS ds__day
, COALESCE(subq_30.listing__country_latest, subq_40.listing__country_latest) AS listing__country_latest
, CAST(subq_30.bookings AS DOUBLE) / CAST(NULLIF(subq_40.views, 0) AS DOUBLE) AS bookings_per_view
ds__day
, listing__country_latest
, CAST(bookings AS DOUBLE) / CAST(NULLIF(views, 0) AS DOUBLE) AS bookings_per_view
FROM (
-- Join Standard Outputs
-- Pass Only Elements:
-- ['bookings', 'listing__country_latest', 'ds__day']
-- Aggregate Measures
-- Compute Metrics via Expressions
-- Combine Metrics
SELECT
subq_23.ds__day AS ds__day
, listings_latest_src_10004.country AS listing__country_latest
, SUM(subq_23.bookings) AS bookings
COALESCE(subq_30.ds__day, subq_40.ds__day) AS ds__day
, COALESCE(subq_30.listing__country_latest, subq_40.listing__country_latest) AS listing__country_latest
, MAX(subq_30.bookings) AS bookings
, MAX(subq_40.views) AS views
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Join Standard Outputs
-- Pass Only Elements:
-- ['bookings', 'ds__day', 'listing']
-- ['bookings', 'listing__country_latest', 'ds__day']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
DATE_TRUNC('day', ds) AS ds__day
, listing_id AS listing
, 1 AS bookings
FROM ***************************.fct_bookings bookings_source_src_10001
) subq_23
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_10004
ON
subq_23.listing = listings_latest_src_10004.listing_id
GROUP BY
subq_23.ds__day
, listings_latest_src_10004.country
) subq_30
INNER JOIN (
-- Join Standard Outputs
-- Pass Only Elements:
-- ['views', 'listing__country_latest', 'ds__day']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
subq_33.ds__day AS ds__day
, listings_latest_src_10004.country AS listing__country_latest
, SUM(subq_33.views) AS views
FROM (
-- Read Elements From Semantic Model 'views_source'
-- Metric Time Dimension 'ds'
subq_23.ds__day AS ds__day
, listings_latest_src_10004.country AS listing__country_latest
, SUM(subq_23.bookings) AS bookings
FROM (
-- Read Elements From Semantic Model 'bookings_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements:
-- ['bookings', 'ds__day', 'listing']
SELECT
DATE_TRUNC('day', ds) AS ds__day
, listing_id AS listing
, 1 AS bookings
FROM ***************************.fct_bookings bookings_source_src_10001
) subq_23
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_10004
ON
subq_23.listing = listings_latest_src_10004.listing_id
GROUP BY
subq_23.ds__day
, listings_latest_src_10004.country
) subq_30
FULL OUTER JOIN (
-- Join Standard Outputs
-- Pass Only Elements:
-- ['views', 'ds__day', 'listing']
-- ['views', 'listing__country_latest', 'ds__day']
-- Aggregate Measures
-- Compute Metrics via Expressions
SELECT
DATE_TRUNC('day', ds) AS ds__day
, listing_id AS listing
, 1 AS views
FROM ***************************.fct_views views_source_src_10009
) subq_33
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_10004
subq_33.ds__day AS ds__day
, listings_latest_src_10004.country AS listing__country_latest
, SUM(subq_33.views) AS views
FROM (
-- Read Elements From Semantic Model 'views_source'
-- Metric Time Dimension 'ds'
-- Pass Only Elements:
-- ['views', 'ds__day', 'listing']
SELECT
DATE_TRUNC('day', ds) AS ds__day
, listing_id AS listing
, 1 AS views
FROM ***************************.fct_views views_source_src_10009
) subq_33
LEFT OUTER JOIN
***************************.dim_listings_latest listings_latest_src_10004
ON
subq_33.listing = listings_latest_src_10004.listing_id
GROUP BY
subq_33.ds__day
, listings_latest_src_10004.country
) subq_40
ON
subq_33.listing = listings_latest_src_10004.listing_id
GROUP BY
subq_33.ds__day
, listings_latest_src_10004.country
) subq_40
ON
(
(
subq_30.listing__country_latest = subq_40.listing__country_latest
) OR (
(
subq_30.listing__country_latest IS NULL
) AND (
subq_40.listing__country_latest IS NULL
)
)
) AND (
(
) AND (
subq_30.ds__day = subq_40.ds__day
) OR (
(subq_30.ds__day IS NULL) AND (subq_40.ds__day IS NULL)
)
)
GROUP BY
COALESCE(subq_30.ds__day, subq_40.ds__day)
, COALESCE(subq_30.listing__country_latest, subq_40.listing__country_latest)
) subq_41
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ FROM (
-- Combine Metrics
SELECT
COALESCE(subq_7.metric_time__day, subq_15.metric_time__day) AS metric_time__day
, subq_7.bookings_fill_nulls_with_0 AS bookings_fill_nulls_with_0
, subq_15.bookings_2_weeks_ago AS bookings_2_weeks_ago
, MAX(subq_7.bookings_fill_nulls_with_0) AS bookings_fill_nulls_with_0
, MAX(subq_15.bookings_2_weeks_ago) AS bookings_2_weeks_ago
FROM (
-- Compute Metrics via Expressions
SELECT
Expand Down Expand Up @@ -238,7 +238,7 @@ FROM (
subq_4.metric_time__day = subq_3.metric_time__day
) subq_6
) subq_7
INNER JOIN (
FULL OUTER JOIN (
-- Compute Metrics via Expressions
SELECT
subq_14.metric_time__day
Expand Down Expand Up @@ -555,13 +555,7 @@ FROM (
) subq_14
) subq_15
ON
(
subq_7.metric_time__day = subq_15.metric_time__day
) OR (
(
subq_7.metric_time__day IS NULL
) AND (
subq_15.metric_time__day IS NULL
)
)
subq_7.metric_time__day = subq_15.metric_time__day
GROUP BY
COALESCE(subq_7.metric_time__day, subq_15.metric_time__day)
) subq_16
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ FROM (
-- Combine Metrics
SELECT
COALESCE(subq_24.metric_time__day, subq_32.metric_time__day) AS metric_time__day
, subq_24.bookings_fill_nulls_with_0 AS bookings_fill_nulls_with_0
, subq_32.bookings_2_weeks_ago AS bookings_2_weeks_ago
, MAX(subq_24.bookings_fill_nulls_with_0) AS bookings_fill_nulls_with_0
, MAX(subq_32.bookings_2_weeks_ago) AS bookings_2_weeks_ago
FROM (
-- Compute Metrics via Expressions
SELECT
Expand Down Expand Up @@ -41,7 +41,7 @@ FROM (
subq_22.ds = subq_20.metric_time__day
) subq_23
) subq_24
INNER JOIN (
FULL OUTER JOIN (
-- Join to Time Spine Dataset
-- Pass Only Elements:
-- ['bookings', 'metric_time__day']
Expand All @@ -65,13 +65,7 @@ FROM (
subq_28.ds
) subq_32
ON
(
subq_24.metric_time__day = subq_32.metric_time__day
) OR (
(
subq_24.metric_time__day IS NULL
) AND (
subq_32.metric_time__day IS NULL
)
)
subq_24.metric_time__day = subq_32.metric_time__day
GROUP BY
COALESCE(subq_24.metric_time__day, subq_32.metric_time__day)
) subq_33
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ FROM (
-- Combine Metrics
SELECT
COALESCE(subq_4.metric_time__day, subq_9.metric_time__day) AS metric_time__day
, subq_4.ref_bookings AS ref_bookings
, subq_9.bookings AS bookings
, MAX(subq_4.ref_bookings) AS ref_bookings
, MAX(subq_9.bookings) AS bookings
FROM (
-- Compute Metrics via Expressions
SELECT
Expand Down Expand Up @@ -224,7 +224,7 @@ FROM (
subq_2.metric_time__day
) subq_3
) subq_4
INNER JOIN (
FULL OUTER JOIN (
-- Compute Metrics via Expressions
SELECT
subq_8.metric_time__day
Expand Down Expand Up @@ -441,13 +441,7 @@ FROM (
) subq_8
) subq_9
ON
(
subq_4.metric_time__day = subq_9.metric_time__day
) OR (
(
subq_4.metric_time__day IS NULL
) AND (
subq_9.metric_time__day IS NULL
)
)
subq_4.metric_time__day = subq_9.metric_time__day
GROUP BY
COALESCE(subq_4.metric_time__day, subq_9.metric_time__day)
) subq_10
Loading
Loading