From 203df5ab0759ac9a0f119ae2753c3debcab04d94 Mon Sep 17 00:00:00 2001 From: Hang Gao Date: Mon, 30 Sep 2024 19:17:16 +0000 Subject: [PATCH 01/15] Change the name to bottleneck_delay for better description. Provide the delay metrics into the final table --- .../intermediate/performance/_performance.yml | 2 +- ...performance__bottleneck_delay_metrics.sql} | 45 ++++++++++++++++++- 2 files changed, 44 insertions(+), 3 deletions(-) rename transform/models/intermediate/performance/{int_performance__bottlenecks.sql => int_performance__bottleneck_delay_metrics.sql} (81%) diff --git a/transform/models/intermediate/performance/_performance.yml b/transform/models/intermediate/performance/_performance.yml index 82d75daa..c7d6abb6 100644 --- a/transform/models/intermediate/performance/_performance.yml +++ b/transform/models/intermediate/performance/_performance.yml @@ -430,7 +430,7 @@ models: description: 50th highest hour volume in the precceding year. - name: K_100 description: 100th highest hour volume in the precceding year. - - name: int_performance__bottlenecks + - name: int_performance__bottleneck_delay_metrics description: | The PeMS system runs a bottleneck identification algorithm every day. The original algorithm was presented in, "Systematic Identification of Freeway Bottlenecks," by Chen, C., Skabardonis, A., diff --git a/transform/models/intermediate/performance/int_performance__bottlenecks.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql similarity index 81% rename from transform/models/intermediate/performance/int_performance__bottlenecks.sql rename to transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql index bea95d20..9b247a5c 100644 --- a/transform/models/intermediate/performance/int_performance__bottlenecks.sql +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql @@ -19,7 +19,13 @@ station_five_minute as ( station_type, absolute_postmile, length, - volume_sum + volume_sum, + delay_35_mph, + delay_40_mph, + delay_45_mph, + delay_50_mph, + delay_55_mph, + delay_60_mph from {{ ref ("int_performance__station_metrics_agg_five_minutes") }} where {{ make_model_incremental('sample_date') }} @@ -161,6 +167,41 @@ congestion_length as ( from congestion_events qualify is_bottleneck = true -- TODO: also filter if upstream is a bottleneck start? +), + +shift as ( + select + *, + case + when extract(hour from sample_timestamp) >= 5 and extract(hour from sample_timestamp) <= 10 + then 'AM' + when extract(hour from sample_timestamp) >= 10 and extract(hour from sample_timestamp) <= 15 + then 'NOON' + when extract(hour from sample_timestamp) >= 15 and extract(hour from sample_timestamp) <= 20 + then 'PM' + end as time_shift + from congestion_length +), + +bottleneck_delay as ( + select + * exclude ( + congestion_sequence, + congestion_status_change, + is_bottleneck, + speed_delta_ne, + speed_delta_sw, + distance_delta_sw, + distance_delta_ne, + volume_sum, + length, + upstream_is_congested, + is_congested, + speed_five_mins, + congestion_length + ) + from shift + ) -select * from congestion_length +select * from bottleneck_delay From 033ddfd7126c6e9d16c9892883c7b59dd6f75c0d Mon Sep 17 00:00:00 2001 From: Hang Gao Date: Tue, 1 Oct 2024 22:08:26 +0000 Subject: [PATCH 02/15] Change the time values to global variables defined in dbt_project.yml --- .../int_performance__bottleneck_delay_metrics.sql | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql index 9b247a5c..4645db8e 100644 --- a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql @@ -173,11 +173,17 @@ shift as ( select *, case - when extract(hour from sample_timestamp) >= 5 and extract(hour from sample_timestamp) <= 10 + when + cast(sample_timestamp as time) >= {{ var('am_shift_start') }} + and cast(sample_timestamp as time) <= {{ var('am_shift_end') }} then 'AM' - when extract(hour from sample_timestamp) >= 10 and extract(hour from sample_timestamp) <= 15 + when + cast(sample_timestamp as time) >= {{ var('noon_shift_start') }} + and cast(sample_timestamp as time) <= {{ var('noon_shift_end') }} then 'NOON' - when extract(hour from sample_timestamp) >= 15 and extract(hour from sample_timestamp) <= 20 + when + cast(sample_timestamp as time) >= {{ var('pm_shift_start') }} + and cast(sample_timestamp as time) <= {{ var('pm_shift_end') }} then 'PM' end as time_shift from congestion_length From 6f712ad36ad4fae5a9a88587ed0b81c59f52647c Mon Sep 17 00:00:00 2001 From: Hang Gao Date: Mon, 7 Oct 2024 21:29:26 +0000 Subject: [PATCH 03/15] Fix calcs CTE to resolve the issue in #260 --- ..._performance__bottleneck_delay_metrics.sql | 36 +++++++++++++++---- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql index 4645db8e..2d67d11f 100644 --- a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql @@ -40,22 +40,44 @@ calcs as ( station is north or east, the "upstream" station has a smaller postmile, and we need to lag to get the speed there. When the direction is west or south, the "upstream" station has a larger postmile, and we need to lead to get the speed there. */ + /*There are five routes (NS: Route 71; EW: Route 282, 580, 780) in California which do not + follow this rule. We need to specify them in the speed difference and distance difference + calculation*/ speed_five_mins - lead(speed_five_mins) over (partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile asc) as speed_delta_ne, - speed_five_mins - lag(speed_five_mins) - over (partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile asc) - as speed_delta_sw, - absolute_postmile - lead(absolute_postmile) over (partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile asc) as distance_delta_ne, + case + when + (freeway = 71 and direction = 'S') + or (freeway = 282 and direction = 'W') + or (freeway = 580 and direction = 'W') + or (freeway = 780 and direction = 'W') + then speed_five_mins - lag(speed_five_mins) + over ( + partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile desc + ) + else speed_five_mins - lag(speed_five_mins) + over (partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile asc) + end as speed_delta_sw, - absolute_postmile - lag(absolute_postmile) - over (partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile asc) - as distance_delta_sw + case + when + (freeway = 71 and direction = 'S') + or (freeway = 282 and direction = 'W') + or (freeway = 580 and direction = 'W') + or (freeway = 780 and direction = 'W') + then absolute_postmile - lag(absolute_postmile) + over ( + partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile desc + ) + else absolute_postmile - lag(absolute_postmile) + over (partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile asc) + end as distance_delta_sw from station_five_minute ), From 7fea1a6c6771195a7e20404c9d98c22c3b340568 Mon Sep 17 00:00:00 2001 From: Hang Gao Date: Wed, 9 Oct 2024 00:46:41 +0000 Subject: [PATCH 04/15] 1. Considered the special routes into all metrics' calculation 2. Aggregated delay by bottleneck extent --- transform/dbt_project.yml | 1 + ..._performance__bottleneck_delay_metrics.sql | 136 ++++++++++++++---- 2 files changed, 110 insertions(+), 27 deletions(-) diff --git a/transform/dbt_project.yml b/transform/dbt_project.yml index 1532d4dc..2a849d7f 100644 --- a/transform/dbt_project.yml +++ b/transform/dbt_project.yml @@ -37,6 +37,7 @@ vars: high_occupancy_threshold: 0.7 incremental_model_look_back: -2 dev_model_look_back: -7 + special_routes: (71, 153, 282, 580, 780) day_start: "'05:00:00'" day_end: "'21:59:59'" V_t: [35, 40, 45, 50, 55, 60] diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql index 2d67d11f..43fd0a4a 100644 --- a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql @@ -5,6 +5,8 @@ on_schema_change='sync_all_columns', snowflake_warehouse = get_snowflake_refresh_warehouse(small="XS") ) }} +{% set delay_metrics = ['delay_35_mph', 'delay_40_mph', 'delay_45_mph', 'delay_50_mph', 'delay_55_mph', +'delay_60_mph'] %} with @@ -40,23 +42,36 @@ calcs as ( station is north or east, the "upstream" station has a smaller postmile, and we need to lag to get the speed there. When the direction is west or south, the "upstream" station has a larger postmile, and we need to lead to get the speed there. */ - /*There are five routes (NS: Route 71; EW: Route 282, 580, 780) in California which do not + /*There are five routes (NS: Route 71; EW: Route 153, 282, 580, 780) in California which do not follow this rule. We need to specify them in the speed difference and distance difference calculation*/ + /*Need to check all calculations ordered by absolute_postmile and fix the logic for 5 routes + specifically*/ + case + when + freeway in {{ var("special_routes") }} + then speed_five_mins - lead(speed_five_mins) + over ( + partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile desc + ) + else speed_five_mins - lead(speed_five_mins) + over (partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile asc) + end as speed_delta_ne, - speed_five_mins - lead(speed_five_mins) - over (partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile asc) - as speed_delta_ne, + case + when + freeway in {{ var("special_routes") }} + then absolute_postmile - lead(absolute_postmile) + over ( + partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile desc + ) + else absolute_postmile - lead(absolute_postmile) + over (partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile asc) + end as distance_delta_ne, - absolute_postmile - lead(absolute_postmile) - over (partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile asc) - as distance_delta_ne, case when - (freeway = 71 and direction = 'S') - or (freeway = 282 and direction = 'W') - or (freeway = 580 and direction = 'W') - or (freeway = 780 and direction = 'W') + freeway in {{ var("special_routes") }} then speed_five_mins - lag(speed_five_mins) over ( partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile desc @@ -67,10 +82,7 @@ calcs as ( case when - (freeway = 71 and direction = 'S') - or (freeway = 282 and direction = 'W') - or (freeway = 580 and direction = 'W') - or (freeway = 780 and direction = 'W') + freeway in {{ var("special_routes") }} then absolute_postmile - lag(absolute_postmile) over ( partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile desc @@ -135,15 +147,32 @@ congestion as ( station is north or east, the "upstream" station has a smaller postmile, and we need to lag to get the speed there. When the direction is west or south, the "upstream" station has a larger postmile, and we need to lead to get the speed there. */ + /*There are five routes (NS: Route 71; EW: Route 282, 580, 780) in California which do not + follow this rule. We need to specify them in the speed difference and distance difference + calculation*/ case - when direction in ('N', 'E') + when (freeway in {{ var("special_routes") }} and direction in ('N', 'E')) + then + lag(is_congested) + over ( + partition by sample_timestamp, freeway, direction, station_type + order by absolute_postmile desc + ) + when (direction in ('N', 'E') and freeway not in {{ var("special_routes") }}) then lag(is_congested) over ( partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile asc ) - when direction in ('S', 'W') + when (freeway in {{ var("special_routes") }} and direction in ('S', 'W')) + then + lead(is_congested) + over ( + partition by sample_timestamp, freeway, direction, station_type + order by absolute_postmile desc + ) + when (direction in ('S', 'W') and freeway not in {{ var("special_routes") }}) then lead(is_congested) over ( @@ -158,12 +187,23 @@ congestion as ( congestion_events as ( select *, - sum(congestion_status_change) - over ( - partition by sample_timestamp, freeway, direction, station_type - order by absolute_postmile asc - rows between unbounded preceding and current row - ) as congestion_sequence + case + when freeway in {{ var("special_routes") }} + then + sum(congestion_status_change) + over ( + partition by sample_timestamp, freeway, direction, station_type + order by absolute_postmile desc + rows between unbounded preceding and current row + ) + else + sum(congestion_status_change) + over ( + partition by sample_timestamp, freeway, direction, station_type + order by absolute_postmile asc + rows between unbounded preceding and current row + ) + end as congestion_sequence from congestion ), @@ -171,14 +211,18 @@ congestion_length as ( select *, case - when direction in ('N', 'E') + when + (direction in ('N', 'E') and freeway not in {{ var("special_routes") }}) + or (direction in ('S', 'W') and freeway in {{ var("special_routes") }}) then sum(congestion_length) over ( partition by sample_timestamp, freeway, direction, station_type, congestion_sequence order by absolute_postmile asc rows between unbounded preceding and current row ) - when direction in ('S', 'W') + when + (direction in ('S', 'W') and freeway not in {{ var("special_routes") }}) + or (direction in ('N', 'E') and freeway in {{ var("special_routes") }}) then sum(congestion_length) over ( partition by sample_timestamp, freeway, direction, station_type, congestion_sequence @@ -191,6 +235,38 @@ congestion_length as ( ), +agg_spatial_delay as ( + select + *, + {% for delay in delay_metrics %} + case + when + (direction in ('N', 'E') and freeway not in {{ var("special_routes") }}) + or (direction in ('S', 'W') and freeway in {{ var("special_routes") }}) + then + sum({{ delay }}) over ( + partition by sample_timestamp, freeway, direction, station_type, congestion_sequence + order by absolute_postmile asc + rows between unbounded preceding and current row + ) + when + (direction in ('S', 'W') and freeway not in {{ var("special_routes") }}) + or (direction in ('N', 'E') and freeway in {{ var("special_routes") }}) + then + sum({{ delay }}) over ( + partition by sample_timestamp, freeway, direction, station_type, congestion_sequence + order by absolute_postmile asc + rows between current row and unbounded following + ) + end as spatial_{{ delay }} + {% if not loop.last %} + , + {% endif %} + + {% endfor %} + from congestion_length +), + shift as ( select *, @@ -208,7 +284,7 @@ shift as ( and cast(sample_timestamp as time) <= {{ var('pm_shift_end') }} then 'PM' end as time_shift - from congestion_length + from agg_spatial_delay ), bottleneck_delay as ( @@ -226,7 +302,13 @@ bottleneck_delay as ( upstream_is_congested, is_congested, speed_five_mins, - congestion_length + congestion_length, + delay_35_mph, + delay_40_mph, + delay_45_mph, + delay_50_mph, + delay_55_mph, + delay_60_mph ) from shift From 483017c2c443b201f9651501f249c23ca133975f Mon Sep 17 00:00:00 2001 From: Hang Gao Date: Wed, 9 Oct 2024 00:56:29 +0000 Subject: [PATCH 05/15] Trim Trailing Whitespace --- .../performance/int_performance__bottleneck_delay_metrics.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql index 43fd0a4a..bd593954 100644 --- a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql @@ -5,7 +5,7 @@ on_schema_change='sync_all_columns', snowflake_warehouse = get_snowflake_refresh_warehouse(small="XS") ) }} -{% set delay_metrics = ['delay_35_mph', 'delay_40_mph', 'delay_45_mph', 'delay_50_mph', 'delay_55_mph', +{% set delay_metrics = ['delay_35_mph', 'delay_40_mph', 'delay_45_mph', 'delay_50_mph', 'delay_55_mph', 'delay_60_mph'] %} with From 28457b6e716896b6a70e30c7d52ecd2ef3444c09 Mon Sep 17 00:00:00 2001 From: Hang Gao Date: Thu, 10 Oct 2024 23:07:22 +0000 Subject: [PATCH 06/15] 1. Modified the code and variable name based on James suggestions 2. Provided hourly, daily, monthly summary table for bottleneck_delay --- transform/dbt_project.yml | 2 +- .../intermediate/performance/_performance.yml | 2 +- ...ce__bottleneck_delay_metrics_agg_daily.sql | 34 +++++++++++++ ...leneck_delay_metrics_agg_five_minutes.sql} | 49 ++++++++++--------- ...e__bottleneck_delay_metrics_agg_hourly.sql | 41 ++++++++++++++++ ...__bottleneck_delay_metrics_agg_monthly.sql | 37 ++++++++++++++ 6 files changed, 139 insertions(+), 26 deletions(-) create mode 100644 transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql rename transform/models/intermediate/performance/{int_performance__bottleneck_delay_metrics.sql => int_performance__bottleneck_delay_metrics_agg_five_minutes.sql} (84%) create mode 100644 transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_hourly.sql create mode 100644 transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_monthly.sql diff --git a/transform/dbt_project.yml b/transform/dbt_project.yml index 2a849d7f..7408146b 100644 --- a/transform/dbt_project.yml +++ b/transform/dbt_project.yml @@ -37,7 +37,7 @@ vars: high_occupancy_threshold: 0.7 incremental_model_look_back: -2 dev_model_look_back: -7 - special_routes: (71, 153, 282, 580, 780) + backward_routes: (71, 153, 282, 580, 780) day_start: "'05:00:00'" day_end: "'21:59:59'" V_t: [35, 40, 45, 50, 55, 60] diff --git a/transform/models/intermediate/performance/_performance.yml b/transform/models/intermediate/performance/_performance.yml index c7d6abb6..009e2600 100644 --- a/transform/models/intermediate/performance/_performance.yml +++ b/transform/models/intermediate/performance/_performance.yml @@ -430,7 +430,7 @@ models: description: 50th highest hour volume in the precceding year. - name: K_100 description: 100th highest hour volume in the precceding year. - - name: int_performance__bottleneck_delay_metrics + - name: int_performance__bottleneck_delay_metrics_agg_five_minutes description: | The PeMS system runs a bottleneck identification algorithm every day. The original algorithm was presented in, "Systematic Identification of Freeway Bottlenecks," by Chen, C., Skabardonis, A., diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql new file mode 100644 index 00000000..fe7e0092 --- /dev/null +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql @@ -0,0 +1,34 @@ +{{ config(materialized='table') }} + +with hourly_spatial_bottleneck_delay_metrics as ( + select * + from {{ ref('int_performance__bottleneck_delay_metrics_agg_hourly') }} +), + +/*aggregate hourly delay and bottleneck extent in a daily level. Since one day has +3 time shifts, the aggregation would be in a time shift level*/ + +daily_time_shift_spatial_bottleneck_delay_metrics as ( + select + station_id, + sample_date, + time_shift, + any_value(station_type) as station_type, + any_value(freeway) as freeway, + any_value(direction) as direction, + any_value(absolute_postmile) as absolute_postmile, + sum(hourly_duration) as daily_time_shift_duration, + avg(hourly_bottleneck_extent) as daily_time_shift_bottleneck_extent, + -- spatial delay aggregation in daily level, decomposed into time shift + {% for value in var("V_t") %} + sum(hourly_spatial_delay_{{ value }}_mph) + as daily_time_shift_spatial_delay_{{ value }}_mph + {% if not loop.last %} + , + {% endif %} + {% endfor %} + from hourly_spatial_bottleneck_delay_metrics + group by station_id, sample_date, time_shift +), + +select * from daily_time_shift_spatial_bottleneck_delay_metrics diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_five_minutes.sql similarity index 84% rename from transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql rename to transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_five_minutes.sql index bd593954..62f4be90 100644 --- a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics.sql +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_five_minutes.sql @@ -5,8 +5,6 @@ on_schema_change='sync_all_columns', snowflake_warehouse = get_snowflake_refresh_warehouse(small="XS") ) }} -{% set delay_metrics = ['delay_35_mph', 'delay_40_mph', 'delay_45_mph', 'delay_50_mph', 'delay_55_mph', -'delay_60_mph'] %} with @@ -17,6 +15,7 @@ station_five_minute as ( sample_timestamp, nullifzero(speed_five_mins) as speed_five_mins, freeway, + freeway in {{ var("backward_routes") }} as is_backward_routes, direction, station_type, absolute_postmile, @@ -49,7 +48,7 @@ calcs as ( specifically*/ case when - freeway in {{ var("special_routes") }} + is_backward_routes then speed_five_mins - lead(speed_five_mins) over ( partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile desc @@ -60,7 +59,7 @@ calcs as ( case when - freeway in {{ var("special_routes") }} + is_backward_routes then absolute_postmile - lead(absolute_postmile) over ( partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile desc @@ -71,7 +70,7 @@ calcs as ( case when - freeway in {{ var("special_routes") }} + is_backward_routes then speed_five_mins - lag(speed_five_mins) over ( partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile desc @@ -82,7 +81,7 @@ calcs as ( case when - freeway in {{ var("special_routes") }} + is_backward_routes then absolute_postmile - lag(absolute_postmile) over ( partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile desc @@ -151,28 +150,28 @@ congestion as ( follow this rule. We need to specify them in the speed difference and distance difference calculation*/ case - when (freeway in {{ var("special_routes") }} and direction in ('N', 'E')) + when (is_backward_routes and direction in ('N', 'E')) then lag(is_congested) over ( partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile desc ) - when (direction in ('N', 'E') and freeway not in {{ var("special_routes") }}) + when (direction in ('N', 'E') and is_backward_routes = false) then lag(is_congested) over ( partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile asc ) - when (freeway in {{ var("special_routes") }} and direction in ('S', 'W')) + when (is_backward_routes and direction in ('S', 'W')) then lead(is_congested) over ( partition by sample_timestamp, freeway, direction, station_type order by absolute_postmile desc ) - when (direction in ('S', 'W') and freeway not in {{ var("special_routes") }}) + when (direction in ('S', 'W') and is_backward_routes = false) then lead(is_congested) over ( @@ -188,7 +187,7 @@ congestion_events as ( select *, case - when freeway in {{ var("special_routes") }} + when is_backward_routes then sum(congestion_status_change) over ( @@ -212,8 +211,8 @@ congestion_length as ( *, case when - (direction in ('N', 'E') and freeway not in {{ var("special_routes") }}) - or (direction in ('S', 'W') and freeway in {{ var("special_routes") }}) + (direction in ('N', 'E') and is_backward_routes = false) + or (direction in ('S', 'W') and is_backward_routes) then sum(congestion_length) over ( partition by sample_timestamp, freeway, direction, station_type, congestion_sequence @@ -221,8 +220,8 @@ congestion_length as ( rows between unbounded preceding and current row ) when - (direction in ('S', 'W') and freeway not in {{ var("special_routes") }}) - or (direction in ('N', 'E') and freeway in {{ var("special_routes") }}) + (direction in ('S', 'W') and is_backward_routes = false) + or (direction in ('N', 'E') and is_backward_routes) then sum(congestion_length) over ( partition by sample_timestamp, freeway, direction, station_type, congestion_sequence @@ -238,27 +237,27 @@ congestion_length as ( agg_spatial_delay as ( select *, - {% for delay in delay_metrics %} + {% for value in var("V_t") %} case when - (direction in ('N', 'E') and freeway not in {{ var("special_routes") }}) - or (direction in ('S', 'W') and freeway in {{ var("special_routes") }}) + (direction in ('N', 'E') and is_backward_routes = false) + or (direction in ('S', 'W') and is_backward_routes) then - sum({{ delay }}) over ( + sum(delay_{{ value }}_mph) over ( partition by sample_timestamp, freeway, direction, station_type, congestion_sequence order by absolute_postmile asc rows between unbounded preceding and current row ) when - (direction in ('S', 'W') and freeway not in {{ var("special_routes") }}) - or (direction in ('N', 'E') and freeway in {{ var("special_routes") }}) + (direction in ('S', 'W') and is_backward_routes = false) + or (direction in ('N', 'E') and is_backward_routes) then - sum({{ delay }}) over ( + sum(delay_{{ value }}_mph) over ( partition by sample_timestamp, freeway, direction, station_type, congestion_sequence order by absolute_postmile asc rows between current row and unbounded following ) - end as spatial_{{ delay }} + end as spatial_delay_{{ value }}_mph {% if not loop.last %} , {% endif %} @@ -292,12 +291,14 @@ bottleneck_delay as ( * exclude ( congestion_sequence, congestion_status_change, - is_bottleneck, + bottleneck_check, + bottleneck_check_summed, speed_delta_ne, speed_delta_sw, distance_delta_sw, distance_delta_ne, volume_sum, + is_backward_routes, length, upstream_is_congested, is_congested, diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_hourly.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_hourly.sql new file mode 100644 index 00000000..fadd50b5 --- /dev/null +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_hourly.sql @@ -0,0 +1,41 @@ +{{ config( + materialized="incremental", + unique_key=['detector_id','sample_date', 'sample_hour'], + snowflake_warehouse = get_snowflake_refresh_warehouse(small="XL") +) }} + +-- read the volume, occupancy and speed five minutes data +with station_five_mins_data as ( + select + *, + date_trunc('hour', sample_timestamp) as sample_timestamp_trunc + from {{ ref('int_performance__bottleneck_delay_metrics_agg_five_minutes') }} + where {{ make_model_incremental('sample_date') }} +), + +-- aggregate five mins delay and calculate the average bottleneck extent in an hourly basis +hourly_spatial_bottleneck_delay_metrics as ( + select + station_id, + sample_date, + sample_timestamp_trunc as sample_hour, + any_value(station_type) as station_type, + any_value(freeway) as freeway, + any_value(direction) as direction, + any_value(absolute_postmile) as absolute_postmile, + any_value(time_shift) as time_shift, + sum(case when is_bottleneck = true then 1 else 0 end) * 5 as hourly_duration, + avg(bottleneck_extent) as hourly_bottleneck_extent, + -- spatial delay aggregation in hourly level + {% for value in var("V_t") %} + sum(spatial_delay_{{ value }}_mph) + as hourly_spatial_delay_{{ value }}_mph + {% if not loop.last %} + , + {% endif %} + {% endfor %} + from station_five_mins_data + group by station_id, sample_date, sample_hour +) + +select * from hourly_spatial_bottleneck_delay_metrics diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_monthly.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_monthly.sql new file mode 100644 index 00000000..525000d3 --- /dev/null +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_monthly.sql @@ -0,0 +1,37 @@ +{{ config(materialized='table') }} + +with station_daily_data as ( + select + *, + -- Extracting first day of each month + -- reference: https://docs.snowflake.com/en/sql-reference/functions/year + date_trunc(month, sample_date) as sample_month + from {{ ref('int_performance__bottleneck_delay_metrics_agg_daily') }} + where date_trunc(month, sample_date) != date_trunc(month, current_date) +), + +monthly_spatial_bottleneck_delay_metrics as ( + select + station_id, + sample_month, + time_shift, + any_value(station_type) as station_type, + any_value(freeway) as freeway, + any_value(direction) as direction, + any_value(absolute_postmile) as absolute_postmile, + avg(daily_time_shift_duration) as monthly_time_shift_duration, + sum(case when daily_time_shift_duration > 0 then 1 else 0 end) as monthly_active_days, + avg(daily_time_shift_bottleneck_extent) as monthly_time_shift_extent, + -- spatial delay aggregation in monthly level, decomposed into time shift + {% for value in var("V_t") %} + sum(daily_time_shift_spatial_delay_{{ value }}_mph) + as monthly_time_shift_spatial_delay_{{ value }}_mph + {% if not loop.last %} + , + {% endif %} + {% endfor %} + from station_daily_data + group by station_id, sample_month, time_shift +) + +select * from monthly_spatial_bottleneck_delay_metrics From 8cf58380364e2126b63c021cf13465e6a804ea9b Mon Sep 17 00:00:00 2001 From: Hang Gao Date: Fri, 18 Oct 2024 03:04:55 +0000 Subject: [PATCH 07/15] Fixed two bugs --- .../int_performance__bottleneck_delay_metrics_agg_daily.sql | 2 +- ...t_performance__bottleneck_delay_metrics_agg_five_minutes.sql | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql index fe7e0092..213707a5 100644 --- a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql @@ -29,6 +29,6 @@ daily_time_shift_spatial_bottleneck_delay_metrics as ( {% endfor %} from hourly_spatial_bottleneck_delay_metrics group by station_id, sample_date, time_shift -), +) select * from daily_time_shift_spatial_bottleneck_delay_metrics diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_five_minutes.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_five_minutes.sql index 62f4be90..70bbc66c 100644 --- a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_five_minutes.sql +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_five_minutes.sql @@ -291,8 +291,6 @@ bottleneck_delay as ( * exclude ( congestion_sequence, congestion_status_change, - bottleneck_check, - bottleneck_check_summed, speed_delta_ne, speed_delta_sw, distance_delta_sw, From 94e1d70dc411f407271cc80ea839a5b7d25c1788 Mon Sep 17 00:00:00 2001 From: Hang Gao Date: Mon, 21 Oct 2024 17:53:03 +0000 Subject: [PATCH 08/15] Include district and county as additional metadata --- .../int_performance__bottleneck_delay_metrics_agg_daily.sql | 2 ++ ..._performance__bottleneck_delay_metrics_agg_five_minutes.sql | 2 ++ .../int_performance__bottleneck_delay_metrics_agg_hourly.sql | 3 +++ .../int_performance__bottleneck_delay_metrics_agg_monthly.sql | 2 ++ 4 files changed, 9 insertions(+) diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql index 213707a5..1fc4d5a0 100644 --- a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql @@ -13,6 +13,8 @@ daily_time_shift_spatial_bottleneck_delay_metrics as ( station_id, sample_date, time_shift, + any_value(district) as district, + any_value(county) as county, any_value(station_type) as station_type, any_value(freeway) as freeway, any_value(direction) as direction, diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_five_minutes.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_five_minutes.sql index 70bbc66c..ed6ed71b 100644 --- a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_five_minutes.sql +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_five_minutes.sql @@ -14,6 +14,8 @@ station_five_minute as ( sample_date, sample_timestamp, nullifzero(speed_five_mins) as speed_five_mins, + district, + county, freeway, freeway in {{ var("backward_routes") }} as is_backward_routes, direction, diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_hourly.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_hourly.sql index fadd50b5..48b1200f 100644 --- a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_hourly.sql +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_hourly.sql @@ -19,6 +19,9 @@ hourly_spatial_bottleneck_delay_metrics as ( station_id, sample_date, sample_timestamp_trunc as sample_hour, + any_value(district) as district, + any_value(county) as county, + any_value(absolute_postmile) as absolute_postmile, any_value(station_type) as station_type, any_value(freeway) as freeway, any_value(direction) as direction, diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_monthly.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_monthly.sql index 525000d3..99df2e2e 100644 --- a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_monthly.sql +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_monthly.sql @@ -15,6 +15,8 @@ monthly_spatial_bottleneck_delay_metrics as ( station_id, sample_month, time_shift, + any_value(district) as district, + any_value(county) as county, any_value(station_type) as station_type, any_value(freeway) as freeway, any_value(direction) as direction, From 97d791bf8ec7de0521c13263b5e1b778ddf7223b Mon Sep 17 00:00:00 2001 From: Hang Gao Date: Mon, 21 Oct 2024 17:57:56 +0000 Subject: [PATCH 09/15] Fix bugs --- .../int_performance__bottleneck_delay_metrics_agg_hourly.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_hourly.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_hourly.sql index 48b1200f..ef2f01ad 100644 --- a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_hourly.sql +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_hourly.sql @@ -21,7 +21,6 @@ hourly_spatial_bottleneck_delay_metrics as ( sample_timestamp_trunc as sample_hour, any_value(district) as district, any_value(county) as county, - any_value(absolute_postmile) as absolute_postmile, any_value(station_type) as station_type, any_value(freeway) as freeway, any_value(direction) as direction, From 29de7b06095ff0e7ddd361a04efd9774400f2d30 Mon Sep 17 00:00:00 2001 From: Hang Gao Date: Mon, 21 Oct 2024 23:51:30 +0000 Subject: [PATCH 10/15] Provide descriptions of hourly, daily, and monthly tables in yml file --- .../intermediate/performance/_performance.yml | 134 ++++++++++++++++++ ...e__bottleneck_delay_metrics_agg_hourly.sql | 2 +- 2 files changed, 135 insertions(+), 1 deletion(-) diff --git a/transform/models/intermediate/performance/_performance.yml b/transform/models/intermediate/performance/_performance.yml index 009e2600..a2ef4a61 100644 --- a/transform/models/intermediate/performance/_performance.yml +++ b/transform/models/intermediate/performance/_performance.yml @@ -476,12 +476,18 @@ models: Actual reported speed if available otherwise the preliminary speed calculation in miles/hour based on the simplified version of the speed formula located at https://pems.dot.ca.gov/Papers/vanzwet_gfactor.pdf + - name: DISTRICT + description: The district in which the VDS is located. Values are 1-12. + - name: COUNTY + description: The unique number that identifies the county that contains a specific VDS within PeMS. - name: FREEWAY description: The freeway where the VDS is located. - name: DIRECTION description: A string indicating the freeway direction of a specific VDS. Directions are N, E, S or W. - name: STATION_TYPE description: Two character string identify the VDS type. + - name: ABSOLUTE_POSTMILE + description: The absolute postmile location of the station. - name: LENGTH description: length of the station. - name: DISTANCE_DELTA_NE @@ -518,6 +524,134 @@ models: description: | Calculates the congestion region starting from the bottleneck location towards upstream adjacent congested stations, which is the summation of congestion lengths with same congestion sequence values. + - name: SPATIAL_DELAY MPH + description: | + Delay for different threshold of congestion speed such as 35, 40, 45, 50, 55, and 60 for each station lane. + The delay performance metric is the amount of additional time spent by the vehicles on a section of road + due to congestion. + - name: SHIFT + description: | + - AM shift (5am - 10am) + - Noon shift (10am - 3pm) + - PM shift (3pm - 8pm) + - name: int_performance__bottleneck_delay_metrics_agg_hourly + description: | + Hourly aggregation of bottleneck performance by each station. It will measure the hourly performance + of the state highway system at the station level. This can be used for hourly aggregation of bottleneck + performance metrics. + columns: + - name: STATION_ID + description: | + An integer value that uniquely indentifies a station. + Use this value to 'join' other files or tables that contain the Station ID value. + - name: SAMPLE_DATE + description: The date associated with hourly aggregated data samples. + - name: SAMPLE_HOUR + description: The hour associated with hourly aggregated data samples. + - name: DISTRICT + description: The district in which the VDS is located. Values are 1-12. + - name: COUNTY + description: The unique number that identifies the county that contains a specific VDS within PeMS. + - name: FREEWAY + description: The freeway where the VDS is located. + - name: DIRECTION + description: A string indicating the freeway direction of a specific VDS. Directions are N, E, S or W. + - name: STATION_TYPE + description: Two character string identify the VDS type. + - name: ABSOLUTE_POSTMILE + description: The absolute postmile location of the station. + - name: TIME_SHIFT + description: | + - AM shift (5am - 10am) + - Noon shift (10am - 3pm) + - PM shift (3pm - 8pm) + - name: HOURLY_DURATION + description: This is how long the bottleneck was active within an hour during that particular shift on that day. + - name: HOURLY_BOTTLENECK_EXTENT + description: | + Calculates the hourly congestion region starting from the bottleneck location towards upstream adjacent + congested stations, which is the summation of congestion lengths with same congestion sequence values. + - name: HOURLY_SPATIAL_DELAY_MPH + description: | + Hourly delay for different thresholds such as 35, 40, 45, 50, 55, and 60 for each station lane. + The delay performance metric is the amount of additional time spent by the vehicles on a section of road + due to congestion. + - name: int_performance__bottleneck_delay_metrics_agg_daily + description: | + Daily aggregation of bottleneck performance by each station. It will measure the hourly performance + of the state highway system at the station level. This can be used for daily aggregation of bottleneck + performance metrics. + columns: + - name: STATION_ID + description: | + An integer value that uniquely indentifies a station. + Use this value to 'join' other files or tables that contain the Station ID value. + - name: SAMPLE_DATE + description: The date associated with hourly aggregated data samples. + - name: SAMPLE_HOUR + description: The hour associated with hourly aggregated data samples. + - name: DISTRICT + description: The district in which the VDS is located. Values are 1-12. + - name: COUNTY + description: The unique number that identifies the county that contains a specific VDS within PeMS. + - name: FREEWAY + description: The freeway where the VDS is located. + - name: DIRECTION + description: A string indicating the freeway direction of a specific VDS. Directions are N, E, S or W. + - name: STATION_TYPE + description: Two character string identify the VDS type. + - name: ABSOLUTE_POSTMILE + description: The absolute postmile location of the station. + - name: DAILY_TIME_SHIFT_DURATION + description: How long the bottleneck was active within a time shift during that particular shift on that day. + - name: DAILY_TIME_SHIFT_BOTTLENECK_EXTENT + description: | + Calculates the time shift congestion region starting from the bottleneck location towards upstream adjacent + congested stations, which is the summation of congestion lengths with same congestion sequence values. + - name: DAILY_TIME_SHIFT_SPATIAL_DELAY_MPH + description: | + Daily time shift delay for different thresholds such as 35, 40, 45, 50, 55, and 60 for each station lane. + The delay performance metric is the amount of additional time spent by the vehicles on a section of road + due to congestion. + - name: int_performance__bottleneck_delay_metrics_agg_monthly + description: | + Monthly aggregation of bottleneck performance by each station. It will measure the hourly performance + of the state highway system at the station level. This can be used for monthly aggregation of bottleneck + performance metrics. + columns: + - name: STATION_ID + description: | + An integer value that uniquely indentifies a station. + Use this value to 'join' other files or tables that contain the Station ID value. + - name: SAMPLE_DATE + description: The date associated with hourly aggregated data samples. + - name: SAMPLE_HOUR + description: The hour associated with hourly aggregated data samples. + - name: DISTRICT + description: The district in which the VDS is located. Values are 1-12. + - name: COUNTY + description: The unique number that identifies the county that contains a specific VDS within PeMS. + - name: FREEWAY + description: The freeway where the VDS is located. + - name: DIRECTION + description: A string indicating the freeway direction of a specific VDS. Directions are N, E, S or W. + - name: STATION_TYPE + description: Two character string identify the VDS type. + - name: ABSOLUTE_POSTMILE + description: The absolute postmile location of the station. + - name: MONTHLY_TIME_SHIFT_DURATION + description: How long the bottleneck was active within a month during that particular shift on that day. + - name: MONTHLY_ACTIVE_DAYS + description: How many days the bottleneck was active within a month. + - name: MONTHLY_TIME_SHIFT_EXTENT + description: | + Calculates the monthly congestion region starting from the bottleneck location towards upstream adjacent + congested stations, which is the summation of congestion lengths with same congestion sequence values. + - name: MONTHLY_TIME_SHIFT_SPATIAL_DELAY_MPH + description: | + Monthly delay for different thresholds such as 35, 40, 45, 50, 55, and 60 for each station lane. + The delay performance metric is the amount of additional time spent by the vehicles on a section of road + due to congestion. - name: int_performance__detector_metrics_agg_hourly description: | hourly aggregation of volume, occupancy and speed along with delays and lost productivity by diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_hourly.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_hourly.sql index ef2f01ad..0c8033e4 100644 --- a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_hourly.sql +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_hourly.sql @@ -1,6 +1,6 @@ {{ config( materialized="incremental", - unique_key=['detector_id','sample_date', 'sample_hour'], + unique_key=['station_id','sample_date', 'sample_hour'], snowflake_warehouse = get_snowflake_refresh_warehouse(small="XL") ) }} From 9e411ef4f04e7ccfb1349a451fc2bde155f89095 Mon Sep 17 00:00:00 2001 From: Hang Gao Date: Mon, 21 Oct 2024 23:59:45 +0000 Subject: [PATCH 11/15] Whitespaces Fixed --- .../intermediate/performance/_performance.yml | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/transform/models/intermediate/performance/_performance.yml b/transform/models/intermediate/performance/_performance.yml index a2ef4a61..4ecf3bf2 100644 --- a/transform/models/intermediate/performance/_performance.yml +++ b/transform/models/intermediate/performance/_performance.yml @@ -527,13 +527,15 @@ models: - name: SPATIAL_DELAY MPH description: | Delay for different threshold of congestion speed such as 35, 40, 45, 50, 55, and 60 for each station lane. - The delay performance metric is the amount of additional time spent by the vehicles on a section of road + The delay performance metric is the amount of additional time spent by the vehicles on a section of road due to congestion. - name: SHIFT description: | - - AM shift (5am - 10am) - - Noon shift (10am - 3pm) - - PM shift (3pm - 8pm) + - AM shift (5am - 10am) + + - Noon shift (10am - 3pm) + + - PM shift (3pm - 8pm) - name: int_performance__bottleneck_delay_metrics_agg_hourly description: | Hourly aggregation of bottleneck performance by each station. It will measure the hourly performance @@ -562,9 +564,11 @@ models: description: The absolute postmile location of the station. - name: TIME_SHIFT description: | - - AM shift (5am - 10am) - - Noon shift (10am - 3pm) - - PM shift (3pm - 8pm) + - AM shift (5am - 10am) + + - Noon shift (10am - 3pm) + + - PM shift (3pm - 8pm) - name: HOURLY_DURATION description: This is how long the bottleneck was active within an hour during that particular shift on that day. - name: HOURLY_BOTTLENECK_EXTENT @@ -574,7 +578,7 @@ models: - name: HOURLY_SPATIAL_DELAY_MPH description: | Hourly delay for different thresholds such as 35, 40, 45, 50, 55, and 60 for each station lane. - The delay performance metric is the amount of additional time spent by the vehicles on a section of road + The delay performance metric is the amount of additional time spent by the vehicles on a section of road due to congestion. - name: int_performance__bottleneck_delay_metrics_agg_daily description: | @@ -611,7 +615,7 @@ models: - name: DAILY_TIME_SHIFT_SPATIAL_DELAY_MPH description: | Daily time shift delay for different thresholds such as 35, 40, 45, 50, 55, and 60 for each station lane. - The delay performance metric is the amount of additional time spent by the vehicles on a section of road + The delay performance metric is the amount of additional time spent by the vehicles on a section of road due to congestion. - name: int_performance__bottleneck_delay_metrics_agg_monthly description: | From 6314ff8a289be9b4899abf5d6e4c311cc0ebe2a1 Mon Sep 17 00:00:00 2001 From: Hang Gao Date: Tue, 22 Oct 2024 00:05:46 +0000 Subject: [PATCH 12/15] Trime whitespaces --- transform/models/intermediate/performance/_performance.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transform/models/intermediate/performance/_performance.yml b/transform/models/intermediate/performance/_performance.yml index 4ecf3bf2..09368a3d 100644 --- a/transform/models/intermediate/performance/_performance.yml +++ b/transform/models/intermediate/performance/_performance.yml @@ -654,7 +654,7 @@ models: - name: MONTHLY_TIME_SHIFT_SPATIAL_DELAY_MPH description: | Monthly delay for different thresholds such as 35, 40, 45, 50, 55, and 60 for each station lane. - The delay performance metric is the amount of additional time spent by the vehicles on a section of road + The delay performance metric is the amount of additional time spent by the vehicles on a section of road due to congestion. - name: int_performance__detector_metrics_agg_hourly description: | From 38a0855ce4bce781270a76a02548291848b41223 Mon Sep 17 00:00:00 2001 From: Hang Gao Date: Tue, 22 Oct 2024 18:13:51 +0000 Subject: [PATCH 13/15] change daily table to incremental model --- ...int_performance__bottleneck_delay_metrics_agg_daily.sql | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql index 1fc4d5a0..1477a265 100644 --- a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql @@ -1,8 +1,13 @@ -{{ config(materialized='table') }} +{{ config( + materialized="incremental", + unique_key=['station_id','sample_date'], + snowflake_warehouse = get_snowflake_refresh_warehouse(small="XL") +) }} with hourly_spatial_bottleneck_delay_metrics as ( select * from {{ ref('int_performance__bottleneck_delay_metrics_agg_hourly') }} + where {{ make_model_incremental('sample_date') }} ), /*aggregate hourly delay and bottleneck extent in a daily level. Since one day has From b05ec35978a7d96e9a2d2a567bbf967e60dd6afa Mon Sep 17 00:00:00 2001 From: Hang Gao Date: Tue, 22 Oct 2024 19:45:44 +0000 Subject: [PATCH 14/15] Avoid NULL values in Timeshift --- .../int_performance__bottleneck_delay_metrics_agg_daily.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql index 1477a265..288e1ca4 100644 --- a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql @@ -7,7 +7,7 @@ with hourly_spatial_bottleneck_delay_metrics as ( select * from {{ ref('int_performance__bottleneck_delay_metrics_agg_hourly') }} - where {{ make_model_incremental('sample_date') }} + where {{ make_model_incremental('sample_date') }} ), /*aggregate hourly delay and bottleneck extent in a daily level. Since one day has @@ -35,6 +35,7 @@ daily_time_shift_spatial_bottleneck_delay_metrics as ( {% endif %} {% endfor %} from hourly_spatial_bottleneck_delay_metrics + where time_shift is not NULL group by station_id, sample_date, time_shift ) From 10b122903e4890c1046b26f47fd20e36d35e7aa1 Mon Sep 17 00:00:00 2001 From: Hang Gao Date: Tue, 22 Oct 2024 19:51:46 +0000 Subject: [PATCH 15/15] whitespace --- .../int_performance__bottleneck_delay_metrics_agg_daily.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql index 288e1ca4..1b0dad54 100644 --- a/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql +++ b/transform/models/intermediate/performance/int_performance__bottleneck_delay_metrics_agg_daily.sql @@ -7,7 +7,7 @@ with hourly_spatial_bottleneck_delay_metrics as ( select * from {{ ref('int_performance__bottleneck_delay_metrics_agg_hourly') }} - where {{ make_model_incremental('sample_date') }} + where {{ make_model_incremental('sample_date') }} ), /*aggregate hourly delay and bottleneck extent in a daily level. Since one day has