Skip to content

Commit

Permalink
Created a model that performs various checks if data is missing that …
Browse files Browse the repository at this point in the history
…is used in the detector status diagnostics tests. This results in more detailed statuses when data is not being relayed from detectors. Also, reorganized joins in the detector_status model to pass PR checks.
  • Loading branch information
kengodleskidot committed Oct 2, 2024
1 parent 1c972b5 commit ac7d895
Show file tree
Hide file tree
Showing 2 changed files with 237 additions and 51 deletions.
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
{{ config(
materialized="incremental",
cluster_by=['sample_date'],
unique_key=['station_id', 'lane', 'sample_date'],
on_schema_change='sync_all_columns',
cluster_by=["sample_date"],
unique_key=["station_id", "lane", "sample_date"],
on_schema_change="sync_all_columns",
snowflake_warehouse=get_snowflake_refresh_warehouse(small="XL")
) }}

with

source as (
select * from {{ ref('int_diagnostics__samples_per_detector') }}
where {{ make_model_incremental('sample_date') }}
Expand All @@ -17,26 +16,9 @@ detector_meta as (
select * from {{ ref('int_vds__detector_config') }}
),

district_feed_check as (
select
source.district,
case
when (count_if(source.sample_ct > 0)) > 0 then 'Yes'
else 'No'
end as district_feed_working
from source
inner join {{ ref('districts') }} as d
on source.district = d.district_id
group by source.district
),

detector_status as (
assignment_with_meta as (
select
set_assgnmt.active_date,
set_assgnmt.station_id,
set_assgnmt.district,
set_assgnmt.station_type,
set_assgnmt.active_date as sample_date,
set_assgnmt.*,
dm.detector_id,
dm.detector_type,
dm.lane,
Expand All @@ -49,12 +31,48 @@ detector_status as (
dm.city,
dm.freeway,
dm.direction,
dm.length,
dm.length
from {{ ref('int_diagnostics__det_diag_set_assignment') }} as set_assgnmt
inner join detector_meta as dm
on
set_assgnmt.station_id = dm.station_id
and {{ get_scd_2_data('set_assgnmt.active_date','dm._valid_from','dm._valid_to') }}
),

detector_status as (
select
awm.active_date,
awm.station_id,
awm.district,
awm.station_type,
awm.station_diagnostic_method_id,
awm.active_date as sample_date,
awm.detector_id,
awm.detector_type,
awm.lane,
awm.state_postmile,
awm.absolute_postmile,
awm.latitude,
awm.longitude,
awm.physical_lanes,
awm.county,
awm.city,
awm.freeway,
awm.direction,
awm.length,
sps.* exclude (district, station_id, lane, detector_id, sample_date),
dfc.district_feed_working,
nds.district_feed_working,
nds.line_num_working,
nds.controller_feed_working,
nds.station_feed_working,
nds.detector_feed_working,
co.min_occupancy_delta,
case
when dfc.district_feed_working = 'No' then 'District Feed Down'
when nds.district_feed_working = 'No' then 'District Feed Down'
when nds.line_num_working = 'No' then 'Line Down'
when nds.controller_feed_working = 'No' then 'Controller Down'
when nds.station_feed_working = 'No' then 'Station Down'
when nds.detector_feed_working = 'No' then 'No Data'
when sps.sample_ct = 0 or sps.sample_ct is null
then 'Down/No Data'
/* # of samples < 60% of the max collected during the test period
Expand All @@ -63,63 +81,61 @@ detector_status as (
when sps.sample_ct between 1 and (0.6 * ({{ var("detector_status_max_sample_value") }}))
then 'Insufficient Data'
when
set_assgnmt.station_diagnostic_method_id = 'ramp'
awm.station_diagnostic_method_id = 'ramp'
and sps.zero_vol_ct / ({{ var("detector_status_max_sample_value") }})
> (set_assgnmt.zero_flow_percent / 100)
> (awm.zero_flow_percent / 100)
then 'Card Off'
when
set_assgnmt.station_diagnostic_method_id = 'mainline'
awm.station_diagnostic_method_id = 'mainline'
and sps.zero_occ_ct / ({{ var("detector_status_max_sample_value") }})
> (set_assgnmt.zero_occupancy_percent / 100)
> (awm.zero_occupancy_percent / 100)
then 'Card Off'
when
set_assgnmt.station_diagnostic_method_id = 'ramp'
awm.station_diagnostic_method_id = 'ramp'
and sps.high_volume_ct / ({{ var("detector_status_max_sample_value") }})
> (set_assgnmt.high_flow_percent / 100)
> (awm.high_flow_percent / 100)
then 'High Val'
when
set_assgnmt.station_diagnostic_method_id = 'mainline'
awm.station_diagnostic_method_id = 'mainline'
and sps.high_occupancy_ct / ({{ var("detector_status_max_sample_value") }})
> (set_assgnmt.high_occupancy_percent / 100)
> (awm.high_occupancy_percent / 100)
then 'High Val'
when
set_assgnmt.station_diagnostic_method_id = 'mainline'
awm.station_diagnostic_method_id = 'mainline'
and sps.zero_vol_pos_occ_ct / ({{ var("detector_status_max_sample_value") }})
> (set_assgnmt.flow_occupancy_percent / 100)
> (awm.flow_occupancy_percent / 100)
then 'Intermittent'
when
set_assgnmt.station_diagnostic_method_id = 'mainline'
awm.station_diagnostic_method_id = 'mainline'
and sps.zero_occ_pos_vol_ct / ({{ var("detector_status_max_sample_value") }})
> (set_assgnmt.occupancy_flow_percent / 100)
> (awm.occupancy_flow_percent / 100)
then 'Intermittent'
when
coalesce(co.min_occupancy_delta = 0, false)
and set_assgnmt.station_diagnostic_method_id = 'mainline'
and awm.station_diagnostic_method_id = 'mainline'
then 'Constant'
--Feed unstable case needed
else 'Good'
end as status

from {{ ref('int_diagnostics__det_diag_set_assignment') }} as set_assgnmt
inner join detector_meta as dm
on
set_assgnmt.station_id = dm.station_id
and {{ get_scd_2_data('set_assgnmt.active_date','dm._valid_from','dm._valid_to') }}
from assignment_with_meta as awm

left join source as sps
on
set_assgnmt.station_id = sps.station_id
and dm.lane = sps.lane
and set_assgnmt.active_date = sps.sample_date
awm.station_id = sps.station_id
and awm.lane = sps.lane
and awm.active_date = sps.sample_date

left join {{ ref('int_diagnostics__constant_occupancy') }} as co
on
set_assgnmt.station_id = co.station_id
awm.station_id = co.station_id
and sps.lane = co.lane
and set_assgnmt.active_date = co.sample_date
and awm.active_date = co.sample_date

left join district_feed_check as dfc
on set_assgnmt.district = dfc.district
left join {{ ref('int_diagnostics__no_data_status') }} as nds
on
awm.active_date = nds.active_date
and awm.detector_id = nds.detector_id
)

select * from detector_status
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
{{ config(
materialized="incremental",
cluster_by=["active_date"],
unique_key=["detector_id", "active_date"],
on_schema_change="sync_all_columns",
snowflake_warehouse=get_snowflake_refresh_warehouse(small="XL")
) }}

with
detector_meta as (
select * from {{ ref('int_vds__active_detectors') }}
where {{ make_model_incremental('active_date') }}
),

station_meta as (
select * from {{ ref('int_vds__station_config') }}
),

controller_meta as (
select * from {{ ref('int_vds__controller_config') }}
),

equipment_meta as (
select
dm.*,
sm.controller_id,
sm.name,
sm.angle
from detector_meta as dm
inner join station_meta as sm
on
dm.station_id = sm.station_id
and {{ get_scd_2_data('dm.active_date','sm._valid_from','sm._valid_to') }}
),

equipment_all_meta as (
select
em.*,
cm.line_num,
cm.stn_address,
cm.controller_type
from equipment_meta as em
inner join controller_meta as cm
on
em.controller_id = cm.controller_id
and {{ get_scd_2_data('em.active_date','cm._valid_from','cm._valid_to') }}
),

equipment_with_samples as (
select
eam.*,
source.sample_ct
from equipment_all_meta as eam
left join {{ ref('int_diagnostics__samples_per_detector') }} as source
on
eam.detector_id = source.detector_id
and eam.active_date = source.sample_date
),

district_feed_check as (
select
ews.active_date,
ews.district,
case
when (count_if(ews.sample_ct > 0)) > 0 then 'Yes'
else 'No'
end as district_feed_working
from equipment_with_samples as ews
inner join {{ ref('districts') }} as d
on ews.district = d.district_id
group by ews.active_date, ews.district
),

line_feed_check as (
select
ews.active_date,
ews.district,
ews.line_num,
case
when ews.line_num is null then 'Yes'
when (count_if(ews.sample_ct > 0)) > 0 then 'Yes'
else 'No'
end as line_num_working
from equipment_with_samples as ews
group by ews.active_date, ews.district, ews.line_num
),

controller_feed_check as (
select
ews.active_date,
ews.district,
ews.controller_id,
case
when ews.controller_id is null then 'Yes'
when (count_if(ews.sample_ct > 0)) > 0 then 'Yes'
else 'No'
end as controller_feed_working
from equipment_with_samples as ews
group by ews.active_date, ews.district, ews.controller_id
),

station_feed_check as (
select
ews.active_date,
ews.district,
ews.station_id,
case
when ews.station_id is null then 'Yes'
when (count_if(ews.sample_ct > 0)) > 0 then 'Yes'
else 'No'
end as station_feed_working
from equipment_with_samples as ews
group by ews.active_date, ews.district, ews.station_id
),

detector_feed_check as (
select
ews.active_date,
ews.detector_id,
case
when ews.detector_id is null then 'Yes'
when (count_if(ews.sample_ct > 0)) > 0 then 'Yes'
else 'No'
end as detector_feed_working
from equipment_with_samples as ews
group by ews.active_date, ews.detector_id
),

data_feed_check as (
select
ews.active_date,
ews.district,
ews.line_num,
ews.controller_id,
ews.station_id,
ews.detector_id,
ews.sample_ct,
dfc.district_feed_working,
lfc.line_num_working,
cfc.controller_feed_working,
sfc.station_feed_working,
detfc.detector_feed_working
from equipment_with_samples as ews
left join district_feed_check as dfc
on
ews.active_date = dfc.active_date
and ews.district = dfc.district
left join line_feed_check as lfc
on
ews.active_date = lfc.active_date
and ews.district = lfc.district
and ews.line_num = lfc.line_num
left join controller_feed_check as cfc
on
ews.active_date = cfc.active_date
and ews.district = cfc.district
and ews.controller_id = cfc.controller_id
left join station_feed_check as sfc
on
ews.active_date = sfc.active_date
and ews.district = sfc.district
and ews.station_id = sfc.station_id
left join detector_feed_check as detfc
on
ews.active_date = detfc.active_date
and ews.detector_id = detfc.detector_id
)

select * from data_feed_check
order by active_date, district, line_num, controller_id, station_id, detector_id

0 comments on commit ac7d895

Please sign in to comment.