Skip to content

Commit

Permalink
#905 split in to table definitions, functions
Browse files Browse the repository at this point in the history
  • Loading branch information
gabrielwol committed Mar 22, 2024
1 parent 917a298 commit d76d8a6
Show file tree
Hide file tree
Showing 6 changed files with 429 additions and 249 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
--Review decisions:
--Classification Grouping and naming
--Include/Exclude bicycles?
--Include/Exclude buses/streetcars?
--Decision to not include manual anomalous_range 'valid_caveat' notes: SELECT
--Including entry/exit information to satisfy ATR related DRs.
--> providing exit leg and direction as extra columns rather
-- than extra rows to reduce potential for double counting.

--DROP FUNCTION gwolofs.insert_miovision_15min_open_data;

CREATE OR REPLACE FUNCTION gwolofs.insert_miovision_15min_open_data(
_date date,
intersections integer[] DEFAULT ARRAY[]::integer[])
RETURNS void
LANGUAGE 'plpgsql'
COST 100
VOLATILE PARALLEL UNSAFE
AS $BODY$

DECLARE
target_intersections integer [] = miovision_api.get_intersections_uids(intersections);
n_deleted int;
n_inserted int;
_month date = date_trunc('month', _date);

BEGIN

WITH deleted AS (
DELETE FROM gwolofs.miovision_15min_open_data
WHERE
datetime_15min >= _month
AND datetime_15min < _month + interval '1 month'
AND intersection_uid = ANY(target_intersections)
RETURNING *
)

SELECT COUNT(*) INTO n_deleted
FROM deleted;

RAISE NOTICE 'Deleted % rows from gwolofs.miovision_15min_open_data for month %.', n_deleted, _month;

WITH inserted AS (
INSERT INTO gwolofs.miovision_15min_open_data (
intersection_uid, intersection_long_name, datetime_15min, classification_type,
entry_leg, entry_dir, movement, exit_leg, exit_dir, volume_15min
)
SELECT
v15.intersection_uid,
i.api_name AS intersection_long_name,
v15.datetime_bin AS datetime_15min,
CASE
WHEN cl.classification = 'Light' THEN 'Light Auto'
WHEN cl.classification IN (
'SingleUnitTruck', 'ArticulatedTruck', 'MotorizedVehicle', 'Bus'
) THEN 'Truck/Bus'
ELSE cl.classification -- 'Bicycle', 'Pedestrian'
END AS classification_type,
v15.leg AS entry_leg,
entries.dir AS entry_dir,
mov.movement_name AS movement,
--assign exits for peds, bike entry only movements
COALESCE(exits.leg_new, v15.leg) AS exit_leg,
COALESCE(exits.dir, entries.dir) AS exit_dir,
SUM(v15.volume) AS volume_15min
--exclude notes (manual text field)
--array_agg(ar.notes ORDER BY ar.range_start, ar.uid) FILTER (WHERE ar.uid IS NOT NULL) AS anomalous_range_caveats
FROM miovision_api.volumes_15min_mvt AS v15
JOIN miovision_api.intersections AS i USING (intersection_uid)
JOIN miovision_api.classifications AS cl USING (classification_uid)
JOIN miovision_api.movements AS mov USING (movement_uid)
-- TMC to ATR crossover table for e
LEFT JOIN miovision_api.movement_map AS entries ON
entries.leg_old = v15.leg
AND entries.movement_uid = v15.movement_uid
AND entries.leg_new <> substr(entries.dir, 1, 1) --eg. E leg going West is an entry
-- TMC to ATR crossover table
LEFT JOIN miovision_api.movement_map AS exits ON
exits.leg_old = v15.leg
AND exits.movement_uid = v15.movement_uid
AND exits.leg_new = substr(exits.dir, 1, 1) --eg. E leg going East is an exit
--anti-join anomalous_ranges. See HAVING clause.
LEFT JOIN miovision_api.anomalous_ranges AS ar ON
(
ar.intersection_uid = v15.intersection_uid
OR ar.intersection_uid IS NULL
) AND (
ar.classification_uid = v15.classification_uid
OR ar.classification_uid IS NULL
)
AND v15.datetime_bin >= ar.range_start
AND (
v15.datetime_bin < ar.range_end
OR ar.range_end IS NULL
)
WHERE
v15.datetime_bin >= _month
AND v15.datetime_bin < _month + interval '1 month'
AND v15.intersection_uid = ANY(target_intersections)
GROUP BY
v15.intersection_uid,
i.api_name,
v15.datetime_bin,
classification_type,
movement,
entry_leg,
entry_dir,
exit_dir,
exit_leg
HAVING
NOT array_agg(ar.problem_level) && ARRAY['do-not-use'::text, 'questionable'::text]
AND SUM(v15.volume) > 0 --confirm
ORDER BY
v15.intersection_uid,
classification_type,
v15.datetime_bin,
v15.leg
--fail on conflict
RETURNING *
)

SELECT COUNT(*) INTO n_inserted
FROM inserted;

RAISE NOTICE 'Inserted % rows into gwolofs.miovision_15min_open_data for month %.', n_inserted, _month;

END;
$BODY$;

ALTER FUNCTION gwolofs.insert_miovision_15min_open_data(date, integer[])
OWNER TO gwolofs;

GRANT EXECUTE ON FUNCTION gwolofs.insert_miovision_15min_open_data(date, integer[])
TO miovision_admins;

GRANT EXECUTE ON FUNCTION gwolofs.insert_miovision_15min_open_data(date, integer[])
TO miovision_api_bot;

REVOKE ALL ON FUNCTION gwolofs.insert_miovision_15min_open_data(date, integer[])
FROM PUBLIC;

COMMENT ON FUNCTION gwolofs.insert_miovision_15min_open_data(date, integer[])
IS 'Function for first deleting then inserting monthly 15
minute open data volumes into gwolofs.miovision_15min_open_data.
Contains an optional intersection parameter in case one just one
intersection needs to be refreshed.';

--testing, indexes work
--~50s for 1 day, ~40 minutes for 1 month (5M rows)
SELECT gwolofs.insert_miovision_15min_open_data('2024-02-01'::date);
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
--DROP FUNCTION gwolofs.insert_miovision_open_data_monthly_summary;

CREATE OR REPLACE FUNCTION gwolofs.insert_miovision_open_data_monthly_summary(
_date date,
intersections integer[] DEFAULT ARRAY[]::integer[])
RETURNS void
LANGUAGE 'plpgsql'
COST 100
VOLATILE PARALLEL UNSAFE
AS $BODY$

DECLARE
target_intersections integer [] = miovision_api.get_intersections_uids(intersections);
n_deleted int;
n_inserted int;
_month date = date_trunc('month', _date);

BEGIN

WITH deleted AS (
DELETE FROM gwolofs.miovision_open_data_monthly_summary
WHERE
mnth = _month
AND intersection_uid = ANY(target_intersections)
RETURNING *
)

SELECT COUNT(*) INTO n_deleted
FROM deleted;

RAISE NOTICE 'Deleted % rows from gwolofs.miovision_15min_open_data for month %.', n_deleted, _month;

WITH daily_volumes AS (
SELECT
vd.dt,
vd.intersection_uid,
CASE
WHEN cl.classification = 'Light' THEN 'Light Auto'
WHEN cl.classification IN ('SingleUnitTruck', 'ArticulatedTruck', 'MotorizedVehicle', 'Bus') THEN 'Truck/Bus'
ELSE cl.classification -- 'Bicycle', 'Pedestrian'
END AS classification_type,
--daily volume with long gaps imputted
SUM(coalesce(daily_volume,0) + coalesce(avg_historical_gap_vol,0)) AS total_vol
--omits anomalous_ranges
FROM miovision_api.volumes_daily AS vd
JOIN miovision_api.classifications AS cl USING (classification_uid)
WHERE
vd.isodow <= 5
AND vd.holiday IS false
AND vd.dt >= _month
AND vd.dt < _month + interval '1 month'
AND vd.intersection_uid = ANY(target_intersections)
--AND classification_uid NOT IN (2,7,10) --exclude bikes due to reliability?
GROUP BY
vd.dt,
vd.intersection_uid,
vd.intersection_uid,
classification_type
),

v15 AS (
--15 minute volumes grouped by classification_type
SELECT
v.intersection_uid,
v.datetime_bin,
CASE
WHEN cl.classification = 'Light' THEN 'Light Auto'
WHEN cl.classification IN ('SingleUnitTruck', 'ArticulatedTruck', 'MotorizedVehicle', 'Bus') THEN 'Truck/Bus'
ELSE cl.classification -- 'Bicycle', 'Pedestrian'
END AS classification_type,
SUM(v.volume) AS vol_15min
FROM miovision_api.volumes_15min_mvt AS v
JOIN miovision_api.classifications AS cl USING (classification_uid)
--anti join holidays
LEFT JOIN ref.holiday AS hol ON hol.dt = v.datetime_bin::date
--anti join anomalous ranges. See HAVING clause.
--NOTE: this method is omitting the whole classification_type if
--one classification is missing. May be undesired.
LEFT JOIN miovision_api.anomalous_ranges ar ON
(
ar.intersection_uid = v.intersection_uid
OR ar.intersection_uid IS NULL
) AND (
ar.classification_uid = v.classification_uid
OR ar.classification_uid IS NULL
)
AND v.datetime_bin >= ar.range_start
AND (
v.datetime_bin < ar.range_end
OR ar.range_end IS NULL
)
WHERE
v.datetime_bin >= _month
AND v.datetime_bin < _month + interval '1 month'
AND v.intersection_uid = ANY(target_intersections)
AND hol.holiday IS NULL
AND date_part('isodow', v.datetime_bin) <= 5 --weekday
--AND classification_uid NOT IN (2,7,10) --exclude bikes due to reliability?
GROUP BY
v.intersection_uid,
classification_type,
v.datetime_bin
),

hourly_data AS (
--find rolling 1 hour volume
SELECT
intersection_uid,
classification_type,
datetime_bin,
datetime_bin::date AS dt,
CASE WHEN date_part('hour', datetime_bin) < 12 THEN 'AM' ELSE 'PM' END AS am_pm,
vol_15min,
SUM(vol_15min) OVER (
PARTITION BY intersection_uid, classification_type
ORDER BY datetime_bin
RANGE BETWEEN '45 minutes' PRECEDING AND CURRENT ROW
) AS hr_vol
FROM v15
),

highest_daily_volume AS (
--find highest volume each day
SELECT
intersection_uid,
classification_type,
am_pm,
dt,
MAX(hr_vol) AS max_hr_volume
FROM hourly_data
GROUP BY
intersection_uid,
classification_type,
am_pm,
dt
),

inserted AS (
INSERT INTO gwolofs.miovision_open_data_monthly_summary (
intersection_uid, intersection_long_name, mnth, avg_daily_vol_auto, avg_daily_vol_truckbus, avg_daily_vol_ped, avg_daily_vol_bike,
avg_am_peak_hour_vol_auto, avg_am_peak_hour_vol_truckbus, avg_am_peak_hour_vol_ped, avg_am_peak_hour_vol_bike,
avg_pm_peak_hour_vol_auto, avg_pm_peak_hour_vol_truckbus, avg_pm_peak_hour_vol_ped, avg_pm_peak_hour_vol_bike
)
SELECT
coalesce(dv.intersection_uid, hv.intersection_uid) AS intersection_uid,
i.api_name AS intersection_long_name,
date_trunc('month', coalesce(dv.dt, hv.dt)) AS mnth,
ROUND(AVG(dv.total_vol) FILTER (WHERE dv.classification_type = 'Light Auto'), 0) AS avg_daily_vol_auto,
ROUND(AVG(dv.total_vol) FILTER (WHERE dv.classification_type = 'Truck/Bus'), 0) AS avg_daily_vol_truckbus,
ROUND(AVG(dv.total_vol) FILTER (WHERE dv.classification_type = 'Pedestrians'), 0) AS avg_daily_vol_ped,
ROUND(AVG(dv.total_vol) FILTER (WHERE dv.classification_type = 'Cyclists'), 0) AS avg_daily_vol_bike,
ROUND(AVG(hv.max_hr_volume) FILTER (WHERE hv.classification_type = 'Light Auto' AND hv.am_pm = 'AM'), 0) AS avg_am_peak_hour_vol_auto,
ROUND(AVG(hv.max_hr_volume) FILTER (WHERE hv.classification_type = 'Truck/Bus' AND hv.am_pm = 'AM'), 0) AS avg_am_peak_hour_vol_truckbus,
ROUND(AVG(hv.max_hr_volume) FILTER (WHERE hv.classification_type = 'Pedestrians' AND hv.am_pm = 'AM'), 0) AS avg_am_peak_hour_vol_ped,
ROUND(AVG(hv.max_hr_volume) FILTER (WHERE hv.classification_type = 'Cyclists' AND hv.am_pm = 'AM'), 0) AS avg_am_peak_hour_vol_bike,
ROUND(AVG(hv.max_hr_volume) FILTER (WHERE hv.classification_type = 'Light Auto' AND hv.am_pm = 'PM'), 0) AS avg_pm_peak_hour_vol_auto,
ROUND(AVG(hv.max_hr_volume) FILTER (WHERE hv.classification_type = 'Truck/Bus' AND hv.am_pm = 'PM'), 0) AS avg_pm_peak_hour_vol_truckbus,
ROUND(AVG(hv.max_hr_volume) FILTER (WHERE hv.classification_type = 'Pedestrians' AND hv.am_pm = 'PM'), 0) AS avg_pm_peak_hour_vol_ped,
ROUND(AVG(hv.max_hr_volume) FILTER (WHERE hv.classification_type = 'Cyclists' AND hv.am_pm = 'PM'), 0) AS avg_pm_peak_hour_vol_bike
--array_agg(DISTINCT ar.notes) FILTER (WHERE ar.uid IS NOT NULL) AS notes
FROM daily_volumes AS dv
FULL JOIN highest_daily_volume AS hv USING (intersection_uid, dt, classification_type)
LEFT JOIN miovision_api.intersections AS i ON
i.intersection_uid = coalesce(dv.intersection_uid, hv.intersection_uid)
GROUP BY
coalesce(dv.intersection_uid, hv.intersection_uid),
i.api_name,
date_trunc('month', coalesce(dv.dt, hv.dt))
ORDER BY
coalesce(dv.intersection_uid, hv.intersection_uid),
date_trunc('month', coalesce(dv.dt, hv.dt))
RETURNING *
)

SELECT COUNT(*) INTO n_inserted
FROM inserted;

RAISE NOTICE 'Inserted % rows into gwolofs.miovision_open_data_monthly_summary for month %.', n_inserted, _month;

END;
$BODY$;

ALTER FUNCTION gwolofs.insert_miovision_open_data_monthly_summary(date, integer[])
OWNER TO gwolofs;

GRANT EXECUTE ON FUNCTION gwolofs.insert_miovision_open_data_monthly_summary(date, integer[])
TO miovision_admins;

GRANT EXECUTE ON FUNCTION gwolofs.insert_miovision_open_data_monthly_summary(date, integer[])
TO miovision_api_bot;

REVOKE ALL ON FUNCTION gwolofs.insert_miovision_open_data_monthly_summary(date, integer[])
FROM PUBLIC;

COMMENT ON FUNCTION gwolofs.insert_miovision_open_data_monthly_summary(date, integer[])
IS 'Function for first deleting then inserting monthly summary miovision
open data into gwolofs.miovision_open_data_monthly_summary.
Contains an optional intersection parameter in case one just one
intersection needs to be refreshed.';

--testing, indexes work
--~50s for 1 day, ~40 minutes for 1 month (5M rows)
SELECT gwolofs.insert_miovision_open_data_monthly_summary('2024-02-01'::date);
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
-- DROP TABLE IF EXISTS gwolofs.miovision_15min_open_data;

CREATE TABLE IF NOT EXISTS gwolofs.miovision_15min_open_data
(
intersection_uid integer NOT NULL,
intersection_long_name text COLLATE pg_catalog."default",
datetime_15min timestamp without time zone NOT NULL,
classification_type text COLLATE pg_catalog."default" NOT NULL,
entry_leg text COLLATE pg_catalog."default" NOT NULL,
entry_dir text COLLATE pg_catalog."default",
movement text COLLATE pg_catalog."default" NOT NULL,
exit_leg text COLLATE pg_catalog."default",
exit_dir text COLLATE pg_catalog."default",
volume_15min smallint,
CONSTRAINT miovision_open_data_15min_pkey PRIMARY KEY (
intersection_uid, datetime_15min, classification_type, entry_leg, movement
)
)

TABLESPACE pg_default;

ALTER TABLE IF EXISTS gwolofs.miovision_15min_open_data
OWNER TO gwolofs;

REVOKE ALL ON TABLE gwolofs.miovision_15min_open_data FROM bdit_humans;

GRANT SELECT ON TABLE gwolofs.miovision_15min_open_data TO bdit_humans;

GRANT ALL ON TABLE gwolofs.miovision_15min_open_data TO gwolofs;

GRANT ALL ON TABLE gwolofs.miovision_15min_open_data TO miovision_admins;

COMMENT ON TABLE gwolofs.miovision_15min_open_data
IS 'Table to store Miovision 15min open data. Updated monthly.
Schema is a blend of TMC and ATR style data to cover different
types of data requests.';
Loading

0 comments on commit d76d8a6

Please sign in to comment.