Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lilian dbt training #65

Merged
merged 17 commits into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 22 additions & 10 deletions transform/models/_models.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,26 @@
version: 2

models:
- name: sample_data
description: A sample model
- name: stg_pems__station_meta
description: reformating table columns
config:
materialized: table
schema: DBT_ZWU_TEST
columns:
- name: name
description: A person's name
- name: city
description: Their residence
- name: license_plate
description: Their license plate
- name: email
description: Their email
- name: META_DATE
description: date
- name: ID
description: "station ID"
tests:
- not_null

- name: int_pems__stations_per_county_counted
description: intermediate model with 2023 station count by county
columns:
- name: County
description: county id
tests:
- not_null
- unique
- name: stat_counts
description: number of stations in this county
85 changes: 85 additions & 0 deletions transform/models/_sources.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
version: 2

sources:
- name: CLEARINGHOUSE
database: RAW_PRD
schema: CLEARINGHOUSE
description: Station Information
tables:
- name: STATION_META
description: meta data for stations
columns:
- name: FILENAME
description: file name
- name: ID
description: station ID
- name: FWY
description: freeway number
- name: DIR
description: traffic flow direction
- name: DISTRICT
description: district number
- name: COUNTY
description: county name
- name: CITY
description: city name
- name: STATE_PM
description: state postmile
- name: ABS_PM
description: absolute postmile
- name: LATITUDE
description: coordinate-latitude
- name: LONGITUDE
description: coordinate-longitude
- name: LENGTH
description: distance
- name: TYPE
description: <>
- name: LANES
description: number of lanes
- name: NAME
description: station name
- name: USER_ID_1
description: user id 1
- name: USER_ID_2
description: <>
- name: USER_ID_3
description: <>
- name: USER_ID_4
description: <>
- name: STATION_RAW
description: Raw traffic data collected at each station
columns:
- name: FILENAME
description: file name
- name: SAMPLE_TIMESTAMP
description: timestamp
- name: SAMPLE_DATE
description: date
- name: ID
description: station ID
- name: FLOW_1
description: flow in lane 1
- name: OCCUPANCY_1
description: lane 1 occupancy
- name: SPEED_1
description: average travel speed in lane 1
- name: FLOW_2
description: flow in lane 2
- name: OCCUPANCY_2
description: lane 2 occupancy
- name: SPEED_2
description: average travel speed in lane 2
- name: FLOW_3
description: flow in lane 3
- name: OCCUPANCY_3
description: lane 3 occupancy
- name: SPEED_3
description: average travel speed in lane 3
- name: STATION_STATUS
description: station status information
columns:
- name: FILENAME
description: file name
- name: CONTENT
description: content
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
with

source as (
select * from {{ ref("stg_pems__station_meta") }}
),

county_data as (
select
county,
Count(id) as sta_counts
from source
where Year(meta_date) = 2023
group by county

)

select *
from county_data
order by sta_counts desc


-- select *
-- from stg_pems__station_meta
-- limit 10
12 changes: 0 additions & 12 deletions transform/models/sample_data.sql

This file was deleted.

24 changes: 24 additions & 0 deletions transform/models/staging/stg_pems__station_meta.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
select
DATE_FROM_PARTS(
SUBSTRING(FILENAME, 18, 4),
SUBSTRING(FILENAME, 23, 2),
LEFT(RIGHT(FILENAME, 6), 2)
) as META_DATE,
ID,
FWY as FREEWAY,
DIR as DIRECTION,
DISTRICT,
COUNTY,
CITY,
STATE_PM as STATE_POSTMILE,
ABS_PM as ABSOLUTE_POSTMILE,
LATITUDE,
LONGITUDE,
LENGTH,
TYPE,
LANES,
NAME

-- from RAW_PRD.CLEARINGHOUSE.STATION_META

from {{ source('CLEARINGHOUSE','STATION_META') }}
Loading