Skip to content

Commit

Permalink
Using argcmdr for dirtyduck (Related to #749)
Browse files Browse the repository at this point in the history
  • Loading branch information
nanounanue committed Mar 20, 2021
1 parent d003dd1 commit ffabeb1
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 13 deletions.
19 changes: 19 additions & 0 deletions dirtyduck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import argparse
from pathlib import Path

from argcmdr import local, LocalRoot, Local

ROOT_PATH = Path(__file__).parent.resolve()

class DirtyDuck(LocalRoot):
"""Commands for the Dirtyducks's tutorial"""
pass

@DirtyDuck.register
def db_setup(context, args):
"""Setting up dirtyducks's database
The following environment variables should available:
PGHOST, PGDATABASE, PGUSER, PGPASSWORD, PGPORT
and obvioulsy they should point to a PostgreSQL database"""
for sql_file in Path('dirtyduck').rglob('*.sql'):
yield context.local['psql']['-f', str(sql_file)]
2 changes: 1 addition & 1 deletion dirtyduck/food_db/01_create_inspections_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ create table if not exists raw.inspections (
location text
);

copy raw.inspections from program 'bzcat /tmp/inspections_2014_2017.csv.bz2' HEADER CSV QUOTE '"';
copy raw.inspections from program 'bzcat ./inspections_2014_2017.csv.bz2' HEADER CSV QUOTE '"';
5 changes: 2 additions & 3 deletions dirtyduck/food_db/02_create_cleaned_inspections_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,10 @@ create table cleaned.inspections as (
btrim(lower(regexp_replace(type, 'liquor', 'task force', 'gi')))
from 'canvass|task force|complaint|food poisoning|consultation|license|tag removal') as type,
date,
-- point(longitude, latitude) as location
ST_SetSRID(ST_MakePoint(longitude, latitude), 4326)::geography as location -- We use geography so the measurements are in meters
point(longitude, latitude) as location
--ST_SetSRID(ST_MakePoint(longitude, latitude), 4326)::geography as location -- We use geography so the measurements are in meters
from raw.inspections
where zip is not null -- removing NULL zip codes
)

select * from cleaned where type is not null
);
14 changes: 5 additions & 9 deletions dirtyduck/food_db/04_create_semantic_tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ create table semantic.entities as (
license_num, facility, facility_aka, facility_type, address,
date asc -- IMPORTANT!!
)

select
row_number() over (order by start_time asc ) as entity_id,
license_num,
Expand All @@ -53,23 +52,21 @@ create index entities_facility_type_ix on semantic.entities (facility_type);
create index entities_zip_code_ix on semantic.entities (zip_code);

-- Spatial index
create index entities_location_gix on semantic.entities using gist (location);
-- create index entities_location_gix on semantic.entities using gist (location);

create index entities_full_key_ix on semantic.entities (license_num, facility, facility_aka, facility_type, address);

drop table if exists semantic.events cascade;

create table semantic.events as (

with entities as (
select * from semantic.entities
),

inspections as (
select
i.inspection, i.type, i.date, i.risk, i.result,
i.license_num, i.facility, i.facility_aka,
i.facility_type, i.address, i.zip_code, i.location,
i.facility_type, i.address, i.zip_code,-- i.location,
jsonb_agg(
jsonb_build_object(
'code', v.code,
Expand All @@ -86,14 +83,13 @@ create table semantic.events as (
on i.inspection = v.inspection
group by
i.inspection, i.type, i.license_num, i.facility,
i.facility_aka, i.facility_type, i.address, i.zip_code, i.location,
i.facility_aka, i.facility_type, i.address, i.zip_code, --i.location,
i.date, i.risk, i.result
)

select
i.inspection as event_id,
e.entity_id, i.type, i.date, i.risk, i.result,
e.facility_type, e.zip_code, e.location,
e.facility_type, e.zip_code, --e.location,
i.violations
from
entities as e
Expand All @@ -111,7 +107,7 @@ create index events_facility_type_ix on semantic.events (facility_type);
create index events_zip_code_ix on semantic.events (zip_code);

-- Spatial index
create index events_location_gix on semantic.events using gist (location);
-- create index events_location_gix on semantic.events using gist (location);

-- JSONB indices
create index events_violations on semantic.events using gin(violations);
Expand Down

0 comments on commit ffabeb1

Please sign in to comment.