From ffabeb111347968f147528997a64c620245f3941 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adolfo=20De=20Un=C3=A1nue?= Date: Sat, 20 Mar 2021 17:15:12 -0600 Subject: [PATCH] Using argcmdr for dirtyduck (Related to #749) --- dirtyduck.py | 19 +++++++++++++++++++ .../food_db/01_create_inspections_table.sql | 2 +- .../02_create_cleaned_inspections_table.sql | 5 ++--- .../food_db/04_create_semantic_tables.sql | 14 +++++--------- 4 files changed, 27 insertions(+), 13 deletions(-) create mode 100644 dirtyduck.py diff --git a/dirtyduck.py b/dirtyduck.py new file mode 100644 index 000000000..619fd94d4 --- /dev/null +++ b/dirtyduck.py @@ -0,0 +1,19 @@ +import argparse +from pathlib import Path + +from argcmdr import local, LocalRoot, Local + +ROOT_PATH = Path(__file__).parent.resolve() + +class DirtyDuck(LocalRoot): + """Commands for the Dirtyducks's tutorial""" + pass + +@DirtyDuck.register +def db_setup(context, args): + """Setting up dirtyducks's database + The following environment variables should available: + PGHOST, PGDATABASE, PGUSER, PGPASSWORD, PGPORT + and obvioulsy they should point to a PostgreSQL database""" + for sql_file in Path('dirtyduck').rglob('*.sql'): + yield context.local['psql']['-f', str(sql_file)] diff --git a/dirtyduck/food_db/01_create_inspections_table.sql b/dirtyduck/food_db/01_create_inspections_table.sql index 7875cd90e..a89a5c786 100644 --- a/dirtyduck/food_db/01_create_inspections_table.sql +++ b/dirtyduck/food_db/01_create_inspections_table.sql @@ -21,4 +21,4 @@ create table if not exists raw.inspections ( location text ); -copy raw.inspections from program 'bzcat /tmp/inspections_2014_2017.csv.bz2' HEADER CSV QUOTE '"'; +copy raw.inspections from program 'bzcat ./inspections_2014_2017.csv.bz2' HEADER CSV QUOTE '"'; diff --git a/dirtyduck/food_db/02_create_cleaned_inspections_table.sql b/dirtyduck/food_db/02_create_cleaned_inspections_table.sql index ba44e4810..a2c019a83 100644 --- a/dirtyduck/food_db/02_create_cleaned_inspections_table.sql +++ b/dirtyduck/food_db/02_create_cleaned_inspections_table.sql @@ -21,11 +21,10 @@ create table cleaned.inspections as ( btrim(lower(regexp_replace(type, 'liquor', 'task force', 'gi'))) from 'canvass|task force|complaint|food poisoning|consultation|license|tag removal') as type, date, - -- point(longitude, latitude) as location - ST_SetSRID(ST_MakePoint(longitude, latitude), 4326)::geography as location -- We use geography so the measurements are in meters + point(longitude, latitude) as location + --ST_SetSRID(ST_MakePoint(longitude, latitude), 4326)::geography as location -- We use geography so the measurements are in meters from raw.inspections where zip is not null -- removing NULL zip codes ) - select * from cleaned where type is not null ); diff --git a/dirtyduck/food_db/04_create_semantic_tables.sql b/dirtyduck/food_db/04_create_semantic_tables.sql index 958fcd5db..7e1a0bc2c 100644 --- a/dirtyduck/food_db/04_create_semantic_tables.sql +++ b/dirtyduck/food_db/04_create_semantic_tables.sql @@ -30,7 +30,6 @@ create table semantic.entities as ( license_num, facility, facility_aka, facility_type, address, date asc -- IMPORTANT!! ) - select row_number() over (order by start_time asc ) as entity_id, license_num, @@ -53,23 +52,21 @@ create index entities_facility_type_ix on semantic.entities (facility_type); create index entities_zip_code_ix on semantic.entities (zip_code); -- Spatial index -create index entities_location_gix on semantic.entities using gist (location); +-- create index entities_location_gix on semantic.entities using gist (location); create index entities_full_key_ix on semantic.entities (license_num, facility, facility_aka, facility_type, address); drop table if exists semantic.events cascade; create table semantic.events as ( - with entities as ( select * from semantic.entities ), - inspections as ( select i.inspection, i.type, i.date, i.risk, i.result, i.license_num, i.facility, i.facility_aka, - i.facility_type, i.address, i.zip_code, i.location, + i.facility_type, i.address, i.zip_code,-- i.location, jsonb_agg( jsonb_build_object( 'code', v.code, @@ -86,14 +83,13 @@ create table semantic.events as ( on i.inspection = v.inspection group by i.inspection, i.type, i.license_num, i.facility, - i.facility_aka, i.facility_type, i.address, i.zip_code, i.location, + i.facility_aka, i.facility_type, i.address, i.zip_code, --i.location, i.date, i.risk, i.result ) - select i.inspection as event_id, e.entity_id, i.type, i.date, i.risk, i.result, - e.facility_type, e.zip_code, e.location, + e.facility_type, e.zip_code, --e.location, i.violations from entities as e @@ -111,7 +107,7 @@ create index events_facility_type_ix on semantic.events (facility_type); create index events_zip_code_ix on semantic.events (zip_code); -- Spatial index -create index events_location_gix on semantic.events using gist (location); +-- create index events_location_gix on semantic.events using gist (location); -- JSONB indices create index events_violations on semantic.events using gin(violations);