diff --git a/scripts/us_cdc/environmental_health_toxicology/OzoneCensusTractPollution.tmcf b/scripts/us_cdc/environmental_health_toxicology/OzoneCensusTractPollution.tmcf index 7e84ac4a03..7c96dcea72 100644 --- a/scripts/us_cdc/environmental_health_toxicology/OzoneCensusTractPollution.tmcf +++ b/scripts/us_cdc/environmental_health_toxicology/OzoneCensusTractPollution.tmcf @@ -4,7 +4,7 @@ typeOf: dcs:StatVarObservation observationDate: C:OzoneCTPollution->date variableMeasured: dcs:Mean_Concentration_AirPollutant_Ozone observationPeriod: "P8H" -unit: parts per billion (ppb) +unit: PartsPerBillion value: C:OzoneCTPollution->Value Node: E:OzoneCTPollution->E2 diff --git a/scripts/us_cdc/environmental_health_toxicology/OzoneCountyPollution.tmcf b/scripts/us_cdc/environmental_health_toxicology/OzoneCountyPollution.tmcf index 95c8551eee..579a07dcb0 100644 --- a/scripts/us_cdc/environmental_health_toxicology/OzoneCountyPollution.tmcf +++ b/scripts/us_cdc/environmental_health_toxicology/OzoneCountyPollution.tmcf @@ -6,34 +6,34 @@ Node: E:OzoneCountyPollution->E1 observationAbout: C:OzoneCountyPollution->dcid typeOf: dcs:StatVarObservation observationDate: C:OzoneCountyPollution->date -value: C:OzoneCountyPollution->O3_mean_pred +value: C:OzoneCountyPollution->o3_mean_pred observationPeriod: "P8H" -unit: parts per billion (ppb) +unit: PartsPerBillion variableMeasured: dcs:Mean_Concentration_AirPollutant_Ozone Node: E:OzoneCountyPollution->E2 observationAbout: C:OzoneCountyPollution->dcid typeOf: dcs:StatVarObservation observationDate: C:OzoneCountyPollution->date -value: C:OzoneCountyPollution->O3_med_pred +value: C:OzoneCountyPollution->o3_med_pred observationPeriod: "P8H" -unit: parts per billion (ppb) +unit: PartsPerBillion variableMeasured: dcs:Median_Concentration_AirPollutant_Ozone Node: E:OzoneCountyPollution->E3 observationAbout: C:OzoneCountyPollution->dcid typeOf: dcs:StatVarObservation observationDate: C:OzoneCountyPollution->date -value: C:OzoneCountyPollution->O3_max_pred +value: C:OzoneCountyPollution->o3_max_pred observationPeriod: "P8H" -unit: parts per billion (ppb) +unit: PartsPerBillion variableMeasured: dcs:Max_Concentration_AirPollutant_Ozone Node: E:OzoneCountyPollution->E4 observationAbout: C:OzoneCountyPollution->dcid typeOf: dcs:StatVarObservation observationDate: C:OzoneCountyPollution->date -value: C:OzoneCountyPollution->O3_pop_pred +value: C:OzoneCountyPollution->o3_pop_pred observationPeriod: "P8H" -unit: parts per billion (ppb) +unit: PartsPerBillion variableMeasured: dcs:PopulationWeighted_Concentration_AirPollutant_Ozone diff --git a/scripts/us_cdc/environmental_health_toxicology/PM25CensusTractPollution.tmcf b/scripts/us_cdc/environmental_health_toxicology/PM25CensusTractPollution.tmcf index 3b97d68a94..5d14798ff1 100644 --- a/scripts/us_cdc/environmental_health_toxicology/PM25CensusTractPollution.tmcf +++ b/scripts/us_cdc/environmental_health_toxicology/PM25CensusTractPollution.tmcf @@ -4,7 +4,7 @@ typeOf: dcs:StatVarObservation observationDate: C:PM25CTPollution->date variableMeasured: dcs:Mean_Concentration_AirPollutant_PM2.5 observationPeriod: "P24H" -unit: μg/m3 +unit: dcs:MicrogramsPerCubicMeter value: C:PM25CTPollution->Value Node: E:PM25CTPollution->E2 diff --git a/scripts/us_cdc/environmental_health_toxicology/PM25CountyPollution.tmcf b/scripts/us_cdc/environmental_health_toxicology/PM25CountyPollution.tmcf index c2a5620056..26fb09c53c 100644 --- a/scripts/us_cdc/environmental_health_toxicology/PM25CountyPollution.tmcf +++ b/scripts/us_cdc/environmental_health_toxicology/PM25CountyPollution.tmcf @@ -2,34 +2,34 @@ Node: E:PM25CountyPollution->E1 observationAbout: C:PM25CountyPollution->dcid typeOf: dcs:StatVarObservation observationDate: C:PM25CountyPollution->date -value: C:PM25CountyPollution->PM25_mean_pred +value: C:PM25CountyPollution->pm25_mean_pred observationPeriod: "P24H" -unit: μg/m3 +unit: MicrogramsPerCubicMeter variableMeasured: dcs:Mean_Concentration_AirPollutant_PM2.5 Node: E:PM25CountyPollution->E2 observationAbout: C:PM25CountyPollution->dcid typeOf: dcs:StatVarObservation observationDate: C:PPM25CountyPollution->date -value: C:PM25CountyPollution->PM25_med_pred +value: C:PM25CountyPollution->pm25_med_pred observationPeriod: "P24H" -unit: μg/m3 +unit: MicrogramsPerCubicMeter variableMeasured: dcs:Median_Concentration_AirPollutant_PM2.5 Node: E:PM25CountyPollution->E3 observationAbout: C:PM25CountyPollution->dcid typeOf: dcs:StatVarObservation observationDate: C:PM25CountyPollution->date -value: C:PM25CountyPollution->PM25_max_pred +value: C:PM25CountyPollution->pm25_max_pred observationPeriod: "P24H" -unit: μg/m3 +unit: MicrogramsPerCubicMeter variableMeasured: dcs:Max_Concentration_AirPollutant_PM2.5 Node: E:PM25CountyPollution->E4 observationAbout: C:PM25CountyPollution->dcid typeOf: dcs:StatVarObservation observationDate: C:PM25CountyPollution->date -value: C:PM25CountyPollution->PM25_pop_pred +value: C:PM25CountyPollution->pm25_pop_pred observationPeriod: "P24H" -unit: μg/m3 +unit: MicrogramsPerCubicMeter variableMeasured: dcs:PopulationWeighted_Concentration_AirPollutant_PM2.5 diff --git a/scripts/us_cdc/environmental_health_toxicology/README.md b/scripts/us_cdc/environmental_health_toxicology/README.md index 5189046f66..7061594d56 100644 --- a/scripts/us_cdc/environmental_health_toxicology/README.md +++ b/scripts/us_cdc/environmental_health_toxicology/README.md @@ -109,18 +109,63 @@ The expected output of this test can be found in [`small_Palmer_expected.csv`](h #### Processing Steps -`@input_file_name` - path to the input csv file to be cleaned +To clean the precipitation index data files, run: -`@output_file_name` - path to write the cleaned csv file +```bash +$ python3 parse_precipitation_index.py input_file_name output_file_name +``` To clean the air quality data files, run: -```bash -$ python3 parse_air_quality.py input_file_name output_file_name -``` +1. Import name: CDC_PM25CensusTract -To clean the precipitation index data files, run: + command to download the file + ============================ + + $ python3 scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py CDC_PM25CensusTract --mode=download + -```bash -$ python3 parse_precipitation_index.py input_file_name output_file_name -``` + Command to process the file + =========================== + + $ python3 scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py CDC_PM25CensusTract --mode=process + + +2. Import name: CDC_OzoneCensusTract + + command to download the file + ============================ + + $ python3 scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py CDC_OzoneCensusTract --mode=download + + + Command to process the file + =========================== + + $ python3 scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py CDC_OzoneCensusTract --mode=process + +3. Import name: CDC_PM25County + + command to download the file + ============================ + + $ python3 scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py CDC_PM25County --mode=download + + + Command to process the file + =========================== + + $ python3 scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py CDC_PM25County --mode=process + +4. Import name: CDC_OzoneCounty + + command to download the file + ============================ + + $ python3 scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py CDC_OzoneCounty --mode=download + + + Command to process the file + =========================== + + $ python3 scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py CDC_OzoneCounty --mode=process \ No newline at end of file diff --git a/scripts/us_cdc/environmental_health_toxicology/manifest.json b/scripts/us_cdc/environmental_health_toxicology/manifest.json new file mode 100644 index 0000000000..38bd0cf99b --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/manifest.json @@ -0,0 +1,104 @@ +{ + "import_specifications": [ + { + "import_name": "CDC_PM25CensusTract", + "curator_emails": [ + "sudhisha@google.com" + ], + "provenance_url": "https://data.cdc.gov/browse?category=Environmental+Health+%26+Toxicology", + "provenance_description": "Environmental Health Toxicology", + "scripts": [ + "parse_air_quality.py CDC_PM25CensusTract" + ], + "import_inputs": [ + { + "template_mcf": "PM25CensusTractPollution.tmcf", + "cleaned_csv": "output/PM2.5CensusTract_0.csv" + }, + { + "template_mcf": "PM25CensusTractPollution.tmcf", + "cleaned_csv": "output/PM2.5CensusTract_1.csv" + }, + { + "template_mcf": "PM25CensusTractPollution.tmcf", + "cleaned_csv": "output/PM2.5CensusTract_2.csv" + }, + { + "template_mcf": "PM25CensusTractPollution.tmcf", + "cleaned_csv": "output/PM2.5CensusTract_3.csv" + } + + ], + "cron_schedule": "0 1 2 * *" + }, + { + "import_name": "CDC_OzoneCensusTract", + "curator_emails": [ + "sudhisha@google.com" + ], + "provenance_url": "https://data.cdc.gov/browse?category=Environmental+Health+%26+Toxicology", + "provenance_description": "Environmental Health Toxicology", + "scripts": [ + "parse_air_quality.py CDC_OzoneCensusTract" + ], + "import_inputs": [ + { + "template_mcf": "OzoneCensusTractPollution.tmcf", + "cleaned_csv": "output/Census_Tract_Level_Ozone_Concentrations_0.csv" + }, + { + "template_mcf": "OzoneCensusTractPollution.tmcf", + "cleaned_csv": "output/Census_Tract_Level_Ozone_Concentrations_1.csv" + }, + { + "template_mcf": "OzoneCensusTractPollution.tmcf", + "cleaned_csv": "output/Census_Tract_Level_Ozone_Concentrations_2.csv" + }, + { + "template_mcf": "OzoneCensusTractPollution.tmcf", + "cleaned_csv": "output/Census_Tract_Level_Ozone_Concentrations_3.csv" + } + + ], + "cron_schedule": "0 1 3 * *" + }, + { + "import_name": "CDC_PM25County", + "curator_emails": [ + "sudhisha@google.com" + ], + "provenance_url": "https://data.cdc.gov/browse?category=Environmental+Health+%26+Toxicology", + "provenance_description": "Environmental Health Toxicology", + "scripts": [ + "parse_air_quality.py CDC_PM25County" + ], + "import_inputs": [ + { + "template_mcf": "PM25CountyPollution.tmcf", + "cleaned_csv": "output/PM25county.csv" + } + + ], + "cron_schedule": "0 1 4 * *" + }, + { + "import_name": "CDC_OzoneCounty", + "curator_emails": [ + "sudhisha@google.com" + ], + "provenance_url": "https://data.cdc.gov/browse?category=Environmental+Health+%26+Toxicology", + "provenance_description": "Environmental Health Toxicology", + "scripts": [ + "parse_air_quality.py CDC_OzoneCounty" + ], + "import_inputs": [ + { + "template_mcf": "OzoneCountyPollution.tmcf", + "cleaned_csv": "output/OzoneCounty.csv" + } + + ], + "cron_schedule": "0 1 5 * *" + } + ] + } diff --git a/scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py b/scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py index 1c658b3d8a..5f52ae7226 100644 --- a/scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py +++ b/scripts/us_cdc/environmental_health_toxicology/parse_air_quality.py @@ -1,4 +1,4 @@ -# Copyright 2021 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,12 +22,101 @@ python3 parse_air_quality.py input_file output_file ''' -import sys +import json +import os +import requests import pandas as pd +from absl import app, logging, flags +from retry import retry +from pathlib import Path +import sys + +_FLAGS = flags.FLAGS + +flags.DEFINE_string('mode', '', 'Options: download or process') +flags.DEFINE_string('input_file_path', 'input_files', 'Input files path') +flags.DEFINE_string('output_file_path', 'output', 'Output files path') + +_MODULE_DIR = os.path.dirname(os.path.abspath(__file__)) +_INPUT_FILE_PATH = None +_OUTOUT_FILE_PATH = None + +#Making a dictionary to keep each all 4 imports and it's correspondence download +# source url and output path to save final output +import_configs = [{ + "import_name": + "CDC_PM25CensusTract", + "files": [{ + "url": "https://data.cdc.gov/resource/v5qq-ktfc.csv", + "input_file_name": "PM2.5CensusTractPollution_input_0.csv", + "output_file_name": "PM2.5CensusTract_0.csv" + }, { + "url": "https://data.cdc.gov/resource/ujra-cbx5.csv", + "input_file_name": "PM2.5CensusTractPollution_input_1.csv", + "output_file_name": "PM2.5CensusTract_1.csv" + }, { + "url": "https://data.cdc.gov/resource/qjxm-7fny.csv", + "input_file_name": "PM2.5CensusTractPollution_input_2.csv", + "output_file_name": "PM2.5CensusTract_2.csv" + }, { + "url": "https://data.cdc.gov/resource/96sd-hxdt.csv", + "input_file_name": "PM2.5CensusTractPollution_input_3.csv", + "output_file_name": "PM2.5CensusTract_3.csv" + }] +}, { + "import_name": + "CDC_OzoneCensusTract", + "files": [{ + "url": "https://data.cdc.gov/resource/v76h-zdce.csv", + "input_file_name": "Census_Tract_Level_Ozone_Concentrations_input.csv", + "output_file_name": "Census_Tract_Level_Ozone_Concentrations_0.csv" + }, { + "url": + "https://data.cdc.gov/resource/xm94-zmtx.csv", + "input_file_name": + "Census_Tract_Level_Ozone_Concentrations_input_1.csv", + "output_file_name": + "Census_Tract_Level_Ozone_Concentrations_1.csv" + }, { + "url": + "https://data.cdc.gov/resource/847z-pxin.csv", + "input_file_name": + "Census_Tract_Level_Ozone_Concentrations_input_2.csv", + "output_file_name": + "Census_Tract_Level_Ozone_Concentrations_2.csv" + }, { + "url": + "https://data.cdc.gov/resource/hf2a-3ebq.csv", + "input_file_name": + "Census_Tract_Level_Ozone_Concentrations_input_3.csv", + "output_file_name": + "Census_Tract_Level_Ozone_Concentrations_3.csv" + }] +}, { + "import_name": + "CDC_PM25County", + "files": [{ + "url": "https://data.cdc.gov/resource/dqwm-pbi7.csv", + "input_file_name": "PM2.5County_input.csv", + "output_file_name": "PM25county.csv" + }] +}, { + "import_name": + "CDC_OzoneCounty", + "files": [{ + "url": "https://data.cdc.gov/resource/jcn4-jcv5.csv", + "input_file_name": "OzoneCounty_input.csv", + "output_file_name": "OzoneCounty.csv" + }] +}] + +record_count_query = '?$query=select%20count(*)%20as%20COLUMN_ALIAS_GUARD__count' # Mapping of column names in file to StatVar names. STATVARS = { "DS_PM_pred": "Mean_Concentration_AirPollutant_PM2.5", + "ds_pm_pred": "Mean_Concentration_AirPollutant_PM2.5", + "ds_pm_stdd": "Mean_Concentration_AirPollutant_PM2.5_StandardError", "DS_O3_pred": "Mean_Concentration_AirPollutant_Ozone", "PM25_max_pred": "Max_Concentration_AirPollutant_PM2.5", "PM25_med_pred": "Median_Concentration_AirPollutant_PM2.5", @@ -56,14 +145,12 @@ } -def main(): - """Main function to generate the cleaned csv file.""" - file_path = sys.argv[1] - output_file = sys.argv[2] - clean_air_quality_data(file_path, output_file) +# this method is applicable only for "census tract PM25" +def add_prefix_zero(value, length): + return value.zfill(length) -def clean_air_quality_data(file_path, output_file): +def clean_air_quality_data(file_path, output_file, importname): """ Args: file_path: path to a comma-separated CDC air quality data file @@ -71,39 +158,164 @@ def clean_air_quality_data(file_path, output_file): Returns: a cleaned csv file """ - print("Cleaning file...") - data = pd.read_csv(file_path) - if "Ozone" in file_path and "County" in file_path: - data["Month"] = data["Month"].map(MONTH_MAP) - data["date"] = pd.to_datetime(data[["Year", "Month", "Day"]], - yearfirst=True) - else: - data["date"] = pd.to_datetime(data["date"], yearfirst=True) - if "PM2.5" in file_path: - census_tract = "DS_PM" - elif "Ozone" in file_path: - census_tract = "DS_O3" - if "Census" in file_path: - data = pd.melt(data, - id_vars=[ - 'year', 'date', 'statefips', 'countyfips', 'ctfips', - 'latitude', 'longitude', census_tract + '_stdd' - ], - value_vars=[str(census_tract + '_pred')], - var_name='StatisticalVariable', - value_name='Value') - data.rename(columns={census_tract + '_stdd': 'Error'}, inplace=True) - data["dcid"] = "geoId/" + data["ctfips"].astype(str) - data['StatisticalVariable'] = data['StatisticalVariable'].map(STATVARS) - elif "County" in file_path and "PM" in file_path: - data["countyfips"] = "1200" + data["countyfips"].astype(str) - data["dcid"] = "geoId/" + data["countyfips"].astype(str) - elif "County" in file_path and "Ozone" in file_path: - data["countyfips"] = "1200" + data["countyfips"].astype(str) - data["dcid"] = "geoId/" + data["countyfips"].astype(str) - data.to_csv(output_file, float_format='%.6f', index=False) - print("Finished cleaning file!") + global output_file_name + logging.info(f"import name from command line {importname}") + for config1 in import_configs: + if config1["import_name"] == importname: + files = config1["files"] + for file_info in files: + output_file_name = file_info["output_file_name"] + logging.info(f"output_file_name: {output_file_name}") + + for file in os.listdir(file_path): + logging.info(f"file_path: {file_path}") + + if str(file).endswith('.csv'): + logging.info(f"Cleaning {file} ....") + data = pd.read_csv(os.path.join(file_path, file)) + data["date"] = pd.to_datetime(data["date"], + yearfirst=True) + data["date"] = pd.to_datetime(data["date"], + format="%Y-%m-%d") + + if "PM2.5" in file: + census_tract = "ds_pm" + elif "Ozone" in file: + census_tract = "ds_o3" + if "Census" in file: + if "PM2.5" in file: + data = pd.melt(data, + id_vars=[ + 'year', 'date', 'statefips', + 'countyfips', 'ctfips', + 'latitude', 'longitude' + ], + value_vars=[ + str(census_tract + '_pred'), + str(census_tract + '_stdd') + ], + var_name='StatisticalVariable', + value_name='Value') + + elif "Ozone" in file: + data = pd.melt( + data, + id_vars=[ + 'year', 'date', 'statefips', + 'countyfips', 'ctfips', 'latitude', + 'longitude', census_tract + '_stdd' + ], + value_vars=[str(census_tract + '_pred')], + var_name='StatisticalVariable', + value_name='Value') + data.rename( + columns={census_tract + '_stdd': 'Error'}, + inplace=True) + max_length = data['ctfips'].astype( + str).str.len().max() + data['ctfips'] = data['ctfips'].astype(str).apply( + lambda x: add_prefix_zero(x, max_length)) + data["dcid"] = "geoId/" + data["ctfips"].astype(str) + data['StatisticalVariable'] = data[ + 'StatisticalVariable'].map(STATVARS) + elif "County" in file and "PM" in file: + data["statefips"] = data["statefips"].astype( + str).str.zfill(2) + data["countyfips"] = data["countyfips"].astype( + str).str.zfill(3) + data["dcid"] = "geoId/" + data["statefips"] + data[ + "countyfips"] + elif "County" in file and "Ozone" in file: + data["statefips"] = data["statefips"].astype( + str).str.zfill(2) + data["countyfips"] = data["countyfips"].astype( + str).str.zfill(3) + data["dcid"] = "geoId/" + data["statefips"] + data[ + "countyfips"] + output_file_path_with_file_name = output_file + "/" + output_file_name + data.to_csv(output_file_path_with_file_name, + float_format='%.6f', + index=False) + logging.info( + f"Finished cleaning file {output_file_name}!") + + +def download_files(importname): + global _INPUT_FILE_PATH + global import_name + global url_new + + @retry(tries=3, delay=2, backoff=2) + def download_with_retry(url, input_file_name): + logging.info(f"Downloading file from URL: {url}") + response = requests.get(url) + response.raise_for_status() + if response.status_code == 200: + if not response.content: + logging.fatal( + f"No data available for URL: {url}. Aborting download.") + return + filename = os.path.join(_INPUT_FILE_PATH, input_file_name) + with open(filename, 'wb') as f: + f.write(response.content) + else: + logging.error( + f"Failed to download file from URL: {url}. Status code: {response.status_code}" + ) + + try: + logging.info(f"import name from command line {importname}") + for config in import_configs: + import_name = config["import_name"] + for config1 in import_configs: + if config1["import_name"] == importname: + import_name = config1["import_name"] + files = config1["files"] + for file_info in files: + url_new = file_info["url"] + logging.info(f"URL from link {url_new}") + input_file_name = file_info["input_file_name"] + logging.info(f"Input File Name {input_file_name}") + + get_record_count = requests.get( + url_new.replace('.csv', record_count_query)) + if get_record_count.status_code == 200: + record_count = json.loads( + get_record_count.text + )[0]['COLUMN_ALIAS_GUARD__count'] + logging.info( + f"Numbers of records found for the URL {url_new} is {record_count}" + ) + url_new = f"{url_new}?$limit={record_count}&$offset=0" + download_with_retry(url_new, input_file_name) + + except Exception as e: + logging.fatal(f"Error downloading URL {url_new} - {e}") + + +def main(_): + """Main function to generate the cleaned csv file.""" + global _INPUT_FILE_PATH, _OUTOUT_FILE_PATH + _INPUT_FILE_PATH = os.path.join(_MODULE_DIR, _FLAGS.input_file_path) + _OUTOUT_FILE_PATH = os.path.join(_MODULE_DIR, _FLAGS.output_file_path) + Path(_INPUT_FILE_PATH).mkdir(parents=True, exist_ok=True) + Path(_OUTOUT_FILE_PATH).mkdir(parents=True, exist_ok=True) + mode = _FLAGS.mode + # Get command-line arguments + importname = sys.argv[1] + + if mode == "": + logging.info(f"Inside mode download and process") + download_files(importname) + clean_air_quality_data(_INPUT_FILE_PATH, _OUTOUT_FILE_PATH, importname) + + if mode == "download": + logging.info(f"Inside mode download") + download_files(importname) + if mode == "process": + logging.info(f"Inside mode process") + clean_air_quality_data(_INPUT_FILE_PATH, _OUTOUT_FILE_PATH, importname) if __name__ == "__main__": - main() + app.run(main) diff --git a/scripts/us_cdc/environmental_health_toxicology/parse_air_quality_test.py b/scripts/us_cdc/environmental_health_toxicology/parse_air_quality_test.py index 53231f7433..60d241e246 100644 --- a/scripts/us_cdc/environmental_health_toxicology/parse_air_quality_test.py +++ b/scripts/us_cdc/environmental_health_toxicology/parse_air_quality_test.py @@ -11,45 +11,97 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -''' -Author: Padma Gundapaneni @padma-g -Date: 7/28/21 -Description: This script contains unit tests for the parse_air_quality.py script. -@input_file filepath to the original csv that needs to be cleaned -@output_file filepath to the csv to which the cleaned data is written -python3 parse_air_quality_test.py input_file output_file -''' - import unittest import os + +from absl import logging + +_MODULE_DIR = os.path.dirname(__file__) +TEST_DATA_DIR = os.path.join(_MODULE_DIR, 'test_data') +INPUT_DIR = 'input_files' +OUTPUT_DIR = 'actual_output_files' +OUTPUT_FILES = 'expected_output_files' + from .parse_air_quality import clean_air_quality_data -module_dir_ = os.path.dirname(__file__) +# test data for each import type +TEST_DATA = { + "CDC_OzoneCensusTract": { + "input_dir": + os.path.join(TEST_DATA_DIR, "CDC_OzoneCensusTract", INPUT_DIR), + "output_dir": + os.path.join(TEST_DATA_DIR, "CDC_OzoneCensusTract", OUTPUT_DIR), + "expected_file": + os.path.join(TEST_DATA_DIR, "CDC_OzoneCensusTract", OUTPUT_FILES, + "Census_Tract_Level_Ozone_Concentrations_0.csv"), + "actual_file": + os.path.join(TEST_DATA_DIR, "CDC_OzoneCensusTract", OUTPUT_DIR, + "Census_Tract_Level_Ozone_Concentrations_0.csv"), + }, + "CDC_OzoneCounty": { + "input_dir": + os.path.join(TEST_DATA_DIR, "CDC_OzoneCounty", INPUT_DIR), + "output_dir": + os.path.join(TEST_DATA_DIR, "CDC_OzoneCounty", OUTPUT_DIR), + "expected_file": + os.path.join(TEST_DATA_DIR, "CDC_OzoneCounty", OUTPUT_FILES, + "OzoneCounty.csv"), + "actual_file": + os.path.join(TEST_DATA_DIR, "CDC_OzoneCounty", OUTPUT_DIR, + "OzoneCounty.csv"), + }, + "CDC_PM25CensusTract": { + "input_dir": + os.path.join(TEST_DATA_DIR, "CDC_PM25CensusTract", INPUT_DIR), + "output_dir": + os.path.join(TEST_DATA_DIR, "CDC_PM25CensusTract", OUTPUT_DIR), + "expected_file": + os.path.join(TEST_DATA_DIR, "CDC_PM25CensusTract", OUTPUT_FILES, + "PM2.5CensusTract_0.csv"), + "actual_file": + os.path.join(TEST_DATA_DIR, "CDC_PM25CensusTract", OUTPUT_DIR, + "PM2.5CensusTract_0.csv"), + }, + "CDC_PM25County": { + "input_dir": + os.path.join(TEST_DATA_DIR, "CDC_PM25County", INPUT_DIR), + "output_dir": + os.path.join(TEST_DATA_DIR, "CDC_PM25County", OUTPUT_DIR), + "expected_file": + os.path.join(TEST_DATA_DIR, "CDC_PM25County", OUTPUT_FILES, + "PM25county.csv"), + "actual_file": + os.path.join(TEST_DATA_DIR, "CDC_PM25County", OUTPUT_DIR, + "PM25county.csv"), + }, +} class TestParseAirQuality(unittest.TestCase): - """ - Tests the functions in parse_air_quality.py. - """ + + def __init__(self, methodName: str = ...) -> None: + super().__init__(methodName) def test_clean_air_quality_data(self): """ - Tests the clean_air_quality_data function. + Tests the clean_air_quality_data function for all the 4 imports. """ - print(module_dir_) - test_csv = os.path.join(module_dir_, 'test_data/small_Ozone_County.csv') - output_csv = os.path.join(module_dir_, - 'test_data/small_Ozone_County_output.csv') - clean_air_quality_data(test_csv, output_csv) - - expected_csv = os.path.join( - module_dir_, 'test_data/small_Ozone_County_expected.csv') - with open(output_csv, 'r') as test: - test_str: str = test.read() - with open(expected_csv, 'r') as expected: - expected_str: str = expected.read() - self.assertEqual(test_str, expected_str) - os.remove(output_csv) + for import_type, test_data in TEST_DATA.items(): + output_dir = test_data["output_dir"] + os.makedirs(output_dir, exist_ok=True) + + clean_air_quality_data(test_data["input_dir"], + test_data["output_dir"], import_type) + + with open(test_data["actual_file"], + encoding="utf-8") as actual_csv_file: + actual_csv_data = actual_csv_file.read().strip() + + with open(test_data["expected_file"], + encoding="utf-8") as expected_csv_file: + expected_csv_data = expected_csv_file.read().strip() + + self.assertEqual(expected_csv_data, actual_csv_data) if __name__ == '__main__': diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/expected_output_files/Census_Tract_Level_Ozone_Concentrations_0.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/expected_output_files/Census_Tract_Level_Ozone_Concentrations_0.csv new file mode 100644 index 0000000000..14ad39ed91 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/expected_output_files/Census_Tract_Level_Ozone_Concentrations_0.csv @@ -0,0 +1,6 @@ +year,date,statefips,countyfips,ctfips,latitude,longitude,Error,StatisticalVariable,Value,dcid +2001,2001-01-01,1,1,1001020100,32.477180,-86.490010,5.931458,,31.659713,geoId/1001020100 +2001,2001-01-01,1,1,1001020200,32.474250,-86.473390,5.703084,,31.939058,geoId/1001020200 +2001,2001-01-01,1,1,1001020300,32.475440,-86.460200,5.779281,,31.859155,geoId/1001020300 +2001,2001-01-01,1,1,1001020400,32.472040,-86.443700,5.784730,,31.681802,geoId/1001020400 +2001,2001-01-01,1,1,1001020500,32.458920,-86.422710,5.797620,,31.752820,geoId/1001020500 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/expected_output_files/Census_Tract_Level_Ozone_Concentrations_1.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/expected_output_files/Census_Tract_Level_Ozone_Concentrations_1.csv new file mode 100644 index 0000000000..14ad39ed91 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/expected_output_files/Census_Tract_Level_Ozone_Concentrations_1.csv @@ -0,0 +1,6 @@ +year,date,statefips,countyfips,ctfips,latitude,longitude,Error,StatisticalVariable,Value,dcid +2001,2001-01-01,1,1,1001020100,32.477180,-86.490010,5.931458,,31.659713,geoId/1001020100 +2001,2001-01-01,1,1,1001020200,32.474250,-86.473390,5.703084,,31.939058,geoId/1001020200 +2001,2001-01-01,1,1,1001020300,32.475440,-86.460200,5.779281,,31.859155,geoId/1001020300 +2001,2001-01-01,1,1,1001020400,32.472040,-86.443700,5.784730,,31.681802,geoId/1001020400 +2001,2001-01-01,1,1,1001020500,32.458920,-86.422710,5.797620,,31.752820,geoId/1001020500 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/expected_output_files/Census_Tract_Level_Ozone_Concentrations_2.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/expected_output_files/Census_Tract_Level_Ozone_Concentrations_2.csv new file mode 100644 index 0000000000..14ad39ed91 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/expected_output_files/Census_Tract_Level_Ozone_Concentrations_2.csv @@ -0,0 +1,6 @@ +year,date,statefips,countyfips,ctfips,latitude,longitude,Error,StatisticalVariable,Value,dcid +2001,2001-01-01,1,1,1001020100,32.477180,-86.490010,5.931458,,31.659713,geoId/1001020100 +2001,2001-01-01,1,1,1001020200,32.474250,-86.473390,5.703084,,31.939058,geoId/1001020200 +2001,2001-01-01,1,1,1001020300,32.475440,-86.460200,5.779281,,31.859155,geoId/1001020300 +2001,2001-01-01,1,1,1001020400,32.472040,-86.443700,5.784730,,31.681802,geoId/1001020400 +2001,2001-01-01,1,1,1001020500,32.458920,-86.422710,5.797620,,31.752820,geoId/1001020500 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/expected_output_files/Census_Tract_Level_Ozone_Concentrations_3.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/expected_output_files/Census_Tract_Level_Ozone_Concentrations_3.csv new file mode 100644 index 0000000000..14ad39ed91 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/expected_output_files/Census_Tract_Level_Ozone_Concentrations_3.csv @@ -0,0 +1,6 @@ +year,date,statefips,countyfips,ctfips,latitude,longitude,Error,StatisticalVariable,Value,dcid +2001,2001-01-01,1,1,1001020100,32.477180,-86.490010,5.931458,,31.659713,geoId/1001020100 +2001,2001-01-01,1,1,1001020200,32.474250,-86.473390,5.703084,,31.939058,geoId/1001020200 +2001,2001-01-01,1,1,1001020300,32.475440,-86.460200,5.779281,,31.859155,geoId/1001020300 +2001,2001-01-01,1,1,1001020400,32.472040,-86.443700,5.784730,,31.681802,geoId/1001020400 +2001,2001-01-01,1,1,1001020500,32.458920,-86.422710,5.797620,,31.752820,geoId/1001020500 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/input_files/Census_Tract_Level_Ozone_Concentrations_input _2.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/input_files/Census_Tract_Level_Ozone_Concentrations_input _2.csv new file mode 100644 index 0000000000..2f67a8a1c4 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/input_files/Census_Tract_Level_Ozone_Concentrations_input _2.csv @@ -0,0 +1,6 @@ +year,date,statefips,countyfips,ctfips,latitude,longitude,ds_o3_pred,ds_o3_stdd +2001,01JAN2001,1,1,1001020100,32.47718,-86.49001,31.659713,5.931458 +2001,01JAN2001,1,1,1001020200,32.47425,-86.47339,31.939058,5.703084 +2001,01JAN2001,1,1,1001020300,32.47544,-86.4602,31.859155,5.779281 +2001,01JAN2001,1,1,1001020400,32.47204,-86.4437,31.681802,5.78473 +2001,01JAN2001,1,1,1001020500,32.45892,-86.42271,31.75282,5.79762 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/input_files/Census_Tract_Level_Ozone_Concentrations_input _3.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/input_files/Census_Tract_Level_Ozone_Concentrations_input _3.csv new file mode 100644 index 0000000000..2f67a8a1c4 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/input_files/Census_Tract_Level_Ozone_Concentrations_input _3.csv @@ -0,0 +1,6 @@ +year,date,statefips,countyfips,ctfips,latitude,longitude,ds_o3_pred,ds_o3_stdd +2001,01JAN2001,1,1,1001020100,32.47718,-86.49001,31.659713,5.931458 +2001,01JAN2001,1,1,1001020200,32.47425,-86.47339,31.939058,5.703084 +2001,01JAN2001,1,1,1001020300,32.47544,-86.4602,31.859155,5.779281 +2001,01JAN2001,1,1,1001020400,32.47204,-86.4437,31.681802,5.78473 +2001,01JAN2001,1,1,1001020500,32.45892,-86.42271,31.75282,5.79762 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/input_files/Census_Tract_Level_Ozone_Concentrations_input.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/input_files/Census_Tract_Level_Ozone_Concentrations_input.csv new file mode 100644 index 0000000000..2f67a8a1c4 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/input_files/Census_Tract_Level_Ozone_Concentrations_input.csv @@ -0,0 +1,6 @@ +year,date,statefips,countyfips,ctfips,latitude,longitude,ds_o3_pred,ds_o3_stdd +2001,01JAN2001,1,1,1001020100,32.47718,-86.49001,31.659713,5.931458 +2001,01JAN2001,1,1,1001020200,32.47425,-86.47339,31.939058,5.703084 +2001,01JAN2001,1,1,1001020300,32.47544,-86.4602,31.859155,5.779281 +2001,01JAN2001,1,1,1001020400,32.47204,-86.4437,31.681802,5.78473 +2001,01JAN2001,1,1,1001020500,32.45892,-86.42271,31.75282,5.79762 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/input_files/Census_Tract_Level_Ozone_Concentrations_input_1.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/input_files/Census_Tract_Level_Ozone_Concentrations_input_1.csv new file mode 100644 index 0000000000..2f67a8a1c4 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCensusTract/input_files/Census_Tract_Level_Ozone_Concentrations_input_1.csv @@ -0,0 +1,6 @@ +year,date,statefips,countyfips,ctfips,latitude,longitude,ds_o3_pred,ds_o3_stdd +2001,01JAN2001,1,1,1001020100,32.47718,-86.49001,31.659713,5.931458 +2001,01JAN2001,1,1,1001020200,32.47425,-86.47339,31.939058,5.703084 +2001,01JAN2001,1,1,1001020300,32.47544,-86.4602,31.859155,5.779281 +2001,01JAN2001,1,1,1001020400,32.47204,-86.4437,31.681802,5.78473 +2001,01JAN2001,1,1,1001020500,32.45892,-86.42271,31.75282,5.79762 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCounty/expected_output_files/OzoneCounty.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCounty/expected_output_files/OzoneCounty.csv new file mode 100644 index 0000000000..9a3f5d334b --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCounty/expected_output_files/OzoneCounty.csv @@ -0,0 +1,6 @@ +year,date,statefips,countyfips,o3_max_pred,o3_med_pred,o3_mean_pred,o3_pop_pred,dcid +2016,2016-01-01,01,001,26.653300,26.402050,26.388967,26.431634,geoId/01001 +2016,2016-01-01,01,003,27.285500,26.058600,26.173639,26.089479,geoId/01003 +2016,2016-01-01,01,005,25.441200,25.020300,25.091133,25.080339,geoId/01005 +2016,2016-01-01,01,007,28.644400,28.328550,28.275825,28.361489,geoId/01007 +2016,2016-01-01,01,009,28.497900,28.098400,28.127722,28.109563,geoId/01009 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCounty/input_files/OzoneCounty_input.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCounty/input_files/OzoneCounty_input.csv new file mode 100644 index 0000000000..54d53a7dc5 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_OzoneCounty/input_files/OzoneCounty_input.csv @@ -0,0 +1,6 @@ +year,date,statefips,countyfips,o3_max_pred,o3_med_pred,o3_mean_pred,o3_pop_pred +2016,01JAN2016,1,1,26.6533,26.40205,26.3889666666666,26.4316337917991 +2016,01JAN2016,1,3,27.2855,26.0586,26.1736387096774,26.0894791171099 +2016,01JAN2016,1,5,25.4412,25.0203,25.0911333333333,25.0803386405726 +2016,01JAN2016,1,7,28.6444,28.32855,28.275825,28.3614885378008 +2016,01JAN2016,1,9,28.4979,28.0984,28.1277222222222,28.109563351208 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/expected_output_files/PM2.5CensusTract_0.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/expected_output_files/PM2.5CensusTract_0.csv new file mode 100644 index 0000000000..edb1b396b4 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/expected_output_files/PM2.5CensusTract_0.csv @@ -0,0 +1,9 @@ +year,date,statefips,countyfips,ctfips,latitude,longitude,StatisticalVariable,Value,dcid +2001,2001-06-17,50,1,50001960700,44.010730,-73.106380,Mean_Concentration_AirPollutant_PM2.5,8.119343,geoId/50001960700 +2001,2001-06-17,50,1,50001960800,43.988310,-73.159380,Mean_Concentration_AirPollutant_PM2.5,8.086446,geoId/50001960800 +2001,2001-06-17,50,1,50001960900,43.901520,-73.289390,Mean_Concentration_AirPollutant_PM2.5,8.015730,geoId/50001960900 +2001,2001-06-17,50,1,50001961000,43.900050,-73.105380,Mean_Concentration_AirPollutant_PM2.5,7.982991,geoId/50001961000 +2001,2001-06-17,50,1,50001960700,44.010730,-73.106380,Mean_Concentration_AirPollutant_PM2.5_StandardError,2.401500,geoId/50001960700 +2001,2001-06-17,50,1,50001960800,43.988310,-73.159380,Mean_Concentration_AirPollutant_PM2.5_StandardError,2.512964,geoId/50001960800 +2001,2001-06-17,50,1,50001960900,43.901520,-73.289390,Mean_Concentration_AirPollutant_PM2.5_StandardError,2.392995,geoId/50001960900 +2001,2001-06-17,50,1,50001961000,43.900050,-73.105380,Mean_Concentration_AirPollutant_PM2.5_StandardError,2.397569,geoId/50001961000 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/expected_output_files/PM2.5CensusTract_1.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/expected_output_files/PM2.5CensusTract_1.csv new file mode 100644 index 0000000000..edb1b396b4 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/expected_output_files/PM2.5CensusTract_1.csv @@ -0,0 +1,9 @@ +year,date,statefips,countyfips,ctfips,latitude,longitude,StatisticalVariable,Value,dcid +2001,2001-06-17,50,1,50001960700,44.010730,-73.106380,Mean_Concentration_AirPollutant_PM2.5,8.119343,geoId/50001960700 +2001,2001-06-17,50,1,50001960800,43.988310,-73.159380,Mean_Concentration_AirPollutant_PM2.5,8.086446,geoId/50001960800 +2001,2001-06-17,50,1,50001960900,43.901520,-73.289390,Mean_Concentration_AirPollutant_PM2.5,8.015730,geoId/50001960900 +2001,2001-06-17,50,1,50001961000,43.900050,-73.105380,Mean_Concentration_AirPollutant_PM2.5,7.982991,geoId/50001961000 +2001,2001-06-17,50,1,50001960700,44.010730,-73.106380,Mean_Concentration_AirPollutant_PM2.5_StandardError,2.401500,geoId/50001960700 +2001,2001-06-17,50,1,50001960800,43.988310,-73.159380,Mean_Concentration_AirPollutant_PM2.5_StandardError,2.512964,geoId/50001960800 +2001,2001-06-17,50,1,50001960900,43.901520,-73.289390,Mean_Concentration_AirPollutant_PM2.5_StandardError,2.392995,geoId/50001960900 +2001,2001-06-17,50,1,50001961000,43.900050,-73.105380,Mean_Concentration_AirPollutant_PM2.5_StandardError,2.397569,geoId/50001961000 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/expected_output_files/PM2.5CensusTract_2.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/expected_output_files/PM2.5CensusTract_2.csv new file mode 100644 index 0000000000..edb1b396b4 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/expected_output_files/PM2.5CensusTract_2.csv @@ -0,0 +1,9 @@ +year,date,statefips,countyfips,ctfips,latitude,longitude,StatisticalVariable,Value,dcid +2001,2001-06-17,50,1,50001960700,44.010730,-73.106380,Mean_Concentration_AirPollutant_PM2.5,8.119343,geoId/50001960700 +2001,2001-06-17,50,1,50001960800,43.988310,-73.159380,Mean_Concentration_AirPollutant_PM2.5,8.086446,geoId/50001960800 +2001,2001-06-17,50,1,50001960900,43.901520,-73.289390,Mean_Concentration_AirPollutant_PM2.5,8.015730,geoId/50001960900 +2001,2001-06-17,50,1,50001961000,43.900050,-73.105380,Mean_Concentration_AirPollutant_PM2.5,7.982991,geoId/50001961000 +2001,2001-06-17,50,1,50001960700,44.010730,-73.106380,Mean_Concentration_AirPollutant_PM2.5_StandardError,2.401500,geoId/50001960700 +2001,2001-06-17,50,1,50001960800,43.988310,-73.159380,Mean_Concentration_AirPollutant_PM2.5_StandardError,2.512964,geoId/50001960800 +2001,2001-06-17,50,1,50001960900,43.901520,-73.289390,Mean_Concentration_AirPollutant_PM2.5_StandardError,2.392995,geoId/50001960900 +2001,2001-06-17,50,1,50001961000,43.900050,-73.105380,Mean_Concentration_AirPollutant_PM2.5_StandardError,2.397569,geoId/50001961000 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/expected_output_files/PM2.5CensusTract_3.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/expected_output_files/PM2.5CensusTract_3.csv new file mode 100644 index 0000000000..edb1b396b4 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/expected_output_files/PM2.5CensusTract_3.csv @@ -0,0 +1,9 @@ +year,date,statefips,countyfips,ctfips,latitude,longitude,StatisticalVariable,Value,dcid +2001,2001-06-17,50,1,50001960700,44.010730,-73.106380,Mean_Concentration_AirPollutant_PM2.5,8.119343,geoId/50001960700 +2001,2001-06-17,50,1,50001960800,43.988310,-73.159380,Mean_Concentration_AirPollutant_PM2.5,8.086446,geoId/50001960800 +2001,2001-06-17,50,1,50001960900,43.901520,-73.289390,Mean_Concentration_AirPollutant_PM2.5,8.015730,geoId/50001960900 +2001,2001-06-17,50,1,50001961000,43.900050,-73.105380,Mean_Concentration_AirPollutant_PM2.5,7.982991,geoId/50001961000 +2001,2001-06-17,50,1,50001960700,44.010730,-73.106380,Mean_Concentration_AirPollutant_PM2.5_StandardError,2.401500,geoId/50001960700 +2001,2001-06-17,50,1,50001960800,43.988310,-73.159380,Mean_Concentration_AirPollutant_PM2.5_StandardError,2.512964,geoId/50001960800 +2001,2001-06-17,50,1,50001960900,43.901520,-73.289390,Mean_Concentration_AirPollutant_PM2.5_StandardError,2.392995,geoId/50001960900 +2001,2001-06-17,50,1,50001961000,43.900050,-73.105380,Mean_Concentration_AirPollutant_PM2.5_StandardError,2.397569,geoId/50001961000 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/input_files/PM2.5CensusTractPollution_input_ 2.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/input_files/PM2.5CensusTractPollution_input_ 2.csv new file mode 100644 index 0000000000..9f042a0ab7 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/input_files/PM2.5CensusTractPollution_input_ 2.csv @@ -0,0 +1,5 @@ +year,date,statefips,countyfips,ctfips,latitude,longitude,ds_pm_pred,ds_pm_stdd +2001,17JUN2001,50,1,50001960700,44.01073,-73.10638,8.119343,2.4015 +2001,17JUN2001,50,1,50001960800,43.98831,-73.15938,8.086446,2.512964 +2001,17JUN2001,50,1,50001960900,43.90152,-73.28939,8.01573,2.392995 +2001,17JUN2001,50,1,50001961000,43.90005,-73.10538,7.982991,2.397569 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/input_files/PM2.5CensusTractPollution_input_ 3.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/input_files/PM2.5CensusTractPollution_input_ 3.csv new file mode 100644 index 0000000000..9f042a0ab7 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/input_files/PM2.5CensusTractPollution_input_ 3.csv @@ -0,0 +1,5 @@ +year,date,statefips,countyfips,ctfips,latitude,longitude,ds_pm_pred,ds_pm_stdd +2001,17JUN2001,50,1,50001960700,44.01073,-73.10638,8.119343,2.4015 +2001,17JUN2001,50,1,50001960800,43.98831,-73.15938,8.086446,2.512964 +2001,17JUN2001,50,1,50001960900,43.90152,-73.28939,8.01573,2.392995 +2001,17JUN2001,50,1,50001961000,43.90005,-73.10538,7.982991,2.397569 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/input_files/PM2.5CensusTractPollution_input_0.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/input_files/PM2.5CensusTractPollution_input_0.csv new file mode 100644 index 0000000000..9f042a0ab7 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/input_files/PM2.5CensusTractPollution_input_0.csv @@ -0,0 +1,5 @@ +year,date,statefips,countyfips,ctfips,latitude,longitude,ds_pm_pred,ds_pm_stdd +2001,17JUN2001,50,1,50001960700,44.01073,-73.10638,8.119343,2.4015 +2001,17JUN2001,50,1,50001960800,43.98831,-73.15938,8.086446,2.512964 +2001,17JUN2001,50,1,50001960900,43.90152,-73.28939,8.01573,2.392995 +2001,17JUN2001,50,1,50001961000,43.90005,-73.10538,7.982991,2.397569 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/input_files/PM2.5CensusTractPollution_input_1.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/input_files/PM2.5CensusTractPollution_input_1.csv new file mode 100644 index 0000000000..9f042a0ab7 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25CensusTract/input_files/PM2.5CensusTractPollution_input_1.csv @@ -0,0 +1,5 @@ +year,date,statefips,countyfips,ctfips,latitude,longitude,ds_pm_pred,ds_pm_stdd +2001,17JUN2001,50,1,50001960700,44.01073,-73.10638,8.119343,2.4015 +2001,17JUN2001,50,1,50001960800,43.98831,-73.15938,8.086446,2.512964 +2001,17JUN2001,50,1,50001960900,43.90152,-73.28939,8.01573,2.392995 +2001,17JUN2001,50,1,50001961000,43.90005,-73.10538,7.982991,2.397569 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25County/expected_output_files/PM25county.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25County/expected_output_files/PM25county.csv new file mode 100644 index 0000000000..8e287f03d0 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25County/expected_output_files/PM25county.csv @@ -0,0 +1,6 @@ +year,date,statefips,countyfips,pm25_max_pred,pm25_med_pred,pm25_mean_pred,pm25_pop_pred,dcid +2016,2016-01-01,01,001,12.035500,11.173300,11.307642,11.313675,geoId/01001 +2016,2016-01-01,01,003,9.117200,8.486000,8.497219,8.483280,geoId/01003 +2016,2016-01-01,01,005,8.898900,8.331700,8.422811,8.435805,geoId/01005 +2016,2016-01-01,01,007,11.730300,10.851400,10.992450,10.864566,geoId/01007 +2016,2016-01-01,01,009,13.224700,12.895900,12.703789,12.725276,geoId/01009 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25County/input_files/PM2.5County_input.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25County/input_files/PM2.5County_input.csv new file mode 100644 index 0000000000..e074a4f085 --- /dev/null +++ b/scripts/us_cdc/environmental_health_toxicology/test_data/CDC_PM25County/input_files/PM2.5County_input.csv @@ -0,0 +1,6 @@ +year,date,statefips,countyfips,pm25_max_pred,pm25_med_pred,pm25_mean_pred,pm25_pop_pred +2016,01JAN2016,1,1,12.0355,11.1733,11.3076416666666,11.3136752729304 +2016,01JAN2016,1,3,9.1172,8.486,8.49721935483871,8.48328045295752 +2016,01JAN2016,1,5,8.8989,8.3317,8.42281111111111,8.43580460984033 +2016,01JAN2016,1,7,11.7303,10.8514,10.99245,10.8645659208991 +2016,01JAN2016,1,9,13.2247,12.8959,12.7037888888888,12.7252755613783 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/small_Ozone_County.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/small_Ozone_County.csv deleted file mode 100644 index a90abf5a4d..0000000000 --- a/scripts/us_cdc/environmental_health_toxicology/test_data/small_Ozone_County.csv +++ /dev/null @@ -1,20 +0,0 @@ -Year,Month,Day,statefips,countyfips,O3_max_pred,O3_med_pred,O3_mean_pred,O3_pop_pred -2001,JAN,1,01,1,31.939058,31.691988,31.6808585833333,31.6712262223708 -2001,JAN,1,01,3,33.646855,33.170271,32.9947749354838,32.9350697957369 -2001,JAN,1,01,5,34.288917,34.068507,34.0779537777777,34.0866306011945 -2001,JAN,1,01,7,30.349767,30.036093,29.93175575,29.991733426271 -2001,JAN,1,01,9,26.472655,25.776595,25.8575708888888,25.8724720383971 -2001,JAN,1,01,11,33.607696,33.252916,33.3046376666666,33.1696366148066 -2001,JAN,1,01,13,33.385263,32.992025,32.9911305555555,32.9857658772139 -2001,JAN,1,01,15,29.553653,28.873731,28.7909083870967,28.7797639908072 -2001,JAN,1,01,17,32.269248,32.032674,31.963699,31.9937919549028 -2001,JAN,1,01,19,28.613335,28.4270775,28.3153388333333,28.292539279195 -2001,JAN,1,01,21,31.132989,30.743824,30.6450868888888,30.572113369223 -2001,JAN,1,01,23,33.950801,33.7021405,33.686666,33.6776395035716 -2001,JAN,1,01,25,33.887748,33.648907,33.6081631111111,33.5661207622421 -2001,JAN,1,01,27,30.688122,30.1181705,30.11563425,30.1605159267872 -2001,JAN,1,01,29,30.855293,30.120469,30.20309275,30.2681012427865 -2001,JAN,1,01,31,34.196473,33.9589405,33.9934574285714,33.9955487814326 -2001,JAN,1,01,33,27.533899,25.682831,25.8164068571428,25.7950421543323 -2001,JAN,1,01,35,33.752266,33.303454,33.3758202,33.4532000275929 -2001,JAN,1,01,37,31.138993,30.106737,30.3926723333333,30.3744259493023 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/small_Ozone_County_expected.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/small_Ozone_County_expected.csv deleted file mode 100644 index 807711df0c..0000000000 --- a/scripts/us_cdc/environmental_health_toxicology/test_data/small_Ozone_County_expected.csv +++ /dev/null @@ -1,20 +0,0 @@ -Year,Month,Day,statefips,countyfips,O3_max_pred,O3_med_pred,O3_mean_pred,O3_pop_pred,date,dcid -2001,1,1,1,12001,31.939058,31.691988,31.680859,31.671226,2001-01-01,geoId/12001 -2001,1,1,1,12003,33.646855,33.170271,32.994775,32.935070,2001-01-01,geoId/12003 -2001,1,1,1,12005,34.288917,34.068507,34.077954,34.086631,2001-01-01,geoId/12005 -2001,1,1,1,12007,30.349767,30.036093,29.931756,29.991733,2001-01-01,geoId/12007 -2001,1,1,1,12009,26.472655,25.776595,25.857571,25.872472,2001-01-01,geoId/12009 -2001,1,1,1,120011,33.607696,33.252916,33.304638,33.169637,2001-01-01,geoId/120011 -2001,1,1,1,120013,33.385263,32.992025,32.991131,32.985766,2001-01-01,geoId/120013 -2001,1,1,1,120015,29.553653,28.873731,28.790908,28.779764,2001-01-01,geoId/120015 -2001,1,1,1,120017,32.269248,32.032674,31.963699,31.993792,2001-01-01,geoId/120017 -2001,1,1,1,120019,28.613335,28.427077,28.315339,28.292539,2001-01-01,geoId/120019 -2001,1,1,1,120021,31.132989,30.743824,30.645087,30.572113,2001-01-01,geoId/120021 -2001,1,1,1,120023,33.950801,33.702140,33.686666,33.677640,2001-01-01,geoId/120023 -2001,1,1,1,120025,33.887748,33.648907,33.608163,33.566121,2001-01-01,geoId/120025 -2001,1,1,1,120027,30.688122,30.118171,30.115634,30.160516,2001-01-01,geoId/120027 -2001,1,1,1,120029,30.855293,30.120469,30.203093,30.268101,2001-01-01,geoId/120029 -2001,1,1,1,120031,34.196473,33.958940,33.993457,33.995549,2001-01-01,geoId/120031 -2001,1,1,1,120033,27.533899,25.682831,25.816407,25.795042,2001-01-01,geoId/120033 -2001,1,1,1,120035,33.752266,33.303454,33.375820,33.453200,2001-01-01,geoId/120035 -2001,1,1,1,120037,31.138993,30.106737,30.392672,30.374426,2001-01-01,geoId/120037 diff --git a/scripts/us_cdc/environmental_health_toxicology/test_data/small_Ozone_County_output.csv b/scripts/us_cdc/environmental_health_toxicology/test_data/small_Ozone_County_output.csv deleted file mode 100644 index f7a4093668..0000000000 --- a/scripts/us_cdc/environmental_health_toxicology/test_data/small_Ozone_County_output.csv +++ /dev/null @@ -1,20 +0,0 @@ -Year,Month,Day,statefips,countyfips,O3_max_pred,O3_med_pred,O3_mean_pred,O3_pop_pred,date,dcid -2001,1,1,1,12001,31.939058,31.691988,31.6808585833333,31.6712262223708,2001-01-01,geoId/12001 -2001,1,1,1,12003,33.646855,33.170271,32.9947749354838,32.9350697957369,2001-01-01,geoId/12003 -2001,1,1,1,12005,34.288917,34.068507,34.0779537777777,34.0866306011945,2001-01-01,geoId/12005 -2001,1,1,1,12007,30.349767,30.036093,29.93175575,29.991733426271,2001-01-01,geoId/12007 -2001,1,1,1,12009,26.472655,25.776595,25.8575708888888,25.8724720383971,2001-01-01,geoId/12009 -2001,1,1,1,120011,33.607696,33.252916,33.3046376666666,33.1696366148066,2001-01-01,geoId/120011 -2001,1,1,1,120013,33.385263,32.992025,32.9911305555555,32.9857658772139,2001-01-01,geoId/120013 -2001,1,1,1,120015,29.553653,28.873731,28.7909083870967,28.7797639908072,2001-01-01,geoId/120015 -2001,1,1,1,120017,32.269248,32.032674,31.963699,31.9937919549028,2001-01-01,geoId/120017 -2001,1,1,1,120019,28.613335,28.4270775,28.3153388333333,28.292539279195,2001-01-01,geoId/120019 -2001,1,1,1,120021,31.132989,30.743824,30.6450868888888,30.572113369223,2001-01-01,geoId/120021 -2001,1,1,1,120023,33.950801,33.7021405,33.686666,33.6776395035716,2001-01-01,geoId/120023 -2001,1,1,1,120025,33.887748,33.648907,33.6081631111111,33.5661207622421,2001-01-01,geoId/120025 -2001,1,1,1,120027,30.688122,30.1181705,30.11563425,30.1605159267872,2001-01-01,geoId/120027 -2001,1,1,1,120029,30.855293,30.120469,30.20309275,30.2681012427865,2001-01-01,geoId/120029 -2001,1,1,1,120031,34.196473,33.9589405,33.9934574285714,33.9955487814326,2001-01-01,geoId/120031 -2001,1,1,1,120033,27.533899,25.682831,25.8164068571428,25.7950421543323,2001-01-01,geoId/120033 -2001,1,1,1,120035,33.752266,33.303454,33.3758202,33.4532000275929,2001-01-01,geoId/120035 -2001,1,1,1,120037,31.138993,30.106737,30.3926723333333,30.3744259493023,2001-01-01,geoId/120037