forked from DOI-USGS/gw-conditions
-
Notifications
You must be signed in to change notification settings - Fork 0
/
0_historic.yml
133 lines (113 loc) · 5.28 KB
/
0_historic.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
target_default: 0_historic
include:
- 0_config.yml
packages:
- dataRetrieval
- tidyverse
sources:
- 0_historic/src/do_historic_gw_fetch.R
- 0_historic/src/fetch_nwis_historic.R
- 0_historic/src/filter_historic_fetched_data.R
- 0_historic/src/calculate_historic_quantiles.R
- 0_historic/src/do_historic_addl_param_fetch.R
targets:
0_historic:
depends:
- gw-conditions/historic_gw_site_info_unfiltered.rds.ind
- gw-conditions/historic_gw_data_unfiltered.csv.ind
- gw-conditions/historic_gw_data_filtered.csv.ind
- gw-conditions/historic_gw_site_info_filtered.rds.ind
- gw-conditions/historic_gw_quantiles.csv.ind
##-- All GW sites with continuous data for all time --##
historic_gw_sites_all:
command: fetch_gw_historic_sites(historic_start_date, historic_end_date, historic_fetch_bounding_box, gw_param_cd)
historic_gw_sites_dv:
command: pull_sites_by_service(historic_gw_sites_all, I("dv"))
historic_gw_sites_uv:
command: pull_sites_by_service(historic_gw_sites_all, I("uv"))
0_historic/out/historic_gw_data_dv.csv:
command: do_historic_gw_fetch(
final_target = target_name,
task_makefile = I('0_historic_fetch_tasks.yml'),
gw_site_nums = historic_gw_sites_dv,
gw_site_nums_obj_nm = I('historic_gw_sites_dv'),
param_cd = gw_param_cd,
service_cd = I('dv'),
request_limit = historic_dv_fetch_size_limit,
'0_historic/src/do_historic_gw_fetch.R',
'0_historic/src/fetch_nwis_historic.R')
# Task table for `uv` includes a step to average the data to daily values.
# This step needs the timezones for all the sites.
historic_gw_sites_uv_tz:
command: fetch_gw_site_tz(historic_gw_sites_uv)
0_historic/out/historic_gw_data_uv.csv:
command: do_historic_gw_fetch(
final_target = target_name,
task_makefile = I('0_historic_fetch_tasks.yml'),
gw_site_nums = historic_gw_sites_uv,
gw_site_nums_obj_nm = I('historic_gw_sites_uv'),
param_cd = gw_param_cd,
service_cd = I('uv'),
request_limit = historic_uv_fetch_size_limit,
'0_historic/src/do_historic_gw_fetch.R',
'0_historic/src/fetch_nwis_historic.R',
gw_site_tz_xwalk_nm = I('historic_gw_sites_uv_tz'))
# Special pull for just KS & just FL using `62610`. Plus, HI using `72150`.
# Read all about why on GitHub: https://github.com/USGS-VIZLAB/gw-conditions/issues/9
0_historic/out/historic_gw_data_uv_addl.csv:
command: do_historic_addl_param_fetch(
target_name,
addl_states,
addl_gw_param_cds,
historic_start_date,
historic_end_date)
addl_site_nums:
command: pull_site_nums('0_historic/out/historic_gw_data_uv_addl.csv')
0_historic/out/historic_gw_data.csv:
command: combine_gw_fetches(
target_name,
'0_historic/out/historic_gw_data_dv.csv',
'0_historic/out/historic_gw_data_uv.csv',
'0_historic/out/historic_gw_data_uv_addl.csv')
historic_gw_sites:
command: combine_gw_sites(historic_gw_sites_all, addl_site_nums, gw_param_cd, addl_gw_param_cds, addl_states)
historic_gw_site_info:
command: fetch_gw_site_info('0_historic/out/historic_gw_data.csv')
0_historic/out/historic_gw_site_info.rds:
command: saveRDS(file = target_name, historic_gw_site_info)
##-- Use historic data, filter to sites that meet a min years requirement, and calculate quantiles --##
0_historic/out/filtered_historic_gw_data.csv:
command: filter_historic_fetched_data(
target_name,
historic_gw_data_fn = "0_historic/out/historic_gw_data.csv",
min_years = historic_min_yrs_per_site_for_quantile_calc,
allowed_site_types = historic_site_types_for_quantile_calc)
0_historic/out/filtered_historic_gw_site_info.rds:
command: gather_metadata_of_sites_used(
target_name,
site_data_filtered_fn = "0_historic/out/filtered_historic_gw_data.csv",
site_data_service_to_pull = historic_gw_sites,
site_metadata = historic_gw_site_info)
quantiles_to_calc:
command: seq(0, 1, by = 0.05)
# Sites that use "depth below" as their gw level need to be inversed. In the
# current implementation, this means any site that used pcode == '72019'
depth_below_sites:
command: historic_gw_sites_all[[I('site_no')]]
0_historic/out/historic_gw_quantiles.csv:
command: calculate_historic_quantiles(
target_name,
"0_historic/out/filtered_historic_gw_data.csv",
quantiles_to_calc,
inverse_sites = depth_below_sites)
##-- Now push historic data and quantiles to S3 to be used in the rest of the pipeline --##
gw-conditions/historic_gw_site_info_unfiltered.rds.ind:
command: s3_put(remote_ind = target_name, local_source = '0_historic/out/historic_gw_site_info.rds')
gw-conditions/historic_gw_data_unfiltered.csv.ind:
command: s3_put(target_name, local_source = '0_historic/out/historic_gw_data.csv')
gw-conditions/historic_gw_data_filtered.csv.ind:
command: s3_put(target_name, local_source = '0_historic/out/filtered_historic_gw_data.csv')
gw-conditions/historic_gw_site_info_filtered.rds.ind:
command: s3_put(remote_ind = target_name, local_source = '0_historic/out/filtered_historic_gw_site_info.rds')
gw-conditions/historic_gw_quantiles.csv.ind:
command: s3_put(target_name, local_source = '0_historic/out/historic_gw_quantiles.csv')