generated from degauss-org/degauss_template
-
Notifications
You must be signed in to change notification settings - Fork 2
/
entrypoint.R
executable file
·68 lines (50 loc) · 2.17 KB
/
entrypoint.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/local/bin/Rscript
dht::greeting()
## load libraries without messages or warnings
withr::with_message_sink("/dev/null", library(dplyr))
withr::with_message_sink("/dev/null", library(tidyr))
withr::with_message_sink("/dev/null", library(sf))
doc <- "
Usage:
entrypoint.R <filename> [<census_year>]
"
opt <- docopt::docopt(doc)
## for interactive testing
## opt <- docopt::docopt(doc, args = 'test/my_address_file_geocoded.csv')
if (is.null(opt$census_year)) {
opt$census_year <- 2010
cli::cli_alert("No census year provided. Using 2010.")
}
if(opt$census_year == 2010 | opt$census_year == 2020) {
dht::check_ram(5)
}
if(! opt$census_year %in% c('2020', '2010', '2000', '1990', '1980', '1970')) {
cli::cli_alert_danger('Available census geographies include years 1970, 1980, 1990, 2000, 2010, and 2020.')
stop()
}
if(opt$census_year %in% c('1980', '1970')) {
cli::cli_alert_warning('Block groups are not available for the selected year. Only tract identifiers will be returned.')
}
message("reading input file...")
d <- dht::read_lat_lon_csv(opt$filename, nest_df = T, sf = T, project_to_crs = 5072)
dht::check_for_column(d$raw_data, "lat", d$raw_data$lat)
dht::check_for_column(d$raw_data, "lon", d$raw_data$lon)
message('loading census shape files...')
if (opt$census_year %in% c('1980', '1970')) {
geography <- readRDS(file=paste0("/app/tracts_", opt$census_year, "_5072.rds"))
} else {
geography <- readRDS(file=paste0("/app/block_groups_", opt$census_year, "_5072.rds"))
}
message('finding containing geography for each point...')
d$d <- suppressWarnings( sf::st_join(d$d, geography, left = FALSE, largest = TRUE) )
if(! opt$census_year %in% c('1980', '1970')) {
d$d <- d$d %>%
mutate_at(vars(starts_with(glue::glue('census_block_group_id_{opt$census_year}'))),
list(census_tract_id = ~stringr::str_sub(.x, 1, 11)))
names(d$d)[ncol(d$d)] <- glue::glue('{names(d$d)[ncol(d$d)]}_{opt$census_year}')
}
## merge back on .row after unnesting .rows into .row
dht::write_geomarker_file(d = d$d,
raw_data = d$raw_data,
filename = opt$filename,
argument = opt$census_year)