Skip to content

Commit

Permalink
Rewrite neighborhood weights
Browse files Browse the repository at this point in the history
  • Loading branch information
camille-s committed May 22, 2024
1 parent 58208a7 commit 73949ad
Show file tree
Hide file tree
Showing 13 changed files with 88 additions and 95 deletions.
5 changes: 1 addition & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,7 @@ data/occ_codes.rda data/naics_codes.rda &: data-raw/make_lehd.R
data/msa.rda: data-raw/make_msas.R
$(SRC)

data/%_tracts.rda &: data-raw/make_neighborhood_weights.R data/town_sf.rda data/%_sf.rda
$(SRC)

data/%_tracts19.rda &: data-raw/make_neighborhood_weights19.R data/town_sf.rda data/%_sf19.rda
data/%_tracts.rda &: data-raw/make_neighborhood_weights.R data/town_sf.rda data/%_sf.rda
$(SRC)

data/regions.rda: data-raw/make_regions.R data-raw/files/town_region_lookup.csv
Expand Down
80 changes: 71 additions & 9 deletions data-raw/make_neighborhood_weights.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,62 @@
# previously had an error where spatial joins had tracts assigned to wrong towns
# for nhv manually move long wharf to hill
# for hartford drop north meadows
# combine 2020, 2019 in one script

# for 2022 switch to COGs, need to add COG-based fips codes as another column
sf::sf_use_s2(FALSE)
min_hh <- 3
nhoods <- list(bridgeport = bridgeport_sf,
new_haven = new_haven_sf,
hartford = hartford_sf,
stamford = stamford_sf) |>
nhoods <- list(bridgeport = cwi::bridgeport_sf,
new_haven = cwi::new_haven_sf,
hartford = cwi::hartford_sf,
stamford = cwi::stamford_sf) |>
dplyr::bind_rows(.id = "city") |>
dplyr::mutate(name = dplyr::recode(name, "Long Wharf" = "Hill")) |>
dplyr::mutate(town = coalesce(town, city) |>
dplyr::mutate(town = dplyr::coalesce(town, city) |>
stringr::str_replace_all("_", " ") |>
stringr::str_to_title()) |>
dplyr::filter(!name %in% c("North Meadows"))

# 2020 shapefile includes pop & households
hh20 <- tigris::blocks(state = "09", year = 2020, refresh = FALSE) |>
janitor::clean_names() |>
dplyr::filter(aland20 > 0) |>
dplyr::filter(aland20 > 0, housing20 > 0) |>
dplyr::select(block = geoid20, hh = housing20) |>
dplyr::left_join(dplyr::select(cwi::xwalk, block, tract, town), by = "block")

# 2010 shapefile doesn't
hh10 <- tidycensus::get_decennial("block", variables = c(hh10 = "H003001"), year = 2010, sumfile = "sf1",
state = "09", geometry = TRUE, keep_geo_vars = TRUE) |>
janitor::clean_names() |>
dplyr::filter(aland10 > 0, value > 0) |>
dplyr::select(block10 = geoid, hh = value) |>
# use towns since don't have xwalk for old geos
sf::st_join(cwi::town_sf |> dplyr::select(town = name), left = FALSE, largest = TRUE)

# assign each block to 1 neighborhood
block2nhood <- hh20 |>
sf::st_join(nhoods, left = TRUE, largest = TRUE) |>
# check that town it's assigned to is correct and in set of cities
dplyr::filter(town.x == town.y) |>
sf::st_drop_geometry()

block2nhood10 <- hh10 |>
sf::st_join(nhoods, left = TRUE, largest = TRUE) |>
dplyr::filter(town.x == town.y) |>
dplyr::mutate(tract10 = substr(block10, 1, 11)) |>
sf::st_drop_geometry()


# tract pops for denominators
tract_pops <- block2nhood |>
dplyr::group_by(city, town = town.x, tract) |>
dplyr::summarise(tract_hh = sum(hh))
dplyr::summarise(tract_hh = sum(hh)) |>
dplyr::filter(tract_hh >= min_hh)

tract_pops10 <- block2nhood10 |>
dplyr::group_by(city, town = town.x, tract10) |>
dplyr::summarise(tract_hh = sum(hh)) |>
dplyr::filter(tract_hh >= min_hh)

tract2nhood <- block2nhood |>
dplyr::group_by(city, town = town.x, name, tract) |>
Expand All @@ -46,18 +71,49 @@ tract2nhood <- block2nhood |>
dplyr::left_join(dplyr::distinct(cwi::xwalk, tract, tract_cog), by = "tract") |>
dplyr::select(city, town, name, geoid = tract, geoid_cog = tract_cog, weight)

tract2nhood10 <- block2nhood10 |>
dplyr::group_by(city, town = town.x, name, tract10) |>
dplyr::summarise(inter_hh = sum(hh)) |>
dplyr::ungroup() |>
dplyr::left_join(tract_pops10, by = c("city", "town", "tract10")) |>
dplyr::mutate(weight = round(inter_hh / tract_hh, digits = 3)) |>
dplyr::filter(weight > 0.01) |>
dplyr::select(city, town, name, geoid10 = tract10, weight)

# sanity check:
# tract2nhood |>
# dplyr::left_join(cwi::tract_sf, by = c("geoid" = "name")) |>
# sf::st_as_sf() |>
# split(~city) |>
# purrr::map(sf::st_geometry) |>
# purrr::map(plot)
# tract2nhood10 |>
# dplyr::left_join(cwi::tract_sf19, by = c("geoid10" = "name")) |>
# sf::st_as_sf() |>
# split(~city) |>
# purrr::map(sf::st_geometry) |>
# purrr::map(plot)

out <- tract2nhood |>
split(~city) |>
purrr::map(janitor::remove_constant) |>
# purrr::map(janitor::remove_constant) |>
purrr::map(function(df) {
if (length(unique(df$town)) == 1) {
df$town <- NULL
}
dplyr::select(df, -city)
}) |>
rlang::set_names(\(x) paste(x, "tracts", sep = "_"))
out10 <- tract2nhood10 |>
split(~city) |>
# purrr::map(janitor::remove_constant) |>
purrr::map(function(df) {
if (length(unique(df$town)) == 1) {
df$town <- NULL
}
dplyr::select(df, -city)
}) |>
rlang::set_names(\(x) paste(x, "tracts19", sep = "_"))

# dropping block groups for now, but can add back in if we want
# block2nhood |>
Expand All @@ -66,9 +122,15 @@ out <- tract2nhood |>
# dplyr::mutate(weight = round(hh / sum(hh), 3))

# wow can't believe i'm doing list2env
list2env(tract2nhood, .GlobalEnv)
list2env(out, .GlobalEnv)
list2env(out10, .GlobalEnv)
usethis::use_data(bridgeport_tracts,
hartford_tracts,
new_haven_tracts,
stamford_tracts,
overwrite = TRUE)
usethis::use_data(bridgeport_tracts19,
hartford_tracts19,
new_haven_tracts19,
stamford_tracts19,
overwrite = TRUE)
66 changes: 0 additions & 66 deletions data-raw/make_neighborhood_weights19.R

This file was deleted.

Binary file modified data/bridgeport_tracts.rda
Binary file not shown.
Binary file modified data/bridgeport_tracts19.rda
Binary file not shown.
Binary file modified data/hartford_tracts.rda
Binary file not shown.
Binary file modified data/hartford_tracts19.rda
Binary file not shown.
Binary file modified data/new_haven_tracts.rda
Binary file not shown.
Binary file modified data/new_haven_tracts19.rda
Binary file not shown.
Binary file modified data/stamford_tracts.rda
Binary file not shown.
Binary file modified data/stamford_tracts19.rda
Binary file not shown.
16 changes: 8 additions & 8 deletions man/neighborhood_tracts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 8 additions & 8 deletions tests/testthat/test-multi_geo_fetch.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,20 @@ test_that("multi_geo_* handles tables that don't exist", {
})

test_that("multi_geo_* handles neighborhood geoids", {
dummy_nhood_00 <- dplyr::mutate(new_haven_tracts19, geoid = paste0(geoid, "00"))
dummy_nhood_bg <- dplyr::mutate(new_haven_tracts19, geoid = paste0(geoid, "0"))
dummy_nhood_nm <- dplyr::rename(new_haven_tracts19, fips = geoid)
expect_message(multi_test(neighborhoods = new_haven_tracts19, nhood_geoid = "geoid"), "tracts")
expect_message(multi_test(neighborhoods = dummy_nhood_00, nhood_geoid = "geoid"))
expect_message(multi_test(neighborhoods = dummy_nhood_bg, nhood_geoid = "geoid"))
dummy_nhood_00 <- dplyr::mutate(new_haven_tracts19, geoid10 = paste0(geoid10, "00"))
dummy_nhood_bg <- dplyr::mutate(new_haven_tracts19, geoid10 = paste0(geoid10, "0"))
dummy_nhood_nm <- dplyr::rename(new_haven_tracts19, fips = geoid10)
expect_message(multi_test(neighborhoods = new_haven_tracts19, nhood_geoid = "geoid10"), "tracts")
expect_message(multi_test(neighborhoods = dummy_nhood_00, nhood_geoid = "geoid10"))
expect_message(multi_test(neighborhoods = dummy_nhood_bg, nhood_geoid = "geoid10"))
expect_silent(multi_test(neighborhoods = dummy_nhood_nm, nhood_geoid = "fips", verbose = FALSE))
})

test_that("multi_geo_* handles neighborhood names", {
# previously was returning name, name_2 columns
skip_on_ci()
acs_df <- multi_geo_acs("B01003", year = 2019, neighborhoods = new_haven_tracts19, nhood_geoid = "geoid")
dec_df <- multi_geo_decennial("P001", year = 2010, neighborhoods = new_haven_tracts19, nhood_geoid = "geoid", sumfile = "sf1")
acs_df <- multi_geo_acs("B01003", year = 2019, neighborhoods = new_haven_tracts19, nhood_geoid = "geoid10")
dec_df <- multi_geo_decennial("P001", year = 2010, neighborhoods = new_haven_tracts19, nhood_geoid = "geoid10", sumfile = "sf1")
expect_false(any(grepl("\\d", names(acs_df))))
expect_false(any(grepl("\\d", names(dec_df))))
})
Expand Down

0 comments on commit 73949ad

Please sign in to comment.