-
Notifications
You must be signed in to change notification settings - Fork 3
/
appears-bbox-check.R
161 lines (131 loc) · 6.23 KB
/
appears-bbox-check.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
## ------------------------------------------------------- ##
# Silica WG - Spatial Data - Check Bounding Boxes
## ------------------------------------------------------- ##
# Written by:
## Nick J Lyon
# Purpose:
## Make sure the site shapefiles fit in manually-drawn bounding boxes (used in AppEEARS data)
## ------------------------------------------------------- ##
# Housekeeping ----
## ------------------------------------------------------- ##
# Load needed libraries
# install.packages("librarian")
librarian::shelf(tidyverse, sf, terra, NCEAS/scicomptools, googledrive, readxl)
# Clear environment
rm(list = ls())
# Identify path to location of shared data
(path <- scicomptools::wd_loc(local = F, remote_path = file.path('/', "home", "shares", "lter-si", "si-watershed-extract")))
# Load in site names with lat/longs
sites <- readxl::read_excel(path = file.path(path, "site-coordinates",
"silica-coords_RAW.xlsx")) %>%
## Pare down to minimum needed columns
dplyr::select(LTER, Stream_Name, Discharge_Site_Name, Shapefile_Name) %>%
## Drop duplicate rows (if any)
dplyr::distinct() %>%
## Remove any watersheds without a shapefile
dplyr::filter(!is.na(Shapefile_Name) &
nchar(Shapefile_Name) != 0 &
!Shapefile_Name %in% c("?", "MISSING"))
# Check it out
dplyr::glimpse(sites)
# Grab the shapefiles the previous script (see PURPOSE section) created
sheds <- sf::st_read(dsn = file.path(path, "site-coordinates", "silica-watersheds.shp")) %>%
# Expand names to what they were before
dplyr::rename(Shapefile_Name = file_name,
expert_area_km2 = exp_area,
shape_area_km2 = real_area)
# Check that out
dplyr::glimpse(sheds)
# Clean up environment
rm(list = setdiff(ls(), c('path', 'sites', 'sheds')))
## ------------------------------------------------------- ##
# Identify Spatial Files ----
## ------------------------------------------------------- ##
# Identify the driver and file pattern
driver_name <- "raw-evapo-modis16a2-v006"
file_pattern <- "MOD16A2.006_ET_500m_"
# Make an empty list
file_list <- list()
# Identify files for each region
for(region in c("north-america-usa", "north-america-arctic",
"cropped-russia-west", "cropped-russia-west-2",
"cropped-russia-center", "cropped-russia-east",
"puerto-rico", "scandinavia")){
# Identify files in that folder
file_df <- data.frame("region" = region,
"files" = dir(path = file.path(path, "raw-driver-data",
driver_name, region),
pattern = file_pattern))
# Add that set of files to the list
file_list[[region]] <- file_df }
# Wrangle the list
file_all <- file_list %>%
# Unlist the loop's output
purrr::list_rbind() %>%
# Identify date from file name
dplyr::mutate(date_raw = stringr::str_extract(string = files,
pattern = "_doy[[:digit:]]{7}")) %>%
# Simplify that column
dplyr::mutate(date = gsub(pattern = "_doy", replacement = "", x = date_raw)) %>%
# Identify day of year & year
dplyr::mutate(year = stringr::str_sub(string = date, start = 1, end = 4),
doy = stringr::str_sub(string = date, start = 5, end = 7)) %>%
# Drop 'raw' version
dplyr::select(-date_raw)
# Glimpse it
dplyr::glimpse(file_all)
# Clean up environment
rm(list = setdiff(ls(), c('path', 'sites', 'sheds', 'file_all')))
## ------------------------------------------------------- ##
# Bounding Box Check ----
## ------------------------------------------------------- ##
# Let's check to make sure each of my manual bounding boxes fits the sites for that region
# Filter to only one row per 'region'
(viz_files <- file_all %>%
# Find first file per region
dplyr::group_by(region) %>%
dplyr::summarize(files = dplyr::first(x = files)) %>%
dplyr::ungroup() )
# Read in one raster of each region
rast1 <- terra::rast(file.path(path, "raw-driver-data", "raw-evapo-modis16a2-v006",
viz_files$region[1], viz_files$files[1]))
rast2 <- terra::rast(file.path(path, "raw-driver-data", "raw-evapo-modis16a2-v006",
viz_files$region[2], viz_files$files[2]))
rast3 <- terra::rast(file.path(path, "raw-driver-data", "raw-evapo-modis16a2-v006",
viz_files$region[3], viz_files$files[3]))
rast4 <- terra::rast(file.path(path, "raw-driver-data", "raw-evapo-modis16a2-v006",
viz_files$region[4], viz_files$files[4]))
rast5 <- terra::rast(file.path(path, "raw-driver-data", "raw-evapo-modis16a2-v006",
viz_files$region[5], viz_files$files[5]))
rast6 <- terra::rast(file.path(path, "raw-driver-data", "raw-evapo-modis16a2-v006",
viz_files$region[6], viz_files$files[6]))
rast7 <- terra::rast(file.path(path, "raw-driver-data", "raw-evapo-modis16a2-v006",
viz_files$region[7], viz_files$files[7]))
rast8 <- terra::rast(file.path(path, "raw-driver-data", "raw-evapo-modis16a2-v006",
viz_files$region[8], viz_files$files[8]))
# Plot each "tile" of data against the watersheds polygons
## Russia Composite (Cropped)
frame_rast <- terra::rast(terra::ext(55, 140, 45, 80))
suppressWarnings(plot(frame_rast, axes = T, reset = F, main = "Russia COMPOSITE"))
plot(rast1, axes = T, add = T)
plot(rast3, axes = T, add = T)
plot(rast4, axes = T, add = T)
plot(sheds, axes = T, add = T)
## Russia East (Cropped)
plot(rast2, axes = T, reset = F, main = viz_files$region[2])
plot(sheds, axes = T, add = T)
## North America Arctic
plot(rast5, axes = T, reset = F, main = viz_files$region[5])
plot(sheds, axes = T, add = T)
## USA
plot(rast6, axes = T, reset = F, main = viz_files$region[6])
plot(sheds, axes = T, add = T)
## Puerto Rico
plot(rast7, axes = T, reset = F, main = viz_files$region[7])
plot(sheds, axes = T, add = T)
## Scandinavia
plot(rast8, axes = T, reset = F, main = viz_files$region[8])
plot(sheds, axes = T, add = T)
# Clean up environment
rm(list = setdiff(ls(), c('path', 'sites', 'sheds', 'file_all')))
# End ----