-
Notifications
You must be signed in to change notification settings - Fork 0
/
modeling_bart.R
129 lines (94 loc) · 4.91 KB
/
modeling_bart.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#######################################################
#######################################################
##### Modeling Bayesian Additive Regression Trees #####
#######################################################
#######################################################
source("00_loading_data.R")
{
#Only modeling "common" species
for(s in sp){
print(paste('Modeling', s))
sites_sp <- sites[, c("x", "y", "ua", s,
"PC1", "PC2", "PC3", "PC4", "PC5")]
names(sites_sp)[names(sites_sp) == s] <- "occ"
if(sum(sites_sp$occ) >= 30){
pa_data <- st_as_sf(sites_sp,
coords = c("x", "y"),
crs = crs(env_terra))
#Separating the data into blocks of 165 km size (no more spatial autocorrelation)
sb <- spatialBlock(speciesData = pa_data,
species = "occ",
rasterLayer = env_raster,
theRange = 165000, #Size of the blocks (m) from prior SAC analysis
k = 5, #Arbitrary
selection = "random",
iteration = 200, #Find evenly dispersed folds
biomod2Format = FALSE,
verbose = FALSE,
progress = FALSE)
#Adding the partitioned blocks info to the initial data set
sites_sp$block <- sb$foldID
#Separate training-testing by pre-defined spatial blocks
split <- initial_split(sites_sp, prop = .7, strata = block) #70/30 in each block
train <- training(split)
test <- testing(split)
#Running the BART model
#Model training here
bart_train <- bart.step(x.data = train[, 5:9],
y.data = train$occ)
#Model testing here
test_pred <- dbarts:::predict.bart(bart_train, test)
#Since 1000 models are runned we need to extract the mean
test_pred <- colMeans(test_pred)
#ROC-AUC and PRC-AUC evaluation
precrec_obj <- evalmod(scores = test_pred,
labels = test$occ)
#Prediction maps based on trained data
bartpred <- predict2.bart(object = bart_train,
x.layers = env_raster,
splitby = 20,
quiet = TRUE)
#Saving the output prediction raster
writeRaster(bartpred,
paste("results/bart/rasters/", s, ".tif", sep = ""),
overwrite = TRUE)
#Evaluation table for each species
result = rbind(result,
data.frame(spp = s,
roc = round(precrec::auc(precrec_obj)$aucs[1], 4),
prc = round(precrec::auc(precrec_obj)$aucs[2], 4)))
} else {
#Separate training-testing by pre-defined spatial blocks
split <- initial_split(sites_sp, prop = .7) #70/30 in each block
train <- training(split)
test <- testing(split)
#Running the BART model
#Model training here
bart_train <- bart.step(x.data = train[, 5:9],
y.data = train$occ)
#Model testing here
test_pred <- dbarts:::predict.bart(bart_train, test)
#Since 1000 models are runned we need to extract the mean
test_pred <- colMeans(test_pred)
#ROC-AUC and PRC-AUC evaluation
precrec_obj <- evalmod(scores = test_pred,
labels = test$occ)
#Prediction maps based on trained data
bartpred <- predict2.bart(object = bart_train,
x.layers = env_raster,
splitby = 20,
quiet = TRUE)
#Saving the output prediction raster
writeRaster(bartpred,
paste("results/bart/rasters/", s, ".tif", sep = ""),
overwrite = TRUE)
#Evaluation table for each species
result = rbind(result,
data.frame(spp = s,
roc = round(precrec::auc(precrec_obj)$aucs[1], 4),
prc = round(precrec::auc(precrec_obj)$aucs[2], 4)))
}
}
#Table
write.csv(result, "results/bart/evaluation.csv")
}