Skip to content

Commit

Permalink
Merge pull request #22 from USEPA/develop
Browse files Browse the repository at this point in the history
CRAN v0.7.0
  • Loading branch information
michaeldumelle authored Jul 18, 2024
2 parents f636ff5 + f3c66cf commit b3e1dab
Show file tree
Hide file tree
Showing 142 changed files with 18,097 additions and 6,506 deletions.
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: spmodel
Title: Spatial Statistical Modeling and Prediction
Version: 0.6.0
Version: 0.7.0
Authors@R: c(
person(given = "Michael",
family = "Dumelle",
Expand Down Expand Up @@ -28,7 +28,7 @@ License: GPL-3
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Depends:
R (>= 3.5.0)
Imports:
Expand All @@ -45,7 +45,8 @@ Suggests:
testthat (>= 3.0.0),
ggplot2,
ranger,
statmod
statmod,
pROC
VignetteBuilder: knitr
Config/testthat/edition: 3
URL: https://usepa.github.io/spmodel/
Expand Down
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ S3method(AICc,spautor)
S3method(AICc,spgautor)
S3method(AICc,spglm)
S3method(AICc,splm)
S3method(AUROC,spgautor)
S3method(AUROC,spglm)
S3method(BIC,spautor)
S3method(BIC,spgautor)
S3method(BIC,spglm)
S3method(BIC,splm)
S3method(anova,spautor)
S3method(anova,spgautor)
S3method(anova,spglm)
Expand Down Expand Up @@ -345,6 +351,7 @@ S3method(vcov,spgautor)
S3method(vcov,spglm)
S3method(vcov,splm)
export(AICc)
export(AUROC)
export(augment)
export(covmatrix)
export(dispersion_initial)
Expand Down Expand Up @@ -391,8 +398,10 @@ importFrom(sf,st_coordinates)
importFrom(sf,st_crs)
importFrom(sf,st_drop_geometry)
importFrom(sf,st_intersects)
importFrom(sf,st_is_longlat)
importFrom(stats,.getXlevels)
importFrom(stats,AIC)
importFrom(stats,BIC)
importFrom(stats,anova)
importFrom(stats,coef)
importFrom(stats,coefficients)
Expand Down
18 changes: 18 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
# spmodel 0.7.0

## Minor Updates

* Added `AUROC()` functions to compute the area under the receiver operating characteristic (AUROC) curve for `spglm()` and `spgautor()` models when `family` is `"binomial"` and the response is binary (i.e., represents a single success or failure).
* Added a `BIC()` function to compute the Bayesian Information Criterion (BIC) for `splm()`, `spautor()`, `spglm()`, and `spgautor()` models when `estmethod` is `"reml"` (restricted maximum likelihood; the default) or `"ml"` (maximum likelihood).
* Added a `type` argument to `loocv()` when `cv_predict = TRUE` and using `spglm()` or `spgautor()` models so that predictions may be obtained on the link or response scale.
* Added a warning message when `data` is an `sf` object and a geographic (i.e., degrees) coordinate system is used instead of a projected coordinate system.
* Changed the default behavior of `local` in `predict.spmodel` so that it depends only on the observed data sample size. Now, when the observed data sample size exceeds 10,000 `local` is set to `TRUE` by default. This change was made because prediction for big data depends almost exclusively on the observed data sample size, not the number of predictions desired.
* Minor external data updates (for package testing).
* Minor vignette updates.
* Minor documentation updates.
* Minor error message updates.

## Bug Fixes

* Fixed a bug that prohibited proper indexing when calling `predict()` with the `local` method `"distance"` on a model object fit with a random effect or partition factor.

# spmodel 0.6.0

## Minor Updates
Expand Down
58 changes: 58 additions & 0 deletions R/AUROC.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#' Area Under Receiver Operating Characteristic Curve
#'
#' @description Compare area under the receiver operating characteristic curve (AUROC) for binary (e.g.,
#' logistic) models. The area under the ROC curve provides a measure of the
#' model's classification accuracy averaged over all possible threshold values.
#'
#' @param object A fitted model object from [spglm()] or [spgautor()]) where \code{family = "binomial"}
#' and the response values are binary, representing a single success or failure for each datum.
#' @param ... Additional arguments to [pROC::auc()].
#'
#' @return The area under the receiver operating characteristic curve.
#'
#' @order 1
#' @export
#'
#' @examples
#' spgmod <- spglm(presence ~ elev,
#' family = "binomial", data = moose,
#' spcov_type = "exponential"
#' )
#' AUROC(spgmod)
#' @references
#' Robin, X., Turck, N., Hainard, A., Tiberti, N., Lisacek, F., Sanchez, J. C., & Müller, M.
#' (2011). pROC: an open-source package for R and S+ to analyze and compare ROC curves.
#' \emph{BMC bioinformatics}, 12, 1-8.
AUROC <- function(object, ...) {
UseMethod("AUROC", object)
}

#' @rdname AUROC
#' @method AUROC spglm
#' @order 2
#' @export
AUROC.spglm <- function(object, ...) {
if (!requireNamespace("pROC", quietly = TRUE)) {
stop("Install the pROC package before using AUROC().", call. = FALSE)
} else {

if (object$family != "binomial") {
stop("AUROC() only available when family is \"binomial\".", call. = FALSE)
}

if (any(object$size != 1)) {
stop("AUROC() only available for binary models (i.e., models whose response indicates a single success or failure).", call. = FALSE)
}
dotlist <- list(...)
if (!("quiet" %in% names(dotlist))) {
dotlist$quiet <- TRUE
}
as.numeric(do.call(pROC::auc, c(list(response = as.vector(object$y), predictor = fitted(object)), dotlist)))
}
}

#' @rdname AUROC
#' @method AUROC spgautor
#' @order 3
#' @export
AUROC.spgautor <- AUROC.spglm
127 changes: 127 additions & 0 deletions R/BIC.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#' Compute BIC of fitted model objects
#'
#' @description Compute BIC for one or
#' several fitted model objects for which a log-likelihood
#' value can be obtained.
#'
#' @param object A fitted model object from [splm()], [spautor()], [spglm()], or [spgautor()]
#' where \code{estmethod} is \code{"ml"} or \code{"reml"}.
#' @param ... Optionally more fitted model objects.
#'
#' @details When comparing models fit by maximum or restricted maximum
#' likelihood, the smaller the BIC, the better the fit. The theory of
#' BIC requires that the log-likelihood has been maximized, and hence,
#' no BIC methods exist for models where \code{estmethod} is not
#' \code{"ml"} or \code{"reml"}. Additionally, BIC comparisons between \code{"ml"}
#' and \code{"reml"} models are meaningless -- comparisons should only be made
#' within a set of models estimated using \code{"ml"} or a set of models estimated
#' using \code{"reml"}. BIC comparisons for \code{"reml"} must
#' use the same fixed effects. To vary the covariance parameters and
#' fixed effects simultaneously, use \code{"ml"}.
#'
#' BIC is defined as \eqn{-2loglik + log(n)(estparams)}, where \eqn{n} is the sample size
#' and \eqn{estparams} is the number of estimated parameters. For \code{"ml"}, \eqn{estparams} is
#' the number of estimated covariance parameters plus the number of estimated
#' fixed effects. For \code{"reml"}, \eqn{estparams} is the number of estimated covariance
#' parameters.
#'
#' @return If just one object is provided, a numeric value with the corresponding
#' BIC.
#'
#' If multiple objects are provided, a \code{data.frame} with rows corresponding
#' to the objects and columns representing the number of parameters estimated
#' (\code{df}) and the BIC.
#'
#' @name BIC.spmodel
#' @method BIC splm
#' @order 1
#' @export
#'
#' @examples
#' spmod <- splm(z ~ water + tarp,
#' data = caribou,
#' spcov_type = "exponential", xcoord = x, ycoord = y
#' )
#' BIC(spmod)
BIC.splm <- function(object, ...) {

# set k as log of sample size
k <- log(object$n)

# store object and ...
object_list <- list(object, ...)

# see if ... has any elements
if (length(object_list) == 1) {

# number of estimated parameters
if (object$estmethod == "ml") {
n_est_param <- object$npar + object$p
} else {
n_est_param <- object$npar
}

# error if not ml or reml
if (!object$estmethod %in% c("ml", "reml")) {
stop("BIC is only defined is estmethod is \"ml\" or \"reml\".", call. = FALSE)
}
# compute BIC
BIC_val <- -2 * logLik(object) + k * (n_est_param)
} else {


# warning if ml and reml in same call
est_methods <- vapply(object_list, function(x) x$estmethod, character(1))
if ("ml" %in% est_methods && "reml" %in% est_methods) {
warning("BIC and BICc should not compare models fit with
\"ml\" to models fit with \"reml\"", call. = FALSE)
}

# warning if reml and fixed effects change
est_methods_reml <- which(est_methods == "reml")
if (length(est_methods_reml) > 1) {
if (any(vapply(
est_methods_reml,
function(x) !identical(formula(object_list[[x]]), formula(object_list[[1]])), logical(1)
))) {
warning("BIC and BICc should not be used to compare models fit with \"reml\" whose fixed effect formulas differ.", call. = FALSE)
}
}

# find model names provided
object_list_names <- as.character(c(substitute(object), (as.list(substitute(list(...)))[-1])))

# error if any names duplicated
if (any(duplicated(object_list_names))) {
stop("Each model object must have a unique name", call. = FALSE)
}
# iterate through each model
object_BIC <- lapply(object_list, function(x) {
# warning if estmethod not ml or reml
if (!object$estmethod %in% c("ml", "reml")) {
stop("BIC is only defined is estmethod is \"ml\" or \"reml\".", call. = FALSE)
}

if (x$estmethod == "ml") {
n_est_param <- x$npar + x$p
} else {
n_est_param <- x$npar
}

# store degrees of freedom (parames estimated) and BIC
data.frame(df = n_est_param, BIC = -2 * logLik(x) + k * (n_est_param))
})
# put all BIC data frames together
BIC_val <- do.call("rbind", object_BIC)
# set rownames as model names
row.names(BIC_val) <- object_list_names
}
# return BIC value
BIC_val
}

#' @rdname BIC.spmodel
#' @method BIC spautor
#' @order 2
#' @export
BIC.spautor <- BIC.splm
11 changes: 11 additions & 0 deletions R/BIC_glm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#' @rdname BIC.spmodel
#' @method BIC spglm
#' @order 3
#' @export
BIC.spglm <- BIC.splm

#' @rdname BIC.spmodel
#' @method BIC spgautor
#' @order 4
#' @export
BIC.spgautor <- BIC.spautor
16 changes: 15 additions & 1 deletion R/get_data_object.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,20 @@ get_data_object_splm <- function(formula, data, spcov_initial, xcoord, ycoord, e
stop("Coordinates must be numeric.", call. = FALSE)
}

# check if coordinates are projected
if (is_sf) {
if (st_is_longlat(crs)) {
warning("Coordinates are in a geographic coordinate system. For the most accurate results, please ensure
coordinates are in a projected coordinate system (e.g., via sf::st_transform()).", call. = FALSE)
}
} else {
# possible revisit this later and add explicit warning
# if (any(abs(c(data[[xcoord]], data[[ycoord]])) <= 360)) {
# warning("Coordinates may be in a geographic coordinate system. For the most accurate results, please ensure
# coordinates are in a projected coordinate system (e.g., via sf::st_transform()).", call. = FALSE)
# }
}

# subsetting by na and not na values
## find response variabale name
na_index <- is.na(data[[all.vars(formula)[1]]])
Expand Down Expand Up @@ -206,7 +220,7 @@ get_data_object_splm <- function(formula, data, spcov_initial, xcoord, ycoord, e
if (is.null(local)) {
if (n > 5000) {
local <- TRUE
message("Because the sample size exceeds 5000, we are setting local = TRUE to perform computationally efficient approximations. To override this behavior and compute the exact solution, rerun splm() with local = FALSE. Be aware that setting local = FALSE may result in exceedingly long computational times.")
message("Because the sample size exceeds 5,000, we are setting local = TRUE to perform computationally efficient approximations. To override this behavior and compute the exact solution, rerun with local = FALSE. Be aware that setting local = FALSE may result in exceedingly long computational times.")
} else {
local <- FALSE
}
Expand Down
16 changes: 15 additions & 1 deletion R/get_data_object_glm.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,20 @@ get_data_object_spglm <- function(formula, family, data, spcov_initial, xcoord,
stop("Coordinates must be numeric.", call. = FALSE)
}

# check if coordinates are projected
if (is_sf) {
if (st_is_longlat(crs)) {
warning("Coordinates are in a geographic coordinate system. For the most accurate results, please ensure
coordinates are in a projected coordinate system (e.g., via sf::st_transform()).", call. = FALSE)
}
} else {
# possible revisit this later and add explicit warning
# if (any(abs(c(data[[xcoord]], data[[ycoord]])) <= 360)) {
# warning("Coordinates may be in a geographic coordinate system. For the most accurate results, please ensure
# coordinates are in a projected coordinate system (e.g., via sf::st_transform()).", call. = FALSE)
# }
}

# subsetting by na and not na values
## find response variable name
# na_index <- is.na(data[[all.vars(formula)[1]]])
Expand Down Expand Up @@ -239,7 +253,7 @@ get_data_object_spglm <- function(formula, family, data, spcov_initial, xcoord,
if (is.null(local)) {
if (n > 3000) {
local <- TRUE
message("Because the sample size exceeds 3000, we are setting local = TRUE to perform computationally efficient approximations. To override this behavior and compute the exact solution, rerun splm() with local = FALSE. Be aware that setting local = FALSE may result in exceedingly long computational times.")
message("Because the sample size exceeds 3,000, we are setting local = TRUE to perform computationally efficient approximations. To override this behavior and compute the exact solution, rerun with local = FALSE. Be aware that setting local = FALSE may result in exceedingly long computational times.")
} else {
local <- FALSE
}
Expand Down
8 changes: 7 additions & 1 deletion R/get_local_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,13 @@ get_local_list_estimation <- function(local, data, xcoord, ycoord, n, partition_

# setting var adjust
if (!"var_adjust" %in% names_local) {
local$var_adjust <- "theoretical"
if (n <= 100000) {
local$var_adjust <- "theoretical"
} else {
message('var_adjust was not specified and the sample size exceeds 100,000, so the default var_adjust value is being changed from "theoretical" to "none". To override this behavior, rerun and set var_adjust in local. Be aware that setting var_adjust to "theoretical" may result in exceedingly long computational times.')
local$var_adjust <- "none"
}

} # "none", "empirical", "theoretical", and "pooled"

# setting partition factor
Expand Down
10 changes: 6 additions & 4 deletions R/glance.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,21 @@
#' \item \code{n} The sample size.
#' \item \code{p} The number of fixed effects.
#' \item \code{npar} The number of estimated covariance parameters.
#' \item \code{value} The optimized value of the fitting function
#' \item \code{value} The optimized value of the fitting function.
#' \item \code{AIC} The AIC.
#' \item \code{AICc} The AICc.
#' \item \code{logLik} The log-likelihood
#' \item \code{BIC} The BIC.
#' \item \code{logLik} The log-likelihood.
#' \item \code{deviance} The deviance.
#' \item \code{pseudo.r.squared} The pseudo r-squared
#' \item \code{pseudo.r.squared} The pseudo r-squared.
#' }
#'
#' @name glance.spmodel
#' @method glance splm
#' @order 1
#' @export
#'
#' @seealso [AIC.spmodel()] [AICc()] [logLik.spmodel()] [deviance.spmodel()] [pseudoR2()] [tidy.spmodel()] [augment.spmodel()]
#' @seealso [AIC.spmodel()] [AICc()] [BIC.spmodel()] [logLik.spmodel()] [deviance.spmodel()] [pseudoR2()] [tidy.spmodel()] [augment.spmodel()]
#'
#' @examples
#' spmod <- splm(z ~ water + tarp,
Expand All @@ -44,6 +45,7 @@ glance.splm <- function(x, ...) {
value = x$optim$value,
AIC = ifelse(is_likbased, AIC(x), NA),
AICc = ifelse(is_likbased, AICc(x), NA),
BIC = ifelse(is_likbased, BIC(x), NA),
logLik = ifelse(is_likbased, logLik(x), NA),
deviance = ifelse(is_likbased, deviance(x), NA),
pseudo.r.squared = pseudoR2(x),
Expand Down
Loading

0 comments on commit b3e1dab

Please sign in to comment.