Skip to content

Commit

Permalink
merge with master
Browse files Browse the repository at this point in the history
  • Loading branch information
mnwright committed May 16, 2024
2 parents c0402b1 + f570f7a commit 7da74a8
Show file tree
Hide file tree
Showing 31 changed files with 589 additions and 170 deletions.
8 changes: 8 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
version: 2

updates:
# Keep dependencies for GitHub Actions up-to-date
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "monthly"
2 changes: 1 addition & 1 deletion .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
R_KEEP_PKG_SOURCE: yes

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/cpp-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
linux:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Build
run: |
sudo apt-get install cmake
Expand All @@ -21,7 +21,7 @@ jobs:
macos:
runs-on: macos-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Build
run: |
mkdir build && pushd build
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
permissions:
contents: write
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

Expand All @@ -41,7 +41,7 @@ jobs:

- name: Deploy to GitHub pages 🚀
if: github.event_name != 'pull_request'
uses: JamesIves/github-pages-deploy-action@v4.4.1
uses: JamesIves/github-pages-deploy-action@v4.6.0
with:
clean: false
branch: gh-pages
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Suggests:
survival,
testthat
Encoding: UTF-8
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
URL: http://imbs-hl.github.io/ranger/,
https://github.com/imbs-hl/ranger
BugReports: https://github.com/imbs-hl/ranger/issues
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@

# ranger 0.16.1
* Set num.threads=2 as default; respect environment variables and options
* Add hierarchical shrinkage
* Allow vector min.node.size and min.bucket for class-specific limits

# ranger 0.16.0
* New CRAN version
Expand Down
22 changes: 22 additions & 0 deletions R/onAttach.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

.onAttach = function(libname, pkgname) {
if (!interactive()) {
return()
}

threads_env <- Sys.getenv("R_RANGER_NUM_THREADS")
threads_option1 <- getOption("ranger.num.threads")
threads_option2 <- getOption("Ncpus")

if (threads_env != "") {
thread_string <- paste(threads_env, "threads as set by environment variable R_RANGER_NUM_THREADS. Can be overwritten with num.threads.")
} else if (!is.null(threads_option1)) {
thread_string <- paste(threads_option1, "threads as set by options(ranger.num.threads = N). Can be overwritten with num.threads.")
} else if (!is.null(threads_option2)) {
thread_string <- paste(threads_option2, "threads as set by options(Ncpus = N). Can be overwritten with num.threads.")
} else {
thread_string <- "2 threads (default). Change with num.threads in ranger() and predict(), options(Ncpus = N), options(ranger.num.threads = N) or environment variable R_RANGER_NUM_THREADS."
}

packageStartupMessage(paste("ranger", packageVersion("ranger"), "using", thread_string))
}
12 changes: 9 additions & 3 deletions R/predict.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
##'
##' For classification and \code{predict.all = TRUE}, a factor levels are returned as numerics.
##' To retrieve the corresponding factor levels, use \code{rf$forest$levels}, if \code{rf} is the ranger object.
##'
##' By default, ranger uses 2 threads. The default can be changed with: (1) \code{num.threads} in ranger/predict call, (2) environment variable
##' R_RANGER_NUM_THREADS, (3) \code{options(ranger.num.threads = N)}, (4) \code{options(Ncpus = N)}, with precedence in that order.
##'
##' @title Ranger prediction
##' @param object Ranger \code{ranger.forest} object.
Expand All @@ -45,7 +48,7 @@
##' @param type Type of prediction. One of 'response', 'se', 'terminalNodes', 'quantiles' with default 'response'. See below for details.
##' @param se.method Method to compute standard errors. One of 'jack', 'infjack' with default 'infjack'. Only applicable if type = 'se'. See below for details.
##' @param seed Random seed. Default is \code{NULL}, which generates the seed from \code{R}. Set to \code{0} to ignore the \code{R} seed. The seed is used in case of ties in classification mode.
##' @param num.threads Number of threads. Default is number of CPUs available.
##' @param num.threads Number of threads. Use 0 for all available cores. Default is 2 if not set by options/environment variables (see below).
##' @param verbose Verbose output on or off.
##' @param inbag.counts Number of times the observations are in-bag in the trees.
##' @param ... further arguments passed to or from other methods.
Expand Down Expand Up @@ -193,7 +196,7 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE,
## Num threads
## Default 0 -> detect from system in C++.
if (is.null(num.threads)) {
num.threads = 0
num.threads <- as.integer(Sys.getenv("R_RANGER_NUM_THREADS", getOption("ranger.num.threads", getOption("Ncpus", 2L))))
} else if (!is.numeric(num.threads) || num.threads < 0) {
stop("Error: Invalid value for num.threads")
}
Expand Down Expand Up @@ -433,6 +436,9 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE,
##'
##' For classification and \code{predict.all = TRUE}, a factor levels are returned as numerics.
##' To retrieve the corresponding factor levels, use \code{rf$forest$levels}, if \code{rf} is the ranger object.
##'
##' By default, ranger uses 2 threads. The default can be changed with: (1) \code{num.threads} in ranger/predict call, (2) environment variable
##' R_RANGER_NUM_THREADS, (3) \code{options(ranger.num.threads = N)}, (4) \code{options(Ncpus = N)}, with precedence in that order.
##'
##' @title Ranger prediction
##' @param object Ranger \code{ranger} object.
Expand All @@ -444,7 +450,7 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE,
##' @param quantiles Vector of quantiles for quantile prediction. Set \code{type = 'quantiles'} to use.
##' @param what User specified function for quantile prediction used instead of \code{quantile}. Must return numeric vector, see examples.
##' @param seed Random seed. Default is \code{NULL}, which generates the seed from \code{R}. Set to \code{0} to ignore the \code{R} seed. The seed is used in case of ties in classification mode.
##' @param num.threads Number of threads. Default is number of CPUs available.
##' @param num.threads Number of threads. Use 0 for all available cores. Default is 2 if not set by options/environment variables (see below).
##' @param verbose Verbose output on or off.
##' @param ... further arguments passed to or from other methods.
##' @return Object of class \code{ranger.prediction} with elements
Expand Down
68 changes: 56 additions & 12 deletions R/ranger.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,10 @@
##' To use only the SNPs without sex or other covariates from the phenotype file, use \code{0} on the right hand side of the formula.
##' Note that missing values are treated as an extra category while splitting.
##'
##' See \url{https://github.com/imbs-hl/ranger} for the development version.
##' By default, ranger uses 2 threads. The default can be changed with: (1) \code{num.threads} in ranger/predict call, (2) environment variable
##' R_RANGER_NUM_THREADS, (3) \code{options(ranger.num.threads = N)}, (4) \code{options(Ncpus = N)}, with precedence in that order.
##'
##' With recent R versions, multithreading on Windows platforms should just work.
##' If you compile yourself, the new RTools toolchain is required.
##' See \url{https://github.com/imbs-hl/ranger} for the development version.
##'
##' @title Ranger
##' @param formula Object of class \code{formula} or \code{character} describing the model to fit. Interaction terms supported only for numerical variables.
Expand All @@ -109,8 +109,8 @@
##' @param importance Variable importance mode, one of 'none', 'impurity', 'impurity_corrected', 'permutation'. The 'impurity' measure is the Gini index for classification, the variance of the responses for regression and the sum of test statistics (see \code{splitrule}) for survival.
##' @param write.forest Save \code{ranger.forest} object, required for prediction. Set to \code{FALSE} to reduce memory usage if no prediction intended.
##' @param probability Grow a probability forest as in Malley et al. (2012).
##' @param min.node.size Minimal node size to split at. Default 1 for classification, 5 for regression, 3 for survival, and 10 for probability.
##' @param min.bucket Minimal terminal node size. No nodes smaller than this value can occur. Default 3 for survival and 1 for all other tree types.
##' @param min.node.size Minimal node size to split at. Default 1 for classification, 5 for regression, 3 for survival, and 10 for probability. For classification, this can be a vector of class-specific values.
##' @param min.bucket Minimal terminal node size. No nodes smaller than this value can occur. Default 3 for survival and 1 for all other tree types. For classification, this can be a vector of class-specific values.
##' @param max.depth Maximal tree depth. A value of NULL or 0 (the default) corresponds to unlimited depth, 1 to tree stumps (1 split per tree).
##' @param replace Sample with replacement.
##' @param sample.fraction Fraction of observations to sample. Default is 1 for sampling with replacement and 0.632 for sampling without replacement. For classification, this can be a vector of class-specific values.
Expand All @@ -133,7 +133,7 @@
##' @param quantreg Prepare quantile prediction as in quantile regression forests (Meinshausen 2006). Regression only. Set \code{keep.inbag = TRUE} to prepare out-of-bag quantile prediction.
##' @param time.interest Time points of interest (survival only). Can be \code{NULL} (default, use all observed time points), a vector of time points or a single number to use as many time points (grid over observed time points).
##' @param oob.error Compute OOB prediction error. Set to \code{FALSE} to save computation time, e.g. for large survival forests.
##' @param num.threads Number of threads. Default is number of CPUs available.
##' @param num.threads Number of threads. Use 0 for all available cores. Default is 2 if not set by options/environment variables (see below).
##' @param save.memory Use memory saving (but slower) splitting mode. No effect for survival and GWAS data. Warning: This option slows down the tree growing, use only if you encounter memory problems.
##' @param verbose Show computation status and estimated runtime.
##' @param node.stats Save node statistics. Set to \code{TRUE} to save prediction, number of observations and split statistics for each node.
Expand Down Expand Up @@ -359,6 +359,15 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
stop("Error: Unsupported type of dependent variable.")
}

## Number of levels
if (treetype %in% c(1, 9)) {
if (is.factor(y)) {
num_levels <- nlevels(y)
} else {
num_levels <- length(unique(y))
}
}

## Quantile prediction only for regression
if (quantreg && treetype != 3) {
stop("Error: Quantile prediction implemented only for regression outcomes.")
Expand Down Expand Up @@ -514,24 +523,54 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
## Num threads
## Default 0 -> detect from system in C++.
if (is.null(num.threads)) {
num.threads = 0
num.threads <- as.integer(Sys.getenv("R_RANGER_NUM_THREADS", getOption("ranger.num.threads", getOption("Ncpus", 2L))))
} else if (!is.numeric(num.threads) || num.threads < 0) {
stop("Error: Invalid value for num.threads")
}

## Minimum node size
if (is.null(min.node.size)) {
min.node.size <- 0
} else if (!is.numeric(min.node.size) || min.node.size < 0) {
stop("Error: Invalid value for min.node.size")
} else if (!is.numeric(min.node.size)) {
stop("Error: Invalid value for min.node.size.")
}
if (length(min.node.size) > 1) {
if (!(treetype %in% c(1, 9))) {
stop("Error: Invalid value for min.node.size. Vector values only valid for classification forests.")
}
if (any(min.node.size < 0)) {
stop("Error: Invalid value for min.node.size. Please give a nonnegative value or a vector of nonnegative values.")
}
if (length(min.node.size) != num_levels) {
stop("Error: Invalid value for min.node.size Expecting ", num_levels, " values, provided ", length(min.node.size), ".")
}
} else {
if (min.node.size < 0) {
stop("Error: Invalid value for min.node.size. Please give a nonnegative value or a vector of nonnegative values.")
}
}

## Minimum bucket size
if (is.null(min.bucket)) {
min.bucket <- 0
} else if (!is.numeric(min.bucket) || min.bucket < 0) {
} else if (!is.numeric(min.bucket)) {
stop("Error: Invalid value for min.bucket")
}
if (length(min.bucket) > 1) {
if (!(treetype %in% c(1, 9))) {
stop("Error: Invalid value for min.bucket Vector values only valid for classification forests.")
}
if (any(min.bucket < 0)) {
stop("Error: Invalid value for min.bucket Please give a nonnegative value or a vector of nonnegative values.")
}
if (length(min.bucket) != num_levels) {
stop("Error: Invalid value for min.bucket Expecting ", num_levels, " values, provided ", length(min.bucket), ".")
}
} else {
if (min.bucket < 0) {
stop("Error: Invalid value for min.bucket Please give a nonnegative value or a vector of nonnegative values.")
}
}

## Tree depth
if (is.null(max.depth)) {
Expand All @@ -554,8 +593,8 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
if (sum(sample.fraction) <= 0) {
stop("Error: Invalid value for sample.fraction. Sum of values must be >0.")
}
if (length(sample.fraction) != nlevels(y)) {
stop("Error: Invalid value for sample.fraction. Expecting ", nlevels(y), " values, provided ", length(sample.fraction), ".")
if (length(sample.fraction) != num_levels) {
stop("Error: Invalid value for sample.fraction. Expecting ", num_levels, " values, provided ", length(sample.fraction), ".")
}
if (!replace & any(sample.fraction * length(y) > table(y))) {
idx <- which(sample.fraction * length(y) > table(y))[1]
Expand Down Expand Up @@ -1037,6 +1076,11 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL,
result$dependent.variable.name <- dependent.variable.name
result$status.variable.name <- status.variable.name

## Save max.depth
if (!is.null(max.depth)) {
result$max.depth <- max.depth
}

class(result) <- "ranger"

## Prepare quantile prediction
Expand Down
5 changes: 4 additions & 1 deletion man/predict.ranger.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/predict.ranger.forest.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions man/ranger.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 7da74a8

Please sign in to comment.