diff --git a/R/onAttach.R b/R/onAttach.R new file mode 100644 index 00000000..61b69dda --- /dev/null +++ b/R/onAttach.R @@ -0,0 +1,22 @@ + +.onAttach = function(libname, pkgname) { + if (!interactive()) { + return() + } + + threads_env <- Sys.getenv("R_RANGER_NUM_THREADS") + threads_option1 <- getOption("ranger.num.threads") + threads_option2 <- getOption("Ncpus") + + if (threads_env != "") { + thread_string <- paste(threads_env, "threads as set by environment variable R_RANGER_NUM_THREADS. Can be overwritten with num.threads.") + } else if (!is.null(threads_option1)) { + thread_string <- paste(threads_option1, "threads as set by options(ranger.num.threads = N). Can be overwritten with num.threads.") + } else if (!is.null(threads_option2)) { + thread_string <- paste(threads_option2, "threads as set by options(Ncpus = N). Can be overwritten with num.threads.") + } else { + thread_string <- "2 threads (default). Change with num.threads in ranger() and predict(), options(Ncpus = N), options(ranger.num.threads = N) or environment variable R_RANGER_NUM_THREADS." + } + + packageStartupMessage(paste("ranger", packageVersion("ranger"), "using", thread_string)) +} diff --git a/R/predict.R b/R/predict.R index 82599ab0..d11c453e 100644 --- a/R/predict.R +++ b/R/predict.R @@ -36,6 +36,9 @@ ##' ##' For classification and \code{predict.all = TRUE}, a factor levels are returned as numerics. ##' To retrieve the corresponding factor levels, use \code{rf$forest$levels}, if \code{rf} is the ranger object. +##' +##' By default, ranger uses 2 threads. The default can be changed with: (1) \code{num.threads} in ranger/predict call, (2) environment variable +##' R_RANGER_NUM_THREADS, (3) \code{options(ranger.num.threads = N)}, (4) \code{options(Ncpus = N)}, with precedence in that order. ##' ##' @title Ranger prediction ##' @param object Ranger \code{ranger.forest} object. @@ -45,7 +48,7 @@ ##' @param type Type of prediction. One of 'response', 'se', 'terminalNodes', 'quantiles' with default 'response'. See below for details. ##' @param se.method Method to compute standard errors. One of 'jack', 'infjack' with default 'infjack'. Only applicable if type = 'se'. See below for details. ##' @param seed Random seed. Default is \code{NULL}, which generates the seed from \code{R}. Set to \code{0} to ignore the \code{R} seed. The seed is used in case of ties in classification mode. -##' @param num.threads Number of threads. Default is number of CPUs available. +##' @param num.threads Number of threads. Use 0 for all available cores. Default is 2 if not set by options/environment variables (see below). ##' @param verbose Verbose output on or off. ##' @param inbag.counts Number of times the observations are in-bag in the trees. ##' @param ... further arguments passed to or from other methods. @@ -193,7 +196,7 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE, ## Num threads ## Default 0 -> detect from system in C++. if (is.null(num.threads)) { - num.threads = 0 + num.threads <- as.integer(Sys.getenv("R_RANGER_NUM_THREADS", getOption("ranger.num.threads", getOption("Ncpus", 2L)))) } else if (!is.numeric(num.threads) || num.threads < 0) { stop("Error: Invalid value for num.threads") } @@ -433,6 +436,9 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE, ##' ##' For classification and \code{predict.all = TRUE}, a factor levels are returned as numerics. ##' To retrieve the corresponding factor levels, use \code{rf$forest$levels}, if \code{rf} is the ranger object. +##' +##' By default, ranger uses 2 threads. The default can be changed with: (1) \code{num.threads} in ranger/predict call, (2) environment variable +##' R_RANGER_NUM_THREADS, (3) \code{options(ranger.num.threads = N)}, (4) \code{options(Ncpus = N)}, with precedence in that order. ##' ##' @title Ranger prediction ##' @param object Ranger \code{ranger} object. @@ -444,7 +450,7 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE, ##' @param quantiles Vector of quantiles for quantile prediction. Set \code{type = 'quantiles'} to use. ##' @param what User specified function for quantile prediction used instead of \code{quantile}. Must return numeric vector, see examples. ##' @param seed Random seed. Default is \code{NULL}, which generates the seed from \code{R}. Set to \code{0} to ignore the \code{R} seed. The seed is used in case of ties in classification mode. -##' @param num.threads Number of threads. Default is number of CPUs available. +##' @param num.threads Number of threads. Use 0 for all available cores. Default is 2 if not set by options/environment variables (see below). ##' @param verbose Verbose output on or off. ##' @param ... further arguments passed to or from other methods. ##' @return Object of class \code{ranger.prediction} with elements diff --git a/R/ranger.R b/R/ranger.R index 51d64a9b..1cc11ff7 100644 --- a/R/ranger.R +++ b/R/ranger.R @@ -96,10 +96,10 @@ ##' To use only the SNPs without sex or other covariates from the phenotype file, use \code{0} on the right hand side of the formula. ##' Note that missing values are treated as an extra category while splitting. ##' -##' See \url{https://github.com/imbs-hl/ranger} for the development version. +##' By default, ranger uses 2 threads. The default can be changed with: (1) \code{num.threads} in ranger/predict call, (2) environment variable +##' R_RANGER_NUM_THREADS, (3) \code{options(ranger.num.threads = N)}, (4) \code{options(Ncpus = N)}, with precedence in that order. ##' -##' With recent R versions, multithreading on Windows platforms should just work. -##' If you compile yourself, the new RTools toolchain is required. +##' See \url{https://github.com/imbs-hl/ranger} for the development version. ##' ##' @title Ranger ##' @param formula Object of class \code{formula} or \code{character} describing the model to fit. Interaction terms supported only for numerical variables. @@ -133,7 +133,7 @@ ##' @param quantreg Prepare quantile prediction as in quantile regression forests (Meinshausen 2006). Regression only. Set \code{keep.inbag = TRUE} to prepare out-of-bag quantile prediction. ##' @param time.interest Time points of interest (survival only). Can be \code{NULL} (default, use all observed time points), a vector of time points or a single number to use as many time points (grid over observed time points). ##' @param oob.error Compute OOB prediction error. Set to \code{FALSE} to save computation time, e.g. for large survival forests. -##' @param num.threads Number of threads. Default is number of CPUs available. +##' @param num.threads Number of threads. Use 0 for all available cores. Default is 2 if not set by options/environment variables (see below). ##' @param save.memory Use memory saving (but slower) splitting mode. No effect for survival and GWAS data. Warning: This option slows down the tree growing, use only if you encounter memory problems. ##' @param verbose Show computation status and estimated runtime. ##' @param node.stats Save node statistics. Set to \code{TRUE} to save prediction, number of observations and split statistics for each node. @@ -523,7 +523,7 @@ ranger <- function(formula = NULL, data = NULL, num.trees = 500, mtry = NULL, ## Num threads ## Default 0 -> detect from system in C++. if (is.null(num.threads)) { - num.threads = 0 + num.threads <- as.integer(Sys.getenv("R_RANGER_NUM_THREADS", getOption("ranger.num.threads", getOption("Ncpus", 2L)))) } else if (!is.numeric(num.threads) || num.threads < 0) { stop("Error: Invalid value for num.threads") } diff --git a/man/predict.ranger.Rd b/man/predict.ranger.Rd index 362befca..2f9c63ac 100644 --- a/man/predict.ranger.Rd +++ b/man/predict.ranger.Rd @@ -38,7 +38,7 @@ \item{seed}{Random seed. Default is \code{NULL}, which generates the seed from \code{R}. Set to \code{0} to ignore the \code{R} seed. The seed is used in case of ties in classification mode.} -\item{num.threads}{Number of threads. Default is number of CPUs available.} +\item{num.threads}{Number of threads. Use 0 for all available cores. Default is 2 if not set by options/environment variables (see below).} \item{verbose}{Verbose output on or off.} @@ -70,6 +70,9 @@ If \code{type = 'se'} is selected, the method to estimate the variances can be c For classification and \code{predict.all = TRUE}, a factor levels are returned as numerics. To retrieve the corresponding factor levels, use \code{rf$forest$levels}, if \code{rf} is the ranger object. + +By default, ranger uses 2 threads. The default can be changed with: (1) \code{num.threads} in ranger/predict call, (2) environment variable +R_RANGER_NUM_THREADS, (3) \code{options(ranger.num.threads = N)}, (4) \code{options(Ncpus = N)}, with precedence in that order. } \examples{ ## Classification forest diff --git a/man/predict.ranger.forest.Rd b/man/predict.ranger.forest.Rd index ba018b0e..805effda 100644 --- a/man/predict.ranger.forest.Rd +++ b/man/predict.ranger.forest.Rd @@ -33,7 +33,7 @@ \item{seed}{Random seed. Default is \code{NULL}, which generates the seed from \code{R}. Set to \code{0} to ignore the \code{R} seed. The seed is used in case of ties in classification mode.} -\item{num.threads}{Number of threads. Default is number of CPUs available.} +\item{num.threads}{Number of threads. Use 0 for all available cores. Default is 2 if not set by options/environment variables (see below).} \item{verbose}{Verbose output on or off.} @@ -66,6 +66,9 @@ If \code{type = 'se'} is selected, the method to estimate the variances can be c For classification and \code{predict.all = TRUE}, a factor levels are returned as numerics. To retrieve the corresponding factor levels, use \code{rf$forest$levels}, if \code{rf} is the ranger object. + +By default, ranger uses 2 threads. The default can be changed with: (1) \code{num.threads} in ranger/predict call, (2) environment variable +R_RANGER_NUM_THREADS, (3) \code{options(ranger.num.threads = N)}, (4) \code{options(Ncpus = N)}, with precedence in that order. } \references{ \itemize{ diff --git a/man/ranger.Rd b/man/ranger.Rd index 6b7465b8..9f8036b7 100644 --- a/man/ranger.Rd +++ b/man/ranger.Rd @@ -112,7 +112,7 @@ ranger( \item{oob.error}{Compute OOB prediction error. Set to \code{FALSE} to save computation time, e.g. for large survival forests.} -\item{num.threads}{Number of threads. Default is number of CPUs available.} +\item{num.threads}{Number of threads. Use 0 for all available cores. Default is 2 if not set by options/environment variables (see below).} \item{save.memory}{Use memory saving (but slower) splitting mode. No effect for survival and GWAS data. Warning: This option slows down the tree growing, use only if you encounter memory problems.} @@ -230,10 +230,10 @@ All SNPs in the \code{GenABEL} object will be used for splitting. To use only the SNPs without sex or other covariates from the phenotype file, use \code{0} on the right hand side of the formula. Note that missing values are treated as an extra category while splitting. -See \url{https://github.com/imbs-hl/ranger} for the development version. +By default, ranger uses 2 threads. The default can be changed with: (1) \code{num.threads} in ranger/predict call, (2) environment variable +R_RANGER_NUM_THREADS, (3) \code{options(ranger.num.threads = N)}, (4) \code{options(Ncpus = N)}, with precedence in that order. -With recent R versions, multithreading on Windows platforms should just work. -If you compile yourself, the new RTools toolchain is required. +See \url{https://github.com/imbs-hl/ranger} for the development version. } \examples{ ## Classification forest with default settings