From 2485f938961569cd375de067056eab2822b7f911 Mon Sep 17 00:00:00 2001 From: assignUser Date: Sat, 3 Oct 2020 16:09:20 +0200 Subject: [PATCH 1/4] add environment managment for ..vars Fixes #59 --- R/add_correlated_data.R | 32 +++- R/add_data.R | 27 +++- R/define_data.R | 2 +- R/generate_data.R | 15 +- R/generate_dist.R | 222 ++++++++++++++++++++-------- R/internal_utility.R | 4 +- R/simstudy-package.R | 2 +- tests/testthat/test-generate_dist.R | 32 ++-- 8 files changed, 243 insertions(+), 93 deletions(-) diff --git a/R/add_correlated_data.R b/R/add_correlated_data.R index 0437d419..c2535ee9 100644 --- a/R/add_correlated_data.R +++ b/R/add_correlated_data.R @@ -95,6 +95,8 @@ addCorData <- function(dtOld, idname, mu, sigma, corMatrix = NULL, #' symmetrical and positive semi-definite. It is not a required field; if a #' matrix is not provided, then a structure and correlation coefficient rho must #' be specified. +#' @param envir Environment the data definitions are evaluated in. +#' Defaults to [base::parent.frame]. #' @return data.table with added column(s) of correlated data #' @examples #' defC <- defData( @@ -143,9 +145,10 @@ addCorData <- function(dtOld, idname, mu, sigma, corMatrix = NULL, #' # Check global correlations - should not be as correlated #' cor(di[, list(A, B, C, D)]) #' @concept correlated +#' @md #' @export addCorFlex <- function(dt, defs, rho = 0, tau = NULL, corstr = "cs", - corMatrix = NULL) { + corMatrix = NULL, envir = parent.frame()) { # "Declare" vars to avoid R CMD warning @@ -203,15 +206,30 @@ addCorFlex <- function(dt, defs, rho = 0, tau = NULL, corstr = "cs", iLink <- corDefs[i, link] if (iDist == "binary") { - params <- .getBinaryMean(dTemp, formula = iFormula, Size = 1, link = iLink) + params <- .getBinaryMean(dTemp, + formula = iFormula, + size = 1, + link = iLink, + envir = envir + ) V <- dTemp[, stats::qbinom(Unew, 1, params[[1]])] } else if (iDist == "poisson") { - param1 <- .getPoissonMean(dTemp, formula = iFormula, link = iLink) + param1 <- .getPoissonMean( + dtSim = dTemp, + formula = iFormula, + link = iLink, + envir = envir + ) V <- dTemp[, stats::qpois(Unew, param1)] } else if (iDist == "gamma") { - mn <- .getGammaMean(dTemp, formula = iFormula, link = iLink) + mn <- .getGammaMean( + dtSim = dTemp, + formula = iFormula, + link = iLink, + envir = envir + ) ### Gamma parameters need to be transformed @@ -221,7 +239,7 @@ addCorFlex <- function(dt, defs, rho = 0, tau = NULL, corstr = "cs", V <- dTemp[, stats::qgamma(Unew, param1, param2)] } else if (iDist == "negBinomial") { - mn <- .getNBmean(dTemp, formula = iFormula, link = iLink) + mn <- .getNBmean(dTemp, formula = iFormula, link = iLink, envir = envir) ### NB parameters need to be transformed @@ -231,7 +249,7 @@ addCorFlex <- function(dt, defs, rho = 0, tau = NULL, corstr = "cs", V <- dTemp[, stats::qnbinom(Unew, param1, param2)] } else if (iDist == "normal") { - param1 <- .getNormalMean(dTemp, formula = iFormula) + param1 <- .getNormalMean(dtSim = dTemp, formula = iFormula, envir = envir) param2 <- sqrt(corDefs[i, variance]) V <- dTemp[, stats::qnorm(Unew, param1, param2)] @@ -645,4 +663,4 @@ addCorGen <- function(dtOld, nvars, idvar = "id", rho, corstr, corMatrix = NULL, return(dtTemp[]) -} +} \ No newline at end of file diff --git a/R/add_data.R b/R/add_data.R index eadcbc7d..04d4fc88 100644 --- a/R/add_data.R +++ b/R/add_data.R @@ -2,6 +2,8 @@ #' #' @param dtDefs name of definitions for added columns #' @param dtOld name of data table that is to be updated +#' @param envir Environment the data definitions are evaluated in. +#' Defaults to [base::parent.frame]. #' @return an updated data.table that contains the added simulated data #' @examples #' # New data set @@ -20,8 +22,9 @@ #' dt <- addColumns(def2, dt) #' dt #' @concept generate_data +#' @md #' @export -addColumns <- function(dtDefs, dtOld) { +addColumns <- function(dtDefs, dtOld, envir = parent.frame()) { # "declares" varname to avoid global NOTE varname <- NULL @@ -52,7 +55,13 @@ addColumns <- function(dtDefs, dtOld) { iter <- nrow(dtDefs) n <- nrow(dtOld) for (i in (1:iter)) { - dtOld <- .generate(dtDefs[i, ], n, dtOld, oldkey) + dtOld <- .generate( + args = dtDefs[i, ], + n = n, + dfSim = dtOld, + idname = oldkey, + envir = envir + ) } dtOld <- data.table::data.table(dtOld) @@ -66,7 +75,8 @@ addColumns <- function(dtDefs, dtOld) { #' @param condDefs Name of definitions for added column #' @param dtOld Name of data table that is to be updated #' @param newvar Name of new column to add -#' +#' @param envir Environment the data definitions are evaluated in. +#' Defaults to [base::parent.frame]. #' @return An updated data.table that contains the added simulated data #' @examples #' @@ -103,9 +113,10 @@ addColumns <- function(dtDefs, dtOld) { #' ggplot(data = dt, aes(x = y, y = NewVar, group = x)) + #' geom_point(aes(color = factor(x))) #' @export +#' @md #' @concept generate_data #' @concept condition -addCondition <- function(condDefs, dtOld, newvar) { +addCondition <- function(condDefs, dtOld, newvar, envir = parent.frame()) { # 'declare' vars varname <- NULL @@ -163,7 +174,13 @@ addCondition <- function(condDefs, dtOld, newvar) { n <- nrow(dtTemp) if (n > 0) { - dtTemp <- .generate(cDefs[i, ], n, dtTemp, oldkey) + dtTemp <- .generate( + args = cDefs[i, ], + n = n, + dfSim = dtTemp, + idname = oldkey, + envir = envir + ) dtTemp <- data.table::data.table(dtTemp) dtTemp <- dtTemp[, list(get(oldkey), get(newvar))] diff --git a/R/define_data.R b/R/define_data.R index ac332858..fed3d029 100644 --- a/R/define_data.R +++ b/R/define_data.R @@ -89,7 +89,7 @@ defCondition <- function(dtDefs = NULL, #' @details The possible data distributions are: `r paste0(.getDists(),collapse = ", ")`. #' #' @examples -#' extVar <<- 2.3 +#' extVar <- 2.3 #' def <- defData(varname = "xNr", dist = "nonrandom", formula = 7, id = "idnum") #' def <- defData(def, varname = "xUni", dist = "uniform", formula = "10;20") #' def <- defData(def, diff --git a/R/generate_data.R b/R/generate_data.R index b6d26f8d..f07e4ac5 100644 --- a/R/generate_data.R +++ b/R/generate_data.R @@ -5,8 +5,11 @@ #' are provided #' a data set with ids only is generated. #' @param id The string defining the id of the record +#' @param envir Environment the data definitions are evaluated in. +#' Defaults to [base::parent.frame]. #' @return A data.table that contains the simulated data. #' @export +#' @md #' @concept generate_data #' @examples #' genData(5) @@ -43,7 +46,7 @@ #' def #' #' genData(5, def) -genData <- function(n, dtDefs = NULL, id = "id") { +genData <- function(n, dtDefs = NULL, id = "id", envir = parent.frame()) { assertNotMissing(n = missing(n)) assertValue(n = n, id = id) assertType(id = id, type = "character") @@ -63,7 +66,13 @@ genData <- function(n, dtDefs = NULL, id = "id") { iter <- nrow(dtDefs) # generate a column of data for each row of dtDefs for (i in (1:iter)) { - dfSimulate <- .generate(dtDefs[i, ], n, dfSimulate, idname) + dfSimulate <- .generate( + args = dtDefs[i, ], + n = n, + dfSim = dfSimulate, + idname = idname, + envir = envir + ) } dt <- data.table::data.table(dfSimulate) @@ -824,4 +833,4 @@ genSurv <- function(dtName, survDefs, digits = 3) { } return(dtSurv[]) -} +} \ No newline at end of file diff --git a/R/generate_dist.R b/R/generate_dist.R index 9f26e3c1..88425793 100644 --- a/R/generate_dist.R +++ b/R/generate_dist.R @@ -6,27 +6,116 @@ #' @param args One row from data definitions data.table #' @param n The number of observations required in the data set #' @param dt Incomplete simulated data.table +#' @param envir Environment the data definitions are evaluated in. +#' Defaults to [base::parent.frame]. #' @return A data.frame with the updated simulated data #' @noRd -.generate <- function(args, n, dfSim, idname) { +.generate <- function(args, n, dfSim, idname, envir = parent.frame()) { newColumn <- switch(args$dist, - beta = .genbeta(n, args$formula, args$variance, args$link, dfSim), + beta = .genbeta( + n = n, + formula = args$formula, + precision = args$variance, + link = args$link, + dtSim = dfSim, + envir = envir + ), binary = { args$variance <- 1 - .genbinom(n, args$formula, args$variance, args$link, dfSim) + .genbinom( + n = n, + formula = args$formula, + size = args$variance, + link = args$link, + dtSim = dfSim, + envir = envir + ) }, - binomial = .genbinom(n, args$formula, args$variance, args$link, dfSim), - categorical = .gencat(n, args$formula, args$link, dfSim), - exponential = .genexp(n, args$formula, args$link, dfSim), - gamma = .gengamma(n, args$formula, args$variance, args$link, dfSim), - mixture = .genmixture(n, args$formula, dfSim), - negBinomial = .gennegbinom(n, args$formula, args$variance, args$link, dfSim), - nonrandom = .gendeterm(n, args$formula, args$link, dfSim), - normal = .gennorm(n, args$formula, args$variance, args$link, dfSim), - noZeroPoisson = .genpoisTrunc(n, args$formula, args$link, dfSim), - poisson = .genpois(n, args$formula, args$link, dfSim), - uniform = .genunif(n, args$formula, dfSim), - uniformInt = .genUnifInt(n, args$formula, dfSim), + binomial = .genbinom( + n = n, + formula = args$formula, + size = args$variance, + link = args$link, + dtSim = dfSim, + envir = envir + ), + categorical = .gencat( + n = n, + formula = args$formula, + link = args$link, + dfSim = dfSim, + envir = envir + ), + exponential = .genexp( + n = n, + formula = args$formula, + link = args$link, + dtSim = dfSim, + envir = envir + ), + gamma = .gengamma( + n = n, + formula = args$formula, + dispersion = args$variance, + link = args$link, + dtSim = dfSim, + envir = envir + ), + mixture = .genmixture( + n = n, + formula = args$formula, + dtSim = dfSim, + envir = envir + ), + negBinomial = .gennegbinom( + n = n, + formula = args$formula, + dispersion = args$variance, + link = args$link, + dtSim = dfSim, + envir = envir + ), + nonrandom = .gendeterm( + n = n, + formula = args$formula, + link = args$link, + dtSim = dfSim, + envir = envir + ), + normal = .gennorm( + n = n, + formula = args$formula, + variance = args$variance, + link = args$link, + dtSim = dfSim, + envir = envir + ), + noZeroPoisson = .genpoisTrunc( + n = n, + formula = args$formula, + link = args$link, + dtSim = dfSim, + envir = envir + ), + poisson = .genpois( + n = n, + formula = args$formula, + link = args$link, + dtSim = dfSim, + envir = envir + ), + uniform = .genunif( + n = n, + formula = args$formula, + dtSim = dfSim, + envir = envir + ), + uniformInt = .genUnifInt( + n = n, + formula = args$formula, + dtSim = dfSim, + envir = envir + ), default = stop( paste(args$dist, "not a valid distribution. Please check spelling."), call. = FALSE @@ -107,10 +196,11 @@ return(mean) } -.genbeta <- function(n, formula, precision, link = "identity", dtSim) { +# TODO document internal functions +.genbeta <- function(n, formula, precision, link = "identity", dtSim, envir) { mean <- .getBetaMean(dtSim, formula, link, n) - d <- .evalWith(precision, .parseDotVars(precision), dtSim, n) + d <- .evalWith(precision, .parseDotVars(precision, envir), dtSim, n) sr <- betaGetShapes(mean = mean, precision = d) new <- stats::rbeta(n, shape = sr$shape1, shape2 = sr$shape2) @@ -126,9 +216,14 @@ # @param dtSim Incomplete simulated data.table # @return A data.frame column with the updated simulated data -.getBinaryMean <- function(dtSim, formula, Size, link, n = nrow(dtSim)) { - size <- .evalWith(Size, .parseDotVars(Size), dtSim, n) - p <- .evalWith(formula, .parseDotVars(formula), dtSim, n) +.getBinaryMean <- function(dtSim, + formula, + size, + link, + n = nrow(dtSim), + envir = parent.frame()) { + size <- .evalWith(size, .parseDotVars(size, envir), dtSim, n) + p <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n) if (link == "logit") { p <- 1 / (1 + exp(-p)) @@ -137,8 +232,15 @@ return(list(p, size)) } -.genbinom <- function(n, formula, Size, link, dtSim) { - params <- .getBinaryMean(dtSim, formula, Size, link, n) +.genbinom <- function(n, formula, size, link, dtSim, envir) { + params <- .getBinaryMean( + dtSim = dtSim, + formula = formula, + size = size, + link = link, + n = n, + envir = envir + ) return(stats::rbinom(n, params[[2]], params[[1]])) } @@ -149,8 +251,10 @@ # @param formula String that specifies the probabilities, each separated by ";" # @param dfSim Incomplete simulated data set # @param idkey Key of incomplete data set +# @param envir Environment the data definitions are evaluated in. +# Defaults to [base::parent.frame]. # @return A data.frame column with the updated simulated data -.gencat <- function(n, formula, link, dfSim) { +.gencat <- function(n, formula, link, dfSim, envir) { formulas <- .splitFormula(formula) if (length(formulas) < 2) { @@ -161,7 +265,7 @@ } parsedProbs <- - .evalWith(formulas, .parseDotVars(formulas), dfSim, n) + .evalWith(formulas, .parseDotVars(formulas, envir), dfSim, n) if (link == "logit") { parsedProbs <- exp(parsedProbs) @@ -182,8 +286,8 @@ # @param dtSim Incomplete simulated data.table # @return A data.frame column with the updated simulated data -.gendeterm <- function(n, formula, link, dtSim) { - new <- .evalWith(formula, .parseDotVars(formula), dtSim, n) +.gendeterm <- function(n, formula, link, dtSim, envir) { + new <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n) if (link == "log") { new <- exp(new) @@ -197,9 +301,8 @@ # @param n The number of observations required in the data set # @param formula String that specifies the mean (lambda) # @return A data.frame column with the updated simulated data - -.genexp <- function(n, formula, link = "identity", dtSim) { - mean <- .evalWith(formula, .parseDotVars(formula), dtSim, n) +.genexp <- function(n, formula, link = "identity", dtSim, envir) { + mean <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n) if (link == "log") { mean <- exp(mean) } @@ -215,8 +318,8 @@ # @param formula String that specifies the probabilities # @return A data.frame column with the updated simulated data -.getGammaMean <- function(dtSim, formula, link, n = nrow(dtSim)) { - mean <- .evalWith(formula, .parseDotVars(formula), dtSim, n) +.getGammaMean <- function(dtSim, formula, link, n = nrow(dtSim), envir) { + mean <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n) if (link == "log") { mean <- exp(mean) } @@ -224,9 +327,9 @@ return(mean) } -.gengamma <- function(n, formula, dispersion, link = "identity", dtSim) { - mean <- .getGammaMean(dtSim, formula, link, n) - d <- .evalWith(dispersion, .parseDotVars(dispersion), dtSim, n) +.gengamma <- function(n, formula, dispersion, link = "identity", dtSim, envir) { + mean <- .getGammaMean(dtSim, formula, link, n, envir) + d <- .evalWith(dispersion, .parseDotVars(dispersion, envir), dtSim, n) sr <- gammaGetShapeRate(mean = mean, dispersion = d) new <- stats::rgamma(n, shape = sr$shape, rate = sr$rate) @@ -234,14 +337,14 @@ return(new) } -.genmixture <- function(n, formula, dtSim) { +.genmixture <- function(n, formula, dtSim, envir) { origFormula <- formula formula <- .rmWS(formula) var_pr <- strsplit(formula, "+", fixed = T) var_dt <- strsplit(var_pr[[1]], "|", fixed = T) formDT <- as.data.table(do.call(rbind, var_dt)) ps <- - cumsum(.evalWith(unlist(formDT[, 2]), .parseDotVars(formDT[, 2]))) + cumsum(.evalWith(unlist(formDT[, 2]), .parseDotVars(formDT[, 2], envir))) if (!isTRUE(all.equal(max(ps), 1))) { valueError(origFormula, @@ -271,8 +374,8 @@ # @param formula String that specifies the mean # @return A data.frame column with the updated simulated data -.getNBmean <- function(dtSim, formula, link, n = nrow(dtSim)) { - mean <- .evalWith(formula, .parseDotVars(formula), dtSim, n) +.getNBmean <- function(dtSim, formula, link, n = nrow(dtSim), envir = parent.frame()) { + mean <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n) if (link == "log") { mean <- exp(mean) } @@ -280,8 +383,8 @@ return(mean) } -.gennegbinom <- function(n, formula, dispersion, link = "identity", dtSim) { - mean <- .getNBmean(dtSim, formula, link, n) +.gennegbinom <- function(n, formula, dispersion, link = "identity", dtSim, envir) { + mean <- .getNBmean(dtSim, formula, link, n, envir) d <- as.numeric(as.character(dispersion)) sp <- negbinomGetSizeProb(mean = mean, dispersion = d) @@ -299,13 +402,16 @@ # @param dtSim Incomplete simulated data.table # @return A data.frame column with the updated simulated data -.getNormalMean <- function(dtSim, formula, n = nrow(dtSim)) { - .evalWith(formula, .parseDotVars(formula), dtSim, n) +.getNormalMean <- function(dtSim, + formula, + n = nrow(dtSim), + envir = parent.frame()) { + .evalWith(formula, .parseDotVars(formula, envir), dtSim, n) } -.gennorm <- function(n, formula, variance, link, dtSim) { - mean <- .getNormalMean(dtSim, formula, n) - v <- .evalWith(variance, .parseDotVars(variance), dtSim, n) +.gennorm <- function(n, formula, variance, link, dtSim, envir) { + mean <- .getNormalMean(dtSim, formula, n, envir) + v <- .evalWith(variance, .parseDotVars(variance, envir), dtSim, n) return(stats::rnorm(n, mean, sqrt(v))) } @@ -318,8 +424,8 @@ # @param dtSim Incomplete simulated data.table # @return A data.frame column with the updated simulated data -.getPoissonMean <- function(dtSim, formula, link, n = nrow(dtSim)) { - mean <- .evalWith(formula, .parseDotVars(formula), dtSim, n) +.getPoissonMean <- function(dtSim, formula, link, n = nrow(dtSim), envir = parent.frame()) { + mean <- .evalWith(formula, .parseDotVars(formula, envir), dtSim, n) if (link == "log") { mean <- exp(mean) @@ -328,8 +434,8 @@ return(mean) } -.genpois <- function(n, formula, link, dtSim) { - mean <- .getPoissonMean(dtSim, formula, link, n) +.genpois <- function(n, formula, link, dtSim, envir) { + mean <- .getPoissonMean(dtSim, formula, link, n, envir) return(stats::rpois(n, mean)) } @@ -343,8 +449,8 @@ # @param dtSim Incomplete simulated data.table # @return A data.frame column with the updated simulated data -.genpoisTrunc <- function(n, formula, link, dtSim) { - mean <- .getPoissonMean(dtSim, formula, link, n) +.genpoisTrunc <- function(n, formula, link, dtSim, envir) { + mean <- .getPoissonMean(dtSim, formula, link, n, envir) u <- stats::runif(n, min = 0, max = 1) @@ -362,17 +468,17 @@ # @param dtSim Incomplete simulated data set # @return A data.frame column with the updated simulated data -.genunif <- function(n, formula, dtSim) { +.genunif <- function(n, formula, dtSim, envir) { if (!is.null(dtSim) && n != nrow(dtSim)) { stop("Length mismatch between 'n' and 'dtSim'") } - range <- .parseUnifFormula(formula, dtSim, n) + range <- .parseUnifFormula(formula, dtSim, n, envir) return(stats::runif(n, range$min, range$max)) } -.parseUnifFormula <- function(formula, dtSim, n) { +.parseUnifFormula <- function(formula, dtSim, n, envir) { range <- .splitFormula(formula) if (length(range) != 2) { @@ -384,7 +490,7 @@ ) } - parsedRange <- .evalWith(range, .parseDotVars(range), dtSim, n) + parsedRange <- .evalWith(range, .parseDotVars(range, envir), dtSim, n) r_min <- parsedRange[, 1] r_max <- parsedRange[, 2] @@ -416,8 +522,8 @@ # @param dtSim Incomplete simulated data set # @return A data.frame column with the updated simulated data -.genUnifInt <- function(n, formula, dtSim) { - range <- .parseUnifFormula(formula, dtSim, n) +.genUnifInt <- function(n, formula, dtSim, envir) { + range <- .parseUnifFormula(formula, dtSim, n, envir) if (any(!sapply(range, function(x) floor(x) == x))) { stop(paste( @@ -429,4 +535,4 @@ unifCont <- stats::runif(n, range$min, range$max + 1) return(as.integer(floor(unifCont))) -} +} \ No newline at end of file diff --git a/R/internal_utility.R b/R/internal_utility.R index 1bb58803..c6976cb5 100644 --- a/R/internal_utility.R +++ b/R/internal_utility.R @@ -13,13 +13,13 @@ #' .parseDotVars(c("a + ..extVar1", "b + ..extVar2")) #' .parseDotVars(data.frame("a + ..extVar1", "b + ..extVar2")) #' @noRd -.parseDotVars <- function(formula) { +.parseDotVars <- function(formula, envir = parent.frame()) { vars <- all.vars(parse(text = formula)) dotVars <- startsWith(vars, "..") # TODO clarify inheritance in case of non globalEnvs in documentation varValues <- mget( sub("..", "", vars[dotVars]), - envir = parent.frame(), + envir = envir, inherits = TRUE, ifnotfound = NA ) diff --git a/R/simstudy-package.R b/R/simstudy-package.R index b184d178..36397309 100644 --- a/R/simstudy-package.R +++ b/R/simstudy-package.R @@ -51,7 +51,7 @@ NULL #' `p_1,...,p_n`. For more information see #' [rdatagen.net](https://www.rdatagen.net/post/adding-mixture-distributions-to-simstudy/). #' @examples -#' ext_var <<- 2.9 +#' ext_var <- 2.9 #' def <- defData(varname = "external", formula = "3 + log(..ext_var)", variance = .5) #' def #' genData(5, def) diff --git a/tests/testthat/test-generate_dist.R b/tests/testthat/test-generate_dist.R index 3b45ce8b..77fcd08c 100644 --- a/tests/testthat/test-generate_dist.R +++ b/tests/testthat/test-generate_dist.R @@ -1,7 +1,7 @@ # .gencat ---- test_that(".gencat throws errors", { - expect_error(.gencat(n, "1;", "identity", NULL), "two probabilities") - expect_error(.gencat(n, "1; ", "identity", NULL), "two probabilities") + expect_error(.gencat(n, "1;", "identity", NULL, environment()), "two probabilities") + expect_error(.gencat(n, "1; ", "identity", NULL, environment()), "two probabilities") }) # .genunif ---- @@ -12,10 +12,10 @@ test_that("unif data is generated as expected.", { dt <- genData(n, def) dterr <- genData(n - 5, def) - expect_error(.genunif(n, "test;test2", dterr), "Length mismatch") - expect_length(.genunif(n, "test;test2", dt), n) - expect_true(all(!is.na(.genunif(n, "test;test2", dt)))) - expect_length(.genunif(n, "1.3;100.2", dt), n) + expect_error(.genunif(n, "test;test2", dterr, environment()), "Length mismatch") + expect_length(.genunif(n, "test;test2", dt, environment()), n) + expect_true(all(!is.na(.genunif(n, "test;test2", dt, environment())))) + expect_length(.genunif(n, "1.3;100.2", dt, environment()), n) }) test_that("'uniform' formula checked correctly", { @@ -28,14 +28,14 @@ test_that("'uniform' formula checked correctly", { x }), function(x) { - expect_silent(.genunif(x$n, x$range, NULL)) + expect_silent(.genunif(x$n, x$range, NULL, environment())) } ) - expect_warning(.genunif(3, "1;1", NULL), "are equal") - expect_error(.genunif(3, "", NULL), "format") - expect_error(.genunif(3, "1;2;3", NULL), "format") - expect_error(.genunif(3, "2;1", NULL), "'max' < 'min'") + expect_warning(.genunif(3, "1;1", NULL, environment()), "are equal") + expect_error(.genunif(3, "", NULL, environment()), "format") + expect_error(.genunif(3, "1;2;3", NULL, environment()), "format") + expect_error(.genunif(3, "2;1", NULL, environment()), "'max' < 'min'") }) # .genUnifInt ---- @@ -47,9 +47,9 @@ test_that("unifInt data is generated as expected.", { dt <- genData(n, def) dt$test2 <- ceiling(dt$test2) - expect_length(.genUnifInt(n, "test;test2", dt), n) - expect_true(all(!is.na(.genUnifInt(n, "test;test2", dt)))) - expect_length(.genUnifInt(n, "1;100", dt), n) + expect_length(.genUnifInt(n, "test;test2", dt, environment()), n) + expect_true(all(!is.na(.genUnifInt(n, "test;test2", dt, environment())))) + expect_length(.genUnifInt(n, "1;100", dt, environment()), n) }) test_that("'uniformInt' formula checked correctly", { @@ -62,9 +62,9 @@ test_that("'uniformInt' formula checked correctly", { x }), function(x) { - expect_silent(.genUnifInt(x$n, x$range, NULL)) + expect_silent(.genUnifInt(x$n, x$range, NULL, environment())) } ) - expect_error(.genUnifInt(3, "1.1;2.4", NULL), "must be integer") + expect_error(.genUnifInt(3, "1.1;2.4", NULL, environment()), "must be integer") }) From cf8b89e555ce8f3cd6605cdbd5971d0a56057855 Mon Sep 17 00:00:00 2001 From: assignUser Date: Sat, 3 Oct 2020 16:09:34 +0200 Subject: [PATCH 2/4] Document --- NEWS.md | 2 +- man/addColumns.Rd | 5 ++++- man/addCondition.Rd | 5 ++++- man/addCorFlex.Rd | 15 +++++++++++++-- man/defData.Rd | 2 +- man/distributions.Rd | 2 +- man/genData.Rd | 7 +++++-- 7 files changed, 29 insertions(+), 9 deletions(-) diff --git a/NEWS.md b/NEWS.md index 4247db22..cbb17a9b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,7 @@ * Moved genCorOrdCat's functionality into genOrdCat. genCorOrdCat is now deprecated. * Introduced a new system for formula definitions and completely reworked the - underlying code. See vignette "NAME". + underlying code. See vignette "double-dot". * Added new vignettes. * Created documentation page: https://kgoldfeld.github.io/simstudy/ * Fixed bug in trtAssign related new ratio argument. diff --git a/man/addColumns.Rd b/man/addColumns.Rd index 982dac8f..b1d41825 100644 --- a/man/addColumns.Rd +++ b/man/addColumns.Rd @@ -4,12 +4,15 @@ \alias{addColumns} \title{Add columns to existing data set} \usage{ -addColumns(dtDefs, dtOld) +addColumns(dtDefs, dtOld, envir = parent.frame()) } \arguments{ \item{dtDefs}{name of definitions for added columns} \item{dtOld}{name of data table that is to be updated} + +\item{envir}{Environment the data definitions are evaluated in. +Defaults to \link[base:sys.parent]{base::parent.frame}.} } \value{ an updated data.table that contains the added simulated data diff --git a/man/addCondition.Rd b/man/addCondition.Rd index 124fe2af..97fb63c6 100644 --- a/man/addCondition.Rd +++ b/man/addCondition.Rd @@ -4,7 +4,7 @@ \alias{addCondition} \title{Add a single column to existing data set based on a condition} \usage{ -addCondition(condDefs, dtOld, newvar) +addCondition(condDefs, dtOld, newvar, envir = parent.frame()) } \arguments{ \item{condDefs}{Name of definitions for added column} @@ -12,6 +12,9 @@ addCondition(condDefs, dtOld, newvar) \item{dtOld}{Name of data table that is to be updated} \item{newvar}{Name of new column to add} + +\item{envir}{Environment the data definitions are evaluated in. +Defaults to \link[base:sys.parent]{base::parent.frame}.} } \value{ An updated data.table that contains the added simulated data diff --git a/man/addCorFlex.Rd b/man/addCorFlex.Rd index c08a3f87..81af26ef 100644 --- a/man/addCorFlex.Rd +++ b/man/addCorFlex.Rd @@ -4,12 +4,20 @@ \alias{addCorFlex} \title{Create multivariate (correlated) data - for general distributions} \usage{ -addCorFlex(dt, defs, rho = 0, tau = NULL, corstr = "cs", corMatrix = NULL) +addCorFlex( + dt, + defs, + rho = 0, + tau = NULL, + corstr = "cs", + corMatrix = NULL, + envir = parent.frame() +) } \arguments{ \item{dt}{Data table that will be updated.} -\item{defs}{Field definition table created by function `defDataAdd`.} +\item{defs}{Field definition table created by function \code{defDataAdd}.} \item{rho}{Correlation coefficient, -1 <= rho <= 1. Use if corMatrix is not provided.} @@ -26,6 +34,9 @@ and "ar1" for an autoregressive structure. Defaults to "cs".} symmetrical and positive semi-definite. It is not a required field; if a matrix is not provided, then a structure and correlation coefficient rho must be specified.} + +\item{envir}{Environment the data definitions are evaluated in. +Defaults to \link[base:sys.parent]{base::parent.frame}.} } \value{ data.table with added column(s) of correlated data diff --git a/man/defData.Rd b/man/defData.Rd index c184cc45..b01ee4ab 100644 --- a/man/defData.Rd +++ b/man/defData.Rd @@ -39,7 +39,7 @@ Add single row to definitions table The possible data distributions are: normal, binary, binomial, poisson, noZeroPoisson, uniform, categorical, gamma, beta, nonrandom, uniformInt, negBinomial, exponential, mixture. } \examples{ -extVar <<- 2.3 +extVar <- 2.3 def <- defData(varname = "xNr", dist = "nonrandom", formula = 7, id = "idnum") def <- defData(def, varname = "xUni", dist = "uniform", formula = "10;20") def <- defData(def, diff --git a/man/distributions.Rd b/man/distributions.Rd index c1918181..3630b8cd 100644 --- a/man/distributions.Rd +++ b/man/distributions.Rd @@ -62,7 +62,7 @@ part of the new distribution \verb{x_1,...,X_n} is assigned a probability } \examples{ -ext_var <<- 2.9 +ext_var <- 2.9 def <- defData(varname = "external", formula = "3 + log(..ext_var)", variance = .5) def genData(5, def) diff --git a/man/genData.Rd b/man/genData.Rd index 7620efa9..f5dbd422 100644 --- a/man/genData.Rd +++ b/man/genData.Rd @@ -4,16 +4,19 @@ \alias{genData} \title{Calling function to simulate data} \usage{ -genData(n, dtDefs = NULL, id = "id") +genData(n, dtDefs = NULL, id = "id", envir = parent.frame()) } \arguments{ \item{n}{the number of observations required in the data set.} \item{dtDefs}{name of definitions data.table/data.frame. If no definitions - are provided +are provided a data set with ids only is generated.} \item{id}{The string defining the id of the record} + +\item{envir}{Environment the data definitions are evaluated in. +Defaults to \link[base:sys.parent]{base::parent.frame}.} } \value{ A data.table that contains the simulated data. From d474356f9966bc22ef75569330984e25df509a57 Mon Sep 17 00:00:00 2001 From: assignUser Date: Sat, 3 Oct 2020 16:33:44 +0200 Subject: [PATCH 3/4] test pkgdown dev version --- DESCRIPTION | 2 +- _pkgdown.yml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index dde5ef48..a2983e80 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: simstudy Title: Simulation of Study Data -Version: 0.2.0 +Version: 0.1.16.9000 Date: 2020-09-25 Authors@R: c(person(given = "Keith", diff --git a/_pkgdown.yml b/_pkgdown.yml index 10633a71..4b3d7ee5 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -3,6 +3,8 @@ useurl: https://kgoldfeld.github.io/simstudy authors: Keith Goldfeld: href: "https://www.rdatagen.net/" +development: + mode: auto reference: - title: Define Data - contents: From 2005fafa47da1f4d2c67639210c8f1b70ce912b9 Mon Sep 17 00:00:00 2001 From: assignUser Date: Sat, 3 Oct 2020 16:40:47 +0200 Subject: [PATCH 4/4] adjust action --- .github/workflows/pkgdown.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 1f83b70e..a1a65781 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -1,6 +1,8 @@ on: push: - branches: release + branches: + - main + - release name: pkgdown