Skip to content

Commit

Permalink
Release v3.5.1
Browse files Browse the repository at this point in the history
Release v3.5.1
  • Loading branch information
anthonysena authored Apr 22, 2024
2 parents 4a10c69 + d2aa800 commit a93992b
Show file tree
Hide file tree
Showing 87 changed files with 1,006 additions and 811 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: FeatureExtraction
Type: Package
Title: Generating Features for a Cohort
Version: 3.5.0
Date: 2024-04-18
Version: 3.5.1
Date: 2024-04-22
Authors@R: c(
person("Martijn", "Schuemie", , "[email protected]", role = c("aut")),
person("Marc", "Suchard", role = c("aut")),
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
FeatureExtraction 3.5.1
=======================

Bug Fixes:

- Minimum threshold for covariate mean should be inclusive of the value parameter (#249)

FeatureExtraction 3.5.0
=======================

Expand Down
36 changes: 22 additions & 14 deletions R/CovariateData.R
Original file line number Diff line number Diff line change
Expand Up @@ -279,20 +279,28 @@ createEmptyCovariateData <- function(cohortIds, aggregated, temporal) {
if (!is.null(temporal) && temporal) {
dummy$timeId <- 1
}
covariateData <- Andromeda::andromeda(covariates = dummy[!1, ],
covariateRef = tibble(covariateId = 1,
covariateName = "",
analysisId = 1,
conceptId = 1)[!1, ],
analysisRef = tibble(analysisId = 1,
analysisName = "",
domainId = "",
startDay = 1,
endDay = 1,
isBinary = "",
missingMeansZero = "")[!1, ])
attr(covariateData, "metaData") <- list(populationSize = 0,
cohortIds = cohortIds)
covariateData <- Andromeda::andromeda(
covariates = dummy[!1, ],
covariateRef = tibble(
covariateId = 1,
covariateName = "",
analysisId = 1,
conceptId = 1
)[!1, ],
analysisRef = tibble(
analysisId = 1,
analysisName = "",
domainId = "",
startDay = 1,
endDay = 1,
isBinary = "",
missingMeansZero = ""
)[!1, ]
)
attr(covariateData, "metaData") <- list(
populationSize = 0,
cohortIds = cohortIds
)
class(covariateData) <- "CovariateData"
return(covariateData)
}
50 changes: 29 additions & 21 deletions R/GetCovariates.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
#' of the createCovariate functions, or a list of such objects.
#' @param aggregated Should aggregate statistics be computed instead of covariates per
#' cohort entry?
#' @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This
#' @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This
#' will help reduce the file size of the characterization output, but will remove information
#' on covariates that have very low values. The default is 0.
#'
Expand Down Expand Up @@ -115,13 +115,13 @@ getDbCovariateData <- function(connectionDetails = NULL,
if (cdmVersion == "4") {
stop("CDM version 4 is not supported any more")
}
if (!missing(cohortId)) {
if (!missing(cohortId)) {
warning("cohortId argument has been deprecated, please use cohortIds")
cohortIds <- cohortId
}
errorMessages <- checkmate::makeAssertCollection()
minCharacterizationMean <- utils::type.convert(minCharacterizationMean, as.is = TRUE)
checkmate::assertNumeric(x = minCharacterizationMean, lower = 0, add = errorMessages)
checkmate::assertNumeric(x = minCharacterizationMean, lower = 0, upper = 1, add = errorMessages)
checkmate::reportAssertions(collection = errorMessages)
if (!is.null(connectionDetails)) {
connection <- DatabaseConnector::connect(connectionDetails)
Expand All @@ -137,12 +137,16 @@ getDbCovariateData <- function(connectionDetails = NULL,
cohortDatabaseSchemaTable <- paste(cohortDatabaseSchema, cohortTable, sep = ".")
}
sql <- "SELECT cohort_definition_id, COUNT_BIG(*) AS population_size FROM @cohort_database_schema_table {@cohort_ids != -1} ? {WHERE cohort_definition_id IN (@cohort_ids)} GROUP BY cohort_definition_id;"
sql <- SqlRender::render(sql = sql,
cohort_database_schema_table = cohortDatabaseSchemaTable,
cohort_ids = cohortIds)
sql <- SqlRender::translate(sql = sql,
targetDialect = attr(connection, "dbms"),
oracleTempSchema = oracleTempSchema)
sql <- SqlRender::render(
sql = sql,
cohort_database_schema_table = cohortDatabaseSchemaTable,
cohort_ids = cohortIds
)
sql <- SqlRender::translate(
sql = sql,
targetDialect = attr(connection, "dbms"),
oracleTempSchema = oracleTempSchema
)
temp <- DatabaseConnector::querySql(connection, sql, snakeCaseToCamelCase = TRUE)
if (aggregated) {
populationSize <- temp$populationSize
Expand All @@ -164,16 +168,18 @@ getDbCovariateData <- function(connectionDetails = NULL,
}
for (i in 1:length(covariateSettings)) {
fun <- attr(covariateSettings[[i]], "fun")
args <- list(connection = connection,
oracleTempSchema = oracleTempSchema,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortTable = cohortDatabaseSchemaTable,
cohortIds = cohortIds,
cdmVersion = cdmVersion,
rowIdField = rowIdField,
covariateSettings = covariateSettings[[i]],
aggregated = aggregated,
minCharacterizationMean = minCharacterizationMean)
args <- list(
connection = connection,
oracleTempSchema = oracleTempSchema,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortTable = cohortDatabaseSchemaTable,
cohortIds = cohortIds,
cdmVersion = cdmVersion,
rowIdField = rowIdField,
covariateSettings = covariateSettings[[i]],
aggregated = aggregated,
minCharacterizationMean = minCharacterizationMean
)
tempCovariateData <- do.call(eval(parse(text = fun)), args)
if (is.null(covariateData)) {
covariateData <- tempCovariateData
Expand All @@ -199,8 +205,10 @@ getDbCovariateData <- function(connectionDetails = NULL,
attr(covariateData, "metaData")[[name]] <- attr(tempCovariateData, "metaData")[[name]]
} else {
attr(covariateData, "metaData")[[name]] <- list(
c(unlist(attr(covariateData, "metaData")[[name]]),
attr(tempCovariateData, "metaData")[[name]])
c(
unlist(attr(covariateData, "metaData")[[name]]),
attr(tempCovariateData, "metaData")[[name]]
)
)
}
}
Expand Down
10 changes: 5 additions & 5 deletions R/GetCovariatesFromCohortAttributes.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ getDbCohortAttrCovariatesData <- function(connection,
if (cdmVersion == "4") {
stop("Common Data Model version 4 is not supported")
}
if (!missing(cohortId)) {
if (!missing(cohortId)) {
warning("cohortId argument has been deprecated, please use cohortIds")
cohortIds <- cohortId
}
Expand Down Expand Up @@ -160,10 +160,10 @@ getDbCohortAttrCovariatesData <- function(connection,
writeLines(paste("Loading took", signif(delta, 3), attr(delta, "units")))

result <- createEmptyCovariateData(cohortIds, aggregated, covariateSettings$temporal)
result$covariates = covariates
result$covariateRef = covariateRef
result$analysisRef = analysisRef
result$covariates <- covariates
result$covariateRef <- covariateRef
result$analysisRef <- analysisRef

return(result)
}

Expand Down
6 changes: 3 additions & 3 deletions R/GetCovariatesFromOtherCohorts.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#' @param covariateSettings An object of type \code{covariateSettings} as created using the
#' \code{\link{createCohortBasedCovariateSettings}} or
#' \code{\link{createCohortBasedTemporalCovariateSettings}} functions.
#' @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This
#' @param minCharacterizationMean The minimum mean value for binary characterization output. Values below this will be cut off from output. This
#' will help reduce the file size of the characterization output, but will remove information
#' on covariates that have very low values. The default is 0.
#' @template GetCovarParams
Expand Down Expand Up @@ -50,9 +50,9 @@ getDbCohortBasedCovariatesData <- function(connection,
checkmate::assertClass(covariateSettings, "covariateSettings", add = errorMessages)
checkmate::assertLogical(aggregated, len = 1, add = errorMessages)
minCharacterizationMean <- utils::type.convert(minCharacterizationMean, as.is = TRUE)
checkmate::assertNumeric(x = minCharacterizationMean, lower = 0, add = errorMessages)
checkmate::assertNumeric(x = minCharacterizationMean, lower = 0, upper = 1, add = errorMessages)
checkmate::reportAssertions(collection = errorMessages)
if (!missing(cohortId)) {
if (!missing(cohortId)) {
warning("cohortId argument has been deprecated, please use cohortIds")
cohortIds <- cohortId
}
Expand Down
23 changes: 12 additions & 11 deletions R/GetDefaultCovariates.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#' it is a temp table, do not specify \code{targetDatabaseSchema}.
#' @param targetCovariateRefTable (Optional) The name of the table where the covariate reference will be stored.
#' @param targetAnalysisRefTable (Optional) The name of the table where the analysis reference will be stored.
#' @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This
#' @param minCharacterizationMean The minimum mean value for binary characterization output. Values below this will be cut off from output. This
#' will help reduce the file size of the characterization output, but will remove information
#' on covariates that have very low values. The default is 0.
#'
Expand Down Expand Up @@ -79,15 +79,15 @@ getDbDefaultCovariateData <- function(connection,
if (!missing(targetCovariateTable) && !is.null(targetCovariateTable) && aggregated) {
stop("Writing aggregated results to database is currently not supported")
}
if (!missing(cohortId)) {
if (!missing(cohortId)) {
warning("cohortId argument has been deprecated, please use cohortIds")
cohortIds <- cohortId
}
errorMessages <- checkmate::makeAssertCollection()
minCharacterizationMean <- utils::type.convert(minCharacterizationMean, as.is = TRUE)
checkmate::assertNumeric(x = minCharacterizationMean, lower = 0, add = errorMessages)
checkmate::assertNumeric(x = minCharacterizationMean, lower = 0, upper = 1, add = errorMessages)
checkmate::reportAssertions(collection = errorMessages)

settings <- .toJson(covariateSettings)
rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$init(system.file("", package = "FeatureExtraction"))
json <- rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$createSql(settings, aggregated, cohortTable, rowIdField, rJava::.jarray(as.character(cohortIds)), cdmDatabaseSchema)
Expand Down Expand Up @@ -152,7 +152,6 @@ getDbDefaultCovariateData <- function(connection,
andromedaTableName = "covariatesContinuous",
snakeCaseToCamelCase = TRUE
)
filterCovariateDataCovariates(covariateData, "covariatesContinuous", minCharacterizationMean)
}

# Covariate reference
Expand Down Expand Up @@ -275,9 +274,11 @@ getDbDefaultCovariateData <- function(connection,
attr(covariateData, "metaData") <- list()
if (is.null(covariateData$covariates) && is.null(covariateData$covariatesContinuous)) {
warning("No data found, probably because no covariates were specified.")
covariateData <- createEmptyCovariateData(cohortIds = cohortIds,
aggregated = aggregated,
temporal = covariateSettings$temporal)
covariateData <- createEmptyCovariateData(
cohortIds = cohortIds,
aggregated = aggregated,
temporal = covariateSettings$temporal
)
}
class(covariateData) <- "CovariateData"
attr(class(covariateData), "package") <- "FeatureExtraction"
Expand All @@ -289,12 +290,12 @@ getDbDefaultCovariateData <- function(connection,
#'
#' @param covariateData The covariate data
#' @param covariatesName The name of the covariates object inside the covariateData
#' @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This
#' @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This
#' will help reduce the file size of the characterization output, but will remove information
#' on covariates that have very low values. The default is 0.
filterCovariateDataCovariates <- function(covariateData, covariatesName, minCharacterizationMean = 0) {
if ("averageValue" %in% colnames(covariateData[[covariatesName]]) && minCharacterizationMean != 0) {
covariateData[[covariatesName]] <- covariateData[[covariatesName]] %>%
dplyr::filter(.data$averageValue > minCharacterizationMean)
dplyr::filter(.data$averageValue >= minCharacterizationMean)
}
}
}
6 changes: 3 additions & 3 deletions R/HelperFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ filterByRowId <- function(covariateData, rowIds) {
#' }
#'
#' @export
filterByCohortDefinitionId <- function(covariateData,
filterByCohortDefinitionId <- function(covariateData,
cohortId = 1,
cohortIds = c(1)) {
if (!isCovariateData(covariateData)) {
Expand All @@ -98,11 +98,11 @@ filterByCohortDefinitionId <- function(covariateData,
if (!isAggregatedCovariateData(covariateData)) {
stop("Can only filter aggregated data by cohortIds")
}
if (!missing(cohortId)) {
if (!missing(cohortId)) {
warning("cohortId argument has been deprecated, please use cohortIds")
cohortIds <- cohortId
}

if (is.null(covariateData$covariates)) {
covariates <- NULL
} else {
Expand Down
31 changes: 17 additions & 14 deletions R/UnitTestHelperFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
#' of the createCovariate functions, or a list of such objects.
#' @param aggregated Should aggregate statistics be computed instead of covariates per
#' cohort entry?
#' @param minCharacterizationMean The minimum mean value for characterization output. Values below this will be cut off from output. This
#' @param minCharacterizationMean The minimum mean value for binary characterization output. Values below this will be cut off from output. This
#' will help reduce the file size of the characterization output, but will remove information
#' on covariates that have very low values. The default is 0.
#' @return
Expand Down Expand Up @@ -107,20 +107,23 @@
}

# Some SQL to construct the covariate:
sql <- paste("SELECT @row_id_field AS row_id, 1 AS covariate_id,",
"DATEDIFF(DAY, observation_period_start_date, cohort_start_date)",
"AS covariate_value",
"FROM @cohort_table c",
"INNER JOIN @cdm_database_schema.observation_period op",
"ON op.person_id = c.subject_id",
"WHERE cohort_start_date >= observation_period_start_date",
"AND cohort_start_date <= observation_period_end_date",
"{@cohort_ids != -1} ? {AND cohort_definition_id IN @cohort_ids}")
sql <- paste(
"SELECT @row_id_field AS row_id, 1 AS covariate_id,",
"DATEDIFF(DAY, observation_period_start_date, cohort_start_date)",
"AS covariate_value",
"FROM @cohort_table c",
"INNER JOIN @cdm_database_schema.observation_period op",
"ON op.person_id = c.subject_id",
"WHERE cohort_start_date >= observation_period_start_date",
"AND cohort_start_date <= observation_period_end_date",
"{@cohort_ids != -1} ? {AND cohort_definition_id IN @cohort_ids}"
)
sql <- SqlRender::render(sql,
cohort_table = cohortTable,
cohort_ids = cohortIds,
row_id_field = rowIdField,
cdm_database_schema = cdmDatabaseSchema)
cohort_table = cohortTable,
cohort_ids = cohortIds,
row_id_field = rowIdField,
cdm_database_schema = cdmDatabaseSchema
)
sql <- SqlRender::translate(sql, targetDialect = attr(connection, "dbms"))
# Retrieve the covariate:
covariates <- DatabaseConnector::querySql(connection, sql, snakeCaseToCamelCase = TRUE)
Expand Down
2 changes: 1 addition & 1 deletion docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions docs/articles/CreatingCovariatesBasedOnOtherCohorts.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit a93992b

Please sign in to comment.