Skip to content

Commit

Permalink
Merge pull request #193 from OHDSI/cohortIds
Browse files Browse the repository at this point in the history
Deprecate cohortId and use cohortIds
  • Loading branch information
ginberg authored Jan 16, 2024
2 parents be11ea5 + b7c342e commit 8b0e7d7
Show file tree
Hide file tree
Showing 45 changed files with 730 additions and 787 deletions.
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ export(createTable1)
export(createTable1CovariateSettings)
export(createTemporalCovariateSettings)
export(createTemporalSequenceCovariateSettings)
export(filterByCohortDefinitionId)
export(filterByCohortDefinitionIds)
export(filterByRowId)
export(getDbCohortAttrCovariatesData)
export(getDbCohortBasedCovariatesData)
Expand Down
2 changes: 1 addition & 1 deletion R/Aggregation.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#' @examples
#' \dontrun{
#' covariateData <- FeatureExtraction:::createEmptyCovariateData(
#' cohortId = 1,
#' cohortIds = 1,
#' aggregated = FALSE,
#' temporal = FALSE
#' )
Expand Down
65 changes: 30 additions & 35 deletions R/CovariateData.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ setClass("CovariateData", contains = "Andromeda")
#' @examples
#' \dontrun{
#' covariateData <- FeatureExtraction:::createEmptyCovariateData(
#' cohortId = 1,
#' cohortIds = 1,
#' aggregated = FALSE,
#' temporal = FALSE
#' )
Expand Down Expand Up @@ -128,13 +128,15 @@ loadCovariateData <- function(file, readOnly) {
setMethod("show", "CovariateData", function(object) {
cli::cat_line(pillar::style_subtle("# CovariateData object"))
cli::cat_line("")
cohortId <- attr(object, "metaData")$cohortId
if (length(cohortId) > 1) {
cli::cat_line(paste("Cohorts of interest IDs:", paste(cohortId, collapse = ", ")))
} else if (cohortId == -1) {
cli::cat_line("All cohorts")
} else {
cli::cat_line(paste("Cohort of interest ID:", cohortId))
cohortIds <- attr(object, "metaData")$cohortIds
if (!is.null(cohortIds)) {
if (length(cohortIds) > 1) {
cli::cat_line(paste("Cohorts of interest IDs:", paste(cohortIds, collapse = ", ")))
} else if (cohortIds == -1) {
cli::cat_line("All cohorts")
} else {
cli::cat_line(paste("Cohort of interest ID:", cohortIds))
}
}
cli::cat_line("")
cli::cat_line(pillar::style_subtle("Inherits from Andromeda:"))
Expand Down Expand Up @@ -204,7 +206,7 @@ isCovariateData <- function(x) {
#' @examples
#' \dontrun{
#' covariateData <- FeatureExtraction:::createEmptyCovariateData(
#' cohortId = 1,
#' cohortIds = 1,
#' aggregated = FALSE,
#' temporal = FALSE
#' )
Expand Down Expand Up @@ -232,7 +234,7 @@ isAggregatedCovariateData <- function(x) {
#' @examples
#' \dontrun{
#' covariateData <- FeatureExtraction:::createEmptyCovariateData(
#' cohortId = 1,
#' cohortIds = 1,
#' aggregated = FALSE,
#' temporal = FALSE
#' )
Expand All @@ -252,20 +254,21 @@ isTemporalCovariateData <- function(x) {

#' Creates an empty covariate data object
#'
#' @param cohortId cohort number
#' @param cohortIds For which cohort IDs should the covariate data be created?
#' @param aggregated if the data should be aggregated
#' @param temporal if the data is temporary
#'
#' @examples
#' \dontrun{
#' covariateData <- FeatureExtraction:::createEmptyCovariateData(
#' cohortId = 1,
#' cohortIds = 1,
#' aggregated = FALSE,
#' temporal = FALSE
#' )
#' }
#' @return the empty CovariateData object
#'
createEmptyCovariateData <- function(cohortId, aggregated, temporal) {
createEmptyCovariateData <- function(cohortIds, aggregated, temporal) {
dummy <- tibble(
covariateId = 1,
covariateValue = 1
Expand All @@ -276,28 +279,20 @@ createEmptyCovariateData <- function(cohortId, aggregated, temporal) {
if (!is.null(temporal) && temporal) {
dummy$timeId <- 1
}
covariateData <- Andromeda::andromeda(
covariates = dummy[!1, ],
covariateRef = tibble(
covariateId = 1,
covariateName = "",
analysisId = 1,
conceptId = 1
)[!1, ],
analysisRef = tibble(
analysisId = 1,
analysisName = "",
domainId = "",
startDay = 1,
endDay = 1,
isBinary = "",
missingMeansZero = ""
)[!1, ]
)
attr(covariateData, "metaData") <- list(
populationSize = 0,
cohortId = cohortId
)
covariateData <- Andromeda::andromeda(covariates = dummy[!1, ],
covariateRef = tibble(covariateId = 1,
covariateName = "",
analysisId = 1,
conceptId = 1)[!1, ],
analysisRef = tibble(analysisId = 1,
analysisName = "",
domainId = "",
startDay = 1,
endDay = 1,
isBinary = "",
missingMeansZero = "")[!1, ])
attr(covariateData, "metaData") <- list(populationSize = 0,
cohortIds = cohortIds)
class(covariateData) <- "CovariateData"
return(covariateData)
}
54 changes: 28 additions & 26 deletions R/GetCovariates.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@
#' specify both the database and the schema, so for example
#' 'cdm_instance.dbo'.
#' @param cohortTableIsTemp Is the cohort table a temp table?
#' @param cohortId For which cohort ID(s) should covariates be constructed? If set to -1,
#' @param cohortId DEPRECATED:For which cohort ID(s) should covariates be constructed? If set to -1,
#' covariates will be constructed for all cohorts in the specified cohort
#' table.
#' @param cohortIds For which cohort ID(s) should covariates be constructed? If set to c(-1),
#' covariates will be constructed for all cohorts in the specified cohort
#' table.
#' @param rowIdField The name of the field in the cohort table that is to be used as the
Expand Down Expand Up @@ -78,7 +81,7 @@
#' cohortTable = "cohort",
#' cohortDatabaseSchema = "main",
#' cohortTableIsTemp = FALSE,
#' cohortId = -1,
#' cohortIds = -1,
#' rowIdField = "subject_id",
#' covariateSettings = covSettings,
#' aggregated = FALSE
Expand All @@ -95,6 +98,7 @@ getDbCovariateData <- function(connectionDetails = NULL,
cohortDatabaseSchema = cdmDatabaseSchema,
cohortTableIsTemp = FALSE,
cohortId = -1,
cohortIds = c(-1),
rowIdField = "subject_id",
covariateSettings,
aggregated = FALSE) {
Expand All @@ -107,6 +111,10 @@ getDbCovariateData <- function(connectionDetails = NULL,
if (cdmVersion == "4") {
stop("CDM version 4 is not supported any more")
}
if (!missing(cohortId)) {
warning("cohortId argument has been deprecated, please use cohortIds")
cohortIds <- cohortId
}
if (!is.null(connectionDetails)) {
connection <- DatabaseConnector::connect(connectionDetails)
on.exit(DatabaseConnector::disconnect(connection))
Expand All @@ -120,17 +128,13 @@ getDbCovariateData <- function(connectionDetails = NULL,
} else {
cohortDatabaseSchemaTable <- paste(cohortDatabaseSchema, cohortTable, sep = ".")
}
sql <- "SELECT cohort_definition_id, COUNT_BIG(*) AS population_size FROM @cohort_database_schema_table {@cohort_id != -1} ? {WHERE cohort_definition_id IN (@cohort_id)} GROUP BY cohort_definition_id;"
sql <- SqlRender::render(
sql = sql,
cohort_database_schema_table = cohortDatabaseSchemaTable,
cohort_id = cohortId
)
sql <- SqlRender::translate(
sql = sql,
targetDialect = attr(connection, "dbms"),
oracleTempSchema = oracleTempSchema
)
sql <- "SELECT cohort_definition_id, COUNT_BIG(*) AS population_size FROM @cohort_database_schema_table {@cohort_ids != -1} ? {WHERE cohort_definition_id IN (@cohort_ids)} GROUP BY cohort_definition_id;"
sql <- SqlRender::render(sql = sql,
cohort_database_schema_table = cohortDatabaseSchemaTable,
cohort_ids = cohortIds)
sql <- SqlRender::translate(sql = sql,
targetDialect = attr(connection, "dbms"),
oracleTempSchema = oracleTempSchema)
temp <- DatabaseConnector::querySql(connection, sql, snakeCaseToCamelCase = TRUE)
if (aggregated) {
populationSize <- temp$populationSize
Expand All @@ -139,7 +143,7 @@ getDbCovariateData <- function(connectionDetails = NULL,
populationSize <- sum(temp$populationSize)
}
if (sum(populationSize) == 0) {
covariateData <- createEmptyCovariateData(cohortId, aggregated, covariateSettings$temporal)
covariateData <- createEmptyCovariateData(cohortIds, aggregated, covariateSettings$temporal)
warning("Population is empty. No covariates were constructed")
} else {
if (inherits(covariateSettings, "covariateSettings")) {
Expand All @@ -152,17 +156,15 @@ getDbCovariateData <- function(connectionDetails = NULL,
}
for (i in 1:length(covariateSettings)) {
fun <- attr(covariateSettings[[i]], "fun")
args <- list(
connection = connection,
oracleTempSchema = oracleTempSchema,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortTable = cohortDatabaseSchemaTable,
cohortId = cohortId,
cdmVersion = cdmVersion,
rowIdField = rowIdField,
covariateSettings = covariateSettings[[i]],
aggregated = aggregated
)
args <- list(connection = connection,
oracleTempSchema = oracleTempSchema,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortTable = cohortDatabaseSchemaTable,
cohortIds = cohortIds,
cdmVersion = cdmVersion,
rowIdField = rowIdField,
covariateSettings = covariateSettings[[i]],
aggregated = aggregated)
tempCovariateData <- do.call(eval(parse(text = fun)), args)
if (is.null(covariateData)) {
covariateData <- tempCovariateData
Expand Down Expand Up @@ -197,7 +199,7 @@ getDbCovariateData <- function(connectionDetails = NULL,
}
}
attr(covariateData, "metaData")$populationSize <- populationSize
attr(covariateData, "metaData")$cohortId <- cohortId
attr(covariateData, "metaData")$cohortIds <- cohortIds
}
return(covariateData)
}
17 changes: 11 additions & 6 deletions R/GetCovariatesFromCohortAttributes.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
#' cdmDatabaseSchema = "main",
#' cdmVersion = "5",
#' cohortTable = "cohort",
#' cohortId = 1,
#' cohortIds = 1,
#' rowIdField = "subject_id",
#' covariateSettings = covariateSettings,
#' aggregated = FALSE
Expand All @@ -74,6 +74,7 @@ getDbCohortAttrCovariatesData <- function(connection,
cdmDatabaseSchema,
cohortTable = "#cohort_person",
cohortId = -1,
cohortIds = c(-1),
cdmVersion = "5",
rowIdField = "subject_id",
covariateSettings,
Expand All @@ -84,6 +85,10 @@ getDbCohortAttrCovariatesData <- function(connection,
if (cdmVersion == "4") {
stop("Common Data Model version 4 is not supported")
}
if (!missing(cohortId)) {
warning("cohortId argument has been deprecated, please use cohortIds")
cohortIds <- cohortId
}
start <- Sys.time()
writeLines("Constructing covariates from cohort attributes table")

Expand Down Expand Up @@ -154,11 +159,11 @@ getDbCohortAttrCovariatesData <- function(connection,
delta <- Sys.time() - start
writeLines(paste("Loading took", signif(delta, 3), attr(delta, "units")))

result <- createEmptyCovariateData(cohortId, aggregated, covariateSettings$temporal)
result$covariates <- covariates
result$covariateRef <- covariateRef
result$analysisRef <- analysisRef

result <- createEmptyCovariateData(cohortIds, aggregated, covariateSettings$temporal)
result$covariates = covariates
result$covariateRef = covariateRef
result$analysisRef = analysisRef
return(result)
}

Expand Down
9 changes: 7 additions & 2 deletions R/GetCovariatesFromOtherCohorts.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ getDbCohortBasedCovariatesData <- function(connection,
cdmDatabaseSchema,
cohortTable = "#cohort_person",
cohortId = -1,
cohortIds = c(-1),
cdmVersion = "5",
rowIdField = "subject_id",
covariateSettings,
Expand All @@ -46,6 +47,10 @@ getDbCohortBasedCovariatesData <- function(connection,
checkmate::assertClass(covariateSettings, "covariateSettings", add = errorMessages)
checkmate::assertLogical(aggregated, len = 1, add = errorMessages)
checkmate::reportAssertions(collection = errorMessages)
if (!missing(cohortId)) {
warning("cohortId argument has been deprecated, please use cohortIds")
cohortIds <- cohortId
}

start <- Sys.time()
message("Constructing covariates from other cohorts")
Expand Down Expand Up @@ -130,10 +135,10 @@ getDbCohortBasedCovariatesData <- function(connection,
oracleTempSchema = oracleTempSchema,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortTable = cohortTable,
cohortId = cohortId,
cohortIds = cohortIds,
cdmVersion = cdmVersion,
rowIdField = rowIdField,
detailledSettings,
covariateSettings = detailledSettings,
aggregated = aggregated
)

Expand Down
16 changes: 9 additions & 7 deletions R/GetDefaultCovariates.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ getDbDefaultCovariateData <- function(connection,
cdmDatabaseSchema,
cohortTable = "#cohort_person",
cohortId = -1,
cohortIds = c(-1),
cdmVersion = "5",
rowIdField = "subject_id",
covariateSettings,
Expand All @@ -74,10 +75,13 @@ getDbDefaultCovariateData <- function(connection,
if (!missing(targetCovariateTable) && !is.null(targetCovariateTable) && aggregated) {
stop("Writing aggregated results to database is currently not supported")
}

if (!missing(cohortId)) {
warning("cohortId argument has been deprecated, please use cohortIds")
cohortIds <- cohortId
}
settings <- .toJson(covariateSettings)
rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$init(system.file("", package = "FeatureExtraction"))
json <- rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$createSql(settings, aggregated, cohortTable, rowIdField, rJava::.jarray(as.character(cohortId)), cdmDatabaseSchema)
json <- rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$createSql(settings, aggregated, cohortTable, rowIdField, rJava::.jarray(as.character(cohortIds)), cdmDatabaseSchema)
todo <- .fromJson(json)
if (length(todo$tempTables) != 0) {
ParallelLogger::logInfo("Sending temp tables to server")
Expand Down Expand Up @@ -260,11 +264,9 @@ getDbDefaultCovariateData <- function(connection,
attr(covariateData, "metaData") <- list()
if (is.null(covariateData$covariates) && is.null(covariateData$covariatesContinuous)) {
warning("No data found, probably because no covariates were specified.")
covariateData <- createEmptyCovariateData(
cohortId = cohortId,
aggregated = aggregated,
temporal = covariateSettings$temporal
)
covariateData <- createEmptyCovariateData(cohortIds = cohortIds,
aggregated = aggregated,
temporal = covariateSettings$temporal)
}
class(covariateData) <- "CovariateData"
attr(class(covariateData), "package") <- "FeatureExtraction"
Expand Down
Loading

0 comments on commit 8b0e7d7

Please sign in to comment.