Skip to content

Commit

Permalink
Merge pull request #457 from OHDSI/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
katy-sadowski authored May 21, 2023
2 parents 162e709 + 7a2d0e5 commit 741f748
Show file tree
Hide file tree
Showing 63 changed files with 8,398 additions and 158 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: DataQualityDashboard
Type: Package
Title: Execute and View Data Quality Checks on OMOP CDM Database
Version: 2.2.0
Date: 2023-05-05
Version: 2.3.0
Date: 2023-05-21
Authors@R: c(
person("Katy", "Sadowski", email = "[email protected]", role = c("aut", "cre")),
person("Clair", "Blacketer", role = c("aut")),
Expand All @@ -27,7 +27,7 @@ Imports:
dplyr,
jsonlite,
rJava,
SqlRender (>= 1.6.0),
SqlRender (>= 1.10.1),
plyr,
stringr,
rlang,
Expand Down
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(convertJsonResultsFileCase)
export(executeDqChecks)
export(listDqChecks)
export(reEvaluateThresholds)
Expand All @@ -8,8 +9,14 @@ export(writeJsonResultsToCsv)
export(writeJsonResultsToTable)
import(DatabaseConnector)
import(magrittr)
importFrom(SqlRender,camelCaseToSnakeCase)
importFrom(SqlRender,snakeCaseToCamelCase)
importFrom(dplyr,case_when)
importFrom(dplyr,mutate)
importFrom(dplyr,rename_with)
importFrom(jsonlite,fromJSON)
importFrom(jsonlite,parse_json)
importFrom(jsonlite,toJSON)
importFrom(magrittr,"%>%")
importFrom(readr,read_csv)
importFrom(rlang,.data)
Expand All @@ -18,6 +25,7 @@ importFrom(stats,setNames)
importFrom(stringr,regex)
importFrom(stringr,str_detect)
importFrom(tidyselect,all_of)
importFrom(tools,file_path_sans_ext)
importFrom(utils,install.packages)
importFrom(utils,menu)
importFrom(utils,packageVersion)
Expand Down
16 changes: 15 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
DataQualityDashboard 2.3.0
==========================
This release includes:

### New features

- *New SQL-only Mode:* Setting `sqlOnly` and `sqlOnlyIncrementalInsert` to TRUE in `executeDqChecks` will return (but not run) a set of SQL queries that, when executed, will calculate the results of the DQ checks and insert them into a database table. Additionally, `sqlOnlyUnionCount` can be used to specify a number of SQL queries to union for each check type, allowing for parallel execution of these queries and potentially large performance gains. See the [SqlOnly vignette](https://ohdsi.github.io/DataQualityDashboard/articles/SqlOnly.html) for details
- *Results File Case Converter:* The new function `convertJsonResultsFileCase` can be used to convert the keys in a DQD results JSON file between snakecase and camelcase. This allows reading of v2.1.0+ JSON files in older DQD versions, and other conversions which may be necessary for secondary use of the DQD results file. See [function documentation](https://ohdsi.github.io/DataQualityDashboard/reference/convertJsonResultsFileCase.html) for details

### Bugfixes

- In the v2.1.0 release, all DQD variables were converted from snakecase to camelcase, including those in the results JSON file. This resulted in errors for users trying to view results files generated by older DQD versions in DQD v2.1.0+. This issue has now been fixed. `viewDqDashboard` will now automatically convert the case of pre-v2.1.0 results files to camelcase so that older results files may be viewed in v2.3.0+


DataQualityDashboard 2.2.0
==========================
This release includes:
Expand Down Expand Up @@ -60,7 +74,7 @@ This release includes:
- **withinVisitDates** looks at clinical facts and the visits they are associated with to make sure that the visit dates occur within one week on either side of the visit
- **plausibleUnitConceptIds** identifies records with invalid Unit_Concept_Ids by Measurement_Concept_Id

### outputFolder input paramater
### outputFolder input parameter

- The `outputFolder` parameter for the `executeDqChecks` function is now REQUIRED and no longer has a default value. **This may be a breaking change for users who have not specified this parameter in their script to run DQD.**

Expand Down
81 changes: 81 additions & 0 deletions R/convertResultsCase.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Copyright 2023 Observational Health Data Sciences and Informatics
#
# This file is part of DataQualityDashboard
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#' @title Convert JSON results file case
#'
#' @description Convert a DQD JSON results file between camelcase and (all-caps) snakecase. Enables viewing of pre-v.2.1.0 results files in later DQD versions, and vice versa
#'
#' @param jsonFilePath Path to the JSON results file to be converted
#' @param writeToFile Whether or not to write the converted results back to a file (must be either TRUE or FALSE)
#' @param outputFolder The folder to output the converted JSON results file to
#' @param outputFile (OPTIONAL) File to write converted results JSON object to. Default is name of input file with a "_camel" or "_snake" postfix
#' @param targetCase Case into which the results file parameters should be converted (must be either "camel" or "snake")
#'
#' @returns DQD results object (a named list)
#'
#' @importFrom jsonlite fromJSON
#' @importFrom SqlRender snakeCaseToCamelCase camelCaseToSnakeCase
#' @importFrom dplyr rename_with
#' @importFrom tools file_path_sans_ext
#'
#' @export

convertJsonResultsFileCase <- function(
jsonFilePath,
writeToFile,
outputFolder = NA,
outputFile = "",
targetCase) {
if (!any(targetCase %in% c("camel", "snake"))) {
stop("targetCase must be either 'camel' or 'snake'.")
}
stopifnot(is.logical(writeToFile))
if (writeToFile && is.na(outputFolder)) {
stop("You must specify an output folder if writing to file.")
}

results <- jsonlite::fromJSON(jsonFilePath)

if ("numViolatedRows" %in% names(results$CheckResults) && targetCase == "camel") {
warning("File is already in camelcase! No conversion will be performed.")
return(results)
}
if ("NUM_VIOLATED_ROWS" %in% names(results$CheckResults) && targetCase == "snake") {
warning("File is already in snakecase! No conversion will be performed.")
return(results)
}

if (targetCase == "camel") {
swapFunction <- SqlRender::snakeCaseToCamelCase
} else {
swapFunction <- function(x) {
toupper(SqlRender::camelCaseToSnakeCase(x))
}
}

results$Metadata <- dplyr::rename_with(results$Metadata, swapFunction)
results$CheckResults <- dplyr::rename_with(results$CheckResults, swapFunction, -c("checkId"))

if (writeToFile) {
if (nchar(outputFile) == 0) {
jsonFile <- tools::file_path_sans_ext(basename(jsonFilePath))
outputFile <- paste(jsonFile, "_", targetCase, ".json", sep = "")
}
.writeResultsToJson(results, outputFolder, outputFile)
}

return(results)
}
20 changes: 17 additions & 3 deletions R/executeDqChecks.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@
#' @param numThreads The number of concurrent threads to use to execute the queries
#' @param cdmSourceName The name of the CDM data source
#' @param sqlOnly Should the SQLs be executed (FALSE) or just returned (TRUE)?
#' @param sqlOnlyUnionCount (OPTIONAL) In sqlOnlyIncrementalInsert mode, how many SQL commands to union in each query to insert check results into results table (can speed processing when queries done in parallel). Default is 1.
#' @param sqlOnlyIncrementalInsert (OPTIONAL) In sqlOnly mode, boolean to determine whether to generate SQL queries that insert check results and associated metadata into results table. Default is FALSE (for backwards compatibility to <= v2.2.0)
#' @param outputFolder The folder to output logs, SQL files, and JSON results file to
#' @param outputFile (OPTIONAL) File to write results JSON object
#' @param verboseMode Boolean to determine if the console will show all execution steps. Default is FALSE
#' @param writeToTable Boolean to indicate if the check results will be written to the dqdashboard_results table in the resultsDatabaseSchema. Default is TRUE
#' @param writeTableName The name of the results table. Defaults to `dqdashboard_results`.
#' @param writeTableName The name of the results table. Defaults to `dqdashboard_results`. Used when sqlOnly or writeToTable is True.
#' @param writeToCsv Boolean to indicate if the check results will be written to a csv file. Default is FALSE
#' @param csvFile (OPTIONAL) CSV file to write results
#' @param checkLevels Choose which DQ check levels to execute. Default is all 3 (TABLE, FIELD, CONCEPT)
Expand Down Expand Up @@ -64,6 +66,8 @@ executeDqChecks <- function(connectionDetails,
cdmSourceName,
numThreads = 1,
sqlOnly = FALSE,
sqlOnlyUnionCount = 1,
sqlOnlyIncrementalInsert = FALSE,
outputFolder,
outputFile = "",
verboseMode = FALSE,
Expand Down Expand Up @@ -93,6 +97,8 @@ executeDqChecks <- function(connectionDetails,
stopifnot(is.character(cdmDatabaseSchema), is.character(resultsDatabaseSchema), is.numeric(numThreads))
stopifnot(is.character(cdmSourceName), is.logical(sqlOnly), is.character(outputFolder), is.logical(verboseMode))
stopifnot(is.logical(writeToTable), is.character(checkLevels))
stopifnot(is.numeric(sqlOnlyUnionCount) && sqlOnlyUnionCount > 0)
stopifnot(is.logical(sqlOnlyIncrementalInsert))
stopifnot(is.character(cohortDatabaseSchema), is.character(cohortTableName))

if (!all(checkLevels %in% c("TABLE", "FIELD", "CONCEPT"))) {
Expand Down Expand Up @@ -128,7 +134,10 @@ executeDqChecks <- function(connectionDetails,
metadata$dqdVersion <- as.character(packageVersion("DataQualityDashboard"))
DatabaseConnector::disconnect(connection)
} else {
metadata <- NA
metadata <- data.frame(
dqdVersion = as.character(packageVersion("DataQualityDashboard")),
cdmSourceName = cdmSourceName
)
}

# Setup output folder ------------------------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -259,10 +268,14 @@ executeDqChecks <- function(connectionDetails,
connection,
cdmDatabaseSchema,
vocabDatabaseSchema,
resultsDatabaseSchema,
writeTableName,
cohortDatabaseSchema,
cohortTableName,
cohortDefinitionId,
outputFolder,
sqlOnlyUnionCount,
sqlOnlyIncrementalInsert,
sqlOnly,
progressBar = TRUE
)
Expand Down Expand Up @@ -310,9 +323,10 @@ executeDqChecks <- function(connectionDetails,
.writeResultsToJson(allResults, outputFolder, outputFile)

ParallelLogger::logInfo("Execution Complete")
} else {
.writeDDL(resultsDatabaseSchema, writeTableName, connectionDetails$dbms, outputFolder)
}


# write to table ----------------------------------------------------------------------

if (!sqlOnly && writeToTable) {
Expand Down
68 changes: 12 additions & 56 deletions R/listChecks.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,65 +35,21 @@ listDqChecks <- function(cdmVersion = "5.3", tableCheckThresholdLoc = "default",
sprintf("OMOP_CDMv%s_Check_Descriptions.csv", cdmVersion),
package = "DataQualityDashboard"
))
dqChecks$checkDescriptions <- as.data.frame(dqChecks$checkDescriptions)

dqChecks$tableChecks <- .readThresholdFile(
checkThresholdLoc = tableCheckThresholdLoc,
defaultLoc = sprintf("OMOP_CDMv%s_Table_Level.csv", cdmVersion)
)

if (tableCheckThresholdLoc == "default") {
dqChecks$tableChecks <-
read_csv(
system.file(
"csv",
sprintf("OMOP_CDMv%s_Table_Level.csv", cdmVersion),
package = "DataQualityDashboard"
),
na = c(" ", "")
)
dqChecks$tableChecks <- as.data.frame(dqChecks$tableChecks)
} else {
dqChecks$tableChecks <- read_csv(
tableCheckThresholdLoc,
na = c(" ", "")
)
dqChecks$tableChecks <- as.data.frame(dqChecks$tableChecks)
}
dqChecks$fieldChecks <- .readThresholdFile(
checkThresholdLoc = fieldCheckThresholdLoc,
defaultLoc = sprintf("OMOP_CDMv%s_Field_Level.csv", cdmVersion)
)

if (fieldCheckThresholdLoc == "default") {
dqChecks$fieldChecks <-
read_csv(
system.file(
"csv",
sprintf("OMOP_CDMv%s_Field_Level.csv", cdmVersion),
package = "DataQualityDashboard"
),
na = c(" ", "")
)
dqChecks$fieldChecks <- as.data.frame(dqChecks$fieldChecks)
} else {
dqChecks$fieldChecks <- read_csv(
fieldCheckThresholdLoc,
na = c(" ", "")
)
dqChecks$fieldChecks <- as.data.frame(dqChecks$fieldChecks)
}

if (conceptCheckThresholdLoc == "default") {
dqChecks$conceptChecks <-
read_csv(
system.file(
"csv",
sprintf("OMOP_CDMv%s_Concept_Level.csv", cdmVersion),
package = "DataQualityDashboard"
),
na = c(" ", "")
)
dqChecks$conceptChecks <- as.data.frame(dqChecks$conceptChecks)
} else {
dqChecks$conceptChecks <- read_csv(
conceptCheckThresholdLoc,
na = c(" ", "")
)
dqChecks$conceptChecks <- as.data.frame(dqChecks$conceptChecks)
}
dqChecks$conceptChecks <- .readThresholdFile(
checkThresholdLoc = conceptCheckThresholdLoc,
defaultLoc = sprintf("OMOP_CDMv%s_Concept_Level.csv", cdmVersion)
)

return(dqChecks)
}
38 changes: 32 additions & 6 deletions R/runCheck.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,14 @@
#' @param connection A connection for connecting to the CDM database using the DatabaseConnector::connect(connectionDetails) function.
#' @param cdmDatabaseSchema The fully qualified database name of the CDM schema
#' @param vocabDatabaseSchema The fully qualified database name of the vocabulary schema (default is to set it as the cdmDatabaseSchema)
#' @param resultsDatabaseSchema The fully qualified database name of the results schema
#' @param writeTableName The table tor write DQD results to. Used when sqlOnly or writeToTable is True.
#' @param cohortDatabaseSchema The schema where the cohort table is located.
#' @param cohortTableName The name of the cohort table.
#' @param cohortDefinitionId The cohort definition id for the cohort you wish to run the DQD on. The package assumes a standard OHDSI cohort table called 'Cohort'
#' @param outputFolder The folder to output logs and SQL files to
#' @param sqlOnlyUnionCount (OPTIONAL) How many SQL commands to union before inserting them into output table (speeds processing when queries done in parallel). Default is 1.
#' @param sqlOnlyIncrementalInsert (OPTIONAL) Boolean to determine whether insert check results and associated metadata into output table. Default is FALSE (for backwards compatability to <= v2.2.0)
#' @param sqlOnly Should the SQLs be executed (FALSE) or just returned (TRUE)?
#'
#' @import magrittr
Expand All @@ -42,10 +46,14 @@
connection,
cdmDatabaseSchema,
vocabDatabaseSchema,
resultsDatabaseSchema,
writeTableName,
cohortDatabaseSchema,
cohortTableName,
cohortDefinitionId,
outputFolder,
sqlOnlyUnionCount,
sqlOnlyIncrementalInsert,
sqlOnly) {
ParallelLogger::logInfo(sprintf("Processing check description: %s", checkDescription$checkName))

Expand All @@ -62,10 +70,6 @@
cohort <- FALSE
}

if (sqlOnly) {
unlink(file.path(outputFolder, sprintf("%s.sql", checkDescription$checkName)))
}

if (nrow(checks) > 0) {
dfs <- apply(X = checks, MARGIN = 1, function(check) {
columns <- lapply(names(check), function(c) {
Expand All @@ -88,7 +92,19 @@

sql <- do.call(SqlRender::loadRenderTranslateSql, params)

if (sqlOnly) {
if (sqlOnly && sqlOnlyIncrementalInsert) {
checkQuery <- .createSqlOnlyQueries(
params,
check,
tableChecks,
fieldChecks,
conceptChecks,
sql,
connectionDetails,
checkDescription
)
data.frame(query = checkQuery)
} else if (sqlOnly) {
write(x = sql, file = file.path(
outputFolder,
sprintf("%s.sql", checkDescription$checkName)
Expand All @@ -105,7 +121,17 @@
)
}
})
do.call(rbind, dfs)

dfs <- do.call(rbind, dfs)

if (sqlOnlyIncrementalInsert) {
sqlToUnion <- dfs$query
if (length(sqlToUnion) > 0) {
.writeSqlOnlyQueries(sqlToUnion, sqlOnlyUnionCount, resultsDatabaseSchema, writeTableName, connectionDetails$dbms, outputFolder, checkDescription)
}
} else {
dfs
}
} else {
ParallelLogger::logWarn(paste0("Warning: Evaluation resulted in no checks: ", filterExpression))
data.frame()
Expand Down
Loading

0 comments on commit 741f748

Please sign in to comment.