diff --git a/DESCRIPTION b/DESCRIPTION index e53bd4a0..42ffb66f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: DataQualityDashboard Type: Package Title: Execute and View Data Quality Checks on OMOP CDM Database -Version: 2.2.0 -Date: 2023-05-05 +Version: 2.3.0 +Date: 2023-05-21 Authors@R: c( person("Katy", "Sadowski", email = "sadowski@ohdsi.org", role = c("aut", "cre")), person("Clair", "Blacketer", role = c("aut")), @@ -27,7 +27,7 @@ Imports: dplyr, jsonlite, rJava, - SqlRender (>= 1.6.0), + SqlRender (>= 1.10.1), plyr, stringr, rlang, diff --git a/NAMESPACE b/NAMESPACE index d8da3bad..69aad71c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(convertJsonResultsFileCase) export(executeDqChecks) export(listDqChecks) export(reEvaluateThresholds) @@ -8,8 +9,14 @@ export(writeJsonResultsToCsv) export(writeJsonResultsToTable) import(DatabaseConnector) import(magrittr) +importFrom(SqlRender,camelCaseToSnakeCase) +importFrom(SqlRender,snakeCaseToCamelCase) importFrom(dplyr,case_when) importFrom(dplyr,mutate) +importFrom(dplyr,rename_with) +importFrom(jsonlite,fromJSON) +importFrom(jsonlite,parse_json) +importFrom(jsonlite,toJSON) importFrom(magrittr,"%>%") importFrom(readr,read_csv) importFrom(rlang,.data) @@ -18,6 +25,7 @@ importFrom(stats,setNames) importFrom(stringr,regex) importFrom(stringr,str_detect) importFrom(tidyselect,all_of) +importFrom(tools,file_path_sans_ext) importFrom(utils,install.packages) importFrom(utils,menu) importFrom(utils,packageVersion) diff --git a/NEWS.md b/NEWS.md index 5f1db1fd..05d569ec 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,17 @@ +DataQualityDashboard 2.3.0 +========================== +This release includes: + +### New features + +- *New SQL-only Mode:* Setting `sqlOnly` and `sqlOnlyIncrementalInsert` to TRUE in `executeDqChecks` will return (but not run) a set of SQL queries that, when executed, will calculate the results of the DQ checks and insert them into a database table. Additionally, `sqlOnlyUnionCount` can be used to specify a number of SQL queries to union for each check type, allowing for parallel execution of these queries and potentially large performance gains. See the [SqlOnly vignette](https://ohdsi.github.io/DataQualityDashboard/articles/SqlOnly.html) for details +- *Results File Case Converter:* The new function `convertJsonResultsFileCase` can be used to convert the keys in a DQD results JSON file between snakecase and camelcase. This allows reading of v2.1.0+ JSON files in older DQD versions, and other conversions which may be necessary for secondary use of the DQD results file. See [function documentation](https://ohdsi.github.io/DataQualityDashboard/reference/convertJsonResultsFileCase.html) for details + +### Bugfixes + +- In the v2.1.0 release, all DQD variables were converted from snakecase to camelcase, including those in the results JSON file. This resulted in errors for users trying to view results files generated by older DQD versions in DQD v2.1.0+. This issue has now been fixed. `viewDqDashboard` will now automatically convert the case of pre-v2.1.0 results files to camelcase so that older results files may be viewed in v2.3.0+ + + DataQualityDashboard 2.2.0 ========================== This release includes: @@ -60,7 +74,7 @@ This release includes: - **withinVisitDates** looks at clinical facts and the visits they are associated with to make sure that the visit dates occur within one week on either side of the visit - **plausibleUnitConceptIds** identifies records with invalid Unit_Concept_Ids by Measurement_Concept_Id -### outputFolder input paramater +### outputFolder input parameter - The `outputFolder` parameter for the `executeDqChecks` function is now REQUIRED and no longer has a default value. **This may be a breaking change for users who have not specified this parameter in their script to run DQD.** diff --git a/R/convertResultsCase.R b/R/convertResultsCase.R new file mode 100644 index 00000000..b6d4f357 --- /dev/null +++ b/R/convertResultsCase.R @@ -0,0 +1,81 @@ +# Copyright 2023 Observational Health Data Sciences and Informatics +# +# This file is part of DataQualityDashboard +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#' @title Convert JSON results file case +#' +#' @description Convert a DQD JSON results file between camelcase and (all-caps) snakecase. Enables viewing of pre-v.2.1.0 results files in later DQD versions, and vice versa +#' +#' @param jsonFilePath Path to the JSON results file to be converted +#' @param writeToFile Whether or not to write the converted results back to a file (must be either TRUE or FALSE) +#' @param outputFolder The folder to output the converted JSON results file to +#' @param outputFile (OPTIONAL) File to write converted results JSON object to. Default is name of input file with a "_camel" or "_snake" postfix +#' @param targetCase Case into which the results file parameters should be converted (must be either "camel" or "snake") +#' +#' @returns DQD results object (a named list) +#' +#' @importFrom jsonlite fromJSON +#' @importFrom SqlRender snakeCaseToCamelCase camelCaseToSnakeCase +#' @importFrom dplyr rename_with +#' @importFrom tools file_path_sans_ext +#' +#' @export + +convertJsonResultsFileCase <- function( + jsonFilePath, + writeToFile, + outputFolder = NA, + outputFile = "", + targetCase) { + if (!any(targetCase %in% c("camel", "snake"))) { + stop("targetCase must be either 'camel' or 'snake'.") + } + stopifnot(is.logical(writeToFile)) + if (writeToFile && is.na(outputFolder)) { + stop("You must specify an output folder if writing to file.") + } + + results <- jsonlite::fromJSON(jsonFilePath) + + if ("numViolatedRows" %in% names(results$CheckResults) && targetCase == "camel") { + warning("File is already in camelcase! No conversion will be performed.") + return(results) + } + if ("NUM_VIOLATED_ROWS" %in% names(results$CheckResults) && targetCase == "snake") { + warning("File is already in snakecase! No conversion will be performed.") + return(results) + } + + if (targetCase == "camel") { + swapFunction <- SqlRender::snakeCaseToCamelCase + } else { + swapFunction <- function(x) { + toupper(SqlRender::camelCaseToSnakeCase(x)) + } + } + + results$Metadata <- dplyr::rename_with(results$Metadata, swapFunction) + results$CheckResults <- dplyr::rename_with(results$CheckResults, swapFunction, -c("checkId")) + + if (writeToFile) { + if (nchar(outputFile) == 0) { + jsonFile <- tools::file_path_sans_ext(basename(jsonFilePath)) + outputFile <- paste(jsonFile, "_", targetCase, ".json", sep = "") + } + .writeResultsToJson(results, outputFolder, outputFile) + } + + return(results) +} diff --git a/R/executeDqChecks.R b/R/executeDqChecks.R index 1363c2de..7f81fd5b 100644 --- a/R/executeDqChecks.R +++ b/R/executeDqChecks.R @@ -25,11 +25,13 @@ #' @param numThreads The number of concurrent threads to use to execute the queries #' @param cdmSourceName The name of the CDM data source #' @param sqlOnly Should the SQLs be executed (FALSE) or just returned (TRUE)? +#' @param sqlOnlyUnionCount (OPTIONAL) In sqlOnlyIncrementalInsert mode, how many SQL commands to union in each query to insert check results into results table (can speed processing when queries done in parallel). Default is 1. +#' @param sqlOnlyIncrementalInsert (OPTIONAL) In sqlOnly mode, boolean to determine whether to generate SQL queries that insert check results and associated metadata into results table. Default is FALSE (for backwards compatibility to <= v2.2.0) #' @param outputFolder The folder to output logs, SQL files, and JSON results file to #' @param outputFile (OPTIONAL) File to write results JSON object #' @param verboseMode Boolean to determine if the console will show all execution steps. Default is FALSE #' @param writeToTable Boolean to indicate if the check results will be written to the dqdashboard_results table in the resultsDatabaseSchema. Default is TRUE -#' @param writeTableName The name of the results table. Defaults to `dqdashboard_results`. +#' @param writeTableName The name of the results table. Defaults to `dqdashboard_results`. Used when sqlOnly or writeToTable is True. #' @param writeToCsv Boolean to indicate if the check results will be written to a csv file. Default is FALSE #' @param csvFile (OPTIONAL) CSV file to write results #' @param checkLevels Choose which DQ check levels to execute. Default is all 3 (TABLE, FIELD, CONCEPT) @@ -64,6 +66,8 @@ executeDqChecks <- function(connectionDetails, cdmSourceName, numThreads = 1, sqlOnly = FALSE, + sqlOnlyUnionCount = 1, + sqlOnlyIncrementalInsert = FALSE, outputFolder, outputFile = "", verboseMode = FALSE, @@ -93,6 +97,8 @@ executeDqChecks <- function(connectionDetails, stopifnot(is.character(cdmDatabaseSchema), is.character(resultsDatabaseSchema), is.numeric(numThreads)) stopifnot(is.character(cdmSourceName), is.logical(sqlOnly), is.character(outputFolder), is.logical(verboseMode)) stopifnot(is.logical(writeToTable), is.character(checkLevels)) + stopifnot(is.numeric(sqlOnlyUnionCount) && sqlOnlyUnionCount > 0) + stopifnot(is.logical(sqlOnlyIncrementalInsert)) stopifnot(is.character(cohortDatabaseSchema), is.character(cohortTableName)) if (!all(checkLevels %in% c("TABLE", "FIELD", "CONCEPT"))) { @@ -128,7 +134,10 @@ executeDqChecks <- function(connectionDetails, metadata$dqdVersion <- as.character(packageVersion("DataQualityDashboard")) DatabaseConnector::disconnect(connection) } else { - metadata <- NA + metadata <- data.frame( + dqdVersion = as.character(packageVersion("DataQualityDashboard")), + cdmSourceName = cdmSourceName + ) } # Setup output folder ------------------------------------------------------------------------------------------------------------ @@ -259,10 +268,14 @@ executeDqChecks <- function(connectionDetails, connection, cdmDatabaseSchema, vocabDatabaseSchema, + resultsDatabaseSchema, + writeTableName, cohortDatabaseSchema, cohortTableName, cohortDefinitionId, outputFolder, + sqlOnlyUnionCount, + sqlOnlyIncrementalInsert, sqlOnly, progressBar = TRUE ) @@ -310,9 +323,10 @@ executeDqChecks <- function(connectionDetails, .writeResultsToJson(allResults, outputFolder, outputFile) ParallelLogger::logInfo("Execution Complete") + } else { + .writeDDL(resultsDatabaseSchema, writeTableName, connectionDetails$dbms, outputFolder) } - # write to table ---------------------------------------------------------------------- if (!sqlOnly && writeToTable) { diff --git a/R/listChecks.R b/R/listChecks.R index 402af927..ddeccbb4 100644 --- a/R/listChecks.R +++ b/R/listChecks.R @@ -35,65 +35,21 @@ listDqChecks <- function(cdmVersion = "5.3", tableCheckThresholdLoc = "default", sprintf("OMOP_CDMv%s_Check_Descriptions.csv", cdmVersion), package = "DataQualityDashboard" )) - dqChecks$checkDescriptions <- as.data.frame(dqChecks$checkDescriptions) + dqChecks$tableChecks <- .readThresholdFile( + checkThresholdLoc = tableCheckThresholdLoc, + defaultLoc = sprintf("OMOP_CDMv%s_Table_Level.csv", cdmVersion) + ) - if (tableCheckThresholdLoc == "default") { - dqChecks$tableChecks <- - read_csv( - system.file( - "csv", - sprintf("OMOP_CDMv%s_Table_Level.csv", cdmVersion), - package = "DataQualityDashboard" - ), - na = c(" ", "") - ) - dqChecks$tableChecks <- as.data.frame(dqChecks$tableChecks) - } else { - dqChecks$tableChecks <- read_csv( - tableCheckThresholdLoc, - na = c(" ", "") - ) - dqChecks$tableChecks <- as.data.frame(dqChecks$tableChecks) - } + dqChecks$fieldChecks <- .readThresholdFile( + checkThresholdLoc = fieldCheckThresholdLoc, + defaultLoc = sprintf("OMOP_CDMv%s_Field_Level.csv", cdmVersion) + ) - if (fieldCheckThresholdLoc == "default") { - dqChecks$fieldChecks <- - read_csv( - system.file( - "csv", - sprintf("OMOP_CDMv%s_Field_Level.csv", cdmVersion), - package = "DataQualityDashboard" - ), - na = c(" ", "") - ) - dqChecks$fieldChecks <- as.data.frame(dqChecks$fieldChecks) - } else { - dqChecks$fieldChecks <- read_csv( - fieldCheckThresholdLoc, - na = c(" ", "") - ) - dqChecks$fieldChecks <- as.data.frame(dqChecks$fieldChecks) - } - - if (conceptCheckThresholdLoc == "default") { - dqChecks$conceptChecks <- - read_csv( - system.file( - "csv", - sprintf("OMOP_CDMv%s_Concept_Level.csv", cdmVersion), - package = "DataQualityDashboard" - ), - na = c(" ", "") - ) - dqChecks$conceptChecks <- as.data.frame(dqChecks$conceptChecks) - } else { - dqChecks$conceptChecks <- read_csv( - conceptCheckThresholdLoc, - na = c(" ", "") - ) - dqChecks$conceptChecks <- as.data.frame(dqChecks$conceptChecks) - } + dqChecks$conceptChecks <- .readThresholdFile( + checkThresholdLoc = conceptCheckThresholdLoc, + defaultLoc = sprintf("OMOP_CDMv%s_Concept_Level.csv", cdmVersion) + ) return(dqChecks) } diff --git a/R/runCheck.R b/R/runCheck.R index efcd05bf..85c31945 100644 --- a/R/runCheck.R +++ b/R/runCheck.R @@ -24,10 +24,14 @@ #' @param connection A connection for connecting to the CDM database using the DatabaseConnector::connect(connectionDetails) function. #' @param cdmDatabaseSchema The fully qualified database name of the CDM schema #' @param vocabDatabaseSchema The fully qualified database name of the vocabulary schema (default is to set it as the cdmDatabaseSchema) +#' @param resultsDatabaseSchema The fully qualified database name of the results schema +#' @param writeTableName The table tor write DQD results to. Used when sqlOnly or writeToTable is True. #' @param cohortDatabaseSchema The schema where the cohort table is located. #' @param cohortTableName The name of the cohort table. #' @param cohortDefinitionId The cohort definition id for the cohort you wish to run the DQD on. The package assumes a standard OHDSI cohort table called 'Cohort' #' @param outputFolder The folder to output logs and SQL files to +#' @param sqlOnlyUnionCount (OPTIONAL) How many SQL commands to union before inserting them into output table (speeds processing when queries done in parallel). Default is 1. +#' @param sqlOnlyIncrementalInsert (OPTIONAL) Boolean to determine whether insert check results and associated metadata into output table. Default is FALSE (for backwards compatability to <= v2.2.0) #' @param sqlOnly Should the SQLs be executed (FALSE) or just returned (TRUE)? #' #' @import magrittr @@ -42,10 +46,14 @@ connection, cdmDatabaseSchema, vocabDatabaseSchema, + resultsDatabaseSchema, + writeTableName, cohortDatabaseSchema, cohortTableName, cohortDefinitionId, outputFolder, + sqlOnlyUnionCount, + sqlOnlyIncrementalInsert, sqlOnly) { ParallelLogger::logInfo(sprintf("Processing check description: %s", checkDescription$checkName)) @@ -62,10 +70,6 @@ cohort <- FALSE } - if (sqlOnly) { - unlink(file.path(outputFolder, sprintf("%s.sql", checkDescription$checkName))) - } - if (nrow(checks) > 0) { dfs <- apply(X = checks, MARGIN = 1, function(check) { columns <- lapply(names(check), function(c) { @@ -88,7 +92,19 @@ sql <- do.call(SqlRender::loadRenderTranslateSql, params) - if (sqlOnly) { + if (sqlOnly && sqlOnlyIncrementalInsert) { + checkQuery <- .createSqlOnlyQueries( + params, + check, + tableChecks, + fieldChecks, + conceptChecks, + sql, + connectionDetails, + checkDescription + ) + data.frame(query = checkQuery) + } else if (sqlOnly) { write(x = sql, file = file.path( outputFolder, sprintf("%s.sql", checkDescription$checkName) @@ -105,7 +121,17 @@ ) } }) - do.call(rbind, dfs) + + dfs <- do.call(rbind, dfs) + + if (sqlOnlyIncrementalInsert) { + sqlToUnion <- dfs$query + if (length(sqlToUnion) > 0) { + .writeSqlOnlyQueries(sqlToUnion, sqlOnlyUnionCount, resultsDatabaseSchema, writeTableName, connectionDetails$dbms, outputFolder, checkDescription) + } + } else { + dfs + } } else { ParallelLogger::logWarn(paste0("Warning: Evaluation resulted in no checks: ", filterExpression)) data.frame() diff --git a/R/sqlOnly.R b/R/sqlOnly.R new file mode 100644 index 00000000..74219902 --- /dev/null +++ b/R/sqlOnly.R @@ -0,0 +1,267 @@ +# Copyright 2023 Observational Health Data Sciences and Informatics +# +# This file is part of DataQualityDashboard +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#' Internal function to create queries when running in "incremental insert" sqlOnly mode +#' +#' @param params Collection of parameters from .runCheck +#' @param check Create SQL for this specific check type +#' @param tablechecks A dataframe containing the table checks +#' @param fieldChecks A dataframe containing the field checks +#' @param conceptChecks A dataframe containing the concept checks +#' @param sql The rendered SQL for this check +#' @param connectionDetails A connectionDetails object for connecting to the CDM database +#' @param checkDescription The description of the data quality check +#' +#' @return A rendered SQL query to add into the incremental insert sqlOnly query + +#' @noRd +#' @keywords internal +#' +.createSqlOnlyQueries <- function( + params, + check, + tableChecks, + fieldChecks, + conceptChecks, + sql, + connectionDetails, + checkDescription) { + resultShell <- .recordResult(check = check, checkDescription = checkDescription, sql = sql) + + resultShell$queryText <- gsub(";", "", resultShell$queryText) + resultShell$checkDescription <- gsub("\t", " ", gsub("\r", " ", gsub("\n", " ", gsub("'", "''", resultShell$checkDescription)))) + + # Retrieve the numeric threshold value for the specific check. + thresholdValue <- .getThreshold( + checkName = resultShell$checkName, + checkLevel = resultShell$checkLevel, + cdmTableName = resultShell$cdmTableName, + cdmFieldName = resultShell$cdmFieldName, + conceptId = resultShell$conceptId, + unitConceptId = resultShell$unitConceptId, + tableChecks = tableChecks, + fieldChecks = fieldChecks, + conceptChecks = conceptChecks + ) + + # Generate the wrapping query for the desired check. This creates a final row for insertion that includes nearly all the metadata for the query (in addition to calling the check query itself) + # The only metadata that are not included in this wrapping query include: + # 1. execution_time -- since this query is not being executed (only the SQL is generated), execution_time is not available + # 2. queryText -- although this could be included, it seemed redundant since it is part of the generated SQL file + # 3. warning -- not available since the SQL is not executed + # 4. error -- not available since the SQL is not executed + # 5. not_applicable_reason -- this currently requires post-processing + # 6. notes_value -- although this could be included, it seemed redundant + checkQuery <- SqlRender::loadRenderTranslateSql( + sqlFilename = file.path("sqlOnly", "cte_sql_for_results_table.sql"), + packageName = "DataQualityDashboard", + dbms = connectionDetails$dbms, + queryText = resultShell$queryText, + checkName = resultShell$checkName, + checkLevel = resultShell$checkLevel, + renderedCheckDescription = resultShell$checkDescription, + cdmTableName = resultShell$cdmTableName, + cdmFieldName = resultShell$cdmFieldName, + conceptId = resultShell$conceptId, + unitConceptId = resultShell$unitConceptId, + sqlFile = checkDescription$sqlFile, + category = resultShell$category, + subcategory = resultShell$subcategory, + context = resultShell$context, + checkId = resultShell$checkId, + thresholdValue = thresholdValue + ) + + return(checkQuery) +} + + +#' Internal function to write queries when running in sqlOnly mode +#' +#' @param sqlToUnion List of one or more SQL queries to union +#' @param sqlOnlyUnionCount Value of @sqlOnlyUnionCount - determines max # of sql queries to union in a single cte +#' @param resultsDatabaseSchema The fully qualified database name of the results schema +#' @param writeTableName The table tor write DQD results to. Used when sqlOnly or writeToTable is True. +#' @param dbms The database type (e.g. spark, sql server) - needed for proper query rendering +#' @param outputFolder Location to write the generated SQL files +#' @param checkDescription The description of the data quality check + +#' @noRd +#' @keywords internal +#' +.writeSqlOnlyQueries <- function( + sqlToUnion, + sqlOnlyUnionCount, + resultsDatabaseSchema, + writeTableName, + dbms, + outputFolder, + checkDescription) { + outFile <- file.path( + outputFolder, + sprintf("%s_%s.sql", checkDescription$checkLevel, checkDescription$checkName) + ) + + # Delete existing file + unlink(outFile) + + ustart <- 1 + while (ustart <= length(sqlToUnion)) { + uend <- min(ustart + sqlOnlyUnionCount - 1, length(sqlToUnion)) + + sqlUnioned <- paste(sqlToUnion[ustart:uend], collapse = " UNION ALL ") + + # Generate INSERT commands to insert results + metadata into results table + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = file.path("sqlOnly", "insert_ctes_into_result_table.sql"), + packageName = "DataQualityDashboard", + dbms = dbms, + resultsDatabaseSchema = resultsDatabaseSchema, + tableName = writeTableName, + queryText = sqlUnioned + ) + + write( + x = sql, + file = outFile, + append = TRUE + ) + + ustart <- ustart + sqlOnlyUnionCount + } +} + + +#' Internal function to write the DDL to outputFolder + +#' @param resultsDatabaseSchema The fully qualified database name of the results schema +#' @param writeTableName The table tor write DQD results to. Used when sqlOnly or writeToTable is True. +#' @param dbms The database type (e.g. spark, sql server) - needed for proper query rendering +#' @param outputFolder Location to write the generated SQL files + +#' @noRd +#' @keywords internal +.writeDDL <- function( + resultsDatabaseSchema, + writeTableName, + dbms, + outputFolder) { + tableName <- sprintf("%s.%s", resultsDatabaseSchema, writeTableName) + + sql <- SqlRender::loadRenderTranslateSql( + sqlFilename = "result_dataframe_ddl.sql", + packageName = "DataQualityDashboard", + dbms = dbms, + tableName = tableName + ) + + write( + x = sql, + file = file.path( + outputFolder, + "ddlDqdResults.sql" + ) + ) +} + + +#' Internal function to get one threshold +#' Note: this does not evaluate is_error or not_applicable status + +#' @param checkName The name of the check - such as measurePersonCompleteness +#' @param checkLevel The check level - such as TABLE +#' @param cdmTableName The name of the CDM table - such as MEASUREMENT +#' @param cdmFieldName Then name of the CDM field - such as MEASUREMENT_CONCEPT_ID +#' @param conceptId The specific concept_id being checked - a valid concept_id number +#' @param unitConceptId The specific unit concept id being checked - a valid concept_id number +#' @param tableChecks A dataframe containing the table checks +#' @param fieldChecks A dataframe containing the field checks +#' @param conceptChecks A dataframe containing the concept checks + +#' @noRd +#' @keywords internal +.getThreshold <- function( + checkName, + checkLevel, + cdmTableName, + cdmFieldName, + conceptId, + unitConceptId, + tableChecks, + fieldChecks, + conceptChecks) { + thresholdField <- sprintf("%sThreshold", checkName) + + # find if field exists ----------------------------------------------- + thresholdFieldExists <- eval(parse( + text = sprintf( + "'%s' %%in%% colnames(%sChecks)", + thresholdField, + tolower(checkLevel) + ) + )) + + if (!thresholdFieldExists) { + thresholdValue <- NA + } else { + if (checkLevel == "TABLE") { + thresholdFilter <- sprintf( + "tableChecks$%s[tableChecks$cdmTableName == '%s']", + thresholdField, cdmTableName + ) + } else if (checkLevel == "FIELD") { + thresholdFilter <- sprintf( + "fieldChecks$%s[fieldChecks$cdmTableName == '%s' & + fieldChecks$cdmFieldName == '%s']", + thresholdField, + cdmTableName, + cdmFieldName + ) + } else if (checkLevel == "CONCEPT") { + if (is.na(unitConceptId)) { + thresholdFilter <- sprintf( + "conceptChecks$%s[conceptChecks$cdmTableName == '%s' & + conceptChecks$cdmFieldName == '%s' & + conceptChecks$conceptId == %s]", + thresholdField, + cdmTableName, + cdmFieldName, + conceptId + ) + } else { + thresholdFilter <- sprintf( + "conceptChecks$%s[conceptChecks$cdmTableName == '%s' & + conceptChecks$cdmFieldName == '%s' & + conceptChecks$conceptId == %s & + conceptChecks$unitConceptId == '%s']", + thresholdField, + cdmTableName, + cdmFieldName, + conceptId, + as.integer(unitConceptId) + ) + } + } + thresholdValue <- eval(parse(text = thresholdFilter)) + } + + # Need value of 0 for NA in generated SQL + if (is.na(thresholdValue)) { + thresholdValue <- 0 + } + + thresholdValue +} diff --git a/R/view.R b/R/view.R index 681db601..1304459a 100644 --- a/R/view.R +++ b/R/view.R @@ -23,6 +23,7 @@ #' @param ... Extra parameters for shiny::runApp() like "port" or "host" #' #' @importFrom utils menu install.packages +#' @importFrom jsonlite toJSON parse_json #' #' @export viewDqDashboard <- function(jsonPath, launch.browser = NULL, display.mode = NULL, ...) { diff --git a/README.md b/README.md index a540338d..fdd1d407 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ cdm_source_name | The name of the CDM instance. | Descriptive name for the sourc cdm_source_abbreviation | The abbreviation of the CDM instance. | The abbreviation should consistent for different release from the same source. cdm_holder | The holder of the CDM instance. | The institution that controls access to the data.  If possible include contact information for who to contact to request access to the data. source_description | The description of the CDM instance. | Add notes, caveats, special characteristics about the source data that would not be assumed from the general descriptive name.  This description intended to help analysts determine if the data is suitable for the problem they are studying. -source_documentation_reference | Reference to where one can find documentation about the source data. | Can include URL's, file name, source data experts contact information (if they agree to it) +source_documentation_reference | Reference to where one can find documentation about the source data. | Can include URLs, file name, source data experts contact information (if they agree to it) cdm_etl_reference | Reference to where one can find documentation about the source to ETL into OMOP CDM. | Assuming there is a document or files (such as Rabbit in the Hat) describing the ETL.  May be the location of the ETL source and documentation repository. source_release_date | The release date of the source data. | When the source data was made available for ETL'ing.  For sites doing incremental updates, the date the last increment made available.  This implies that for sites doing incremental updates the CDM Source table should be updated to reflect that changes were made to the CDM. cdm_release_date | The release date of the CDM instance. | When the source data was made available for general use.  For sites doing incremental updates, this implies that the CDM Source table should be updated to reflect that changes were made to the CDM. @@ -104,7 +104,7 @@ DataQualityDashboard is being developed in R Studio. ### Development status -V2.0 ready for use. +DataQualityDashboard latest release (representing code in the `main` branch) is ready for use. # Acknowledgements - This project is supported in part through the National Science Foundation grant IIS 1251151. diff --git a/_pkgdown.yml b/_pkgdown.yml index 1f4aee6f..57941eb8 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -59,3 +59,7 @@ reference: desc: > Function to write the JSON results to a csv file contents: writeJsonResultsToCsv + - title: "Convert results JSON file case" + desc: > + Function to convert the case of a results JSON file between snakecase and camelcase + contents: convertJsonResultsFileCase diff --git a/docs/404.html b/docs/404.html index cbc0b5b3..7188b296 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -69,6 +69,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index a0bc4280..8a7fd530 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • diff --git a/docs/articles/AddNewCheck.html b/docs/articles/AddNewCheck.html index 5d65e549..5769a243 100644 --- a/docs/articles/AddNewCheck.html +++ b/docs/articles/AddNewCheck.html @@ -33,7 +33,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -70,6 +70,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • @@ -105,7 +108,7 @@

    Add a New Data Quality Check

    Don Torok

    -

    2023-05-05

    +

    2023-05-21

    Source: vignettes/AddNewCheck.rmd diff --git a/docs/articles/CheckStatusDefinitions.html b/docs/articles/CheckStatusDefinitions.html index 0790dc03..5bf3ecd3 100644 --- a/docs/articles/CheckStatusDefinitions.html +++ b/docs/articles/CheckStatusDefinitions.html @@ -33,7 +33,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -70,6 +70,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • @@ -106,7 +109,7 @@

    Check Status Descriptions

    Dmitry Ilyn

    -

    2023-05-05

    +

    2023-05-21

    Source: vignettes/CheckStatusDefinitions.rmd diff --git a/docs/articles/CheckTypeDescriptions.html b/docs/articles/CheckTypeDescriptions.html index 52631621..81fbd664 100644 --- a/docs/articles/CheckTypeDescriptions.html +++ b/docs/articles/CheckTypeDescriptions.html @@ -33,7 +33,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -70,6 +70,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • @@ -106,7 +109,7 @@

    Data Quality Check Type Definitions

    Clair Blacketer

    -

    2023-05-05

    +

    2023-05-21

    Source: vignettes/CheckTypeDescriptions.rmd diff --git a/docs/articles/DataQualityDashboard.html b/docs/articles/DataQualityDashboard.html index 023e7d79..48f7a11b 100644 --- a/docs/articles/DataQualityDashboard.html +++ b/docs/articles/DataQualityDashboard.html @@ -33,7 +33,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -70,6 +70,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • @@ -106,7 +109,7 @@

    Getting Started

    Clair Blacketer

    -

    2023-05-05

    +

    2023-05-21

    Source: vignettes/DataQualityDashboard.rmd @@ -147,32 +150,58 @@

    Executing Data Quality Checks
     
     # fill out the connection details -----------------------------------------------------------------------
    -connectionDetails <- DatabaseConnector::createConnectionDetails(dbms = "", 
    -                                                              user = "", 
    -                                                              password = "", 
    -                                                              server = "", 
    -                                                              port = "", 
    -                                                              extraSettings = "")
    +connectionDetails <- DatabaseConnector::createConnectionDetails(
    +dbms = "", 
    +user = "", 
    +password = "", 
    +server = "", 
    +port = "", 
    +extraSettings = "",
    +pathToDriver = ""
    +)
     
     cdmDatabaseSchema <- "yourCdmSchema" # the fully qualified database schema name of the CDM
     resultsDatabaseSchema <- "yourResultsSchema" # the fully qualified database schema name of the results schema (that you can write to)
     cdmSourceName <- "Your CDM Source" # a human readable name for your CDM source
    +cdmVersion <- "5.4" # the CDM version you are targetting. Currently supports 5.2, 5.3, and 5.4
     
     # determine how many threads (concurrent SQL sessions) to use ----------------------------------------
     numThreads <- 1 # on Redshift, 3 seems to work well
     
     # specify if you want to execute the queries or inspect them ------------------------------------------
     sqlOnly <- FALSE # set to TRUE if you just want to get the SQL scripts and not actually run the queries
    +sqlOnlyIncrementalInsert <- FALSE # set to TRUE if you want the generated SQL queries to calculate DQD results and insert them into a database table (@resultsDatabaseSchema.@writeTableName)
    +sqlOnlyUnionCount <- 1  # in sqlOnlyIncrementalInsert mode, the number of check sqls to union in a single query; higher numbers can improve performance in some DBMS (e.g. a value of 25 may be 25x faster)
    +
    +# NOTES specific to sqlOnly <- TRUE option ------------------------------------------------------------
    +# 1. You do not need a live database connection.  Instead, connectionDetails only needs these parameters:
    +#      connectionDetails <- DatabaseConnector::createConnectionDetails(
    +#        dbms = "", # specify your dbms
    +#        pathToDriver = "/"
    +#      )
    +# 2. Since these are fully functional queries, this can help with debugging.
    +# 3. In the results output by the sqlOnlyIncrementalInsert queries, placeholders are populated for execution_time, query_text, and warnings/errors; and the NOT_APPLICABLE rules are not applied.
    +# 4. In order to use the generated SQL to insert metadata and check results into output table, you must set sqlOnlyIncrementalInsert = TRUE.  Otherwise sqlOnly is backwards compatable with <= v2.2.0, generating queries which run the checks but don't store the results.
     
    -# where should the logs go? -------------------------------------------------------------------------
    +
    +# where should the results and logs go? ----------------------------------------------------------------
     outputFolder <- "output"
    +outputFile <- "results.json"
    +
     
     # logging type -------------------------------------------------------------------------------------
    -verboseMode <- FALSE # set to TRUE if you want to see activity written to the console
    +verboseMode <- TRUE # set to FALSE if you don't want the logs to be printed to the console
     
     # write results to table? ------------------------------------------------------------------------------
     writeToTable <- TRUE # set to FALSE if you want to skip writing to a SQL table in the results schema
     
    +# specify the name of the results table (used when writeToTable = TRUE and when sqlOnlyIncrementalInsert = TRUE)
    +writeTableName <- "dqdashboard_results"
    +
    +# write results to a csv file? -----------------------------------------------------------------------
    +writeToCsv <- FALSE # set to FALSE if you want to skip writing to csv file
    +csvFile <- "" # only needed if writeToCsv is set to TRUE
    +
     # if writing to table and using Redshift, bulk loading can be initialized -------------------------------
     
     # Sys.setenv("AWS_ACCESS_KEY_ID" = "",
    @@ -187,21 +216,28 @@ 

    Executing Data Quality CheckscheckLevels <- c("TABLE", "FIELD", "CONCEPT") # which DQ checks to run? ------------------------------------ - checkNames <- c() # Names can be found in inst/csv/OMOP_CDM_v5.3_Check_Descriptions.csv +# which CDM tables to exclude? ------------------------------------ +tablesToExclude <- c("CONCEPT", "VOCABULARY", "CONCEPT_ANCESTOR", "CONCEPT_RELATIONSHIP", "CONCEPT_CLASS", "CONCEPT_SYNONYM", "RELATIONSHIP", "DOMAIN") # list of CDM table names to skip evaluating checks against; by default DQD excludes the vocab tables + # run the job -------------------------------------------------------------------------------------- DataQualityDashboard::executeDqChecks(connectionDetails = connectionDetails, - cdmDatabaseSchema = cdmDatabaseSchema, - resultsDatabaseSchema = resultsDatabaseSchema, - cdmSourceName = cdmSourceName, - numThreads = numThreads, - sqlOnly = sqlOnly, - outputFolder = outputFolder, - verboseMode = verboseMode, - writeToTable = writeToTable, - checkLevels = checkLevels, - checkNames = checkNames) + cdmDatabaseSchema = cdmDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + cdmSourceName = cdmSourceName, + numThreads = numThreads, + sqlOnly = sqlOnly, + sqlOnlyUnionCount = sqlOnlyUnionCount, + sqlOnlyIncrementalInsert = sqlOnlyIncrementalInsert, + outputFolder = outputFolder, + verboseMode = verboseMode, + writeToTable = writeToTable, + writeToCsv = writeToCsv, + csvFile = csvFile, + checkLevels = checkLevels, + tablesToExclude = tablesToExclude, + checkNames = checkNames) # inspect logs ---------------------------------------------------------------------------- ParallelLogger::launchLogViewer(logFileName = file.path(outputFolder, cdmSourceName, diff --git a/docs/articles/DqdForCohorts.html b/docs/articles/DqdForCohorts.html index fb5539e1..beb2486f 100644 --- a/docs/articles/DqdForCohorts.html +++ b/docs/articles/DqdForCohorts.html @@ -33,7 +33,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -70,6 +70,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • @@ -106,7 +109,7 @@

    Running the DQD on a Cohort

    Clair Blacketer

    -

    2023-05-05

    +

    2023-05-21

    Source: vignettes/DqdForCohorts.rmd diff --git a/docs/articles/SqlOnly.html b/docs/articles/SqlOnly.html new file mode 100644 index 00000000..1a3c3721 --- /dev/null +++ b/docs/articles/SqlOnly.html @@ -0,0 +1,405 @@ + + + + + + + +SqlOnly • DataQualityDashboard + + + + + + + + + + + + +
    +
    + + + + +
    +
    + + + + + +
    +

    Description +

    +

    This article describes how to use DQD to generate only the SQL that +executes all DataQualityDashoard checks, without actually executing +them. There are a few main advantages of running DQD in Sql-only +mode:

    +
      +
    • Create queries locally, before sending to server. This allows for +generation of the SQL on one machine and execution on another (e.g. when +R cannot connect directly to the database server, or you want to run the +DQD SQL as part of your ETL).
    • +
    • Since these are fully functional queries, this can help with +debugging.
    • +
    • +[NEW in v2.3.0!] Performance. If you use +sqlOnlyIncrementalInsert = TRUE and +sqlOnlyUnionCount > 1, multiple checks are unioned +within a cte in the output SQL query to speed performance. When testing +on Spark, this resulted in a 10x or higher performance gain. +
        +
      • Performance for these queries has NOT been benchmarked on all +database systems. In order to obtain optimal results in your database +you may need to adjust the sqlOnlyUnionCount and/or tune +database parameters such as indexing and parallelism
      • +
      +
    • +
    +

    The new sqlOnlyIncrementalInsert mode generates SQL +queries that will actually populate a DQD results table in your database +with the results of the checks. There are currently some differences in +the result when running these queries, compared to a normal DQD run:

    +
      +
    • If you set sqlOnlyUnionCount > 1, if one check +results in an error, multiple checks might fail (since the queries are +unioned in ctes).
    • +
    • The status not_applicable is not evaluated. A check +fails or passes.
    • +
    • The query text is not shown in the results table.
    • +
    • Notes from threshold file are not included in results.
    • +
    • Execution metadata is not automatically added (total and query +execution time; CDM_SOURCE metadata).
    • +
    +

    Running DQD with sqlOnly = TRUE and +sqlOnlyIncrementalInsert = FALSE will generate SQL queries +that can be run to generate the result of each DQ check, but which will +not write the results back to the database.

    +
    +
    +

    Generating the “Incremental Insert” DQD SQL +

    +

    A few things to note:

    +
      +
    • A dummy connectionDetails object is needed where only +the dbms is used during SQL-only execution. +
        +
      • By setting the dbms to ‘sql server’ the output SQL can still be +rendered to any other dialect using SqlRender (see example +below).
      • +
      +
    • +
    • +sqlOnlyUnionCount determines the number of check sqls +to union in a single query. A smaller number gives more control and +progress information, a higher number typically gives a higher +performance. Here, 100 is used.
    • +
    +
    +library(DataQualityDashboard)
    +
    +# ConnectionDetails object needed for sql dialect
    +dbmsConnectionDetails <- DatabaseConnector::createConnectionDetails(
    +  dbms = "sql server",  # can be rendered to any dbms upon execution
    +  pathToDriver = "/"
    +)
    +
    +# Database parameters that are pre-filled in the written queries
    +# Use @-syntax if creating a template-sql at execution-time (e.g. "@cdmDatabaseSchema")
    +cdmDatabaseSchema <- "@cdmDatabaseSchema"   # the fully qualified database schema name of the CDM
    +resultsDatabaseSchema <- "@resultsDatabaseSchema"   # the fully qualified database schema name of the results schema (that you can write to)
    +writeTableName <- "@writeTableName"
    +
    +sqlFolder <- "./results_sql_only"
    +cdmSourceName <- "Synthea"
    +
    +sqlOnly <- TRUE
    +sqlOnlyIncrementalInsert <- TRUE    # this will generate an insert SQL query for each check type that will compute check results and insert them into a database table
    +sqlOnlyUnionCount <- 100            # this unions up to 100 queries in each insert query
    +
    +verboseMode <- TRUE
    +
    +cdmVersion <- "5.4"
    +checkLevels <- c("TABLE", "FIELD", "CONCEPT")
    +tablesToExclude <- c()
    +checkNames <- c()
    +
    +# Run DQD with sqlOnly=TRUE and sqlOnlyIncrementalInsert=TRUE. This will create a sql file for each check type in the output folder
    +DataQualityDashboard::executeDqChecks(
    +  connectionDetails = dbmsConnectionDetails,
    +  cdmDatabaseSchema = cdmDatabaseSchema,
    +  resultsDatabaseSchema = resultsDatabaseSchema,
    +  writeTableName = writeTableName,
    +  cdmSourceName = cdmSourceName,
    +  sqlOnly = sqlOnly,
    +  sqlOnlyUnionCount = sqlOnlyUnionCount,
    +  sqlOnlyIncrementalInsert = sqlOnlyIncrementalInsert,
    +  outputFolder = sqlFolder,
    +  checkLevels = checkLevels,
    +  verboseMode = verboseMode,
    +  cdmVersion = cdmVersion,
    +  tablesToExclude = tablesToExclude,
    +  checkNames = checkNames
    +)
    +

    After running above code, you will end up with a number of sql files +in the specified output directory:

    +
      +
    • One sql file per check type: +TABLE|FIELD|CONCEPT_<check_name>.sql.
    • +
    • +ddlDqdResults.sql with the result table creation +query.
    • +
    +

    The queries can then be run in any SQL client, making sure to run +ddlDqdResults.sql first. The order of the check queries is +not important, and can even be run in parallel. This will run the check, +and store the result in the specified writeTableName. In +order to show this result in the DQD Dashboard Shiny app, this table has +to be exported and converted to the .json format. See below for example +code of how this can be achieved.

    +
    +
    +

    (OPTIONAL) Execute queries +

    +

    Below code snippet shows how you can run the generated queries on an +OMOP CDM database using OHDSI R packages, and display the results in the +DQD Dashboard. Note that this approach uses two non-exported DQD +functions (.summarizeResults, +.writeResultsToJson) that are not tested for this purpose. +In the future we plan to expand support for incremental-insert mode with +a more robust set of public functions. Please reach out with feedback on +our GitHub +page if you’d like to have input on the development of this new +feature!

    +
    +library(DatabaseConnector)
    +cdmSourceName <- "<YourSourceName>"
    +sqlFolder <- "./results_sql_only"
    +jsonOutputFolder <- sqlFolder
    +jsonOutputFile <- "sql_only_results.json"
    +
    +dbms <- Sys.getenv("DBMS")
    +server <- Sys.getenv("DB_SERVER")
    +port <- Sys.getenv("DB_PORT")
    +user <- Sys.getenv("DB_USER")
    +password <- Sys.getenv("DB_PASSWORD")
    +pathToDriver <- Sys.getenv("PATH_TO_DRIVER")
    +connectionDetails <- DatabaseConnector::createConnectionDetails(
    +  dbms = dbms,
    +  server = server,
    +  port = port,
    +  user = user,
    +  password = password,
    +  pathToDriver = pathToDriver
    +)
    +cdmDatabaseSchema <- '<YourCdmSchemaName>'
    +resultsDatabaseSchema <- '<YourResultsSchemaName>'
    +writeTableName <- 'dqd_results' # or whatever you want to name your results table
    +
    +c <- DatabaseConnector::connect(connectionDetails)
    +
    +# Create results table
    +ddlFile <- file.path(sqlFolder, "ddlDqdResults.sql")
    +DatabaseConnector::renderTranslateExecuteSql(
    +  connection = c,
    +  sql = readChar(ddlFile, file.info(ddlFile)$size),
    +  resultsDatabaseSchema = resultsDatabaseSchema,
    +  writeTableName = writeTableName
    +)
    +
    +# Run checks
    +dqdSqlFiles <- Sys.glob(file.path(sqlFolder, "*.sql"))
    +for (dqdSqlFile in dqdSqlFiles) {
    +  if (dqdSqlFile == ddlFile) {
    +    next
    +  }
    +  print(dqdSqlFile)
    +  tryCatch(
    +    expr = {
    +      DatabaseConnector::renderTranslateExecuteSql(
    +        connection = c,
    +        sql = readChar(dqdSqlFile, file.info(dqdSqlFile)$size),
    +        cdmDatabaseSchema = cdmDatabaseSchema,
    +        resultsDatabaseSchema = resultsDatabaseSchema,
    +        writeTableName = writeTableName
    +      )
    +    },
    +    error = function(e) {
    +     print(sprintf("Writing table failed for check %s with error %s", dqdSqlFile, e$message))
    +    }
    +  )
    +}
    +
    +# Get results
    +checkResults <- DatabaseConnector::querySql(
    +  c,
    +  SqlRender::render(
    +    "SELECT * FROM @resultsDatabaseSchema.@writeTableName",
    +    resultsDatabaseSchema = resultsDatabaseSchema,
    +    writeTableName = writeTableName
    +  ),
    +  snakeCaseToCamelCase = TRUE
    +)
    +DatabaseConnector::disconnect(c)
    +
    +# convert check ID column name to correct format
    +colnames(checkResults)[colnames(checkResults) == "checkid"] ="checkId"
    +
    +# Get overview of DQD results
    +library(DataQualityDashboard)
    +overview <- DataQualityDashboard:::.summarizeResults(checkResults = checkResults)
    +
    +# Create results object, adding fake metadata
    +result <- list(
    +  startTimestamp = Sys.time(),
    +  endTimestamp = Sys.time(),
    +  executionTime = "",
    +  Metadata = data.frame(
    +    cdmSourceName = cdmSourceName,
    +    cdmSourceAbbreviation = cdmSourceName,
    +    cdmHolder = "",
    +    sourceDescription = "",
    +    sourceDocumentationReference = "",
    +    cdmEtlReference = "",
    +    sourceReleaseDate = "",
    +    cdmReleaseDate = "",
    +    cdmVersion = cdmVersion,
    +    cdmVersionConceptId = 0,
    +    vocabularyVersion = "",
    +    dqdVersion = as.character(packageVersion("DataQualityDashboard"))
    +  ),
    +  Overview = overview,
    +  CheckResults = checkResults
    +)
    +
    +DataQualityDashboard:::.writeResultsToJson(result, jsonOutputFolder, jsonOutputFile)
    +
    +jsonFilePath <- R.utils::getAbsolutePath(file.path(jsonOutputFolder, jsonOutputFile))
    +DataQualityDashboard::viewDqDashboard(jsonFilePath)
    +
    +
    + + + +
    + + + +
    + +
    +

    +

    Site built with pkgdown 2.0.7.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/articles/Thresholds.html b/docs/articles/Thresholds.html index 1769297c..1e7693a1 100644 --- a/docs/articles/Thresholds.html +++ b/docs/articles/Thresholds.html @@ -33,7 +33,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -70,6 +70,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • @@ -106,7 +109,7 @@

    Failure Thresholds and How to Change Them

    Clair Blacketer

    -

    2023-05-05

    +

    2023-05-21

    Source: vignettes/Thresholds.rmd @@ -144,7 +147,7 @@

    DQD Failure Thresholds

    DQD Control Files

    -

    There is a set of three csv files that underly the DQD. These files +

    There is a set of three csv files that underlie the DQD. These files indicate which checks should be run and what their failure thresholds should be. There is one file per check level: TABLE, FIELD, and CONCEPT. This vignette will walk through how to update the field level check diff --git a/docs/articles/index.html b/docs/articles/index.html index cc25aed5..4a799f32 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@

  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • @@ -94,6 +97,8 @@

    All vignettes

    Running the DQD on a Cohort
    +
    SqlOnly
    +
    Failure Thresholds and How to Change Them
    diff --git a/docs/authors.html b/docs/authors.html index ce535f60..d98f6bef 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • diff --git a/docs/index.html b/docs/index.html index 2070c898..2dd5cdbf 100644 --- a/docs/index.html +++ b/docs/index.html @@ -33,7 +33,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -70,6 +70,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • @@ -177,7 +180,7 @@

    Data Requirements source_documentation_reference Reference to where one can find documentation about the source data. -Can include URL’s, file name, source data experts contact information (if they agree to it) +Can include URLs, file name, source data experts contact information (if they agree to it) cdm_etl_reference @@ -259,7 +262,7 @@

    Development

    Development status

    -

    V2.0 ready for use.

    +

    DataQualityDashboard latest release (representing code in the main branch) is ready for use.

    diff --git a/docs/news/index.html b/docs/news/index.html index 8a7ced01..aa51707f 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0
    @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • @@ -81,6 +84,21 @@

    Changelog

    Source: NEWS.md +
    + +

    This release includes:

    +
    +

    New features

    +
    • +New SQL-only Mode: Setting sqlOnly and sqlOnlyIncrementalInsert to TRUE in executeDqChecks will return (but not run) a set of SQL queries that, when executed, will calculate the results of the DQ checks and insert them into a database table. Additionally, sqlOnlyUnionCount can be used to specify a number of SQL queries to union for each check type, allowing for parallel execution of these queries and potentially large performance gains. See the SqlOnly vignette for details
    • +
    • +Results File Case Converter: The new function convertJsonResultsFileCase can be used to convert the keys in a DQD results JSON file between snakecase and camelcase. This allows reading of v2.1.0+ JSON files in older DQD versions, and other conversions which may be necessary for secondary use of the DQD results file. See function documentation for details
    • +
    +
    +

    Bugfixes

    +
    • In the v2.1.0 release, all DQD variables were converted from snakecase to camelcase, including those in the results JSON file. This resulted in errors for users trying to view results files generated by older DQD versions in DQD v2.1.0+. This issue has now been fixed. viewDqDashboard will now automatically convert the case of pre-v2.1.0 results files to camelcase so that older results files may be viewed in v2.3.0+
    • +
    +

    This release includes:

    @@ -147,7 +165,7 @@

    New Checks -

    outputFolder input paramater

    +

    outputFolder input parameter

    • The outputFolder parameter for the executeDqChecks function is now REQUIRED and no longer has a default value. This may be a breaking change for users who have not specified this parameter in their script to run DQD.
    diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 97c34260..7982666c 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -7,6 +7,7 @@ articles: CheckTypeDescriptions: CheckTypeDescriptions.html DataQualityDashboard: DataQualityDashboard.html DqdForCohorts: DqdForCohorts.html + SqlOnly: SqlOnly.html Thresholds: Thresholds.html -last_built: 2023-05-05T21:20Z +last_built: 2023-05-21T18:40Z diff --git a/docs/pull_request_template.html b/docs/pull_request_template.html index 2f54d804..73f4a6b2 100644 --- a/docs/pull_request_template.html +++ b/docs/pull_request_template.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • diff --git a/docs/reference/convertJsonResultsFileCase.html b/docs/reference/convertJsonResultsFileCase.html new file mode 100644 index 00000000..9cd0e651 --- /dev/null +++ b/docs/reference/convertJsonResultsFileCase.html @@ -0,0 +1,154 @@ + +Convert JSON results file case — convertJsonResultsFileCase • DataQualityDashboard + + +
    +
    + + + +
    +
    + + +
    +

    Convert a DQD JSON results file between camelcase and (all-caps) snakecase. Enables viewing of pre-v.2.1.0 results files in later DQD versions, and vice versa

    +
    + +
    +
    convertJsonResultsFileCase(
    +  jsonFilePath,
    +  writeToFile,
    +  outputFolder = NA,
    +  outputFile = "",
    +  targetCase
    +)
    +
    + +
    +

    Arguments

    +
    jsonFilePath
    +

    Path to the JSON results file to be converted

    + + +
    writeToFile
    +

    Whether or not to write the converted results back to a file (must be either TRUE or FALSE)

    + + +
    outputFolder
    +

    The folder to output the converted JSON results file to

    + + +
    outputFile
    +

    (OPTIONAL) File to write converted results JSON object to. Default is name of input file with a "_camel" or "_snake" postfix

    + + +
    targetCase
    +

    Case into which the results file parameters should be converted (must be either "camel" or "snake")

    + +
    +
    +

    Value

    + + +

    DQD results object (a named list)

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/dot-evaluateThresholds.html b/docs/reference/dot-evaluateThresholds.html index b72a73f2..f2b5a00b 100644 --- a/docs/reference/dot-evaluateThresholds.html +++ b/docs/reference/dot-evaluateThresholds.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • diff --git a/docs/reference/dot-getCheckId.html b/docs/reference/dot-getCheckId.html index 83b7f046..062d8cc0 100644 --- a/docs/reference/dot-getCheckId.html +++ b/docs/reference/dot-getCheckId.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • diff --git a/docs/reference/dot-processCheck.html b/docs/reference/dot-processCheck.html index 61b584e2..10fcaa63 100644 --- a/docs/reference/dot-processCheck.html +++ b/docs/reference/dot-processCheck.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • diff --git a/docs/reference/dot-recordResult.html b/docs/reference/dot-recordResult.html index 66a518be..ca5dd5c1 100644 --- a/docs/reference/dot-recordResult.html +++ b/docs/reference/dot-recordResult.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • diff --git a/docs/reference/dot-runCheck.html b/docs/reference/dot-runCheck.html index d8c2c140..9f3d02cc 100644 --- a/docs/reference/dot-runCheck.html +++ b/docs/reference/dot-runCheck.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • @@ -96,10 +99,14 @@

    Internal function to run and process each data quality check.

    connection, cdmDatabaseSchema, vocabDatabaseSchema, + resultsDatabaseSchema, + writeTableName, cohortDatabaseSchema, cohortTableName, cohortDefinitionId, outputFolder, + sqlOnlyUnionCount, + sqlOnlyIncrementalInsert, sqlOnly )
    @@ -134,6 +141,14 @@

    Arguments

    The fully qualified database name of the vocabulary schema (default is to set it as the cdmDatabaseSchema)

    +
    resultsDatabaseSchema
    +

    The fully qualified database name of the results schema

    + + +
    writeTableName
    +

    The table tor write DQD results to. Used when sqlOnly or writeToTable is True.

    + +
    cohortDatabaseSchema

    The schema where the cohort table is located.

    @@ -150,6 +165,14 @@

    Arguments

    The folder to output logs and SQL files to

    +
    sqlOnlyUnionCount
    +

    (OPTIONAL) How many SQL commands to union before inserting them into output table (speeds processing when queries done in parallel). Default is 1.

    + + +
    sqlOnlyIncrementalInsert
    +

    (OPTIONAL) Boolean to determine whether insert check results and associated metadata into output table. Default is FALSE (for backwards compatability to <= v2.2.0)

    + +
    sqlOnly

    Should the SQLs be executed (FALSE) or just returned (TRUE)?

    diff --git a/docs/reference/dot-summarizeResults.html b/docs/reference/dot-summarizeResults.html index 67d842b5..1cb3b6a9 100644 --- a/docs/reference/dot-summarizeResults.html +++ b/docs/reference/dot-summarizeResults.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • diff --git a/docs/reference/dot-writeResultsToCsv.html b/docs/reference/dot-writeResultsToCsv.html index a22caa7f..7ea3836b 100644 --- a/docs/reference/dot-writeResultsToCsv.html +++ b/docs/reference/dot-writeResultsToCsv.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • diff --git a/docs/reference/dot-writeResultsToJson.html b/docs/reference/dot-writeResultsToJson.html index 5d9c451f..a0f1bd76 100644 --- a/docs/reference/dot-writeResultsToJson.html +++ b/docs/reference/dot-writeResultsToJson.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • diff --git a/docs/reference/dot-writeResultsToTable.html b/docs/reference/dot-writeResultsToTable.html index 79245c9f..1944a661 100644 --- a/docs/reference/dot-writeResultsToTable.html +++ b/docs/reference/dot-writeResultsToTable.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • diff --git a/docs/reference/executeDqChecks.html b/docs/reference/executeDqChecks.html index 3fff9ccf..d06ea101 100644 --- a/docs/reference/executeDqChecks.html +++ b/docs/reference/executeDqChecks.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • @@ -95,6 +98,8 @@

    Execute DQ checks

    cdmSourceName, numThreads = 1, sqlOnly = FALSE, + sqlOnlyUnionCount = 1, + sqlOnlyIncrementalInsert = FALSE, outputFolder, outputFile = "", verboseMode = FALSE, @@ -146,6 +151,14 @@

    Arguments

    Should the SQLs be executed (FALSE) or just returned (TRUE)?

    +
    sqlOnlyUnionCount
    +

    (OPTIONAL) In sqlOnlyIncrementalInsert mode, how many SQL commands to union in each query to insert check results into results table (can speed processing when queries done in parallel). Default is 1.

    + + +
    sqlOnlyIncrementalInsert
    +

    (OPTIONAL) In sqlOnly mode, boolean to determine whether to generate SQL queries that insert check results and associated metadata into results table. Default is FALSE (for backwards compatibility to <= v2.2.0)

    + +
    outputFolder

    The folder to output logs, SQL files, and JSON results file to

    @@ -163,7 +176,7 @@

    Arguments

    writeTableName
    -

    The name of the results table. Defaults to `dqdashboard_results`.

    +

    The name of the results table. Defaults to `dqdashboard_results`. Used when sqlOnly or writeToTable is True.

    writeToCsv
    diff --git a/docs/reference/index.html b/docs/reference/index.html index 6c501848..f8d63ac3 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • @@ -124,6 +127,14 @@

    Write DQD results to a CSV writeJsonResultsToCsv()

    Write JSON Results to CSV file

    + +

    Convert results JSON file case

    +

    Function to convert the case of a results JSON file between snakecase and camelcase

    + + +

    convertJsonResultsFileCase()

    + +

    Convert JSON results file case

    @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • diff --git a/docs/reference/reEvaluateThresholds.html b/docs/reference/reEvaluateThresholds.html index b8f3b157..b15a7fcb 100644 --- a/docs/reference/reEvaluateThresholds.html +++ b/docs/reference/reEvaluateThresholds.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • diff --git a/docs/reference/viewDqDashboard.html b/docs/reference/viewDqDashboard.html index 48ca1452..7ee8d4be 100644 --- a/docs/reference/viewDqDashboard.html +++ b/docs/reference/viewDqDashboard.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • @@ -97,11 +100,11 @@

    Arguments

    launch.browser
    -

    Passed on to shiny::runApp

    +

    Passed on to shiny::runApp

    display.mode
    -

    Passed on to shiny::runApp

    +

    Passed on to shiny::runApp

    ...
    diff --git a/docs/reference/writeJsonResultsToCsv.html b/docs/reference/writeJsonResultsToCsv.html index 855425ae..cea6f937 100644 --- a/docs/reference/writeJsonResultsToCsv.html +++ b/docs/reference/writeJsonResultsToCsv.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • diff --git a/docs/reference/writeJsonResultsToTable.html b/docs/reference/writeJsonResultsToTable.html index e3de6361..55e340e7 100644 --- a/docs/reference/writeJsonResultsToTable.html +++ b/docs/reference/writeJsonResultsToTable.html @@ -17,7 +17,7 @@ DataQualityDashboard - 2.2.0 + 2.3.0 @@ -52,6 +52,9 @@
  • Running the DQD on a Cohort
  • +
  • + SqlOnly +
  • Failure Thresholds and How to Change Them
  • diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 0012aaf2..99f8a396 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -48,6 +48,9 @@ /articles/GettingStarted.html + + /articles/SqlOnly.html + /articles/Thresholds.html @@ -69,6 +72,9 @@ /pull_request_template.html + + /reference/convertJsonResultsFileCase.html + /reference/dot-evaluateThresholds.html diff --git a/extras/DataQualityDashboard.pdf b/extras/DataQualityDashboard.pdf index 057d821f..34e136a4 100644 Binary files a/extras/DataQualityDashboard.pdf and b/extras/DataQualityDashboard.pdf differ diff --git a/extras/PackageMaintenance.R b/extras/PackageMaintenance.R index 347f5768..547a0555 100644 --- a/extras/PackageMaintenance.R +++ b/extras/PackageMaintenance.R @@ -25,8 +25,8 @@ devtools::document() # Create manual and vignettes: unlink("extras/DataQualityDashboard.pdf") -shell("R CMD Rd2pdf ./ --output=extras/DataQualityDashboard.pdf") -# on Mac: system("R CMD Rd2pdf ./ --output=extras/DataQualityDashboard.pdf") +shell("R CMD Rd2pdf ./ --output=extras/DataQualityDashboard.pdf") # PC +system("R CMD Rd2pdf ./ --output=extras/DataQualityDashboard.pdf") # Mac rmarkdown::render("vignettes/AddNewCheck.Rmd", output_file = "../inst/doc/AddNewCheck.pdf", diff --git a/extras/codeToRun.R b/extras/codeToRun.R index db9abb92..4807d3c2 100644 --- a/extras/codeToRun.R +++ b/extras/codeToRun.R @@ -37,7 +37,8 @@ cdmVersion <- "5.4" # the CDM version you are targetting. Currently supporst 5.2 numThreads <- 1 # on Redshift, 3 seems to work well # specify if you want to execute the queries or inspect them ------------------------------------------ -sqlOnly <- FALSE # set to TRUE if you just want to get the SQL scripts and not actually run the queries +sqlOnly <- FALSE # set to TRUE if you just want to get the SQL scripts and not actually run the queries. See codeToRun_sqlOnly.R for other sqlOnly parameters + # where should the results and logs go? ---------------------------------------------------------------- outputFolder <- "output" diff --git a/extras/codeToRun_sqlOnly.R b/extras/codeToRun_sqlOnly.R new file mode 100644 index 00000000..e1dcfbc5 --- /dev/null +++ b/extras/codeToRun_sqlOnly.R @@ -0,0 +1,49 @@ +#' This is an example of how to run DQD in sqlOnlyIncrementalInsert mode +#' There are two main advantages of running DQD in sqlOnlyIncrementalInsert mode: +#' - Create queries locally, before sending to server. This allows for inspection of code before execution. +#' - Faster. With sqlOnlyUnionCount > 1 multiple checks can be executed in parallel in one query. + +library(DataQualityDashboard) + +# ConnectionDetails object needed for sql dialect +dbmsConnectionDetails <- DatabaseConnector::createConnectionDetails( + dbms = "spark", # any valid options - such as 'redshift', 'sql server', etc. + pathToDriver = "/" +) + +# Database parameters that are pre-filled in the written queries +# Use @-syntax if creating a template-sql at execution-time (e.g. "@cdmDatabaseSchema") +cdmDatabaseSchema <- "yourCdmSchema" # the fully qualified database schema name of the CDM +resultsDatabaseSchema <- "yourResultsSchema" # the fully qualified database schema name of the results schema (that you can write to) +writeTableName <- "dqdashboard_results" + +sqlFolder <- "./results" +cdmSourceName <- "Your CDM Source" # a human readable name for your CDM source + +sqlOnly <- TRUE +sqlOnlyUnionCount <- 100 # Number of check sqls to union in a single query. A smaller number gives more control and progress information, a higher number typically gives a higher performance. +sqlOnlyIncrementalInsert <- TRUE # If FALSE, then pre v2.3.0 format. If TRUE, then wraps check query in cte with all metadata and inserts into result table + +verboseMode <- TRUE + +cdmVersion <- "5.3" # version of your CDM +checkLevels <- c("TABLE", "FIELD", "CONCEPT") +tablesToExclude <- c() +checkNames <- c() + +DataQualityDashboard::executeDqChecks( + connectionDetails = dbmsConnectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + writeTableName = writeTableName, + cdmSourceName = cdmSourceName, + sqlOnly = sqlOnly, + sqlOnlyUnionCount = sqlOnlyUnionCount, + sqlOnlyIncrementalInsert = sqlOnlyIncrementalInsert, + outputFolder = sqlFolder, + checkLevels = checkLevels, + verboseMode = verboseMode, + cdmVersion = cdmVersion, + tablesToExclude = tablesToExclude, + checkNames = checkNames +) diff --git a/inst/shinyApps/app.R b/inst/shinyApps/app.R index edf4b9c1..51f17076 100755 --- a/inst/shinyApps/app.R +++ b/inst/shinyApps/app.R @@ -1,7 +1,9 @@ library(shiny) server <- function(input, output, session) { observe({ - results <- jsonlite::read_json(path = Sys.getenv("jsonPath")) + jsonPath <- Sys.getenv("jsonPath") + results <- convertJsonResultsFileCase(jsonPath, writeToFile = FALSE, targetCase = "camel") + results <- jsonlite::parse_json(jsonlite::toJSON(results)) session$sendCustomMessage("results", results) }) } diff --git a/inst/sql/sql_server/sqlOnly/cte_sql_for_results_table.sql b/inst/sql/sql_server/sqlOnly/cte_sql_for_results_table.sql new file mode 100644 index 00000000..8c917e5e --- /dev/null +++ b/inst/sql/sql_server/sqlOnly/cte_sql_for_results_table.sql @@ -0,0 +1,35 @@ +/********* +SQL to create query for insertion into results table. These may be unioned together prior to insert. +Note that this does not include information about SQL errors or performance. +**********/ + +SELECT + cte.num_violated_rows + ,cte.pct_violated_rows + ,cte.num_denominator_rows + ,'' as execution_time + ,'' as query_text + ,'@checkName' as check_name + ,'@checkLevel' as check_level + ,'@renderedCheckDescription' as check_description + ,'@cdmTableName' as cdm_table_name + ,'@cdmFieldName' as cdm_field_name + ,'@conceptId' as concept_id + ,'@unitConceptId' as unit_concept_id + ,'@sqlFile' as sql_file + ,'@category' as category + ,'@subcategory' as subcategory + ,'@context' as context + ,'' as warning + ,'' as error + ,'@checkId' as checkid + ,0 as is_error + ,0 as not_applicable + ,CASE WHEN (cte.pct_violated_rows * 100) > @thresholdValue THEN 1 ELSE 0 END as failed + ,CASE WHEN (cte.pct_violated_rows * 100) <= @thresholdValue THEN 1 ELSE 0 END as passed + ,NULL as not_applicable_reason + ,@thresholdValue as threshold_value + ,NULL as notes_value +FROM ( + @queryText +) cte diff --git a/inst/sql/sql_server/sqlOnly/insert_ctes_into_result_table.sql b/inst/sql/sql_server/sqlOnly/insert_ctes_into_result_table.sql new file mode 100644 index 00000000..952be3f9 --- /dev/null +++ b/inst/sql/sql_server/sqlOnly/insert_ctes_into_result_table.sql @@ -0,0 +1,12 @@ +/********* +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. +Note that this does not include information about SQL errors or performance +**********/ + +WITH cte_all AS ( + @queryText +) +INSERT INTO @resultsDatabaseSchema.@tableName +SELECT * +FROM cte_all +; diff --git a/inst/testdata/TABLE_measurePersonCompleteness-mssql-union=1-insert.sql b/inst/testdata/TABLE_measurePersonCompleteness-mssql-union=1-insert.sql new file mode 100644 index 00000000..e3ddf7bd --- /dev/null +++ b/inst/testdata/TABLE_measurePersonCompleteness-mssql-union=1-insert.sql @@ -0,0 +1,2715 @@ +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the OBSERVATION_PERIOD table' as check_description + + ,'OBSERVATION_PERIOD' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_observation_period' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 0 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 0 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,0 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = OBSERVATION_PERIOD + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.OBSERVATION_PERIOD cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the VISIT_OCCURRENCE table' as check_description + + ,'VISIT_OCCURRENCE' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_visit_occurrence' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 95 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 95 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,95 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = VISIT_OCCURRENCE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.VISIT_OCCURRENCE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the CONDITION_OCCURRENCE table' as check_description + + ,'CONDITION_OCCURRENCE' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_condition_occurrence' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 95 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 95 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,95 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = CONDITION_OCCURRENCE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.CONDITION_OCCURRENCE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the DRUG_EXPOSURE table' as check_description + + ,'DRUG_EXPOSURE' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_drug_exposure' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 95 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 95 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,95 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = DRUG_EXPOSURE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.DRUG_EXPOSURE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the PROCEDURE_OCCURRENCE table' as check_description + + ,'PROCEDURE_OCCURRENCE' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_procedure_occurrence' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 95 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 95 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,95 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = PROCEDURE_OCCURRENCE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.PROCEDURE_OCCURRENCE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the DEVICE_EXPOSURE table' as check_description + + ,'DEVICE_EXPOSURE' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_device_exposure' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 100 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 100 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,100 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = DEVICE_EXPOSURE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.DEVICE_EXPOSURE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the MEASUREMENT table' as check_description + + ,'MEASUREMENT' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_measurement' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 95 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 95 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,95 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = MEASUREMENT + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.MEASUREMENT cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the VISIT_DETAIL table' as check_description + + ,'VISIT_DETAIL' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_visit_detail' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 100 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 100 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,100 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = VISIT_DETAIL + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.VISIT_DETAIL cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the NOTE table' as check_description + + ,'NOTE' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_note' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 100 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 100 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,100 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = NOTE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.NOTE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the OBSERVATION table' as check_description + + ,'OBSERVATION' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_observation' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 95 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 95 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,95 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = OBSERVATION + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.OBSERVATION cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the SPECIMEN table' as check_description + + ,'SPECIMEN' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_specimen' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 100 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 100 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,100 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = SPECIMEN + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.SPECIMEN cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the PAYER_PLAN_PERIOD table' as check_description + + ,'PAYER_PLAN_PERIOD' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_payer_plan_period' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 100 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 100 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,100 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = PAYER_PLAN_PERIOD + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.PAYER_PLAN_PERIOD cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the DRUG_ERA table' as check_description + + ,'DRUG_ERA' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_drug_era' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 95 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 95 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,95 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = DRUG_ERA + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.DRUG_ERA cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the DOSE_ERA table' as check_description + + ,'DOSE_ERA' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_dose_era' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 100 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 100 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,100 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = DOSE_ERA + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.DOSE_ERA cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the CONDITION_ERA table' as check_description + + ,'CONDITION_ERA' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_condition_era' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 95 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 95 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,95 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = CONDITION_ERA + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.CONDITION_ERA cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; diff --git a/inst/testdata/TABLE_measurePersonCompleteness-mssql-union=1-legacy.sql b/inst/testdata/TABLE_measurePersonCompleteness-mssql-union=1-legacy.sql new file mode 100644 index 00000000..dacbe470 --- /dev/null +++ b/inst/testdata/TABLE_measurePersonCompleteness-mssql-union=1-legacy.sql @@ -0,0 +1,1335 @@ + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = OBSERVATION_PERIOD + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.OBSERVATION_PERIOD cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + +; + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = VISIT_OCCURRENCE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.VISIT_OCCURRENCE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + +; + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = CONDITION_OCCURRENCE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.CONDITION_OCCURRENCE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + +; + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = DRUG_EXPOSURE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.DRUG_EXPOSURE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + +; + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = PROCEDURE_OCCURRENCE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.PROCEDURE_OCCURRENCE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + +; + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = DEVICE_EXPOSURE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.DEVICE_EXPOSURE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + +; + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = MEASUREMENT + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.MEASUREMENT cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + +; + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = VISIT_DETAIL + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.VISIT_DETAIL cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + +; + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = NOTE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.NOTE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + +; + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = OBSERVATION + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.OBSERVATION cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + +; + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = SPECIMEN + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.SPECIMEN cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + +; + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = PAYER_PLAN_PERIOD + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.PAYER_PLAN_PERIOD cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + +; + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = DRUG_ERA + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.DRUG_ERA cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + +; + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = DOSE_ERA + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.DOSE_ERA cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + +; + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = CONDITION_ERA + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.CONDITION_ERA cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + +; diff --git a/inst/testdata/TABLE_measurePersonCompleteness-mssql-union=4-insert.sql b/inst/testdata/TABLE_measurePersonCompleteness-mssql-union=4-insert.sql new file mode 100644 index 00000000..6bac6787 --- /dev/null +++ b/inst/testdata/TABLE_measurePersonCompleteness-mssql-union=4-insert.sql @@ -0,0 +1,2462 @@ +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the OBSERVATION_PERIOD table' as check_description + + ,'OBSERVATION_PERIOD' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_observation_period' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 0 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 0 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,0 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = OBSERVATION_PERIOD + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.OBSERVATION_PERIOD cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + UNION ALL /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the VISIT_OCCURRENCE table' as check_description + + ,'VISIT_OCCURRENCE' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_visit_occurrence' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 95 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 95 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,95 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = VISIT_OCCURRENCE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.VISIT_OCCURRENCE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + UNION ALL /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the CONDITION_OCCURRENCE table' as check_description + + ,'CONDITION_OCCURRENCE' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_condition_occurrence' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 95 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 95 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,95 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = CONDITION_OCCURRENCE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.CONDITION_OCCURRENCE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + UNION ALL /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the DRUG_EXPOSURE table' as check_description + + ,'DRUG_EXPOSURE' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_drug_exposure' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 95 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 95 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,95 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = DRUG_EXPOSURE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.DRUG_EXPOSURE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the PROCEDURE_OCCURRENCE table' as check_description + + ,'PROCEDURE_OCCURRENCE' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_procedure_occurrence' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 95 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 95 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,95 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = PROCEDURE_OCCURRENCE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.PROCEDURE_OCCURRENCE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + UNION ALL /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the DEVICE_EXPOSURE table' as check_description + + ,'DEVICE_EXPOSURE' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_device_exposure' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 100 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 100 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,100 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = DEVICE_EXPOSURE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.DEVICE_EXPOSURE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + UNION ALL /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the MEASUREMENT table' as check_description + + ,'MEASUREMENT' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_measurement' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 95 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 95 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,95 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = MEASUREMENT + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.MEASUREMENT cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + UNION ALL /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the VISIT_DETAIL table' as check_description + + ,'VISIT_DETAIL' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_visit_detail' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 100 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 100 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,100 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = VISIT_DETAIL + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.VISIT_DETAIL cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the NOTE table' as check_description + + ,'NOTE' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_note' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 100 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 100 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,100 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = NOTE + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.NOTE cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + UNION ALL /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the OBSERVATION table' as check_description + + ,'OBSERVATION' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_observation' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 95 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 95 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,95 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = OBSERVATION + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.OBSERVATION cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + UNION ALL /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the SPECIMEN table' as check_description + + ,'SPECIMEN' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_specimen' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 100 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 100 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,100 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = SPECIMEN + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.SPECIMEN cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + UNION ALL /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the PAYER_PLAN_PERIOD table' as check_description + + ,'PAYER_PLAN_PERIOD' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_payer_plan_period' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 100 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 100 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,100 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = PAYER_PLAN_PERIOD + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.PAYER_PLAN_PERIOD cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; +/********* + +SQL to insert individual DQD results directly into output table, rather than waiting until collecting all results. + +Note that this does not include information about SQL errors or performance + +**********/ + + + +WITH cte_all AS ( + + /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the DRUG_ERA table' as check_description + + ,'DRUG_ERA' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_drug_era' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 95 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 95 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,95 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = DRUG_ERA + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.DRUG_ERA cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + UNION ALL /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the DOSE_ERA table' as check_description + + ,'DOSE_ERA' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_dose_era' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 100 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 100 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,100 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = DOSE_ERA + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.DOSE_ERA cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + UNION ALL /********* + +SQL to create query for insertion into results table. These may be unioned together prior to insert. + +Note that this does not include information about SQL errors or performance. + +**********/ + + + +SELECT + + cte.num_violated_rows + + ,cte.pct_violated_rows + + ,cte.num_denominator_rows + + ,'' as execution_time + + ,'' as query_text + + ,'measurePersonCompleteness' as check_name + + ,'TABLE' as check_level + + ,'The number and percent of persons in the CDM that do not have at least one record in the CONDITION_ERA table' as check_description + + ,'CONDITION_ERA' as cdm_table_name + + ,'NA' as cdm_field_name + + ,'NA' as concept_id + + ,'NA' as unit_concept_id + + ,'table_person_completeness.sql' as sql_file + + ,'Completeness' as category + + ,'NA' as subcategory + + ,'Validation' as context + + ,'' as warning + + ,'' as error + + ,'table_measurepersoncompleteness_condition_era' as checkid + + ,0 as is_error + + ,0 as not_applicable + + ,CASE WHEN (cte.pct_violated_rows * 100) > 95 THEN 1 ELSE 0 END as failed + + ,CASE WHEN (cte.pct_violated_rows * 100) <= 95 THEN 1 ELSE 0 END as passed + + ,NULL as not_applicable_reason + + ,95 as threshold_value + + ,NULL as notes_value + +FROM ( + + + +/********* + +Table Level: + +MEASURE_PERSON_COMPLETENESS + +Determine what #/% of persons have at least one record in the cdmTable + + + +Parameters used in this template: + +schema = @yourCdmSchema + +cdmTableName = CONDITION_ERA + + + +**********/ + + + + + +SELECT + + num_violated_rows, + + CASE + + WHEN denominator.num_rows = 0 THEN 0 + + ELSE 1.0*num_violated_rows/denominator.num_rows + + END AS pct_violated_rows, + + denominator.num_rows AS num_denominator_rows + +FROM + +( + + SELECT + + COUNT_BIG(violated_rows.person_id) AS num_violated_rows + + FROM + + ( + + /*violatedRowsBegin*/ + + SELECT + + cdmTable.* + + FROM @yourCdmSchema.person cdmTable + + + + LEFT JOIN @yourCdmSchema.CONDITION_ERA cdmTable2 + + ON cdmTable.person_id = cdmTable2.person_id + + WHERE cdmTable2.person_id IS NULL + + /*violatedRowsEnd*/ + + ) violated_rows + +) violated_row_count, + +( + + SELECT + + COUNT_BIG(*) AS num_rows + + FROM @yourCdmSchema.person cdmTable + + + +) denominator + + + + + +) cte + + + +) + +INSERT INTO @yourResultsSchema.dqdashboard_results + +SELECT * + +FROM cte_all + +; diff --git a/man/convertJsonResultsFileCase.Rd b/man/convertJsonResultsFileCase.Rd new file mode 100644 index 00000000..34c328d3 --- /dev/null +++ b/man/convertJsonResultsFileCase.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convertResultsCase.R +\name{convertJsonResultsFileCase} +\alias{convertJsonResultsFileCase} +\title{Convert JSON results file case} +\usage{ +convertJsonResultsFileCase( + jsonFilePath, + writeToFile, + outputFolder = NA, + outputFile = "", + targetCase +) +} +\arguments{ +\item{jsonFilePath}{Path to the JSON results file to be converted} + +\item{writeToFile}{Whether or not to write the converted results back to a file (must be either TRUE or FALSE)} + +\item{outputFolder}{The folder to output the converted JSON results file to} + +\item{outputFile}{(OPTIONAL) File to write converted results JSON object to. Default is name of input file with a "_camel" or "_snake" postfix} + +\item{targetCase}{Case into which the results file parameters should be converted (must be either "camel" or "snake")} +} +\value{ +DQD results object (a named list) +} +\description{ +Convert a DQD JSON results file between camelcase and (all-caps) snakecase. Enables viewing of pre-v.2.1.0 results files in later DQD versions, and vice versa +} diff --git a/man/dot-runCheck.Rd b/man/dot-runCheck.Rd index bbb215ae..ba197cba 100644 --- a/man/dot-runCheck.Rd +++ b/man/dot-runCheck.Rd @@ -13,10 +13,14 @@ connection, cdmDatabaseSchema, vocabDatabaseSchema, + resultsDatabaseSchema, + writeTableName, cohortDatabaseSchema, cohortTableName, cohortDefinitionId, outputFolder, + sqlOnlyUnionCount, + sqlOnlyIncrementalInsert, sqlOnly ) } @@ -35,6 +39,10 @@ \item{vocabDatabaseSchema}{The fully qualified database name of the vocabulary schema (default is to set it as the cdmDatabaseSchema)} +\item{resultsDatabaseSchema}{The fully qualified database name of the results schema} + +\item{writeTableName}{The table tor write DQD results to. Used when sqlOnly or writeToTable is True.} + \item{cohortDatabaseSchema}{The schema where the cohort table is located.} \item{cohortTableName}{The name of the cohort table.} @@ -43,6 +51,10 @@ \item{outputFolder}{The folder to output logs and SQL files to} +\item{sqlOnlyUnionCount}{(OPTIONAL) How many SQL commands to union before inserting them into output table (speeds processing when queries done in parallel). Default is 1.} + +\item{sqlOnlyIncrementalInsert}{(OPTIONAL) Boolean to determine whether insert check results and associated metadata into output table. Default is FALSE (for backwards compatability to <= v2.2.0)} + \item{sqlOnly}{Should the SQLs be executed (FALSE) or just returned (TRUE)?} \item{tablechecks}{A dataframe containing the table checks} diff --git a/man/executeDqChecks.Rd b/man/executeDqChecks.Rd index 6c5b38b5..1ae7d2e2 100644 --- a/man/executeDqChecks.Rd +++ b/man/executeDqChecks.Rd @@ -12,6 +12,8 @@ executeDqChecks( cdmSourceName, numThreads = 1, sqlOnly = FALSE, + sqlOnlyUnionCount = 1, + sqlOnlyIncrementalInsert = FALSE, outputFolder, outputFile = "", verboseMode = FALSE, @@ -47,6 +49,10 @@ executeDqChecks( \item{sqlOnly}{Should the SQLs be executed (FALSE) or just returned (TRUE)?} +\item{sqlOnlyUnionCount}{(OPTIONAL) In sqlOnlyIncrementalInsert mode, how many SQL commands to union in each query to insert check results into results table (can speed processing when queries done in parallel). Default is 1.} + +\item{sqlOnlyIncrementalInsert}{(OPTIONAL) In sqlOnly mode, boolean to determine whether to generate SQL queries that insert check results and associated metadata into results table. Default is FALSE (for backwards compatibility to <= v2.2.0)} + \item{outputFolder}{The folder to output logs, SQL files, and JSON results file to} \item{outputFile}{(OPTIONAL) File to write results JSON object} @@ -55,7 +61,7 @@ executeDqChecks( \item{writeToTable}{Boolean to indicate if the check results will be written to the dqdashboard_results table in the resultsDatabaseSchema. Default is TRUE} -\item{writeTableName}{The name of the results table. Defaults to `dqdashboard_results`.} +\item{writeTableName}{The name of the results table. Defaults to `dqdashboard_results`. Used when sqlOnly or writeToTable is True.} \item{writeToCsv}{Boolean to indicate if the check results will be written to a csv file. Default is FALSE} diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 59ecd3be..147aeb64 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -12,6 +12,15 @@ connectionDetailsEunomia <- Eunomia::getEunomiaConnectionDetails() cdmDatabaseSchemaEunomia <- "main" resultsDatabaseSchemaEunomia <- "main" +remove_sql_comments <- function(sql) { + sql0 <- gsub("--.*?\\n|--.*?\\r", " ", sql) # remove single-line SQL comments + sql1 <- gsub("\\r|\\n|\\t", " ", sql0) # convert tabs and newlines to spaces + sql2 <- gsub("/*", "@@@@ ", sql1, fixed = TRUE) # must add spaces between multi-line comments for quote removal to work + sql3 <- gsub("*/", " @@@@", sql2, fixed = TRUE) # must add spaces between multi-line comments for quote removal to work + sql4 <- gsub("@@@@ .+? @@@@", " ", sql3, ) # remove multi-line comments + sql5 <- gsub("\\s+", " ", sql4) # remove multiple spaces +} + # dbms <- getOption("dbms", default = "sqlite") # if (dbms == "sqlite") { # connectionDetails <- Eunomia::getEunomiaConnectionDetails() diff --git a/tests/testthat/test-convertResultsCase.R b/tests/testthat/test-convertResultsCase.R new file mode 100644 index 00000000..3e792dbd --- /dev/null +++ b/tests/testthat/test-convertResultsCase.R @@ -0,0 +1,62 @@ +library(testthat) + +test_that("Camel correctly converted to snake and back", { + outputFolder <- tempfile("dqd_") + on.exit(unlink(outputFolder, recursive = TRUE)) + + expect_warning( + results <- executeDqChecks( + connectionDetails = connectionDetailsEunomia, + cdmDatabaseSchema = cdmDatabaseSchemaEunomia, + resultsDatabaseSchema = resultsDatabaseSchemaEunomia, + cdmSourceName = "Eunomia", + checkNames = "measurePersonCompleteness", + outputFolder = outputFolder, + outputFile = "foo.json", + writeToTable = FALSE + ), + regexp = "^Missing check names.*" + ) + + jsonFilePath <- file.path(outputFolder, "foo.json") + expect_warning( + convertJsonResultsFileCase(jsonFilePath, writeToFile = F, targetCase = "camel"), + regexp = "^File is already in camelcase!" + ) + snakeResults <- convertJsonResultsFileCase(jsonFilePath, writeToFile = T, outputFolder, outputFile = "snake.json", targetCase = "snake") + snakeNames <- c("NUM_VIOLATED_ROWS", "PCT_VIOLATED_ROWS", "NUM_DENOMINATOR_ROWS", "EXECUTION_TIME", "QUERY_TEXT", "CHECK_NAME", "CHECK_LEVEL", "CHECK_DESCRIPTION", "CDM_TABLE_NAME", "SQL_FILE", "CATEGORY", "CONTEXT", "checkId", "FAILED", "PASSED", "IS_ERROR", "NOT_APPLICABLE", "THRESHOLD_VALUE") + + expect_equal(length(snakeResults), 6) + expect_true(setequal(names(snakeResults$CheckResults), snakeNames)) + + snakeFilePath <- file.path(outputFolder, "snake.json") + expect_warning( + convertJsonResultsFileCase(snakeFilePath, writeToFile = F, targetCase = "snake"), + regexp = "^File is already in snakecase!" + ) + camelResults <- convertJsonResultsFileCase(snakeFilePath, writeToFile = T, outputFolder, targetCase = "camel") + camelNames <- c("numViolatedRows", "pctViolatedRows", "numDenominatorRows", "executionTime", "queryText", "checkName", "checkLevel", "checkDescription", "cdmTableName", "sqlFile", "category", "context", "checkId", "failed", "passed", "isError", "notApplicable", "thresholdValue") + camelFilePath <- file.path(outputFolder, "snake_camel.json") + + expect_equal(length(camelResults), 6) + expect_true(setequal(names(camelResults$CheckResults), camelNames)) + expect_true(file.exists(camelFilePath)) + + origJson <- jsonlite::toJSON(results) + reconvertedJson <- jsonlite::toJSON(camelResults) + expect_equal(origJson, reconvertedJson) +}) + +test_that("Invalid case throws error", { + expect_error( + convertJsonResultsFileCase("bar.json", writeToFile = F, targetCase = "foo"), + regexp = "^targetCase must be either 'camel' or 'snake'." + ) +}) + +test_that("Output folder required when writing to file", { + expect_error( + convertJsonResultsFileCase("bar.json", writeToFile = T, targetCase = "camel"), + regexp = "^You must specify an output folder if writing to file." + ) +}) diff --git a/tests/testthat/test-execute.R b/tests/testthat/test-execute.R index 972c9dd8..646d20cb 100644 --- a/tests/testthat/test-execute.R +++ b/tests/testthat/test-execute.R @@ -273,3 +273,147 @@ test_that("Execute reEvaluateThresholds on Synthea/Eunomia", { ) expect_is(results2, "list") }) + +test_that("Execute DQ checks using sqlOnly=TRUE and sqlOnlyUnionCount=4 and sqlOnlyIncrementalInsert=TRUE", { + outputFolder <- tempfile("dqd_") + on.exit(unlink(outputFolder, recursive = TRUE)) + sqlOnlyConnectionDetails <- DatabaseConnector::createConnectionDetails(dbms = "sql server", pathToDriver = "/") + + expect_warning( + results <- executeDqChecks( + connectionDetails = sqlOnlyConnectionDetails, + cdmDatabaseSchema = "@yourCdmSchema", + resultsDatabaseSchema = "@yourResultsSchema", + cdmSourceName = "Eunomia", + checkNames = "measurePersonCompleteness", + outputFolder = outputFolder, + writeToTable = FALSE, + sqlOnly = TRUE, + sqlOnlyUnionCount = 4, + sqlOnlyIncrementalInsert = TRUE, + writeTableName = "dqdashboard_results" + ), + regexp = "^Missing check names.*" + ) + expect_true("ddlDqdResults.sql" %in% list.files(outputFolder)) + dqdSqlFile <- "TABLE_measurePersonCompleteness.sql" + expect_true(dqdSqlFile %in% list.files(outputFolder)) + + dqdSqlFilePath <- file.path(outputFolder, dqdSqlFile) + sql <- SqlRender::readSql(dqdSqlFilePath) + + # comparison + expectedSqlFile <- system.file("testdata", "TABLE_measurePersonCompleteness-mssql-union=4-insert.sql", package = "DataQualityDashboard") + sqlExpected <- SqlRender::readSql(expectedSqlFile) + + # test if identical, removing comments and excess whitespace + expect_equal(remove_sql_comments(sql), remove_sql_comments(sqlExpected)) +}) + +test_that("Execute DQ checks using sqlOnly=TRUE and sqlOnlyUnionCount=1 and sqlOnlyIncrementalInsert=TRUE", { + outputFolder <- tempfile("dqd_") + on.exit(unlink(outputFolder, recursive = TRUE)) + sqlOnlyConnectionDetails <- DatabaseConnector::createConnectionDetails(dbms = "sql server", pathToDriver = "/") + + expect_warning( + results <- executeDqChecks( + connectionDetails = sqlOnlyConnectionDetails, + cdmDatabaseSchema = "@yourCdmSchema", + resultsDatabaseSchema = "@yourResultsSchema", + cdmSourceName = "Eunomia", + checkNames = "measurePersonCompleteness", + outputFolder = outputFolder, + writeToTable = FALSE, + sqlOnly = TRUE, + sqlOnlyUnionCount = 1, + sqlOnlyIncrementalInsert = TRUE, + writeTableName = "dqdashboard_results" + ), + regexp = "^Missing check names.*" + ) + expect_true("ddlDqdResults.sql" %in% list.files(outputFolder)) + dqdSqlFile <- "TABLE_measurePersonCompleteness.sql" + expect_true(dqdSqlFile %in% list.files(outputFolder)) + + dqdSqlFilePath <- file.path(outputFolder, dqdSqlFile) + sql <- SqlRender::readSql(dqdSqlFilePath) + + # comparison + expectedSqlFile <- system.file("testdata", "TABLE_measurePersonCompleteness-mssql-union=1-insert.sql", package = "DataQualityDashboard") + sqlExpected <- SqlRender::readSql(expectedSqlFile) + + # test if identical, removing comments and excess whitespace + expect_equal(remove_sql_comments(sql), remove_sql_comments(sqlExpected)) +}) + +test_that("Execute DQ checks using sqlOnly=TRUE and sqlOnlyUnionCount=1 and sqlOnlyIncrementalInsert=FALSE (the behavior in version <= 2.2.0)", { + outputFolder <- tempfile("dqd_") + on.exit(unlink(outputFolder, recursive = TRUE)) + sqlOnlyConnectionDetails <- DatabaseConnector::createConnectionDetails(dbms = "sql server", pathToDriver = "/") + + expect_warning( + results <- executeDqChecks( + connectionDetails = sqlOnlyConnectionDetails, + cdmDatabaseSchema = "@yourCdmSchema", + resultsDatabaseSchema = "@yourResultsSchema", + cdmSourceName = "Eunomia", + checkNames = "measurePersonCompleteness", + outputFolder = outputFolder, + writeToTable = FALSE, + sqlOnly = TRUE, + sqlOnlyUnionCount = 1, + sqlOnlyIncrementalInsert = FALSE, + writeTableName = "dqdashboard_results" + ), + regexp = "^Missing check names.*" + ) + expect_true("ddlDqdResults.sql" %in% list.files(outputFolder)) + dqdSqlFile <- "measurePersonCompleteness.sql" + expect_true(dqdSqlFile %in% list.files(outputFolder)) + + dqdSqlFilePath <- file.path(outputFolder, dqdSqlFile) + sql <- SqlRender::readSql(dqdSqlFilePath) + + # comparison + expectedSqlFile <- system.file("testdata", "TABLE_measurePersonCompleteness-mssql-union=1-legacy.sql", package = "DataQualityDashboard") + sqlExpected <- SqlRender::readSql(expectedSqlFile) + + # test if identical, removing comments and excess whitespace + expect_equal(remove_sql_comments(sql), remove_sql_comments(sqlExpected)) +}) + +test_that("Incremental insert SQL is valid.", { + outputFolder <- tempfile("dqd_") + on.exit(unlink(outputFolder, recursive = TRUE)) + + expect_warning( + results <- executeDqChecks( + connectionDetails = connectionDetailsEunomia, + cdmDatabaseSchema = cdmDatabaseSchemaEunomia, + resultsDatabaseSchema = resultsDatabaseSchemaEunomia, + cdmSourceName = "Eunomia", + checkNames = "measurePersonCompleteness", + outputFolder = outputFolder, + writeToTable = FALSE, + sqlOnly = TRUE, + sqlOnlyUnionCount = 4, + sqlOnlyIncrementalInsert = TRUE, + writeTableName = "dqd_results" + ), + regexp = "^Missing check names.*" + ) + + ddlSqlFile <- file.path(outputFolder, "ddlDqdResults.sql") + ddlSql <- SqlRender::readSql(ddlSqlFile) + checkSqlFile <- file.path(outputFolder, "TABLE_measurePersonCompleteness.sql") + checkSql <- SqlRender::readSql(checkSqlFile) + + connection <- DatabaseConnector::connect(connectionDetailsEunomia) + on.exit(DatabaseConnector::disconnect(connection), add = TRUE) + DatabaseConnector::executeSql(connection = connection, sql = ddlSql) + DatabaseConnector::executeSql(connection = connection, sql = checkSql) + + checkResults <- DatabaseConnector::renderTranslateQuerySql(connection, "SELECT * FROM @database_schema.dqd_results;", database_schema = resultsDatabaseSchemaEunomia) + expect_true(nrow(checkResults) == 15) + DatabaseConnector::renderTranslateExecuteSql(connection, "DROP TABLE @database_schema.dqd_results;", database_schema = resultsDatabaseSchemaEunomia) +}) diff --git a/vignettes/DataQualityDashboard.rmd b/vignettes/DataQualityDashboard.rmd index b0723c35..f9c4ada0 100644 --- a/vignettes/DataQualityDashboard.rmd +++ b/vignettes/DataQualityDashboard.rmd @@ -43,32 +43,58 @@ Executing Data Quality Checks ```r # fill out the connection details ----------------------------------------------------------------------- -connectionDetails <- DatabaseConnector::createConnectionDetails(dbms = "", - user = "", - password = "", - server = "", - port = "", - extraSettings = "") +connectionDetails <- DatabaseConnector::createConnectionDetails( + dbms = "", + user = "", + password = "", + server = "", + port = "", + extraSettings = "", + pathToDriver = "" +) cdmDatabaseSchema <- "yourCdmSchema" # the fully qualified database schema name of the CDM resultsDatabaseSchema <- "yourResultsSchema" # the fully qualified database schema name of the results schema (that you can write to) cdmSourceName <- "Your CDM Source" # a human readable name for your CDM source +cdmVersion <- "5.4" # the CDM version you are targetting. Currently supports 5.2, 5.3, and 5.4 # determine how many threads (concurrent SQL sessions) to use ---------------------------------------- numThreads <- 1 # on Redshift, 3 seems to work well # specify if you want to execute the queries or inspect them ------------------------------------------ sqlOnly <- FALSE # set to TRUE if you just want to get the SQL scripts and not actually run the queries +sqlOnlyIncrementalInsert <- FALSE # set to TRUE if you want the generated SQL queries to calculate DQD results and insert them into a database table (@resultsDatabaseSchema.@writeTableName) +sqlOnlyUnionCount <- 1 # in sqlOnlyIncrementalInsert mode, the number of check sqls to union in a single query; higher numbers can improve performance in some DBMS (e.g. a value of 25 may be 25x faster) -# where should the logs go? ------------------------------------------------------------------------- +# NOTES specific to sqlOnly <- TRUE option ------------------------------------------------------------ +# 1. You do not need a live database connection. Instead, connectionDetails only needs these parameters: +# connectionDetails <- DatabaseConnector::createConnectionDetails( +# dbms = "", # specify your dbms +# pathToDriver = "/" +# ) +# 2. Since these are fully functional queries, this can help with debugging. +# 3. In the results output by the sqlOnlyIncrementalInsert queries, placeholders are populated for execution_time, query_text, and warnings/errors; and the NOT_APPLICABLE rules are not applied. +# 4. In order to use the generated SQL to insert metadata and check results into output table, you must set sqlOnlyIncrementalInsert = TRUE. Otherwise sqlOnly is backwards compatable with <= v2.2.0, generating queries which run the checks but don't store the results. + + +# where should the results and logs go? ---------------------------------------------------------------- outputFolder <- "output" +outputFile <- "results.json" + # logging type ------------------------------------------------------------------------------------- -verboseMode <- FALSE # set to TRUE if you want to see activity written to the console +verboseMode <- TRUE # set to FALSE if you don't want the logs to be printed to the console # write results to table? ------------------------------------------------------------------------------ writeToTable <- TRUE # set to FALSE if you want to skip writing to a SQL table in the results schema +# specify the name of the results table (used when writeToTable = TRUE and when sqlOnlyIncrementalInsert = TRUE) +writeTableName <- "dqdashboard_results" + +# write results to a csv file? ----------------------------------------------------------------------- +writeToCsv <- FALSE # set to FALSE if you want to skip writing to csv file +csvFile <- "" # only needed if writeToCsv is set to TRUE + # if writing to table and using Redshift, bulk loading can be initialized ------------------------------- # Sys.setenv("AWS_ACCESS_KEY_ID" = "", @@ -83,21 +109,28 @@ writeToTable <- TRUE # set to FALSE if you want to skip writing to a SQL table i checkLevels <- c("TABLE", "FIELD", "CONCEPT") # which DQ checks to run? ------------------------------------ - checkNames <- c() # Names can be found in inst/csv/OMOP_CDM_v5.3_Check_Descriptions.csv +# which CDM tables to exclude? ------------------------------------ +tablesToExclude <- c("CONCEPT", "VOCABULARY", "CONCEPT_ANCESTOR", "CONCEPT_RELATIONSHIP", "CONCEPT_CLASS", "CONCEPT_SYNONYM", "RELATIONSHIP", "DOMAIN") # list of CDM table names to skip evaluating checks against; by default DQD excludes the vocab tables + # run the job -------------------------------------------------------------------------------------- DataQualityDashboard::executeDqChecks(connectionDetails = connectionDetails, - cdmDatabaseSchema = cdmDatabaseSchema, - resultsDatabaseSchema = resultsDatabaseSchema, - cdmSourceName = cdmSourceName, - numThreads = numThreads, - sqlOnly = sqlOnly, - outputFolder = outputFolder, - verboseMode = verboseMode, - writeToTable = writeToTable, - checkLevels = checkLevels, - checkNames = checkNames) + cdmDatabaseSchema = cdmDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + cdmSourceName = cdmSourceName, + numThreads = numThreads, + sqlOnly = sqlOnly, + sqlOnlyUnionCount = sqlOnlyUnionCount, + sqlOnlyIncrementalInsert = sqlOnlyIncrementalInsert, + outputFolder = outputFolder, + verboseMode = verboseMode, + writeToTable = writeToTable, + writeToCsv = writeToCsv, + csvFile = csvFile, + checkLevels = checkLevels, + tablesToExclude = tablesToExclude, + checkNames = checkNames) # inspect logs ---------------------------------------------------------------------------- ParallelLogger::launchLogViewer(logFileName = file.path(outputFolder, cdmSourceName, diff --git a/vignettes/SqlOnly.rmd b/vignettes/SqlOnly.rmd new file mode 100644 index 00000000..3363632d --- /dev/null +++ b/vignettes/SqlOnly.rmd @@ -0,0 +1,220 @@ +--- +title: "SqlOnly" +author: "Maxim Moinat" +date: "`r Sys.Date()`" +header-includes: + - \usepackage{fancyhdr} + - \pagestyle{fancy} + - \fancyhead{} + - \fancyhead[CO,CE]{Data Quality Check Type Definitions} + - \fancyfoot[CO,CE]{DataQualityDashboard Package Version `r utils::packageVersion("DataQualityDashboard")`} + - \fancyfoot[LE,RO]{\thepage} + - \renewcommand{\headrulewidth}{0.4pt} + - \renewcommand{\footrulewidth}{0.4pt} +output: + html_document: + number_sections: yes + toc: yes +--- + + + +# Description + +This article describes how to use DQD to generate only the SQL that executes all DataQualityDashoard checks, without actually executing them. +There are a few main advantages of running DQD in Sql-only mode: + +* Create queries locally, before sending to server. This allows for generation of the SQL on one machine and execution on another (e.g. when R cannot connect directly to the database server, or you want to run the DQD SQL as part of your ETL). +* Since these are fully functional queries, this can help with debugging. +* **[NEW in v2.3.0!]** Performance. If you use `sqlOnlyIncrementalInsert = TRUE` and `sqlOnlyUnionCount > 1`, multiple checks are unioned within a cte in the output SQL query to speed performance. When testing on Spark, this resulted in a 10x or higher performance gain. + - Performance for these queries has NOT been benchmarked on all database systems. In order to obtain optimal results in your database you may need to adjust the `sqlOnlyUnionCount` and/or tune database parameters such as indexing and parallelism + +The new `sqlOnlyIncrementalInsert` mode generates SQL queries that will actually populate a DQD results table in your database with the results of the checks. There are currently some differences in the result when running these queries, compared to a normal DQD run: + +* If you set `sqlOnlyUnionCount` > 1, if one check results in an error, multiple checks might fail (since the queries are unioned in ctes). +* The status `not_applicable` is not evaluated. A check fails or passes. +* The query text is not shown in the results table. +* Notes from threshold file are not included in results. +* Execution metadata is not automatically added (total and query execution time; CDM_SOURCE metadata). + +Running DQD with `sqlOnly = TRUE` and `sqlOnlyIncrementalInsert = FALSE` will generate SQL queries that can be run to generate the result of each DQ check, but which will not write the results back to the database. + +# Generating the "Incremental Insert" DQD SQL +A few things to note: + +* A dummy `connectionDetails` object is needed where only the `dbms` is used during SQL-only execution. + - By setting the dbms to 'sql server' the output SQL can still be rendered to any other dialect using `SqlRender` (see example below). +* `sqlOnlyUnionCount` determines the number of check sqls to union in a single query. A smaller number gives more control and progress information, a higher number typically gives a higher performance. Here, 100 is used. + +```R +library(DataQualityDashboard) + +# ConnectionDetails object needed for sql dialect +dbmsConnectionDetails <- DatabaseConnector::createConnectionDetails( + dbms = "sql server", # can be rendered to any dbms upon execution + pathToDriver = "/" +) + +# Database parameters that are pre-filled in the written queries +# Use @-syntax if creating a template-sql at execution-time (e.g. "@cdmDatabaseSchema") +cdmDatabaseSchema <- "@cdmDatabaseSchema" # the fully qualified database schema name of the CDM +resultsDatabaseSchema <- "@resultsDatabaseSchema" # the fully qualified database schema name of the results schema (that you can write to) +writeTableName <- "@writeTableName" + +sqlFolder <- "./results_sql_only" +cdmSourceName <- "Synthea" + +sqlOnly <- TRUE +sqlOnlyIncrementalInsert <- TRUE # this will generate an insert SQL query for each check type that will compute check results and insert them into a database table +sqlOnlyUnionCount <- 100 # this unions up to 100 queries in each insert query + +verboseMode <- TRUE + +cdmVersion <- "5.4" +checkLevels <- c("TABLE", "FIELD", "CONCEPT") +tablesToExclude <- c() +checkNames <- c() + +# Run DQD with sqlOnly=TRUE and sqlOnlyIncrementalInsert=TRUE. This will create a sql file for each check type in the output folder +DataQualityDashboard::executeDqChecks( + connectionDetails = dbmsConnectionDetails, + cdmDatabaseSchema = cdmDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + writeTableName = writeTableName, + cdmSourceName = cdmSourceName, + sqlOnly = sqlOnly, + sqlOnlyUnionCount = sqlOnlyUnionCount, + sqlOnlyIncrementalInsert = sqlOnlyIncrementalInsert, + outputFolder = sqlFolder, + checkLevels = checkLevels, + verboseMode = verboseMode, + cdmVersion = cdmVersion, + tablesToExclude = tablesToExclude, + checkNames = checkNames +) +``` + +After running above code, you will end up with a number of sql files in the specified output directory: + +* One sql file per check type: `TABLE|FIELD|CONCEPT_.sql`. +* `ddlDqdResults.sql` with the result table creation query. + +The queries can then be run in any SQL client, making sure to run `ddlDqdResults.sql` first. +The order of the check queries is not important, and can even be run in parallel. +This will run the check, and store the result in the specified `writeTableName`. +In order to show this result in the DQD Dashboard Shiny app, this table has to be exported and converted to the .json format. +See below for example code of how this can be achieved. + +# (OPTIONAL) Execute queries +Below code snippet shows how you can run the generated queries on an OMOP CDM database using OHDSI R packages, and display the results in the DQD Dashboard. +Note that this approach uses two non-exported DQD functions (`.summarizeResults`, `.writeResultsToJson`) that are not tested for this purpose. In the future we plan to expand support for incremental-insert mode with a more robust set of public functions. Please reach out with feedback on our [GitHub page](https://github.com/OHDSI/DataQualityDashboard/issues) if you'd like to have input on the development of this new feature! + +```R +library(DatabaseConnector) +cdmSourceName <- "" +sqlFolder <- "./results_sql_only" +jsonOutputFolder <- sqlFolder +jsonOutputFile <- "sql_only_results.json" + +dbms <- Sys.getenv("DBMS") +server <- Sys.getenv("DB_SERVER") +port <- Sys.getenv("DB_PORT") +user <- Sys.getenv("DB_USER") +password <- Sys.getenv("DB_PASSWORD") +pathToDriver <- Sys.getenv("PATH_TO_DRIVER") +connectionDetails <- DatabaseConnector::createConnectionDetails( + dbms = dbms, + server = server, + port = port, + user = user, + password = password, + pathToDriver = pathToDriver +) +cdmDatabaseSchema <- '' +resultsDatabaseSchema <- '' +writeTableName <- 'dqd_results' # or whatever you want to name your results table + +c <- DatabaseConnector::connect(connectionDetails) + +# Create results table +ddlFile <- file.path(sqlFolder, "ddlDqdResults.sql") +DatabaseConnector::renderTranslateExecuteSql( + connection = c, + sql = readChar(ddlFile, file.info(ddlFile)$size), + resultsDatabaseSchema = resultsDatabaseSchema, + writeTableName = writeTableName +) + +# Run checks +dqdSqlFiles <- Sys.glob(file.path(sqlFolder, "*.sql")) +for (dqdSqlFile in dqdSqlFiles) { + if (dqdSqlFile == ddlFile) { + next + } + print(dqdSqlFile) + tryCatch( + expr = { + DatabaseConnector::renderTranslateExecuteSql( + connection = c, + sql = readChar(dqdSqlFile, file.info(dqdSqlFile)$size), + cdmDatabaseSchema = cdmDatabaseSchema, + resultsDatabaseSchema = resultsDatabaseSchema, + writeTableName = writeTableName + ) + }, + error = function(e) { + print(sprintf("Writing table failed for check %s with error %s", dqdSqlFile, e$message)) + } + ) +} + +# Get results +checkResults <- DatabaseConnector::querySql( + c, + SqlRender::render( + "SELECT * FROM @resultsDatabaseSchema.@writeTableName", + resultsDatabaseSchema = resultsDatabaseSchema, + writeTableName = writeTableName + ), + snakeCaseToCamelCase = TRUE +) +DatabaseConnector::disconnect(c) + +# convert check ID column name to correct format +colnames(checkResults)[colnames(checkResults) == "checkid"] ="checkId" + +# Get overview of DQD results +library(DataQualityDashboard) +overview <- DataQualityDashboard:::.summarizeResults(checkResults = checkResults) + +# Create results object, adding fake metadata +result <- list( + startTimestamp = Sys.time(), + endTimestamp = Sys.time(), + executionTime = "", + Metadata = data.frame( + cdmSourceName = cdmSourceName, + cdmSourceAbbreviation = cdmSourceName, + cdmHolder = "", + sourceDescription = "", + sourceDocumentationReference = "", + cdmEtlReference = "", + sourceReleaseDate = "", + cdmReleaseDate = "", + cdmVersion = cdmVersion, + cdmVersionConceptId = 0, + vocabularyVersion = "", + dqdVersion = as.character(packageVersion("DataQualityDashboard")) + ), + Overview = overview, + CheckResults = checkResults +) + +DataQualityDashboard:::.writeResultsToJson(result, jsonOutputFolder, jsonOutputFile) + +jsonFilePath <- R.utils::getAbsolutePath(file.path(jsonOutputFolder, jsonOutputFile)) +DataQualityDashboard::viewDqDashboard(jsonFilePath) +``` diff --git a/vignettes/Thresholds.rmd b/vignettes/Thresholds.rmd index f687b794..d4116d25 100644 --- a/vignettes/Thresholds.rmd +++ b/vignettes/Thresholds.rmd @@ -30,7 +30,7 @@ A default set of failure thresholds are shipped with the package. Many of these ## DQD Control Files -There is a set of three csv files that underly the DQD. These files indicate which checks should be run and what their failure thresholds should be. There is one file per check level: TABLE, FIELD, and CONCEPT. This vignette will walk through how to update the field level check thresholds but the process is the same for all three files. +There is a set of three csv files that underlie the DQD. These files indicate which checks should be run and what their failure thresholds should be. There is one file per check level: TABLE, FIELD, and CONCEPT. This vignette will walk through how to update the field level check thresholds but the process is the same for all three files. ### Step 1: Find and copy the control files