From ed96e8dffb4b6aafbeec4367dcfc2cef45796d49 Mon Sep 17 00:00:00 2001 From: Ger Inberg Date: Wed, 27 Dec 2023 11:15:01 +0100 Subject: [PATCH] fix note --- R/CompareCohorts.R | 8 ++++---- R/HelperFunctions.R | 2 +- R/Normalization.R | 48 ++++++++++++++++++++++----------------------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/R/CompareCohorts.R b/R/CompareCohorts.R index 80cf2074..bb11b2ed 100644 --- a/R/CompareCohorts.R +++ b/R/CompareCohorts.R @@ -64,7 +64,7 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort covariates1 <- covariateData1$covariates if (!is.null(cohortId1)) { covariates1 <- covariates1 %>% - filter(cohortDefinitionId == cohortId1) + filter(.data$cohortDefinitionId == cohortId1) } covariates1 <- covariates1 %>% select( @@ -76,7 +76,7 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort covariates2 <- covariateData2$covariates if (!is.null(cohortId2)) { covariates2 <- covariates2 %>% - filter(cohortDefinitionId == cohortId2) + filter(.data$cohortDefinitionId == cohortId2) } covariates2 <- covariates2 %>% select( @@ -108,7 +108,7 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort covariates1 <- covariateData1$covariatesContinuous if (!is.null(cohortId1)) { covariates1 <- covariates1 %>% - filter(cohortDefinitionId == cohortId1) + filter(.data$cohortDefinitionId == cohortId1) } covariates1 <- covariates1 %>% select( @@ -121,7 +121,7 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort covariates2 <- covariateData2$covariatesContinuous if (!is.null(cohortId2)) { covariates2 <- covariates2 %>% - filter(cohortDefinitionId == cohortId2) + filter(.data$cohortDefinitionId == cohortId2) } covariates2 <- covariates2 %>% select( diff --git a/R/HelperFunctions.R b/R/HelperFunctions.R index 448639cb..73c697df 100644 --- a/R/HelperFunctions.R +++ b/R/HelperFunctions.R @@ -48,7 +48,7 @@ filterByRowId <- function(covariateData, rowIds) { stop("Cannot filter aggregated data by rowId") } covariates <- covariateData$covariates %>% - filter(rowId %in% rowIds) + filter(.data$rowId %in% rowIds) result <- Andromeda::andromeda( covariates = covariates, diff --git a/R/Normalization.R b/R/Normalization.R index d78a43a9..2c59ab8c 100644 --- a/R/Normalization.R +++ b/R/Normalization.R @@ -71,14 +71,14 @@ tidyCovariateData <- function(covariateData, } else { newCovariates <- covariateData$covariates covariateData$maxValuePerCovariateId <- covariateData$covariates %>% - group_by(covariateId) %>% - summarise(maxValue = max(covariateValue, na.rm = TRUE)) + group_by(.data$covariateId) %>% + summarise(maxValue = max(.data$covariateValue, na.rm = TRUE)) on.exit(covariateData$maxValuePerCovariateId <- NULL) if (removeRedundancy || minFraction != 0) { covariateData$valueCounts <- covariateData$covariates %>% - group_by(covariateId) %>% - summarise(n = count(), nDistinct = n_distinct(covariateValue)) + group_by(.data$covariateId) %>% + summarise(n = count(), nDistinct = n_distinct(.data$covariateValue)) on.exit(covariateData$valueCounts <- NULL, add = TRUE) } @@ -87,8 +87,8 @@ tidyCovariateData <- function(covariateData, if (removeRedundancy) { covariateData$binaryCovariateIds <- covariateData$maxValuePerCovariateId %>% inner_join(covariateData$valueCounts, by = "covariateId") %>% - filter(maxValue == 1 & nDistinct == 1) %>% - select(covariateId = covariateId) + filter(.data$maxValue == 1 & .data$nDistinct == 1) %>% + select(covariateId = .data$covariateId) on.exit(covariateData$binaryCovariateIds <- NULL, add = TRUE) if (covariateData$binaryCovariateIds %>% count() %>% pull() != 0) { @@ -96,30 +96,30 @@ tidyCovariateData <- function(covariateData, # Temporal covariateData$temporalValueCounts <- covariateData$covariates %>% inner_join(covariateData$binaryCovariateIds, by = "covariateId") %>% - group_by(covariateId, timeId) %>% + group_by(.data$covariateId, .data$timeId) %>% count() on.exit(covariateData$temporalValueCounts <- NULL, add = TRUE) # First, find all single covariates that, for every timeId, appear in every row with the same value covariateData$deleteCovariateTimeIds <- covariateData$temporalValueCounts %>% filter(n == populationSize) %>% - select(covariateId, timeId) + select(.data$covariateId, .data$timeId) on.exit(covariateData$deleteCovariateTimeIds <- NULL, add = TRUE) # Next, find groups of covariates (analyses) that together cover everyone: analysisIds <- covariateData$temporalValueCounts %>% anti_join(covariateData$deleteCovariateTimeIds, by = c("covariateId", "timeId")) %>% inner_join(covariateData$covariateRef, by = "covariateId") %>% - group_by(analysisId) %>% + group_by(.data$analysisId) %>% summarise(n = sum(n, na.rm = TRUE)) %>% filter(n == populationSize) %>% - select(analysisId) + select(.data$analysisId) # For those, find most prevalent covariate, and mark it for deletion: valueCounts <- analysisIds %>% inner_join(covariateData$covariateRef, by = "analysisId") %>% inner_join(covariateData$temporalValueCounts, by = "covariateId") %>% - select(analysisId, covariateId, timeId, n) %>% + select(.data$analysisId, .data$covariateId, .data$timeId, .data$n) %>% collect() valueCounts <- valueCounts[order(valueCounts$analysisId, -valueCounts$n), ] Andromeda::appendToTable( @@ -137,25 +137,25 @@ tidyCovariateData <- function(covariateData, # First, find all single covariates that appear in every row with the same value toDelete <- covariateData$valueCounts %>% inner_join(covariateData$binaryCovariateIds, by = "covariateId") %>% - filter(n == populationSize) %>% - select(covariateId) %>% + filter(.data$n == populationSize) %>% + select(.data$covariateId) %>% collect() deleteCovariateIds <- toDelete$covariateId # Next, find groups of covariates (analyses) that together cover everyone: analysisIds <- covariateData$valueCounts %>% inner_join(covariateData$binaryCovariateIds, by = "covariateId") %>% - filter(!covariateId %in% deleteCovariateIds) %>% + filter(!.data$covariateId %in% deleteCovariateIds) %>% inner_join(covariateData$covariateRef, by = "covariateId") %>% - group_by(analysisId) %>% + group_by(.data$analysisId) %>% summarise(n = sum(n, na.rm = TRUE)) %>% - filter(n == populationSize) %>% - select(analysisId) + filter(.data$n == populationSize) %>% + select(.data$analysisId) # For those, find most prevalent covariate, and mark it for deletion: valueCounts <- analysisIds %>% inner_join(covariateData$covariateRef, by = "analysisId") %>% inner_join(covariateData$valueCounts, by = "covariateId") %>% - select(analysisId, covariateId, n) %>% + select(.data$analysisId, .data$covariateId, n) %>% collect() valueCounts <- valueCounts[order(valueCounts$analysisId, -valueCounts$n), ] deleteCovariateIds <- c(deleteCovariateIds, valueCounts$covariateId[!duplicated(valueCounts$analysisId)]) @@ -168,9 +168,9 @@ tidyCovariateData <- function(covariateData, if (minFraction != 0) { minCount <- floor(minFraction * populationSize) toDelete <- covariateData$valueCounts %>% - filter(n < minCount) %>% - filter(!covariateId %in% ignoreCovariateIds) %>% - select(covariateId) %>% + filter(.data$n < minCount) %>% + filter(!.data$covariateId %in% ignoreCovariateIds) %>% + select(.data$covariateId) %>% collect() metaData$deletedInfrequentCovariateIds <- toDelete$covariateId @@ -179,15 +179,15 @@ tidyCovariateData <- function(covariateData, } if (length(deleteCovariateIds) > 0) { newCovariates <- newCovariates %>% - filter(!covariateId %in% deleteCovariateIds) + filter(!.data$covariateId %in% deleteCovariateIds) } if (normalize) { ParallelLogger::logInfo("Normalizing covariates") newCovariates <- newCovariates %>% inner_join(covariateData$maxValuePerCovariateId, by = "covariateId") %>% - mutate(covariateValue = covariateValue / maxValue) %>% - select(-maxValue) + mutate(covariateValue = .data$covariateValue / .data$maxValue) %>% + select(-.data$maxValue) metaData$normFactors <- covariateData$maxValuePerCovariateId %>% collect() }