Skip to content

Commit

Permalink
fix note
Browse files Browse the repository at this point in the history
  • Loading branch information
ginberg committed Dec 27, 2023
1 parent 38c08cc commit ed96e8d
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 29 deletions.
8 changes: 4 additions & 4 deletions R/CompareCohorts.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort
covariates1 <- covariateData1$covariates
if (!is.null(cohortId1)) {
covariates1 <- covariates1 %>%
filter(cohortDefinitionId == cohortId1)
filter(.data$cohortDefinitionId == cohortId1)
}
covariates1 <- covariates1 %>%
select(
Expand All @@ -76,7 +76,7 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort
covariates2 <- covariateData2$covariates
if (!is.null(cohortId2)) {
covariates2 <- covariates2 %>%
filter(cohortDefinitionId == cohortId2)
filter(.data$cohortDefinitionId == cohortId2)
}
covariates2 <- covariates2 %>%
select(
Expand Down Expand Up @@ -108,7 +108,7 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort
covariates1 <- covariateData1$covariatesContinuous
if (!is.null(cohortId1)) {
covariates1 <- covariates1 %>%
filter(cohortDefinitionId == cohortId1)
filter(.data$cohortDefinitionId == cohortId1)
}
covariates1 <- covariates1 %>%
select(
Expand All @@ -121,7 +121,7 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort
covariates2 <- covariateData2$covariatesContinuous
if (!is.null(cohortId2)) {
covariates2 <- covariates2 %>%
filter(cohortDefinitionId == cohortId2)
filter(.data$cohortDefinitionId == cohortId2)
}
covariates2 <- covariates2 %>%
select(
Expand Down
2 changes: 1 addition & 1 deletion R/HelperFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ filterByRowId <- function(covariateData, rowIds) {
stop("Cannot filter aggregated data by rowId")
}
covariates <- covariateData$covariates %>%
filter(rowId %in% rowIds)
filter(.data$rowId %in% rowIds)

result <- Andromeda::andromeda(
covariates = covariates,
Expand Down
48 changes: 24 additions & 24 deletions R/Normalization.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,14 @@ tidyCovariateData <- function(covariateData,
} else {
newCovariates <- covariateData$covariates
covariateData$maxValuePerCovariateId <- covariateData$covariates %>%
group_by(covariateId) %>%
summarise(maxValue = max(covariateValue, na.rm = TRUE))
group_by(.data$covariateId) %>%
summarise(maxValue = max(.data$covariateValue, na.rm = TRUE))
on.exit(covariateData$maxValuePerCovariateId <- NULL)

if (removeRedundancy || minFraction != 0) {
covariateData$valueCounts <- covariateData$covariates %>%
group_by(covariateId) %>%
summarise(n = count(), nDistinct = n_distinct(covariateValue))
group_by(.data$covariateId) %>%
summarise(n = count(), nDistinct = n_distinct(.data$covariateValue))
on.exit(covariateData$valueCounts <- NULL, add = TRUE)
}

Expand All @@ -87,39 +87,39 @@ tidyCovariateData <- function(covariateData,
if (removeRedundancy) {
covariateData$binaryCovariateIds <- covariateData$maxValuePerCovariateId %>%
inner_join(covariateData$valueCounts, by = "covariateId") %>%
filter(maxValue == 1 & nDistinct == 1) %>%
select(covariateId = covariateId)
filter(.data$maxValue == 1 & .data$nDistinct == 1) %>%
select(covariateId = .data$covariateId)
on.exit(covariateData$binaryCovariateIds <- NULL, add = TRUE)

if (covariateData$binaryCovariateIds %>% count() %>% pull() != 0) {
if (isTemporalCovariateData(covariateData)) {
# Temporal
covariateData$temporalValueCounts <- covariateData$covariates %>%
inner_join(covariateData$binaryCovariateIds, by = "covariateId") %>%
group_by(covariateId, timeId) %>%
group_by(.data$covariateId, .data$timeId) %>%
count()
on.exit(covariateData$temporalValueCounts <- NULL, add = TRUE)

# First, find all single covariates that, for every timeId, appear in every row with the same value
covariateData$deleteCovariateTimeIds <- covariateData$temporalValueCounts %>%
filter(n == populationSize) %>%
select(covariateId, timeId)
select(.data$covariateId, .data$timeId)
on.exit(covariateData$deleteCovariateTimeIds <- NULL, add = TRUE)

# Next, find groups of covariates (analyses) that together cover everyone:
analysisIds <- covariateData$temporalValueCounts %>%
anti_join(covariateData$deleteCovariateTimeIds, by = c("covariateId", "timeId")) %>%
inner_join(covariateData$covariateRef, by = "covariateId") %>%
group_by(analysisId) %>%
group_by(.data$analysisId) %>%
summarise(n = sum(n, na.rm = TRUE)) %>%
filter(n == populationSize) %>%
select(analysisId)
select(.data$analysisId)

# For those, find most prevalent covariate, and mark it for deletion:
valueCounts <- analysisIds %>%
inner_join(covariateData$covariateRef, by = "analysisId") %>%
inner_join(covariateData$temporalValueCounts, by = "covariateId") %>%
select(analysisId, covariateId, timeId, n) %>%
select(.data$analysisId, .data$covariateId, .data$timeId, .data$n) %>%
collect()
valueCounts <- valueCounts[order(valueCounts$analysisId, -valueCounts$n), ]
Andromeda::appendToTable(
Expand All @@ -137,25 +137,25 @@ tidyCovariateData <- function(covariateData,
# First, find all single covariates that appear in every row with the same value
toDelete <- covariateData$valueCounts %>%
inner_join(covariateData$binaryCovariateIds, by = "covariateId") %>%
filter(n == populationSize) %>%
select(covariateId) %>%
filter(.data$n == populationSize) %>%
select(.data$covariateId) %>%
collect()
deleteCovariateIds <- toDelete$covariateId

# Next, find groups of covariates (analyses) that together cover everyone:
analysisIds <- covariateData$valueCounts %>%
inner_join(covariateData$binaryCovariateIds, by = "covariateId") %>%
filter(!covariateId %in% deleteCovariateIds) %>%
filter(!.data$covariateId %in% deleteCovariateIds) %>%
inner_join(covariateData$covariateRef, by = "covariateId") %>%
group_by(analysisId) %>%
group_by(.data$analysisId) %>%
summarise(n = sum(n, na.rm = TRUE)) %>%
filter(n == populationSize) %>%
select(analysisId)
filter(.data$n == populationSize) %>%
select(.data$analysisId)
# For those, find most prevalent covariate, and mark it for deletion:
valueCounts <- analysisIds %>%
inner_join(covariateData$covariateRef, by = "analysisId") %>%
inner_join(covariateData$valueCounts, by = "covariateId") %>%
select(analysisId, covariateId, n) %>%
select(.data$analysisId, .data$covariateId, n) %>%
collect()
valueCounts <- valueCounts[order(valueCounts$analysisId, -valueCounts$n), ]
deleteCovariateIds <- c(deleteCovariateIds, valueCounts$covariateId[!duplicated(valueCounts$analysisId)])
Expand All @@ -168,9 +168,9 @@ tidyCovariateData <- function(covariateData,
if (minFraction != 0) {
minCount <- floor(minFraction * populationSize)
toDelete <- covariateData$valueCounts %>%
filter(n < minCount) %>%
filter(!covariateId %in% ignoreCovariateIds) %>%
select(covariateId) %>%
filter(.data$n < minCount) %>%
filter(!.data$covariateId %in% ignoreCovariateIds) %>%
select(.data$covariateId) %>%
collect()

metaData$deletedInfrequentCovariateIds <- toDelete$covariateId
Expand All @@ -179,15 +179,15 @@ tidyCovariateData <- function(covariateData,
}
if (length(deleteCovariateIds) > 0) {
newCovariates <- newCovariates %>%
filter(!covariateId %in% deleteCovariateIds)
filter(!.data$covariateId %in% deleteCovariateIds)
}

if (normalize) {
ParallelLogger::logInfo("Normalizing covariates")
newCovariates <- newCovariates %>%
inner_join(covariateData$maxValuePerCovariateId, by = "covariateId") %>%
mutate(covariateValue = covariateValue / maxValue) %>%
select(-maxValue)
mutate(covariateValue = .data$covariateValue / .data$maxValue) %>%
select(-.data$maxValue)
metaData$normFactors <- covariateData$maxValuePerCovariateId %>%
collect()
}
Expand Down

0 comments on commit ed96e8d

Please sign in to comment.