From 47716637a12923e988073d6fbcb8d4abf657d936 Mon Sep 17 00:00:00 2001 From: rforaita Date: Wed, 13 Mar 2024 15:43:31 +0100 Subject: [PATCH] Submisstion Scientific Reports --- R/structural.em.rf.R | 72 +++ analysis/03_Imputation.R | 39 +- analysis/07d_Boot2igrpah.R | 18 +- analysis/07e_Bootstrap_RMSEU.R | 101 ++++ analysis/10_Paths.R | 205 +++++++ analysis/10_RMSEU.R | 43 -- analysis/Paths_for_all_modifiable_factors.R | 327 +++++++++++ analysis/Table1.R | 150 +++++ analysis/Table1S.R | 155 +++++ analysis/Table4.R | 71 +++ analysis/TableS_networkcharacteristic.R | 600 ++++++++++++++++++++ analysis/Table_edgefreq.R | 96 ++++ analysis/VisNet.R | 215 ++++++- 13 files changed, 2013 insertions(+), 79 deletions(-) create mode 100644 R/structural.em.rf.R create mode 100644 analysis/07e_Bootstrap_RMSEU.R create mode 100644 analysis/10_Paths.R delete mode 100644 analysis/10_RMSEU.R create mode 100644 analysis/Paths_for_all_modifiable_factors.R create mode 100644 analysis/Table1.R create mode 100644 analysis/Table1S.R create mode 100644 analysis/Table4.R create mode 100644 analysis/TableS_networkcharacteristic.R create mode 100644 analysis/Table_edgefreq.R diff --git a/R/structural.em.rf.R b/R/structural.em.rf.R new file mode 100644 index 0000000..91146d4 --- /dev/null +++ b/R/structural.em.rf.R @@ -0,0 +1,72 @@ +#' Structure learning from missing data +#' +#' The \code{bnlear::\link[bnlearn]{structural.em}} function was adapted +#' to our data since it could not impute some missing values for bmi_m.2 +#' +#' @return +#' @export +#' + +structural.em.rf <- function (x, maximize = "hc", maximize.args = list(), fit = "mle", + fit.args = list(), impute, impute.args = list(), return.all = FALSE, + start = NULL, max.iter = 5, debug = FALSE) +{ + ntests = 0 + data.info = bnlearn:::check.data(x, allow.levels = TRUE, allow.missing = TRUE, + warn.if.no.missing = TRUE, stop.if.all.missing = !is(start, + "bn.fit")) + max.iter = bnlearn:::check.max.iter(max.iter) + bnlearn:::check.logical(debug) + bnlearn:::check.logical(return.all) + bnlearn:::check.learning.algorithm(algorithm = maximize, class = "score") + critical.arguments = c("x", "heuristic", "start", "debug") + bnlearn:::check.unused.args(intersect(critical.arguments, names(maximize.args)), + character(0)) + maximize.args[critical.arguments] = list(x = NULL, heuristic = maximize, + start = NULL, debug = debug) + bnlearn:::check.fitting.method(method = fit, data = x) + fit.args = bnlearn:::check.fitting.args(method = fit, network = NULL, + data = x, extra.args = fit.args) + impute = bnlearn:::check.imputation.method(impute, x) + impute.args = bnlearn:::check.imputation.extra.args(impute, impute.args) + + dag = empty.graph(nodes = names(x)) + fitted = bnlearn:::bn.fit.backend(dag, data = x, method = fit, + extra.args = fit.args, data.info = data.info) + + data.info$complete.nodes[names(x)] = TRUE + for (i in seq(max.iter)) { + print(i) + complete = bnlearn:::impute.backend(fitted = fitted, data = x, + method = impute, extra.args = impute.args, debug = debug) + +# new: start: BMI Mother at FU2 could not be estimated. We imputed here the mean +# value. The number of missing values were < 10. + if(any(is.na(complete))){ + ml <- which(is.na(complete$bmi_m.2)) + complete$bmi_m.2[ml] <- mean(complete$bmi_m.2, na.rm = TRUE) + } +# end + maximize.args$x = complete + maximize.args$start = dag + dag = do.call(bnlearn:::greedy.search, maximize.args) + fitted.new = bnlearn:::bn.fit.backend(dag, data = complete, method = fit, + extra.args = fit.args, data.info = data.info) + + ntests = ntests + dag$learning$ntests + if (isTRUE(all.equal(fitted, fitted.new))) + break + else fitted = fitted.new + } + dag$learning$algo = "sem" + dag$learning$maximize = maximize + dag$learning$impute = impute + dag$learning$fit = fit + dag$learning$ntests = ntests + if (return.all) + invisible(list(dag = dag, imputed = complete, fitted = fitted)) + else invisible(dag) +} + + + diff --git a/analysis/03_Imputation.R b/analysis/03_Imputation.R index 3b562d6..b9ac6c9 100644 --- a/analysis/03_Imputation.R +++ b/analysis/03_Imputation.R @@ -8,15 +8,19 @@ # Purpose: Data imputation # # ------------------------------------------------------------------------------ - -analysis_data <- readRDS("data/data2impute.rds") - +# --- was bisher geschah: ------------------------------------------------------ +# source("data/01_DataPreparation.R") +# source("data/02_CreateAnalysisData.R) +analysis_data <- readRDS("data_not_load/02_data2impute.RDS") +# ------------------------------------------------------------------------------ # --------------------- Missing Value Imputation --------------------------- # 1) Patterns & dump variables (such as constants, id_no, etc.) outlist1 <- c("id_no", "ifam_no", "family_id") +#pattern <- md.pattern(analysis_data) +#saveRDS(pattern, "data/missingness-pattern.RDS") ini <- mice(dplyr::select(analysis_data, -all_of(outlist1)), meth = 'rf', maxit = 0) -table(ini$nmis) +#table(ini$nmis) names(ini$nmis[ini$nmis == 0]) names(ini$nmis[ini$nmis > nrow(analysis_data)/2]) @@ -39,21 +43,34 @@ outlist <- sort(c(outlist1, outlist2, fxout, fxout2, "drink_9_t3.1")) tmp <- analysis_data[, !names(analysis_data) %in% outlist3] fx3 <- flux(tmp) -# plot(fx3$outflux, fx3$influx) +plot(fx3$outflux, fx3$influx) + + + +# 4) Remove variables & quickpred +# quickpred is not used for graphical model imputation +# new.data <- tmp[, !names(tmp) %in% outlist3] +saveRDS(outlist, file = "data_not_load/outlist.RDS") + + +# 2) Check methods for imputation +#make.method(new.data) -# 4) Imputation (1 ca. 10 min, 50 ca. 30 sec) + + +# 6) Imputation (1 ca. 10 min, 50 ca. 30 sec) #system.time( imp <- mice(new.data, m = 10, meth = 'rf', seed = 16873, print = TRUE) #) +# --- SAVE! -------------------------------------------------------------------- +saveRDS(imp, "data/03_MI-10.RDS") +saveRDS(new.data, "data_not_load/03_MI-input.RDS") +# ------------------------------------------------------------------------------ + # 7) Check logged events as.character(imp$loggedEvents[, "out"]) -# --- SAVE! -------------------------------------------------------------------- -saveRDS(imp, "data/MI-10.rds") -saveRDS(new.data, "data_not_load/MI-input.rds") -# ------------------------------------------------------------------------------ - diff --git a/analysis/07d_Boot2igrpah.R b/analysis/07d_Boot2igrpah.R index 8678cd2..da94731 100644 --- a/analysis/07d_Boot2igrpah.R +++ b/analysis/07d_Boot2igrpah.R @@ -8,8 +8,8 @@ # Purpose: Bootstrap zu einem Graphen basteln # # ------------------------------------------------------------------------------ -boot <- readRDS("data/boot100_mi1-graph.RDS") -load("data/graph-10trees.RData") +boot <- readRDS("data_not_load/boot100_mi1-graph.RDS") +load("data_not_load/06_graph-10trees-pa.RData") # g.pa to igraph @@ -60,9 +60,21 @@ V(g.boot)$name <- c("Sex", "Country", "Migrant", "Income", "ISCED", "Age_at_birt edges <- as.data.frame(as_edgelist(g.boot)) names(edges) <- c("from", "to") edges$weight <- E(g.boot)$weight +# merge two data frames +edges.joint <- left_join(edges.pa, edges, by = c("from", "to")) + + + + +# edges with more than 90% +e90 <- edges[which(edges$weight >= 90),] +e90[order(e90$weight, decreasing = TRUE),] + + # ---! SAVE ------------------------------------------------------- -save(g.boot, edges, file = "data/boot100igraph.RData") +save(g.boot, edges, file = "data_not_load/07_boot100igraph.RData") +# ------------------------------------------------------------------- diff --git a/analysis/07e_Bootstrap_RMSEU.R b/analysis/07e_Bootstrap_RMSEU.R new file mode 100644 index 0000000..92e9da6 --- /dev/null +++ b/analysis/07e_Bootstrap_RMSEU.R @@ -0,0 +1,101 @@ +# ------------------------------------------------------------------------------ +# +# Project: Cohort Causal Graph +# +# Author: R. Foraita +# Date: JUL 2021 +# +# Purpose: RMSEU +# +# ------------------------------------------------------------------------------ +#boot <- readRDS("data_not_load/boot.RDS") +boot <- readRDS("data_not_load/boot100_mi1-graph.RDS") + + +# make adjacency matrix of all pc-graphs +amat <- lapply(boot, function(x){ + tmp <- wgtMatrix(getGraph(x), transpose = FALSE) + wm2 <- (tmp + t(tmp)) + # undirected edges get 0.5 (in sum they count as 1) + tmp[which(wm2 > 1)] <- 0.5 + tmp +}) + +## average edges +g.avg <- Reduce('+', amat) +g.avg <- Reduce('+', amat) / length(amat) + + + +# sum up directed edges +t1 <- t(g.avg)[lower.tri(t(g.avg))] # untere Hälfte +t2 <- g.avg[lower.tri(g.avg)] # obere Hälfte +t3 <- apply(cbind(t1, t2), 1, sum) # Summe + +# select_all_but_diag <- function(x){ +# apply(cbind(t(x)[lower.tri(x, diag = F)], x[lower.tri(x, diag = F)]), 1, sum) +# } +# a <- select_all_but_diag(g.avg) + + + + +t44 <- t50 <- t75 <- t3 +t44[t44 < 0.44] <- 0 +t50[t50 < 0.50] <- 0 +t75[t75 < 0.75] <- 0 +(rmseu0 <- gum(t3, vertices = nrow(g.avg), threshold = 0.5)) +(rmseu44 <- gum(t44, vertices = nrow(g.avg), threshold = 0.5)) +(rmseu50 <- gum(t50, vertices = nrow(g.avg), threshold = 0.5)) +(rmseu75 <- gum(t75, vertices = nrow(g.avg), threshold = 0.5)) + +# compare - RMSEU = 0.183 +gum(rep(0.9085, times = choose(51,2)), vertices = 51, threshold = 0.5)$rmseu +1-0.9085 +# MEU 0.0443451 +gum(rep(0.005, times = choose(51,2)), vertices = 51, threshold = 0.5)$meu +1-0.9085 + +# compare - RMSEU = 0.246 +gum(rep(0.877, times = choose(51,2)), vertices = 51, threshold = 0.5) +1-0.877 +# compare - MEE = 0.059 +gum(rep(0.0069, times = choose(51,2)), vertices = 51, threshold = 0.5)$mee +1-0.0069 + + +# compare - 0.183 for edges that were at least selected once (here 570) +gum(rep(0.9085, times = 570), vertices = 34, threshold = 0.5)$rmseu +gum(rep(0.877, times = 570), vertices = 34, threshold = 0.5)$rmseu + + + + + +# compare - 0.267481 +gum(rep(0.8665, times = choose(51,2)), vertices = 51, threshold = 0.5)$rmseu +gum(rep(1-0.8665, times = choose(51,2)), vertices = 51, threshold = 0.5)$rmseu +gum(matrix(rep(0.0936, times = 51*51), nrow = 51, ncol = 51), threshold = 0.5)$rmseu +gum(matrix(rep(0.35, times = 51*51), nrow = 51, ncol = 51), threshold = 0.5) + +# summary(t3): mean = 0.09917, rmseu = 0.1048 +gum(rep(0.2457, times = choose(51,2)), 51, threshold = 0.75)$rmseu +gum(rep(0.2457, times = choose(51,2)), 51, threshold = 0.25)$rmseu +gum(rep(0.09917, times = choose(51,2)), 51, threshold = 0.05)$rmseu +gum(rep(0.09917, times = choose(51,2)), 51, threshold = 0.05)$mee + +# --- save ----------------------------------------------- +save(rmseu0, rmseu44, rmseu50, rmseu75, file = "results/rmseu.RData") + + +mymeu <- function(x){ + 0.0443451 - gum(rep(x, times = choose(51,2)), vertices = 51, threshold = 0.5)$meu +} +optim(seq(0.01,0.15, length = 100), mymeu) + + + + +# uncertaintz shcie- 10 Knoten -> 45 kanten +medges <- rep(0.5) +gum(medges, vertices = 2, threshold = 0.05)$mee diff --git a/analysis/10_Paths.R b/analysis/10_Paths.R new file mode 100644 index 0000000..0b54442 --- /dev/null +++ b/analysis/10_Paths.R @@ -0,0 +1,205 @@ +# ------------------------------------------------------------------------------ +# +# Project: Cohort Causal Graph +# +# Author: R. Foraita +# Date: FEB 2023 +# +# Purpose: Pathway structures between repeated measurements +# +# ------------------------------------------------------------------------------ +library(gt) + +load("data_not_load/06_graph-10trees-pa.RData") + +g <- pc2igraph(g.pa) +g <- feature.igraph(g[[1]], labels = V(g[[1]])$name) + +V(g)$name <- c("Sex", "Country", "Migrant", "Income", "ISCED", "Age_at_birth", + "Breastfeeding", "Birthweight", "Weeks_of_pregnancy", "Formula_milk", + "HH_Diet", "Smoking_pregnancy", + "Age", "School", "AVM", "BMI", "Mothers_BMI", "Familymeal", "PA", + "Sleep", "Well-being", "YHEI", "HOMA", + "Age.FU1", "School.FU1", "AVM.FU1", "BMI.FU1", "Mothers_BMI.FU1", + "Familymeal.FU1", "Income.FU1", "ISCED.FU1","PA.FU1", + "Sleep.FU1", "Well-being.FU1", "YHEI.FU1", "HOMA.FU1", + "Age.FU2", "AVM.FU2", "BMI.FU2", "Mothers_BMI.FU2", "Alcohol.FU2", + "Familymeal.FU2", "Income.FU2", "ISCED.FU2","PA.FU2", "Puberty.FU2", + "Sleep.FU2", "Smoking.FU2", "Well-being.FU2", "YHEI.FU2", "HOMA.FU2") + + + +### Shortest and simple path outgoing from exposure variables at baseline ------ +# Distanz: number of hops +search.for.causes <- function(g, variables, mode, minus = NULL){ + require(gt) + distanz <- igraph::distances(g, mode = mode) + + dist.tab <- t(distanz[which(names(V(g)) %in% variables), ]) + table.dist <- data.frame(nodes = rownames(dist.tab), dist.tab) %>% + gt(rowname_col = "nodes") + if(!is.null(minus)){ + table.dist$'_data' <- table.dist$'_data'[-minus,] + } + table.dist +} + +bmivariables <- c("BMI", "BMI.FU1", "BMI.FU2") +exposurevariables <- c("AVM","PA","Sleep","Well-being","YHEI") + +table.dist.bmi <- search.for.causes(g, bmivariables, mode = "in") +table.dist.exp <- search.for.causes(g, exposurevariables, mode = "out")#, minus = c(1:12)) + +### save table --------------------------------------------------------------- +table.dist.bmi %>% gtsave("table-dist-bmi.rtf", path = "results/") +table.dist.exp %>% gtsave("table-dist.rtf", path = "results/") +# ---------------------------------------------------------------------------- + + +# -------------------------------------------------------------- +whichexposure <- which(names(V(g)) %in% exposurevariables) +makepfade <- function(g, exposure, mode = "out"){ + # shortest paths + # browser() + ash <- all_shortest_paths(g, from = V(g)[exposure], mode = mode)[[1]] + # simpliest paths + asp <- all_simple_paths(g, from = V(g)[exposure], mode = mode) + sp <- which(asp %in% ash) + asp <- lapply(asp, function(x) names(V(g))[as.numeric(x)]) + asp <- lapply(asp, function(x) toString(x)) + if(length(asp) > 0){ + data.frame(SP = c(1:length(asp)) %in% sp, pfad = do.call(c,asp)) %>% gt + } else { + cat("no simple path found") + } +} +avm <- makepfade(g, whichexposure[1]) +pa <- makepfade(g, whichexposure[2]) +sleep <- makepfade(g, whichexposure[3]) +wb <- makepfade(g, whichexposure[4]) +yhei <- makepfade(g, whichexposure[5]) +age <- all_shortest_paths(g, from = "Age.FU1", to = "BMI.FU2") + +table.sp <- rbind(avm$`_data`, pa$`_data`, sleep$`_data`, + wb$`_data`, yhei$`_data`) %>% gt + + +### save table --------------------------------------------------------------- +table.sp %>% gtsave("table-sp.rtf", path = "results/") +# ---------------------------------------------------------------------------- + + + +### Shortest and simple path incoming to BMI.FU2 and WB.FU2 ------------------- +exposurevariables <- c("BMI.FU2", "Well-being.FU2") +whichout <- which(names(V(g)) %in% exposurevariables) +bmi <- makepfade(g, whichout[1], mode = "in") +wb <- makepfade(g, whichout[2], mode = "in") + +table.sp.out <- rbind(bmi$`_data`, wb$`_data`) %>% gt + +### save table --------------------------------------------------------------- +table.sp.out %>% gtsave("table-sp-out.rtf", path = "results/") +# ---------------------------------------------------------------------------- + + +### Shortest and simple path from cultural, social and familial variables ----- +exposurevariables <- c("Country", "Migrant", "Income", "ISCED", "Age_at_birth", "Smoking_pregnancy") +whichbasic <- which(names(V(g)) %in% exposurevariables) +country <- makepfade(g, whichbasic[1]) #1593 +migrant <- makepfade(g, whichbasic[2]) #749 +income <- makepfade(g, whichbasic[3]) #527 +isced <- makepfade(g, whichbasic[4]) #527 +age <- makepfade(g, whichbasic[5]) #287 +smok <- makepfade(g, whichbasic[6]) #527 + +table.sp.basic <- rbind(country$`_data`, migrant$`_data`, age$`_data`, + income$`_data`, isced$`_data`) %>% gt + +### save table --------------------------------------------------------------- +table.sp.basic %>% gtsave("table-sp-basic.rtf", path = "results/") +# ---------------------------------------------------------------------------- +distanz <- igraph::distances(g, mode = "out") +dist.tab <- t(distanz[whichbasic,]) +table.dist <- data.frame(nodes = rownames(dist.tab), dist.tab) %>% + gt(rowname_col = "nodes") + + + + + + + +## Find possible ascendents of BMI ------------------------------------------ +which(names(V(g)) %in% bmivariables) +amat <- as_adj(g, sparse = FALSE) +isValidGraph(t(amat), type = "cpdag", verbose = TRUE) + +## possible ancestors of BMI.FU2 +vorfahren2 <- pcalg::possAn(m = t(amat), x = 39, type = "cpdag") +vf2 <- rownames(amat)[setdiff(vorfahren2,39)] +nvf2 <- length(vf2) +vf2 <- data.frame(vorfahren = setdiff(vorfahren2,39), vf = vf2) + +## possible ancestors of BMI.FU1 +vorfahren1 <- pcalg::possAn(m = t(amat), x = 27, type = "cpdag") +vf1 <- rownames(amat)[setdiff(vorfahren1, 27)] +vf1 <- data.frame(vorfahren = setdiff(vorfahren1, 27), + vf = vf1) + +## possible ancestors of BMI +vorfahren <- pcalg::possAn(m = t(amat), x = 16, type = "cpdag") +vf <- rownames(amat)[setdiff(vorfahren,16)] +vf <- data.frame(vf = vf, + vorfahren = setdiff(vorfahren,16)) + + +a <- full_join(vf2, vf1, by = "vorfahren", suffix = c("2","1")) +posAncestors <- full_join(a,vf, by = "vorfahren")[,c(4:2)] + +### save table --------------------------------------------------------------- +posAncestors %>% gt() %>% gtsave("table-posAn-bmi-pag.rtf", path = "results/") +# ---------------------------------------------------------------------------- + +## Find possible descendents of exposures ------------------------------------- +which(names(V(g)) %in% exposurevariables) + +## AVM +des_avm <- pcalg::possDe(m = t(amat), x = 15, type = "cpdag") +des_avm <- data.frame(avm = rownames(amat)[setdiff(des_avm,15)], + nr = setdiff(des_avm,15)) + +## Physical activity +des_pa <- pcalg::possDe(m = t(amat), x = 19, type = "cpdag") +des_pa <- data.frame(pa = rownames(amat)[setdiff(des_pa,19)], + nr = setdiff(des_pa,19)) + +## Sleep +des_sl <- pcalg::possDe(m = t(amat), x = 20, type = "cpdag") +des_sl <- data.frame(sl = rownames(amat)[setdiff(des_sl, 20)], + nr = setdiff(des_sl, 20)) + +## Well-being +des_wb <- pcalg::possDe(m = t(amat), x = 21, type = "cpdag") +des_wb <- data.frame(wb = rownames(amat)[setdiff(des_wb, 21)], + nr = setdiff(des_wb, 21)) + +##YHEI +des_yh <- pcalg::possDe(m = t(amat), x = 22, type = "cpdag") +des_yh <- data.frame(yh = rownames(amat)[setdiff(des_yh, 22)], + nr = setdiff(des_yh, 22)) + + + +a1 <- full_join(des_avm, des_pa, by = "nr", suffix = c("avm","pa")) +a2 <- full_join(des_sl, des_wb, by = "nr", suffix = c("sl","wb")) +a3 <- full_join(a1, a2, by = "nr") +# a4 <- full_join(a3, des_yh, by = "nr") +posDescen <- full_join(a3, des_yh, by = "nr")[, -c(2)] + + +### save table --------------------------------------------------------------- +posDescen %>% gt() %>% gtsave("table-posDes-exposure.rtf", path = "results/") +# ---------------------------------------------------------------------------- + + diff --git a/analysis/10_RMSEU.R b/analysis/10_RMSEU.R deleted file mode 100644 index eaf41c6..0000000 --- a/analysis/10_RMSEU.R +++ /dev/null @@ -1,43 +0,0 @@ -# ------------------------------------------------------------------------------ -# -# Project: Cohort Causal Graph -# -# Author: R. Foraita -# -# Purpose: RMSEU -# -# ------------------------------------------------------------------------------ -boot <- readRDS("data/boot100_mi1-graph.rds") - - -### make adjacency matrix of all pc-graphs -amat <- lapply(boot, function(x){ - tmp <- wgtMatrix(getGraph(x), transpose = FALSE) - wm2 <- (tmp + t(tmp)) - # undirected edges get 0.5 (in sum they count as 1) - tmp[which(wm2 > 1)] <- 0.5 - tmp -}) - -### average edges -g.avg <- Reduce('+', amat) -g.avg <- Reduce('+', amat) / length(amat) - - - -### sum up directed edges -t1 <- t(g.avg)[lower.tri(t(g.avg))] -t2 <- g.avg[lower.tri(g.avg)] -t3 <- apply(cbind(t1, t2), 1, sum) - - -t44 <- t75 <- t3 -t44[t44 < 0.44] <- 0 -t75[t75 < 0.75] <- 0 -(rmseu0 <- gum(t3, vertices = nrow(g.avg), threshold = 0.5)) -(rmseu44 <- gum(t44, vertices = nrow(g.avg), threshold = 0.5)) -(rmseu75 <- gum(t75, vertices = nrow(g.avg), threshold = 0.5)) - - -# --- save ----------------------------------------------- -save(rmseu0, rmseu44, rmseu75, file = "data/rmseu.RData") \ No newline at end of file diff --git a/analysis/Paths_for_all_modifiable_factors.R b/analysis/Paths_for_all_modifiable_factors.R new file mode 100644 index 0000000..6bb0b60 --- /dev/null +++ b/analysis/Paths_for_all_modifiable_factors.R @@ -0,0 +1,327 @@ +# ------------------------------------------------------------------------------ +# +# Project: Cohort Causal Graph +# +# Author: R. Foraita +# Date: Apr 2023 +# +# Purpose: Create Pathways +# +# ------------------------------------------------------------------------------ +library(gt) +library(gtsummary) + +# load data +boot <- readRDS("data_not_load/boot100_mi1-graph.RDS") +load("data_not_load/06_graph-10trees-pa.RData") + +delete_undirected_edges <- function(adm){ + x <- cbind(as.vector(adm), as.vector(t(adm))) + y <- apply(x, 1, sum) + print(table(y)) + u <- which(y == 4) + print(paste0("Number of undirected edges: ", length(u))) + + adm[u] <- 0 + adm +} + +# Data management ------------------------------------------------------------------------ +g1 <- pc2igraph(g.pa)[[1]] +g1 <- feature.igraph(g1, labels = V(g1)$name) +gb <- lapply(boot, function(x){ + tmp <- pc2igraph(x)[[1]] + feature.igraph(tmp, labels = V(tmp)$name)}) +knoten <- g.pa@graph@nodes + + +# 0: no edge +# 1: directed edge +# 2: undirected edge -> set to 0 +g1.undirected <- wgtMatrix(getGraph(g.pa), transpose = FALSE) +tmp <- delete_undirected_edges(g1.undirected) +g1.undirected <- graph_from_adjacency_matrix(tmp) + +# # only for test purposes +# g1.el <- as_edgelist(g1) +# g1.el <- data.frame(A = paste(g1.el[,1], g1.el[,2], sep="-")) +# g1.un.el <- as_edgelist(g1.undirected) +# g1.un.el <- data.frame(A = paste(g1.un.el[,1], g1.un.el[,2], sep="-"), B = 1) +# gg <- merge(g1.el, g1.un.el, by = "A", all = TRUE) +# gg[is.na(gg$B),] + +# boot[[100]] +# # 0 1 2 4 +# # 2335 250 4 12 +# table(wgtMatrix(getGraph(boot[[100]]), transpose = FALSE)) +# as_edgelist(gb.undirected[[100]]) #125+4 + +gb.undirected <- lapply(boot, function(x){ + graph_from_adjacency_matrix( + delete_undirected_edges( + wgtMatrix(getGraph(x), transpose = FALSE) + ) + ) + }) + +# # Average matrix without undirected edges +# +# boot.adm <- lapply(boot, function(x){ +# wgtMatrix(getGraph(x),transpose = FALSE)}) +# boot.adm.dir <- boot.adm <- Reduce('+', boot.adm) +# tt <- t(boot.adm) +# boot.adm.dir[upper.tri(boot.adm.dir)] <- +# boot.adm.dir[upper.tri(boot.adm.dir)] > tt[upper.tri(boot.adm.dir)] +# boot.adm.dir[lower.tri(boot.adm.dir)] <- +# boot.adm.dir[lower.tri(boot.adm.dir)] > tt[lower.tri(boot.adm.dir)] +# boot.adm.dir[15:25, 15:25] +# g.adm <- graph_from_adjacency_matrix(boot.adm.dir) +# a <- as.data.frame(get.edgelist(g.adm)) +# filter(a, V1 %in% c("media.0", "media.1", "wb.1", "homa.1", "homa.2", "bmi.2") & +# V2 %in% c("media.0", "media.1", "wb.1", "homa.1", "homa.2", "bmi.2")) +# +# plot(induced_subgraph(g.adm, vids = which(knoten %in% c("media.0", "media.1", "wb.1", "homa.1", "homa.2", "bmi.2")))) + + + + +# set.seed(6514) +# mat <- list(matrix(sample(1:20, 16), ncol = 4, nrow = 4), +# matrix(sample(1:20, 16), ncol = 4, nrow = 4), +# matrix(sample(1:20, 16), ncol = 4, nrow = 4)) +# mat.old <- mat <- Reduce('+', mat) +# diag(mat.old) <- diag(mat) <- 0 +# tt <- t(mat) +# mat[upper.tri(mat)] <- +# mat[upper.tri(mat)] > tt[upper.tri(mat)] +# mat[lower.tri(mat)] <- +# mat[lower.tri(mat)] > tt[lower.tri(mat)] + + +# Original graph ------------------------------------------------------------------------- +makepfade <- function(graph, boot, FROM, TO){ + if(class(graph) != "igraph") stop("object graph must be an igraph") + if(!is.list(boot)) stop("object boot must be a list") + if(any(sapply(boot, function(x) class(x) != "igraph"))) stop("all boot list elements must be igraph objects") + + #if(class(graph) == "igraph"){ + # --- shortest paths --- + sp <- unlist(shortest_paths(graph, from = FROM, to = TO, mode = "out")$vpath) + sp_chr <- paste(sp, collapse="-") + + # --- all simple paths --- + ap <- all_simple_paths(graph, from = FROM, to = TO, mode = "out") + ap_chr <- unlist(map(ap, paste, collapse="-")) + + gp <- list("sp" = sp, "sp_chr" = sp_chr, "ap" = ap, "ap_chr" = ap_chr) + # } + # + # if(is.list(graph)){ + # --- shortest paths --- + sp_b <- lapply(boot, function(x) + try(shortest_paths(x, from = FROM, to = TO, mode = "out")$vpath, + silent = TRUE)) + sp_b_ul <- lapply(sp_b, unlist) + # delete graphs without a path + sp_b_short <- discard(sp_b_ul, + function(x) length(x) == 0) + + sp_b_chr <- map(sp_b_short, paste, collapse="-") + + # --- all simple paths --- + # list of all simple path for all BGs (list length 100) + ap_b <- lapply(boot, function(x) + try(all_simple_paths(x, from = FROM, to = TO, mode = "out"), + silent = TRUE)) + # list of all paths (no assignment to BG) + ap_b_short <- flatten(discard(ap_b, function(x) length(x) == 0)) + ap_b_chr <- map(ap_b_short, paste, collapse="-") + # vector of all paths (no assignment to BG) + ap_b_chr_v <- unlist(ap_b_chr) + + + bp <- list("sp_b" = sp_b, "sp_b_short" = sp_b_short, "sp_b_chr" = sp_b_chr, + "ap_b" = ap_b, "ap_b_short" = ap_b_short, "ap_b_chr" = ap_b_chr, + "ap_b_chr_v" = ap_b_chr_v) + # } + list("ccg" = gp, "boot" = bp) + +} + + +descr_paths <- function(liste){ + browser() + out.mat <- matrix(NA, nrow = 12, ncol = 2) + row.names(out.mat) <- c("CCG: Shortest path", + "CCG: Number of all paths", + "Boot: Number of graphs with any path", + "Boot: Number of (unique) paths", + "Boot: Median (IQR) number of paths in each BG", + "Boot: Median (IQR) path length", + "How often is the CCG shortest path in any (shortest) bootstrap?", + "","","","","") + + # Shortest path in original path + rownames(out.mat)[1] <- paste("CCG-SP: ", + paste0( + knoten[as.numeric(unlist(str_split(liste$ccg$sp_chr, "-")))], + collapse = " > ")) + out.mat[1,1] <- 1 + + # Number of paths in original graph + out.mat[2,1] <- length(liste$ccg$ap_chr) + + # Number of BG with any path + out.mat[3,1] <- length(liste$boot$sp_b_short) + # Number of Bootstrap paths + out.mat[4,1] <- length(liste$boot$ap_b_chr_v) + # Number of unique Bootstrap paths + out.mat[4,2] <- length(unique(liste$boot$ap_b_chr_v)) + # Median (IQR) number of paths in each BG + out.mat[5,1] <- median(sapply(liste$boot$ap_b, length)[sapply(liste$boot$ap_b, length) != 0]) + out.mat[5,2] <- IQR(sapply(liste$boot$ap_b, length)[sapply(liste$boot$ap_b, length) != 0]) + # Median path length and IQR in Bootstrap + out.mat[6,1] <- median(sapply(liste$boot$ap_b_short, length)) + out.mat[6,2] <- IQR(sapply(liste$boot$ap_b_short, length)) + + ### how often is the shortest path in the any bootstrap path? + tab_allpaths <- table(liste$boot$ap_b_chr_v) +# out.mat[7,1] <- as.vector(tab_allpaths[which(names(tab_allpaths) == liste$ccg$sp_chr)]) + ### how often is the shortest path in the bootstrap shortest paths? + tab_spaths <- table(unlist(liste$boot$sp_b_chr)) +# out.mat[7,2] <- as.vector(tab_spaths[which(names(tab_spaths) == liste$ccg$sp_chr)]) + + ### which shortest path is the most frequent one? + pfadname <- which(tab_spaths == max(tab_spaths)) + out.mat[8,1] <- as.vector(tab_spaths[pfadname])[1] + tmp <- paste("Boot max% ShoP: ", + paste0( + knoten[as.numeric(unlist(str_split(names(pfadname), "-")))], + collapse = " > ")) + rownames(out.mat)[8] <- str_replace(tmp, "bmi.2 >", "bmi.2;") + + ### which simple path is the most frequent one? + pfadname <- which(tab_allpaths == max(tab_allpaths)) + out.mat[9,1] <- as.vector(tab_allpaths[pfadname])[1] + rownames(out.mat)[9] <- paste("Boot max% AnyP: ", + paste0( + knoten[as.numeric(unlist(str_split(names(pfadname), "-")))], + collapse = " > ")) + + # 3 most visited nodes on any bootstrap graph + # browser() + freq_nodes <- sort(table(unlist(liste$boot$ap_b_short)), decreasing = TRUE) + freq_nodes <- freq_nodes[3:5] + names(freq_nodes) <- knoten[as.numeric(names(freq_nodes))] + perc_nodes <- round(freq_nodes * 100 / length(liste$boot$ap_b_chr_v),0) + out.mat[10,] <- c(freq_nodes[1], perc_nodes[1]) + out.mat[11,] <- c(freq_nodes[2], perc_nodes[2]) + out.mat[12,] <- c(freq_nodes[3], perc_nodes[3]) + rownames(out.mat)[10:12] <- paste(names(freq_nodes), " N(%)") + + out.mat +} + + + +p_avm <- makepfade(g1, gb, FROM = "media.0", TO = "bmi.2") +p_avm_un <- makepfade(g1, gb.undirected, FROM = "media.0", TO = "bmi.2") +p_pa <- makepfade(g1, gb, FROM = "pa.0", TO = "bmi.2") +p_pa_un <- makepfade(g1, gb.undirected, FROM = "pa.0", TO = "bmi.2") +p_sleep <- makepfade(g1, gb, FROM = "sleep.0", TO = "bmi.2") +p_sleep_un <- makepfade(g1, gb.undirected, FROM = "sleep.0", TO = "bmi.2") +p_wb <- makepfade(g1, gb, FROM = "wb.0", TO = "bmi.2") +p_wb_un <- makepfade(g1, gb.undirected, FROM = "wb.0", TO = "bmi.2") +p_yhei <- makepfade(g1, gb, FROM = "yhei.0", TO = "bmi.2") +p_yhei_un <- makepfade(g1, gb.undirected, FROM = "yhei.0", TO = "bmi.2") + +t_avm <- descr_paths(p_avm) +t_avm_un <- descr_paths(p_avm_un) +t_pa <- descr_paths(p_pa) +t_pa_un <- descr_paths(p_pa_un) +t_sleep<- descr_paths(p_sleep) +t_sleep_un<- descr_paths(p_sleep_un) +t_wb <- descr_paths(p_wb) +t_wb_un <- descr_paths(p_wb_un) +t_yhei <- descr_paths(p_yhei) +t_yhei_un <- descr_paths(p_yhei_un) + +# save(t_yhei, file = "data_not_load/oslo_pfad_yhei.RData") + +# RTF output ----------------------------------------------------------------------------- +data.frame(V0 = rownames(t_avm), + as.data.frame(t_avm, row.names = FALSE), + V1 = rownames(t_avm_un), + as.data.frame(t_avm_un, row.names = FALSE)) %>% + as_tibble() %>% gt() %>% + cols_label(V0 = md("**AVM**"), + V1 = md("**N**"), + V2 = md("**N/%**"), + V1.1 = md("**directed**"), + V1.2 = md("**N**"), + V2.1 = md("**N, %**")) %>% + sub_missing(columns = c(2,3,5,6), missing_text = "-") %>% + gtsave("paths_avm.rtf", path = "results/") + +data.frame(V0 = rownames(t_pa), + as.data.frame(t_pa, row.names = FALSE), + V1 = rownames(t_pa_un), + as.data.frame(t_pa_un, row.names = FALSE)) %>% + as_tibble() %>% gt() %>% + cols_label(V0 = md("**Physical Activity**"), + V1 = md("**N**"), + V2 = md("**N/%**"), + V1.1 = md("**directed**"), + V1.2 = md("**N**"), + V2.1 = md("**N, %**")) %>% + sub_missing(columns = c(2,3,5,6), missing_text = "-") %>% + gtsave("paths_pa.rtf", path = "results/") + +data.frame(V0 = rownames(t_sleep), + as.data.frame(t_sleep, row.names = FALSE), + V1 = rownames(t_sleep_un), + as.data.frame(t_sleep_un, row.names = FALSE)) %>% + as_tibble() %>% gt() %>% + cols_label(V0 = md("**Sleep**"), + V1 = md("**N**"), + V2 = md("**N/%**"), + V1.1 = md("**directed**"), + V1.2 = md("**N**"), + V2.1 = md("**N, %**")) %>% + sub_missing(columns = c(2,3,5,6), missing_text = "-") %>% + gtsave("paths_sleep.rtf", path = "results/") + +data.frame(V0 = rownames(t_wb), + as.data.frame(t_wb, row.names = FALSE), + V1 = rownames(t_wb_un), + as.data.frame(t_wb_un, row.names = FALSE)) %>% + as_tibble() %>% gt() %>% + cols_label(V0 = md("**Well-being**"), + V1 = md("**N**"), + V2 = md("**N/%**"), + V1.1 = md("**directed**"), + V1.2 = md("**N**"), + V2.1 = md("**N, %**")) %>% + sub_missing(columns = c(2,3,5,6), missing_text = "-") %>% + gtsave("paths_wb.rtf", path = "results/") + +data.frame(V0 = rownames(t_yhei), + as.data.frame(t_yhei, row.names = FALSE), + V1 = rownames(t_yhei_un), + as.data.frame(t_yhei_un, row.names = FALSE)) %>% + as_tibble() %>% gt() %>% + cols_label(V0 = md("**YHEI**"), + V1 = md("**N**"), + V2 = md("**N/%**"), + V1.1 = md("**directed**"), + V1.2 = md("**N**"), + V2.1 = md("**N, %**")) %>% + sub_missing(columns = c(2,3,5,6), missing_text = "-") %>% + gtsave("paths_yhei.rtf", path = "results/") + + +# ---------------------------------------------------------------------------------------- + +# how many edges have the BGs on average? +my.pc.print(g.pa) +apply(sapply(boot, my.pc.print), 1, mean) diff --git a/analysis/Table1.R b/analysis/Table1.R new file mode 100644 index 0000000..a325aaf --- /dev/null +++ b/analysis/Table1.R @@ -0,0 +1,150 @@ +# ------------------------------------------------------------------------------ +# +# Project: Cohort Causal Graph +# +# Author: R. Foraita +# Date: JUL 2021 +# +# Purpose: Create Table 1 +# +# ------------------------------------------------------------------------------ +library(gt) +library(gtsummary) + +load("data_not_load/06_graph-pa.RData") +df <- daten.pa.mids$data + + +df1 <- df %>% rename( + "sex.iv" = "sex", + "country.iv" = "country3", + "migrant.iv" = "migrant", + "income.0" = "income", + "isced.0" = "isced", + "bage.el" = "bage", + "bf.el" = "bf", + "bweight.el" = "bweight", + "preg_week.el" = "preg_week", + "preg_smoke.el" = "preg_smoke", + "formula.el" = "formula", + "hdiet.el" = "hdiet") %>% + mutate(id = 1:nrow(df)) + +levels(df1$country.iv) <- c("1","2","3") # Central North South +levels(df1$familymeal.0) <- c("0","1") # less at least once a day +levels(df1$familymeal.1) <- c("0","1") # less at least once a day +levels(df1$familymeal.2) <- c("0","1") # less at least once a day + +df1 <- df1 %>% mutate_if(is.factor, ~ as.numeric(as.character(.x))) + + +# Review: include BMI categories -------------------------- +analysis_data <- readRDS("data_not_load/02_data2impute.RDS") +# idnr <- analysis_data$id_no +# save(idnr, file = "data_not_load/review_id.rda") + +# prepare the following dataset on the CDS +# C:\Users\foraita\Documents\Foraita\ccg\review +load("data_not_load/bmi_cat.rda") + +# merge BMI categories to df +ad_sub <- subset(analysis_data, select = c(id_no, country, sex, age_t0, bmi_t0)) +ad_sub <- merge(ad_sub, core_out, + by.x = c("id_no", "country", "sex", "age_t0", "bmi_t0"), + by.y = c("id_no", "country", "sex", "age.0", "bmi.0"), + all.x = TRUE, sort = FALSE) +names(ad_sub)[c(4:5, 10:12)] <- c("age.0", "bmi.0", "age.2", "bmi.2", "bmicat.2") + +merkmal <- c("underweight", "underweight", "underweight", "normal weight", "overweight", "obesity") + +# ad_sub$bmicat.0 <- factor(ad_sub$bmicat.0, labels = merkmal) +# ad_sub$bmicat.1 <- factor(ad_sub$bmicat.1, labels = merkmal) +# ad_sub$bmicat.2 <- factor(ad_sub$bmicat.2, labels = merkmal) + +together <- data.frame(df1, subset(ad_sub, + select = c(bmicat.0, bmicat.1, bmicat.2))) + +# review end ---------------------------------------------- + + + + +df2 <- together %>% pivot_longer(cols = c(1:51, 53:55), #starts_with("age"), + names_to = "group", + values_to = "val") + + +df3 <- df2 %>% + separate(group, c("variable","group"), sep="\\.") %>% + pivot_wider(names_from = variable, + values_from = val) %>% + mutate(bf = exp(bf) - 1, + pa = exp(pa) - 1, + media = media / 7, + hdiet = exp(hdiet) - 1, + wb = wb / 48 * 100) %>% + mutate_at(c("group", "sex", "migrant", "country", "school", "income", "isced", + "formula", "preg_smoke", "familymeal", "alc", "pub", "smoke", "bmicat"), + ~ as.factor(.x)) %>% + relocate(c(id, group, + country, sex, migrant, preg_week, preg_smoke, bage, + bweight, bf, formula, hdiet, + age, school, bmi, bmicat, wb, media, pa, sleep, yhei, familymeal, + homa, pub, alc, smoke, bmi_m, income, isced)) + +levels(df3$group) <- c("Baseline", "FU1", "FU2", "EL", "IV") +levels(df3$sex) <- c("no", "yes") +levels(df3$migrant) <- c("yes", "no") +levels(df3$country) <- c("Central", "North", "South") +levels(df3$school) <- c("Kindergarden", "School", "neither") +levels(df3$familymeal) <- c("no", "yes") +levels(df3$income) <- c("low", "middle", "high") +levels(df3$isced) <- c("low", "middle", "high") +levels(df3$formula) <- c("no", "yes") +levels(df3$preg_smoke) <- c("never", "rarely", "several occasions a week", "daily") +levels(df3$alc) <- c("no", "yes") +levels(df3$smoke) <- c("no", "yes") +levels(df3$pub) <- c("no", "yes") +levels(df3$bmicat) <- c("underweight", "underweight", "underweight", "normal weight", "overweight", "obesity") + +table1 <- df3 %>% select(.,-c(id)) %>% + tbl_summary( + by = group, + statistic = list(all_continuous() ~ "{mean} ({sd})", + all_categorical() ~ "{n} ({p}%)"), + digits = list(all_categorical() ~ c(0, 1)), + label = list(country ~ "Country", + sex ~ "Female", + migrant ~ "Migration background", + bweight ~ "Birthweight [g]", + bage ~ "Mother's age at birth", + preg_smoke ~ "Tobacco smoking during pregnancy", + formula ~ "Was fed with formula milk", + bf ~ "Total breastfeeding [months]", + preg_week ~ "Completed weeks of pregnancy", + hdiet ~ "Fully integrated into household's diet [month]", + age ~ "Age", + bmi ~ "BMI z-score", + bmicat ~ "BMI", + bmi_m ~ "Mother's BMI", + school ~ "School", + isced ~ "ISCED", + income ~ "Household's income", + media ~ "Audiovisual media consumption [h/day]", + familymeal ~ "Daily family meals", + pa ~ "Physical activity [h/week]", + sleep ~ "Total sleep [h/day]", + wb ~ "Well-being [%]", + yhei ~ "Youth healthy eating index [%]", + homa ~ "Homa index z-score", + alc ~ "Ever alcohol drinking", + smoke ~ "Ever tobacco smoking", + pub ~ "Pubertal" + ), + missing_text = "Missing") +table1 +### save table --------------------------------------------------------------- +table1 %>% as_gt() %>% gtsave("table1_revision.rtf", path = "results/") + + + diff --git a/analysis/Table1S.R b/analysis/Table1S.R new file mode 100644 index 0000000..8f58f42 --- /dev/null +++ b/analysis/Table1S.R @@ -0,0 +1,155 @@ +# ------------------------------------------------------------------------------ +# +# Project: Cohort Causal Graph +# +# Author: R. Foraita +# Date: JUL 2021 +# +# Purpose: Create Table 1 +# +# ------------------------------------------------------------------------------ +library(gt) +library(gtsummary) + +load("data_not_load/06_graph-pa.RData") +rm(fg.pa, ebenen.pa, V.pa) + +df <- mice::complete(daten.pa.mids, include = FALSE, action = "stacked") + + +df1 <- df %>% rename( + "sex.iv" = "sex", + "country.iv" = "country3", + "migrant.iv" = "migrant", + "income.0" = "income", + "isced.0" = "isced", + "bage.el" = "bage", + "bf.el" = "bf", + "bweight.el" = "bweight", + "preg_week.el" = "preg_week", + "preg_smoke.el" = "preg_smoke", + "formula.el" = "formula", + "hdiet.el" = "hdiet") %>% + mutate(id = 1:nrow(df)) + +levels(df1$country.iv) <- c("1","2","3") # Central North South +levels(df1$familymeal.0) <- c("0","1") # less at least once a day +levels(df1$familymeal.1) <- c("0","1") # less at least once a day +levels(df1$familymeal.2) <- c("0","1") # less at least once a day + +df1 <- df1 %>% mutate_if(., is.factor, ~ as.numeric(as.character(.x))) + + + + +# Review: include BMI categories -------------------------- +analysis_data <- readRDS("data_not_load/02_data2impute.RDS") +# idnr <- analysis_data$id_no +# save(idnr, file = "data_not_load/review_id.rda") + +# prepare the following dataset on the CDS +# C:\Users\foraita\Documents\Foraita\ccg\review +load("data_not_load/bmi_cat.rda") + +# merge BMI categories to df +ad_sub <- subset(analysis_data, select = c(id_no, country, sex, age_t0, bmi_t0)) +ad_sub <- merge(ad_sub, core_out, + by.x = c("id_no", "country", "sex", "age_t0", "bmi_t0"), + by.y = c("id_no", "country", "sex", "age.0", "bmi.0"), + all.x = TRUE, sort = FALSE) +names(ad_sub)[c(4:5, 10:12)] <- c("age.0", "bmi.0", "age.2", "bmi.2", "bmicat.2") + +# bind the dataset 10 times +ad_sub_mult <- data.frame(bmicat.0 = rep(ad_sub$bmicat.0, 10), + bmicat.1 = rep(ad_sub$bmicat.1, 10), + bmicat.2 = rep(ad_sub$bmicat.2, 10)) + +together <- data.frame(df1, ad_sub_mult) + +# review end ---------------------------------------------- + + + + +df2 <- together %>% pivot_longer(cols = c(1:51, 53:55), #starts_with("age"), + names_to = "group", + values_to = "val") + + +df3 <- df2 %>% + separate(group, c("variable","group"), sep="\\.") %>% + pivot_wider(names_from = variable, + values_from = val) %>% + mutate(bf = exp(bf) - 1, + pa = exp(pa) - 1, + media = media / 7, + hdiet = exp(hdiet) - 1, + wb = wb / 48 * 100) %>% + mutate_at(c("group", "sex", "migrant", "country", "school", "income", "isced", + "formula", "preg_smoke", "familymeal", "alc", "pub", "smoke", "bmicat"), + ~ as.factor(.x)) %>% + relocate(c(id, group, + country, sex, migrant, preg_week, preg_smoke, bage, + bweight, bf, formula, hdiet, + age, school, bmi, bmicat, wb, media, pa, sleep, yhei, familymeal, + homa, pub, alc, smoke, bmi_m, income, isced)) + +levels(df3$group) <- c("Baseline", "FU1", "FU2", "EL", "IV") +levels(df3$sex) <- c("no", "yes") +levels(df3$migrant) <- c("yes", "no") +levels(df3$country) <- c("Central", "North", "South") +levels(df3$school) <- c("Kindergarden", "School", "neither") +levels(df3$familymeal) <- c("no", "yes") +levels(df3$income) <- c("low", "middle", "high") +levels(df3$isced) <- c("low", "middle", "high") +levels(df3$formula) <- c("no", "yes") +levels(df3$preg_smoke) <- c("never", "rarely", "several occasions a week", "daily") +levels(df3$alc) <- c("no", "yes") +levels(df3$smoke) <- c("no", "yes") +levels(df3$pub) <- c("no", "yes") +levels(df3$bmicat) <- c("underweight", "underweight", "underweight", "normal weight", "overweight", "obesity") + + + +table1s <- df3 %>% select(.,-c(id)) %>% + tbl_summary( + by = group, + statistic = list(all_continuous() ~ "{mean} ({sd})", + all_categorical() ~ "{n} ({p}%)"), + digits = list(all_categorical() ~ c(0, 1)), + label = list(country ~ "Country", + sex ~ "Female", + migrant ~ "Migration background", + bweight ~ "Birthweight [g]", + bage ~ "Mother's age at birth", + preg_smoke ~ "Tobacco smoking during pregnancy", + formula ~ "Was fed with formula milk", + bf ~ "Total breastfeeding [months]", + preg_week ~ "Completed weeks of pregnancy", + hdiet ~ "Fully integrated into household's diet [month]", + age ~ "Age", + bmi ~ "BMI z-score", + bmicat ~ "BMI", + bmi_m ~ "Mother's BMI", + school ~ "School", + isced ~ "ISCED", + income ~ "Household's income", + media ~ "Audiovisual media consumption [h/day]", + familymeal ~ "Daily family meals", + pa ~ "Physical activity [h/week]", + sleep ~ "Total sleep [h/day]", + wb ~ "Well-being [%]", + yhei ~ "Youth healthy eating index [%]", + homa ~ "Homa index z-score", + alc ~ "Ever alcohol drinking", + smoke ~ "Ever tobacco smoking", + pub ~ "Pubertal" + ), + missing_text = "Missing") + + +### save table --------------------------------------------------------------- +table1s %>% as_gt() %>% gtsave("table1s_revision.rtf", path = "results/") +# write_rds(df3, file = "data_not_load/table1s_review.RDS") + + diff --git a/analysis/Table4.R b/analysis/Table4.R new file mode 100644 index 0000000..3d5f6ff --- /dev/null +++ b/analysis/Table4.R @@ -0,0 +1,71 @@ +# ------------------------------------------------------------------------------ +# +# Project: Cohort Causal Graph +# +# Author: R. Foraita +# Date: JUL 2021 +# +# Purpose: Create Table 4 +# +# ------------------------------------------------------------------------------ +library(gt) +library(gtsummary) +library(glue) +load("data_not_load/06_graph-10trees-pa.RData") +# load("data_not_load/07_boot-pa.RData") +# load("data_not_load/07_boot100igraph.RData") +load("data_not_load/06_graph-10trees-pa.RData") + +g <- pc2igraph(g.pa) +g1 <- feature.igraph(g[[1]], labels = V(g[[1]])$name) + +V(g1)$name <- c("Sex", "Country", "Migrant", "Income", "ISCED", "Age_at_birth", + "Breastfeeding", "Birthweight", "Weeks_of_pregnancy", "Formula_milk", + "HH_Diet", "Smoking_pregnancy", + "Age", "School", "AVM", "BMI", "Mothers_BMI", "Familymeal", "PA", + "Sleep", "Well-being", "YHEI", "HOMA", + "Age.FU1", "School.FU1", "AVM.FU1", "BMI.FU1", "Mothers_BMI.FU1", + "Familymeal.FU1", "Income.FU1", "ISCED.FU1","PA.FU1", + "Sleep.FU1", "Well-being.FU1", "YHEI.FU1", "HOMA.FU1", + "Age.FU2", "AVM.FU2", "BMI.FU2", "Mothers_BMI.FU2", "Alcohol.FU2", + "Familymeal.FU2", "Income.FU2", "ISCED.FU2","PA.FU2", "Puberty.FU2", + "Sleep.FU2", "Smoking.FU2", "Well-being.FU2", "YHEI.FU2", "HOMA.FU2") + +edges.pa <- as.data.frame(as_edgelist(g1)) +names(edges.pa) <- c("from", "to") +edges.pa <- edges.pa[order(edges.pa$from, edges.pa$to),] + +edges <- edges[order(edges$from, edges$to),] + +# merge pa-data and bootstrapped data frames +edges.joint <- left_join(edges.pa, edges, by = c("from", "to")) +edges.joint <- edges.joint[order(edges.joint$weight, decreasing = TRUE),] + +# summary table +summary(edges.joint$weight) +IQR(edges.joint$weight) + +# --- Table --- +table4 <- + edges.joint %>% gt %>% + tab_spanner( + label = md("**Edges**"), + columns = c("from", "to")) %>% + cols_label( + from = md("**from**"), + to = md("**to**"), + weight = md("**%**")) %>% + cols_align( + align = "left", columns = c("from", "to")) + +table4 + +### save table --------------------------------------------------------------- +table4 %>% gtsave("table-edge-freqs.rtf", path = "results/") +# ---------------------------------------------------------------------------- + + +Fn <- ecdf(edges.joint$weight) +# Prob. of edge frequencies larger than x P(f(e) > 0.9) = 1 - P(f(e) <= 0.9) +1 - Fn(90) +summary(edges.joint$weight) diff --git a/analysis/TableS_networkcharacteristic.R b/analysis/TableS_networkcharacteristic.R new file mode 100644 index 0000000..e41256e --- /dev/null +++ b/analysis/TableS_networkcharacteristic.R @@ -0,0 +1,600 @@ +# ------------------------------------------------------------------------------ +# +# Project: Cohort Causal Graph +# +# Author: R. Foraita +# Date: JUL 2021 +# +# Purpose: Create Table 2 +# +# ------------------------------------------------------------------------------ +library(gt) +library(gtsummary) +library(glue) +load("data_not_load/06_graph-10trees-pa.RData") + +gii <- pc2igraph(g.pa) + +gii1 <- feature.igraph(gii[[1]], labels = V(gii[[1]])$name) +gii2 <- feature.igraph(gii[[2]], labels = V(gii[[2]])$name) +singeltons <- setdiff(V(gii1)$name, V(gii2)$name) + +# articulation points +ap1 <- articulation_points(gii1) +ap2 <- articulation_points(gii2) + +# diameter +diam1 <- get_diameter(gii1) +diam2 <- get_diameter(gii2) + +neighb2 <- V(gii2)$name[which(ego_size(gii2, order = 1, V(gii2), mindist = 1) == + max(ego_size(gii2, order = 1, V(gii2), mindist = 1)))] + + +gd <- graph.descriptives(gii2) +gdt <- tibble( + characteristic = rownames(gd), + val = gd[,1]) + +# table2 <- gdt %>% gt %>% +# fmt_number( +# columns = 2, +# rows = c(4,6:9), +# decimals = 0 +# ) %>% +# fmt_number( +# columns = 2, +# rows = c(5,10:11), +# decimals = 1 +# ) %>% +# fmt_number( +# columns = 2, +# rows = c(1:3,12:15), +# decimals = 2 +# ) %>% +# tab_footnote( +# footnote = glue_collapse(names(ap2), ", "), +# locations = cells_body(columns = 1, rows = 6) +# ) %>% +# tab_footnote( +# footnote = glue_collapse(names(diam2), " > "), +# locations = cells_body(columns = 1, rows = 4) +# ) %>% +# tab_footnote( +# footnote = glue_collapse(neighb2, ", "), +# locations = cells_body(columns = 1, rows = 9) +# ) %>% +# tab_footnote( +# footnote = paste0("without singletons ",glue_collapse(singeltons, ", ")), +# locations = cells_column_labels(columns = 2) +# ) %>% +# cols_label( +# characteristic = md("**Characteristics**"), +# val = md("**Main graph**")) %>% +# cols_align( +# align = "right", columns = c("val")) +# +# table2 + +### save table --------------------------------------------------------------- +# table2 %>% gtsave("table2.rtf", path = "results/") +# ---------------------------------------------------------------------------- + + + +# ---------------------------------------------------------------------------- +# TWD, SEM, Bootstrap, MI0.1 +# ---------------------------------------------------------------------------- +# load("data_not_load/09_twd-pa.RData") +# g.tw.old <- g.tw +load("data_not_load/09_twd-pa-220718.RData") +# load("data_not_load/10_sem-hc.RData") +# sem.old <- sem +load("data_not_load/10_sem-hc_bic-cg.RData") +#load("data_not_load/07_boot-pa.RData") +load("data_not_load/07_boot100igraph.RData") +load("data_not_load/06_graph-10trees-pa.RData") +load("data_not_load/06sens_graph-10trees-pa-alpha.RData") + +gii <- pc2igraph(g.pa) +bn.pa <- bnlearn::as.bn(gii[[1]]) + +V(g.boot)$name <- V(gii[[1]])$name +g.bot <- g.boot %>% delete_edges(which(E(g.boot)$weight < 44)) +g.bot <- feature.igraph(g.bot, labels = V(g.bot)$name) +(bn.bot <- bnlearn::as.bn(g.bot)) +g.b50 <- g.boot %>% delete_edges(which(E(g.boot)$weight < 50)) +g.b50 <- feature.igraph(g.b50, labels = V(g.b50)$name) +(bn.b50 <- bnlearn::as.bn(g.b50)) +g.b75 <- g.boot %>% delete_edges(which(E(g.boot)$weight < 75)) +g.b75 <- feature.igraph(g.b75, labels = V(g.b75)$name) +(bn.b75 <- bnlearn::as.bn(g.b75)) + + +# g.sem.old <- bnlearn::as.igraph(sem.old) +# g.sem.old <- feature.igraph(g.sem.old, labels = V(g.sem.old)$name) +g.sem <- bnlearn::as.igraph(sem) +g.sem <- feature.igraph(g.sem, labels = V(g.sem)$name) + + +gtw <- pc2igraph(g.tw) +gtw <- feature.igraph(gtw[[1]], labels = V(gtw[[1]])$name) +bn.twd <- bnlearn::as.bn(gtw) + +# gtwold <- pc2igraph(g.tw.old) +# gtwold <- feature.igraph(gtwold[[1]], labels = V(gtwold[[1]])$name) +# bn.twdold <- bnlearn::as.bn(gtwold) + +g.mi <- pc2igraph(g.pa.alpha) +g.mi <- feature.igraph(g.mi[[1]], labels = V(g.mi[[1]])$name) +bn.mi <- bnlearn::as.bn(g.mi) + + +# diameter +diam.twd <- get_diameter(gtw) +diam.sem <- get_diameter(g.sem) +diam.bot <- get_diameter(g.bot) +diam.b50 <- get_diameter(g.b50) +diam.b75 <- get_diameter(g.b75) +diam.mi <- get_diameter(g.mi) + +neighb.twd <- V(gtw)$name[which(ego_size(gtw, order = 1, V(gtw), mindist = 1) == + max(ego_size(gtw, order = 1, V(gtw), mindist = 1)))] +neighb.sem <- V(g.sem)$name[which(ego_size(g.sem, order = 1, V(g.sem), mindist = 1) == + max(ego_size(g.sem, order = 1, V(g.sem), mindist = 1)))] +neighb.bot <- V(g.bot)$name[which(ego_size(g.bot, order = 1, V(g.bot), mindist = 1) == + max(ego_size(g.bot, order = 1, V(g.bot), mindist = 1)))] +neighb.b50 <- V(g.b50)$name[which(ego_size(g.b50, order = 1, V(g.b50), mindist = 1) == + max(ego_size(g.b50, order = 1, V(g.b50), mindist = 1)))] +neighb.b75 <- V(g.b75)$name[which(ego_size(g.b75, order = 1, V(g.b75), mindist = 1) == + max(ego_size(g.b75, order = 1, V(g.b75), mindist = 1)))] +neighb.mi <- V(g.mi)$name[which(ego_size(g.mi, order = 1, V(g.mi), mindist = 1) == + max(ego_size(g.mi, order = 1, V(g.mi), mindist = 1)))] + + +# Hamming distance and structural Haming distance +hamming.twd <- bnlearn::hamming(bn.pa, bn.twd) +# hamming.twd.old <- bnlearn::hamming(bn.pa, bn.twdold) +# hamming.sem.old <- bnlearn::hamming(bn.pa, sem.old) +hamming.sem <- bnlearn::hamming(bn.pa, sem) +hamming.bot <- bnlearn::hamming(bn.pa, bn.bot) +hamming.b50 <- bnlearn::hamming(bn.pa, bn.b50) +hamming.b75 <- bnlearn::hamming(bn.pa, bn.b75) +hamming.mi <- bnlearn::hamming(bn.pa, bn.mi) +shd.twd <- bnlearn::shd(bn.pa, bn.twd) +# shd.twd.old <- bnlearn::shd(bn.pa, bn.twdold) +# shd.sem.old <- bnlearn::shd(bn.pa, sem.old) +shd.sem <- bnlearn::shd(bn.pa, sem) +shd.bot <- bnlearn::shd(bn.pa, bn.bot) +shd.b50 <- bnlearn::shd(bn.pa, bn.b50) +shd.b75 <- bnlearn::shd(bn.pa, bn.b75) +shd.mi <- bnlearn::shd(bn.pa, bn.mi) + +#twd +gd <- graph.descriptives(gtw) +rn <- rownames(gd) +gd2 <- matrix(NA, nrow = 17, ncol = 1) +gd2[1:15,1] <- gd +gd2[16,1] <- hamming.twd +gd2[17,1] <- shd.twd +rownames(gd2) <- c(rn, "Hamming distance", "Structural Hamming distance") + +gd.twd <- tibble( + characteristic = rownames(gd2), + val = gd2[,1]) + +#twd.old +# gd <- graph.descriptives(gtwold) +# rn <- rownames(gd) +# gd2 <- matrix(NA, nrow = 17, ncol = 1) +# gd2[1:15,1] <- gd +# gd2[16,1] <- hamming.twd +# gd2[17,1] <- shd.twd +# rownames(gd2) <- c(rn, "Hamming distance", "Structural Hamming distance") +# gd.twd.old <- tibble( +# characteristic = rownames(gd2), +# val = gd2[,1]) + +# sem +gd <- graph.descriptives(g.sem) +gd[7,] <- nrow(sem$arcs) +gd[8,] <- 0 +gd2 <- matrix(NA, nrow = 17, ncol = 1) +gd2[1:15,1] <- gd +gd2[16,1] <- hamming.sem +gd2[17,1] <- shd.sem +rownames(gd2) <- c(rn, "Hamming distance", "Structural Hamming distance") + +gd.sem <- tibble( + characteristic = rownames(gd2), + val = gd2[,1]) + +# mi +gd <- graph.descriptives(g.mi) +rn <- rownames(gd) +gd2 <- matrix(NA, nrow = 17, ncol = 1) +gd2[1:15,1] <- gd +gd2[16,1] <- hamming.mi +gd2[17,1] <- shd.mi +rownames(gd2) <- c(rn, "Hamming distance", "Structural Hamming distance") + +gd.mi <- tibble( + characteristic = rownames(gd2), + val = gd2[,1]) + + +# boot +gd <- graph.descriptives(g.bot) +rn <- rownames(gd) +gd2 <- matrix(NA, nrow = 17, ncol = 1) +gd2[1:15,1] <- gd +gd2[16,1] <- hamming.bot +gd2[17,1] <- shd.bot +rownames(gd2) <- c(rn, "Hamming distance", "Structural Hamming distance") + +gd.bot <- tibble( + characteristic = rownames(gd2), + val = gd2[,1]) + +# boot 50 +gd <- graph.descriptives(g.b50) +rn <- rownames(gd) +gd2 <- matrix(NA, nrow = 17, ncol = 1) +gd2[1:15,1] <- gd +gd2[16,1] <- hamming.b50 +gd2[17,1] <- shd.b50 +rownames(gd2) <- c(rn, "Hamming distance", "Structural Hamming distance") + +gd.b50 <- tibble( + characteristic = rownames(gd2), + val = gd2[,1]) + +# boot 75 +gd <- graph.descriptives(g.b75) +rn <- rownames(gd) +gd2 <- matrix(NA, nrow = 17, ncol = 1) +gd2[1:15,1] <- gd +gd2[16,1] <- hamming.b75 +gd2[17,1] <- shd.b75 +rownames(gd2) <- c(rn, "Hamming distance", "Structural Hamming distance") + +gd.b75 <- tibble( + characteristic = rownames(gd2), + val = gd2[,1]) + +# Average bootstrap graph +boot <- readRDS("data_not_load/boot100_mi1-graph.RDS") +bi <- lapply(boot, function(x){ + tmp <- pc2igraph(x)[[1]] + feature.igraph(tmp)} ) +bn.bi <- lapply(boot, function(x){ + bnlearn::as.bn(x@graph, check.cycles = FALSE)}) + +tmp <- do.call(cbind, lapply(bi, graph.descriptives)) +# tmp <- rbind(tmp, tmp[7,] - tmp[8,]) +# rownames(tmp)[16] <- "Number of directed edges" +b.mean <- apply(tmp, 1, mean, na.rm = TRUE) + +gd.boot <- matrix(NA, nrow = 17, ncol = 1) +gd.boot[1:15,1] <- b.mean +gd.boot[16,1] <- mean(sapply(bn.bi, function(x){bnlearn::hamming(bn.pa, x)})) +gd.boot[17,1] <- mean(sapply(bn.bi, function(x){bnlearn::shd(bn.pa, x)})) +rownames(gd.boot) <- c(rn, "Hamming distance", "Structural Hamming distance") + +gd.boot <- tibble( + characteristic = rownames(gd.boot), + val = gd.boot[,1]) + +# gd.alle <- bind_cols(gd.mi, gd.twd[,2], gd.sem[,2], gd.bot[,2], gd.b75[,2]) +# names(gd.alle)[2:6] <- c("val.mi", "val.twd", "val.sem", "val.boot35", "val.boot75") +# +# +# +# table.alle <- +# gd.alle[,1:6] %>% gt %>% +# fmt_number( +# columns = 2:6, +# rows = c(4,6:9,16,17), +# decimals = 0 +# ) %>% +# fmt_number( +# columns = 2:6, +# rows = c(5,10:11), +# decimals = 1 +# ) %>% +# fmt_number( +# columns = 2:6, +# rows = c(1:3,12:15), +# decimals = 2 +# ) %>% +# tab_footnote( +# footnote = glue_collapse(names(diam2), " > "), +# locations = cells_body(columns = 3, rows = 4) +# ) %>% +# tab_footnote( +# footnote = glue_collapse(names(diam.twd), " > "), +# locations = cells_body(columns = 3, rows = 4) +# ) %>% +# tab_footnote( +# footnote = glue_collapse(names(diam.sem), " > "), +# locations = cells_body(columns = 4, rows = 4) +# ) %>% +# tab_footnote( +# footnote = glue_collapse(names(diam.bot), " > "), +# locations = cells_body(columns = 5, rows = 4) +# ) %>% +# tab_footnote( +# footnote = glue_collapse(names(diam.b75), " > "), +# locations = cells_body(columns = 5, rows = 4) +# ) %>% +# tab_footnote( +# footnote = glue_collapse(sort(neighb.mi), ", "), +# locations = cells_body(columns = 3, rows = 9) +# ) %>% +# tab_footnote( +# footnote = glue_collapse(sort(neighb.twd), ", "), +# locations = cells_body(columns = 3, rows = 9) +# ) %>% +# tab_footnote( +# footnote = glue_collapse(sort(neighb.sem), ", "), +# locations = cells_body(columns = 4, rows = 9) +# ) %>% +# tab_footnote( +# footnote = glue_collapse(sort(neighb.bot), ", "), +# locations = cells_body(columns = 5, rows = 9) +# ) %>% +# tab_footnote( +# footnote = glue_collapse(sort(neighb.b75), ", "), +# locations = cells_body(columns = 5, rows = 9) +# ) %>% +# # tab_footnote( +# # footnote = paste0("without singletons ",glue_collapse(singeltons, ", ")), +# # locations = cells_column_labels(columns = 2) +# # ) %>% +# # tab_footnote( +# # footnote = paste0("without singletons ",glue_collapse(singeltons.tw, ", ")), +# # locations = cells_column_labels(columns = 3) +# # ) %>% +# cols_label( +# characteristic = md("**Characteristics**"), +# val.mi = md("**MI**"), +# val.twd = md("**TWD**"), +# val.sem = md("**SEM**"), +# val.boot35 = md("**BOOT35**"), +# val.boot75 = md("**BOOT75**") +# ) %>% +# cols_align( +# align = "right", columns = c("val.mi","val.twd", "val.sem", "val.boot35", "val.boot75")) +# +# table.alle +# +# ### save table --------------------------------------------------------------- +# save(gd.alle2, file = "data_not_load/table2_data.RData") +# table.alle %>% gtsave("table2_twd-sem-boot.rtf", path = "results/") +# # ---------------------------------------------------------------------------- +# +# + + +## add rmseu +load(file = "results/rmseu.RData") + + gdt[16:17,1:2] <- NA + gdt[16:17,1] <- c("Hamming distance", "Structural Hamming distance") + gd.alle2 <- bind_cols(gdt, gd.mi[,2], gd.twd[,2], gd.sem[,2], gd.bot[,2], + gd.b50[,2], gd.b75[,2], gd.boot[,2]) + names(gd.alle2)[2:9] <- c("val.main","val.mi","val.twd", "val.sem", "val.boot44", + "val.boot50", "val.boot75", "avg.boot") + + gd.alle2[18,1] <- "Mean edge uncertainty" + gd.alle2[18,6] <- round(rmseu44$meu * 100, 1) + gd.alle2[18,7] <- round(rmseu50$meu * 100, 1) + gd.alle2[18,8] <- round(rmseu75$meu * 100, 1) + gd.alle2[18,9] <- round(rmseu0$meu * 100, 1) + + +table.alle2 <- + gd.alle2[c(7,8,11,9,5,4,16,17,18),] %>% gt %>% + fmt_number( + columns = 2:9, + rows = c(1,2,4,6:8), + decimals = 0 + ) %>% + fmt_number( + columns = 2:9, + rows = c(3,5,9), + decimals = 1 + ) %>% + # fmt_number( + # columns = 2:8, + # rows = c(1:3,12:15), + # decimals = 2 + # ) %>% + tab_footnote( + footnote = glue_collapse(names(diam2), " > "), + locations = cells_body(columns = 2, rows = 6) + ) %>% + tab_footnote( + footnote = glue_collapse(names(diam.mi), " > "), + locations = cells_body(columns = 3, rows = 6) + ) %>% + tab_footnote( + footnote = glue_collapse(names(diam.twd), " > "), + locations = cells_body(columns = 4, rows = 6) + ) %>% + tab_footnote( + footnote = glue_collapse(names(diam.sem), " > "), + locations = cells_body(columns = 5, rows = 6) + ) %>% + tab_footnote( + footnote = glue_collapse(names(diam.bot), " > "), + locations = cells_body(columns = 6, rows = 6) + ) %>% + tab_footnote( + footnote = glue_collapse(names(diam.b50), " > "), + locations = cells_body(columns = 7, rows = 6) + ) %>% + tab_footnote( + footnote = glue_collapse(names(diam.b75), " > "), + locations = cells_body(columns = 8, rows = 6) + ) %>% + tab_footnote( + footnote = glue_collapse(sort(neighb2), ", "), + locations = cells_body(columns = 2, rows = 4) + ) %>% + tab_footnote( + footnote = glue_collapse(sort(neighb.mi), ", "), + locations = cells_body(columns = 3, rows = 4) + ) %>% + tab_footnote( + footnote = glue_collapse(sort(neighb.twd), ", "), + locations = cells_body(columns = 4, rows = 4) + ) %>% + tab_footnote( + footnote = glue_collapse(sort(neighb.sem), ", "), + locations = cells_body(columns = 5, rows = 4) + ) %>% + tab_footnote( + footnote = glue_collapse(sort(neighb.bot), ", "), + locations = cells_body(columns = 6, rows = 4) + ) %>% + tab_footnote( + footnote = glue_collapse(sort(neighb.b50), ", "), + locations = cells_body(columns = 7, rows = 4) + ) %>% + tab_footnote( + footnote = glue_collapse(sort(neighb.b75), ", "), + locations = cells_body(columns = 8, rows = 4) + ) %>% + # tab_footnote( + # footnote = paste0("without singletons ",glue_collapse(singeltons, ", ")), + # locations = cells_column_labels(columns = 2) + # ) %>% + # tab_footnote( + # footnote = paste0("without singletons ",glue_collapse(singeltons.tw, ", ")), + # locations = cells_column_labels(columns = 3) + # ) %>% + cols_label( + characteristic = md("**Characteristics**"), + val.main = md("**Main**"), + val.mi = md("**MI**"), + val.twd = md("**TWD**"), + val.sem = md("**SEM**"), + val.boot44 = md("**BOOT44**"), + val.boot50 = md("**BOOT50**"), + val.boot75 = md("**BOOT75**"), + avg.boot = md("**Avg.BOOT**") + ) %>% + cols_align( + align = "right", columns = c("val.main","val.mi","val.twd", "val.sem", "val.boot44", + "val.boot50", "val.boot75", "avg.boot")) + +table.alle2 + + + + +### save table --------------------------------------------------------------- + save(gd.alle2, file = "data_not_load/table2_data.RData") + table.alle2 %>% gtsave("table2_twd-sem-boot.rtf", path = "results/") +# ---------------------------------------------------------------------------- + + + + + + + + # --- MVPA -------------------------------------------------------------- + + load("data_not_load/06_graph-10trees-mvpa.RData") + gmvpa <- pc2igraph(g.mvpa) + + gmvpa1 <- feature.igraph(gmvpa[[1]], labels = V(gmvpa[[1]])$name) + gmvpa2 <- feature.igraph(gmvpa[[2]], labels = V(gmvpa[[2]])$name) + singeltons.mv <- setdiff(V(gmvpa1)$name, V(gmvpa2)$name) + + # articulation points + ap1mv <- articulation_points(gmvpa1) + ap2mv <- articulation_points(gmvpa2) + + # diameter + diam1mv <- get_diameter(gmvpa1) + diam2mv <- get_diameter(gmvpa2) + + + neighb2mv <- V(gmvpa2)$name[which(ego_size(gmvpa2, order = 1, V(gmvpa2), mindist = 1) == + max(ego_size(gmvpa2, order = 1, V(gmvpa2), mindist = 1)))] + + + gd <- graph.descriptives(gmvpa2) + gdt2b <- tibble( + characteristic = rownames(gd), + val = gd[,1]) + + gdt2b <- bind_cols(gdt, gdt2b[,2]) + names(gdt2b)[2:3] <- c("val", "val.mvpa") + + + table2b <- gdt2b %>% gt %>% + fmt_number( + columns = 2:3, + rows = c(4,6:9), + decimals = 0 + ) %>% + fmt_number( + columns = 2:3, + rows = c(5,10:11), + decimals = 1 + ) %>% + fmt_number( + columns = 2:3, + rows = c(1:3,12:15), + decimals = 2 + ) %>% + tab_footnote( + footnote = glue_collapse(names(diam2), " > "), + locations = cells_body(columns = 2, rows = 4) + ) %>% + tab_footnote( + footnote = glue_collapse(names(diam2mv), " > "), + locations = cells_body(columns = 3, rows = 4) + ) %>% + tab_footnote( + footnote = glue_collapse(sort(names(ap2)), ", "), + locations = cells_body(columns = 2, rows = 6) + ) %>% + tab_footnote( + footnote = glue_collapse(sort(names(ap2mv)), ", "), + locations = cells_body(columns = 3, rows = 6) + ) %>% + tab_footnote( + footnote = glue_collapse(sort(neighb2), ", "), + locations = cells_body(columns = 2, rows = 9) + ) %>% + tab_footnote( + footnote = glue_collapse(sort(neighb2mv), ", "), + locations = cells_body(columns = 3, rows = 9) + ) %>% + tab_footnote( + footnote = paste0("without singletons ",glue_collapse(singeltons, ", ")), + locations = cells_column_labels(columns = 2) + ) %>% + tab_footnote( + footnote = paste0("without singletons ",glue_collapse(singeltons.mv, ", ")), + locations = cells_column_labels(columns = 3) + ) %>% + cols_label( + characteristic = md("**Characteristics**"), + val = md("**PA graph**"), + val.mvpa = md("**MVPA graph**")) %>% + cols_align( + align = "right", columns = c("val", "val.mvpa")) + + table2b + + + + ### save table --------------------------------------------------------------- + # table2b %>% gtsave("table2_mvpa.rtf", path = "results/") + # ---------------------------------------------------------------------------- diff --git a/analysis/Table_edgefreq.R b/analysis/Table_edgefreq.R new file mode 100644 index 0000000..f099652 --- /dev/null +++ b/analysis/Table_edgefreq.R @@ -0,0 +1,96 @@ +# ------------------------------------------------------------------------------ +# +# Project: Cohort Causal Graph +# +# Author: R. Foraita +# Date: JUL 2021 +# +# Purpose: Table edge frequencies +# +# ------------------------------------------------------------------------------ +library(gt) +library(gtsummary) + +# load data +boot <- readRDS("data_not_load/boot100_mi1-graph.RDS") +load("data_not_load/06_graph-10trees-pa.RData") + + + + + + +# g.pa to igraph +g <- pc2igraph(g.pa) +g <- feature.igraph(g[[1]]) +V(g)$name <- c("Sex", "Region", "Migrant", "Income", "ISCED", + "Mother's age at birth", "Total breastfeeding", + "Birthweight", "Weeks of pregnancy", "Formula milk", "HH diet", + "Smoking during pregnancy", "Age (B)", "School (B)", "AVM (B)", + "BMI (B)", "Mother's BMI (B)", "Daily family meals (B)", "PA (B)", + "Sleep (B)", "Well-being (B)", "YHEI (B)", "HOMA (B)", + "Age (FU1)", "School (FU1)", "AVM (FU1)", + "BMI (FU1)", "Mother's BMI (FU1)", "Daily family meals (FU1)", + "Income (FU1)", "ISCED (FU1)","PA (FU1)", + "Sleep (FU1)", "Well-being (FU1)", "YHEI (FU1)", "HOMA (FU1)", + "Age (FU2)", "AVM (FU2)", "BMI (FU2)", "Mother's BMI (FU2)", + "Alcohol (FU2)", "Daily family meals (FU2)", "Income (FU2)", "ISCED (FU2)", + "PA (FU2)", "Puberty (FU2)","Sleep (FU2)", "Smoking (FU2)", + "Well-being (FU2)", "YHEI (FU2)", "HOMA (FU2)") + +edges.pa <- as.data.frame(as_edgelist(g)) +names(edges.pa) <- c("from", "to") + + +# make adjacency matrix of all pc-graphs +amat <- lapply(boot, function(x){ + tmp <- wgtMatrix(getGraph(x), transpose = FALSE) + wm2 <- (tmp + t(tmp)) + tmp[which(wm2 > 1)] <- 0.5 + tmp + }) +g.avg <- Reduce('+', amat) + + + +# Build igraph +g.boot <- graph.adjacency(g.avg, weighted = TRUE) +V(g.boot)$name <- c("Sex", "Region", "Migrant", "Income", "ISCED", + "Mother's age at birth", "Total breastfeeding", + "Birthweight", "Weeks of pregnancy", "Formula milk", "HH diet", + "Smoking during pregnancy", "Age (B)", "School (B)", "AVM (B)", + "BMI (B)", "Mother's BMI (B)", "Daily family meals (B)", "PA (B)", + "Sleep (B)", "Well-being (B)", "YHEI (B)", "HOMA (B)", + "Age (FU1)", "School (FU1)", "AVM (FU1)", + "BMI (FU1)", "Mother's BMI (FU1)", "Daily family meals (FU1)", + "Income (FU1)", "ISCED (FU1)","PA (FU1)", + "Sleep (FU1)", "Well-being (FU1)", "YHEI (FU1)", "HOMA (FU1)", + "Age (FU2)", "AVM (FU2)", "BMI (FU2)", "Mother's BMI (FU2)", + "Alcohol (FU2)", "Daily family meals (FU2)", "Income (FU2)", "ISCED (FU2)", + "PA (FU2)", "Puberty (FU2)","Sleep (FU2)", "Smoking (FU2)", + "Well-being (FU2)", "YHEI (FU2)", "HOMA (FU2)") + +# more information about edges +edges <- as.data.frame(as_edgelist(g.boot)) +names(edges) <- c("from", "to") +edges$weight <- E(g.boot)$weight + +# merge two data frames +edges.joint <- left_join(edges.pa, edges, by = c("from", "to")) + + +# median and IQR of selected edges +summary(edges$weight) +IQR(edges$weight) + + +# edges with more than 80% +e80 <- edges[which(edges$weight >= 80),] +e80[order(e80$weight, decreasing = TRUE),] + + +# Table ---------------------------------------------------------------------------------- +ej <- edges.joint[order(edges.joint$weight, decreasing = TRUE),] + +write.csv2(ej, file = "results/tab_edgefreq.csv") + diff --git a/analysis/VisNet.R b/analysis/VisNet.R index d5edda9..d93acd5 100644 --- a/analysis/VisNet.R +++ b/analysis/VisNet.R @@ -1,19 +1,10 @@ -# ------------------------------------------------------------------------------ -# -# Project: Cohort Causal Graph -# -# Author: R. Foraita -# -# Purpose: Make interactive graphs -# -# ------------------------------------------------------------------------------ library(visNetwork) library(glue) # ------------ Main graph ------------------------------------------------------------ -load("data/graph-10trees.RData") +load("data_not_load/06_graph-10trees-pa.RData") g <- pc2igraph(g.pa)[[1]] graph.descriptives(g) @@ -107,6 +98,12 @@ visNetwork(nodes, edges, width = "100%", height = "800px") %>% visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE) vis.pa +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results") +# visSave(vis.pa, file = "vis-pa.html") +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph") + + + @@ -114,8 +111,116 @@ vis.pa # # Sensitivity graphs # +# ------------ MVPA ------------------------------------------------------------------ +load("data_not_load/06_graph-10trees-mvpa.RData") +g <- pc2igraph(g.mvpa)[[1]] + + +# --- Prepare edges --- +edges <- as.data.frame(as_edgelist(g)) +names(edges) <- c("from", "to") +edges$from[which(edges$from == "income")] <- "income.0" +edges$from[which(edges$from == "isced")] <- "isced.0" +edges$to[which(edges$to == "income")] <- "income.0" +edges$to[which(edges$to == "isced")] <- "isced.0" + +edges <- data.frame(edges, edges$from, + arrowheads = rep("arrow", nrow(edges)), + arrows.to.enabled = TRUE) + +# ungerichtete Kanten bearbeiten +tmp <- tmp2 <- vector() +for(i in 1:(nrow(edges))){ + tmp <- c(tmp, glue_collapse(edges[i,1:2],"-")) + tmp2 <- c(tmp2, glue_collapse(edges[i,2:3],"-")) +} + +isele <- which(is.element(tmp2, tmp)) +edges[isele,] +edges$arrows.to.enabled[isele] <- FALSE + +# delete doppelte Kanten:c(21, 80, 83, 86, 88, 90, 107, 109, 110) +edges <- edges[-c(21, 80, 83, 86, 88, 90, 107, 109, 110),] + + + +# --- prepre nodes --- +farbpalette <- c(rep("#afa8b3",3), rep("#4F3824",7), + rep("#D1603D",14),rep("#DDB967",14), rep("#D0E37F",16)) + +nodes <- data.frame(id = g.mvpa@graph@nodes, label = g.mvpa@graph@nodes) +nodes <- rbind(nodes[1:3,], nodes[6:12,], nodes[13:18,], nodes[4:5,], nodes[19:54,]) +nodes$id[which(nodes$id == "income")] <- "income.0" +nodes$id[which(nodes$id == "isced")] <- "isced.0" + +nodes$group <- c(rep("background",3), rep("early life",7), + rep("baseline", 14), rep("FU1",14), rep("FU2",16)) +nodes$level <- c(rep(1,3), rep(2,7), rep(3,14), rep(4,14), rep(5,16)) +nodes$color.background <- farbpalette +nodes$color.border <- farbpalette +nodes$color.highlight.background <- rep("#C62F4B",54) +nodes$color.highlight.border <- rep("#C62F4B",54) +row.names(nodes) <- 1:54 +nodes$label <- c("Sex", "Region", "Migrant", "Mother's age\nat birth", "Total\nbreastfeeding", + "Birthweight", "Weeks of\npregnancy", "Formula milk", "HH diet", + "Smoking\nduring pregnancy", "Age", "School", "AVM", + "zBMI", "Mother's BMI", "Daily\nfamily meals", "Income", "ISCED", "MVPA", + "Sedentary", "Sleep", "Well-being", "YHEI", "HOMA", "Age", "School", "AVM", + "zBMI", "Mother's BMI", "Daily\nfamily meals", "Income", "ISCED","MVPA", + "Sedentary", "Sleep", "Well-being", "YHEI", "HOMA", "Age", "AVM", + "zBMI", "Mother's BMI", "Alcohol", "Daily\nfamily meals", "Income", "ISCED", + "MVPA", "Puberty", "Sedentary","Sleep", "Smoking", "Well-being", "YHEI", "HOMA") + + + +# legend nodes +legendNodes <- data.frame( + label = c("Context","Early life", "B", "FU1", "FU2"), + shape = c("square", "square", "dot", "dot", "dot"), + color.background = c(farbpalette[1], farbpalette[4], farbpalette[11], farbpalette[25], farbpalette[39]), + color.border = c(farbpalette[1], farbpalette[4], farbpalette[11], farbpalette[25], farbpalette[39]) +) + + + + +# --- vis network --- +vis.mvpa <- + visNetwork(nodes, edges, width = "100%", height = "800px") %>% + visLayout(randomSeed = 96) %>% + visIgraphLayout(layout = "layout_on_grid") %>% #layout = "layout_on_grid", "layout_with_sugiyama + # visHierarchicalLayout(direction = "LR", levelSeparation = 200) %>% + visNodes( + shape = "dot", + label = nodes$label, + group = nodes$group, + font = list(size = 32)) %>% + visEdges( + shadow = FALSE, + arrows = "to", + color = list(color = "#466286", highlight = "#C62F4B"), + width = 2, + selectionWidth = 3 + ) %>% + visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE) +vis.mvpa + +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results") +# visSave(vis.mvpa, file = "vis-mvpa.html") +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph") + +nodes.mvpa <- nodes +edges.mvpa <- edges +legendNodes.mvpa <- legendNodes + + + + + + # ------------ Test-wise deletion ---------------------------------------------------- -load("data/graph-twd.RData") +#load("data_not_load/09_twd-pa.RData") +load("data_not_load/09_twd-pa-220718.RData") g <- pc2igraph(g.tw)[[1]] @@ -141,6 +246,12 @@ for(i in 1:(nrow(edges))){ isele <- which(is.element(tmp2, tmp)) edges[isele,] edges$arrows.to.enabled[isele] <- FALSE + +# delete doppelte Kanten OLD:c(32,61,63,80,94,95,98,102,123,136,142,148,150,153) +# edges <- edges[-c(32,61,63,80,94,95,98,102,123,136,142,148,150,153),] + +# 19.07.2022: Fehler bei mixCItwd bereinigt: +# delete doppelte Kanten OLD:c(98, 102, 121, 139. 141) edges <- edges[-c(98, 102, 121, 139, 141),] @@ -206,6 +317,11 @@ vis.twd <- ) %>% visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE) vis.twd + +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results") +# visSave(vis.twd, file = "vis-twd.html") +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph") + nodes.twd <- nodes edges.twd <- edges @@ -215,7 +331,7 @@ edges.twd <- edges # ------------ SEM ------------------------------------------------------------- -load("data/graph-sem-hc_bic-cg.RData") +load("data_not_load/10_sem-hc_bic-cg.RData") g.sem <- bnlearn::as.igraph(sem) g.sem <- feature.igraph(g.sem, labels = V(g.sem)$name) @@ -241,6 +357,11 @@ for(i in 1:(nrow(edges))){ isele <- which(is.element(tmp2, tmp)) edges[isele,] +# edges$arrows.to.enabled[isele] <- FALSE + +# delete doppelte Kanten: keine doppelte Kanten + + # --- prepre nodes --- farbpalette <- c(rep("#afa8b3",3), rep("#4F3824",7), @@ -304,6 +425,12 @@ vis.sem <- ) %>% visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE) vis.sem + +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results") +# visSave(vis.sem, file = "vis-sem.html") +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph") + + nodes.sem <- nodes edges.sem <- edges @@ -313,10 +440,12 @@ edges.sem <- edges + # ------------ Bootstrap ---------------------------------------------------- # # ------------------------------------------------------------------------------- -load("data/boot100igraph.RData") +#load("data_not_load/07_boot-pa.RData") +load("data_not_load/07_boot100igraph.RData") # verwende alte Knotennamen V(g.boot)$name <- @@ -409,6 +538,11 @@ vis.boot.75 <- visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE) vis.boot.75 +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results") +# visSave(vis.boot.75, file = "vis-boot_75perc.html") +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph") + + # --- vis network: Bootstrap 60% --- @@ -425,11 +559,16 @@ isele <- which(is.element(tmp2, tmp)) edges[isele,] edges$arrows.to.enabled[isele] <- FALSE +# delete doppelte Kanten: +# edges <- edges[-c(15, 60, 73, 74, 88, 99, 123, 124, 128),] + vis.boot.60 <- visNetwork(nodes, edges[edges$weight >= 60,], width = "100%", height = "800px") %>% visLayout(randomSeed = 96) %>% + # visOptions(highlightNearest = list(enabled = T, degree = 2, hover = T)) visIgraphLayout(layout = "layout_on_grid") %>% #layout = "layout_on_grid", "layout_with_sugiyama + # visHierarchicalLayout(direction = "LR", levelSeparation = 200) %>% visNodes( shape = "dot", label = nodes$label, @@ -445,6 +584,13 @@ vis.boot.60 <- visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE) vis.boot.60 +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results") +# visSave(vis.boot.60, file = "vis-boot-60perc.html") +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph") +# + + + # --- vis network: Bootstrap 44% --- edges <- edges.boot[edges.boot$weight > 44,] @@ -464,7 +610,9 @@ edges$arrows.to.enabled[isele] <- FALSE vis.boot.44 <- visNetwork(nodes, edges[edges$weight >= 44,], width = "100%", height = "800px") %>% visLayout(randomSeed = 756) %>% + # visOptions(highlightNearest = list(enabled = T, degree = 2, hover = T)) visIgraphLayout(layout = "layout_on_grid") %>% #layout = "layout_on_grid", "layout_with_sugiyama + # visHierarchicalLayout(direction = "LR", levelSeparation = 200) %>% visNodes( shape = "dot", label = nodes$label, @@ -480,6 +628,12 @@ vis.boot.44 <- visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE) vis.boot.44 +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results") +# visSave(vis.boot.44, file = "vis-boot_44perc.html") +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph") + + + @@ -523,11 +677,16 @@ vis.boot.35 <- selectionWidth = 3 ) %>% visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE) +vis.boot.35 + +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results") +# visSave(vis.boot.35, file = "vis-boot-35perc.html") +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph") # ------------ Main graph, alpha = .10 -------------------------------------- -load("data/graph-10trees-alpha.RData") +load("data_not_load/06sens_graph-10trees-pa-alpha.RData") g <- pc2igraph(g.pa.alpha)[[1]] graph.descriptives(g) @@ -620,6 +779,10 @@ vis.pa.alpha <- visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE) vis.pa.alpha +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results") +# visSave(vis.pa.alpha, file = "vis-pa-alpha.html") +# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph") + @@ -627,19 +790,27 @@ vis.pa.alpha save(nodes.pa, nodes.mvpa, nodes.sem, nodes.boot, nodes.twd, nodes.pa.alpha, edges.pa, edges.mvpa, edges.twd, edges.sem, edges.boot, edges.pa.alpha, legendNodes, legendNodes.mvpa, - file = "data/vis_pa.RData") + file = "data_not_load/vis_pa.RData") -visSave(vis.pa, file = "vis-pa.html") -visSave(vis.pa.alpha, file = "vis-pa-alpha.html") -visSave(vis.twd, file = "vis-twd.html") -visSave(vis.sem, file = "vis-sem.html") -visSave(vis.boot.75, file = "vis-boot_75perc.html") -visSave(vis.boot.60, file = "vis-boot-60perc.html") -visSave(vis.boot.44, file = "vis-boot-44perc.html") +#setwd("C:/Users/foraita/Documents/BIPS/software/cohort_causal_graph/results") +visSave(vis.pa, file = "results/vis-pa.html") +visSave(vis.mvpa, file = "results/vis-mvpa.html") +visSave(vis.twd, file = "results/vis-twd.html") +visSave(vis.sem, file = "results/vis-sem.html") +visSave(vis.sem, file = "results/vis-pa-alpha.html") +visSave(vis.boot.75, file = "results/vis-boot_75perc.html") +visSave(vis.boot.60, file = "results/vis-boot-60perc.html") +visSave(vis.boot.35, file = "results/vis-boot-35perc.html") +visSave(vis.boot.44, file = "results/vis-boot-44perc.html") +#setwd("C:/Users/foraita/Documents/BIPS/software/cohort_causal_graph/") +# setEPS() +# postscript("results/vispa.eps") +# vis.pa +# dev.off()