From 47716637a12923e988073d6fbcb8d4abf657d936 Mon Sep 17 00:00:00 2001
From: rforaita <foraita@bips.uni-bremen.de>
Date: Wed, 13 Mar 2024 15:43:31 +0100
Subject: [PATCH] Submisstion Scientific Reports

---
 R/structural.em.rf.R                        |  72 +++
 analysis/03_Imputation.R                    |  39 +-
 analysis/07d_Boot2igrpah.R                  |  18 +-
 analysis/07e_Bootstrap_RMSEU.R              | 101 ++++
 analysis/10_Paths.R                         | 205 +++++++
 analysis/10_RMSEU.R                         |  43 --
 analysis/Paths_for_all_modifiable_factors.R | 327 +++++++++++
 analysis/Table1.R                           | 150 +++++
 analysis/Table1S.R                          | 155 +++++
 analysis/Table4.R                           |  71 +++
 analysis/TableS_networkcharacteristic.R     | 600 ++++++++++++++++++++
 analysis/Table_edgefreq.R                   |  96 ++++
 analysis/VisNet.R                           | 215 ++++++-
 13 files changed, 2013 insertions(+), 79 deletions(-)
 create mode 100644 R/structural.em.rf.R
 create mode 100644 analysis/07e_Bootstrap_RMSEU.R
 create mode 100644 analysis/10_Paths.R
 delete mode 100644 analysis/10_RMSEU.R
 create mode 100644 analysis/Paths_for_all_modifiable_factors.R
 create mode 100644 analysis/Table1.R
 create mode 100644 analysis/Table1S.R
 create mode 100644 analysis/Table4.R
 create mode 100644 analysis/TableS_networkcharacteristic.R
 create mode 100644 analysis/Table_edgefreq.R

diff --git a/R/structural.em.rf.R b/R/structural.em.rf.R
new file mode 100644
index 0000000..91146d4
--- /dev/null
+++ b/R/structural.em.rf.R
@@ -0,0 +1,72 @@
+#' Structure learning from missing data
+#'
+#' The \code{bnlear::\link[bnlearn]{structural.em}} function was adapted
+#' to our data since it could not impute some missing values for bmi_m.2
+#'
+#' @return
+#' @export
+#'
+
+structural.em.rf <- function (x, maximize = "hc", maximize.args = list(), fit = "mle",
+          fit.args = list(), impute, impute.args = list(), return.all = FALSE,
+          start = NULL, max.iter = 5, debug = FALSE)
+{
+  ntests = 0
+  data.info = bnlearn:::check.data(x, allow.levels = TRUE, allow.missing = TRUE,
+                         warn.if.no.missing = TRUE, stop.if.all.missing = !is(start,
+                                                                              "bn.fit"))
+  max.iter = bnlearn:::check.max.iter(max.iter)
+  bnlearn:::check.logical(debug)
+  bnlearn:::check.logical(return.all)
+  bnlearn:::check.learning.algorithm(algorithm = maximize, class = "score")
+  critical.arguments = c("x", "heuristic", "start", "debug")
+  bnlearn:::check.unused.args(intersect(critical.arguments, names(maximize.args)),
+                    character(0))
+  maximize.args[critical.arguments] = list(x = NULL, heuristic = maximize,
+                                           start = NULL, debug = debug)
+  bnlearn:::check.fitting.method(method = fit, data = x)
+  fit.args = bnlearn:::check.fitting.args(method = fit, network = NULL,
+                                data = x, extra.args = fit.args)
+  impute = bnlearn:::check.imputation.method(impute, x)
+  impute.args = bnlearn:::check.imputation.extra.args(impute, impute.args)
+
+  dag = empty.graph(nodes = names(x))
+  fitted = bnlearn:::bn.fit.backend(dag, data = x, method = fit,
+                            extra.args = fit.args, data.info = data.info)
+
+  data.info$complete.nodes[names(x)] = TRUE
+  for (i in seq(max.iter)) {
+    print(i)
+    complete = bnlearn:::impute.backend(fitted = fitted, data = x,
+                              method = impute, extra.args = impute.args, debug = debug)
+
+# new: start: BMI Mother at FU2 could not be estimated. We imputed here the mean
+#            value. The number of missing values were < 10.
+    if(any(is.na(complete))){
+      ml <- which(is.na(complete$bmi_m.2))
+      complete$bmi_m.2[ml] <- mean(complete$bmi_m.2, na.rm = TRUE)
+    }
+# end
+    maximize.args$x = complete
+    maximize.args$start = dag
+    dag = do.call(bnlearn:::greedy.search, maximize.args)
+    fitted.new = bnlearn:::bn.fit.backend(dag, data = complete, method = fit,
+                                extra.args = fit.args, data.info = data.info)
+
+    ntests = ntests + dag$learning$ntests
+    if (isTRUE(all.equal(fitted, fitted.new)))
+      break
+    else fitted = fitted.new
+  }
+  dag$learning$algo = "sem"
+  dag$learning$maximize = maximize
+  dag$learning$impute = impute
+  dag$learning$fit = fit
+  dag$learning$ntests = ntests
+  if (return.all)
+    invisible(list(dag = dag, imputed = complete, fitted = fitted))
+  else invisible(dag)
+}
+
+
+
diff --git a/analysis/03_Imputation.R b/analysis/03_Imputation.R
index 3b562d6..b9ac6c9 100644
--- a/analysis/03_Imputation.R
+++ b/analysis/03_Imputation.R
@@ -8,15 +8,19 @@
 # Purpose: Data imputation
 #
 # ------------------------------------------------------------------------------
-
-analysis_data <- readRDS("data/data2impute.rds")
-
+# --- was bisher geschah: ------------------------------------------------------
+# source("data/01_DataPreparation.R")
+# source("data/02_CreateAnalysisData.R)
+analysis_data <- readRDS("data_not_load/02_data2impute.RDS")
+# ------------------------------------------------------------------------------
 
 # ---------------------   Missing Value Imputation   ---------------------------
 # 1) Patterns & dump variables (such as constants, id_no, etc.)
 outlist1 <- c("id_no", "ifam_no", "family_id")
+#pattern <- md.pattern(analysis_data)
+#saveRDS(pattern, "data/missingness-pattern.RDS")
 ini <- mice(dplyr::select(analysis_data, -all_of(outlist1)), meth = 'rf', maxit = 0)
-table(ini$nmis)
+#table(ini$nmis)
 names(ini$nmis[ini$nmis == 0])
 names(ini$nmis[ini$nmis > nrow(analysis_data)/2])
 
@@ -39,21 +43,34 @@ outlist <- sort(c(outlist1, outlist2, fxout, fxout2, "drink_9_t3.1"))
 tmp <- analysis_data[, !names(analysis_data) %in% outlist3]
 
 fx3 <- flux(tmp)
-# plot(fx3$outflux, fx3$influx)
+plot(fx3$outflux, fx3$influx)
+
+
+
+# 4) Remove variables & quickpred
+# quickpred is not used for graphical model imputation
+# new.data <- tmp[, !names(tmp) %in% outlist3]
+saveRDS(outlist, file = "data_not_load/outlist.RDS")
+
+
+# 2) Check methods for imputation
+#make.method(new.data)
 
-# 4) Imputation (1 ca. 10 min, 50 ca. 30 sec)
+
+
+# 6) Imputation (1 ca. 10 min, 50 ca. 30 sec)
 #system.time(
 imp <- mice(new.data, m = 10, meth = 'rf', seed = 16873, print = TRUE)
 #)
 
+# --- SAVE! --------------------------------------------------------------------
+saveRDS(imp, "data/03_MI-10.RDS")
+saveRDS(new.data, "data_not_load/03_MI-input.RDS")
+# ------------------------------------------------------------------------------
+
 # 7) Check logged events
 as.character(imp$loggedEvents[, "out"])
 
 
-# --- SAVE! --------------------------------------------------------------------
-saveRDS(imp, "data/MI-10.rds")
-saveRDS(new.data, "data_not_load/MI-input.rds")
-# ------------------------------------------------------------------------------
-
 
 
diff --git a/analysis/07d_Boot2igrpah.R b/analysis/07d_Boot2igrpah.R
index 8678cd2..da94731 100644
--- a/analysis/07d_Boot2igrpah.R
+++ b/analysis/07d_Boot2igrpah.R
@@ -8,8 +8,8 @@
 # Purpose: Bootstrap zu einem Graphen basteln
 #
 # ------------------------------------------------------------------------------
-boot <- readRDS("data/boot100_mi1-graph.RDS")
-load("data/graph-10trees.RData")
+boot <- readRDS("data_not_load/boot100_mi1-graph.RDS")
+load("data_not_load/06_graph-10trees-pa.RData")
 
 
 # g.pa to igraph
@@ -60,9 +60,21 @@ V(g.boot)$name <- c("Sex", "Country", "Migrant", "Income", "ISCED", "Age_at_birt
 edges <- as.data.frame(as_edgelist(g.boot))
 names(edges) <- c("from", "to")
 edges$weight <- E(g.boot)$weight
+# merge two data frames
+edges.joint <- left_join(edges.pa, edges, by = c("from", "to"))
+
+
+
+
+# edges with more than 90%
+e90 <- edges[which(edges$weight >= 90),]
+e90[order(e90$weight, decreasing = TRUE),]
+
+
 
 # ---! SAVE   -------------------------------------------------------
-save(g.boot, edges, file = "data/boot100igraph.RData")
+save(g.boot, edges, file = "data_not_load/07_boot100igraph.RData")
+# -------------------------------------------------------------------
 
 
 
diff --git a/analysis/07e_Bootstrap_RMSEU.R b/analysis/07e_Bootstrap_RMSEU.R
new file mode 100644
index 0000000..92e9da6
--- /dev/null
+++ b/analysis/07e_Bootstrap_RMSEU.R
@@ -0,0 +1,101 @@
+# ------------------------------------------------------------------------------
+#
+# Project: Cohort Causal Graph
+#
+# Author:  R. Foraita
+# Date:    JUL 2021
+#
+# Purpose: RMSEU
+#
+# ------------------------------------------------------------------------------
+#boot <- readRDS("data_not_load/boot.RDS")
+boot <- readRDS("data_not_load/boot100_mi1-graph.RDS")
+
+
+# make adjacency matrix of all pc-graphs
+amat <- lapply(boot, function(x){
+  tmp <- wgtMatrix(getGraph(x), transpose = FALSE)
+  wm2 <- (tmp + t(tmp))
+  # undirected edges get 0.5 (in sum they count as 1)
+  tmp[which(wm2 > 1)] <- 0.5
+  tmp
+})
+
+## average edges
+g.avg <- Reduce('+', amat)
+g.avg <- Reduce('+', amat) / length(amat)
+
+
+
+# sum up directed edges
+t1 <- t(g.avg)[lower.tri(t(g.avg))] # untere Hälfte
+t2 <- g.avg[lower.tri(g.avg)]       # obere Hälfte
+t3 <- apply(cbind(t1, t2), 1, sum)  # Summe
+
+# select_all_but_diag <- function(x){
+#   apply(cbind(t(x)[lower.tri(x, diag = F)], x[lower.tri(x, diag = F)]), 1, sum)
+# }
+# a <- select_all_but_diag(g.avg)
+
+
+
+
+t44 <- t50 <- t75 <- t3
+t44[t44 < 0.44] <- 0
+t50[t50 < 0.50] <- 0
+t75[t75 < 0.75] <- 0
+(rmseu0  <-  gum(t3, vertices = nrow(g.avg), threshold = 0.5))
+(rmseu44 <-  gum(t44, vertices = nrow(g.avg), threshold = 0.5))
+(rmseu50 <-  gum(t50, vertices = nrow(g.avg), threshold = 0.5))
+(rmseu75 <-  gum(t75, vertices = nrow(g.avg), threshold = 0.5))
+
+# compare - RMSEU = 0.183
+gum(rep(0.9085, times = choose(51,2)), vertices = 51, threshold = 0.5)$rmseu
+1-0.9085
+# MEU 0.0443451
+gum(rep(0.005, times = choose(51,2)), vertices = 51, threshold = 0.5)$meu
+1-0.9085
+
+# compare - RMSEU = 0.246
+gum(rep(0.877, times = choose(51,2)), vertices = 51, threshold = 0.5)
+1-0.877
+# compare - MEE = 0.059
+gum(rep(0.0069, times = choose(51,2)), vertices = 51, threshold = 0.5)$mee
+1-0.0069
+
+
+# compare - 0.183 for edges that were at least selected once (here 570)
+gum(rep(0.9085, times = 570), vertices = 34, threshold = 0.5)$rmseu
+gum(rep(0.877, times = 570), vertices = 34, threshold = 0.5)$rmseu
+
+
+
+
+
+# compare - 0.267481
+gum(rep(0.8665, times = choose(51,2)), vertices = 51, threshold = 0.5)$rmseu
+gum(rep(1-0.8665, times = choose(51,2)), vertices = 51, threshold = 0.5)$rmseu
+gum(matrix(rep(0.0936, times = 51*51), nrow = 51, ncol = 51), threshold = 0.5)$rmseu
+gum(matrix(rep(0.35, times = 51*51), nrow = 51, ncol = 51), threshold = 0.5)
+
+# summary(t3): mean = 0.09917, rmseu = 0.1048
+gum(rep(0.2457, times = choose(51,2)), 51, threshold = 0.75)$rmseu
+gum(rep(0.2457, times = choose(51,2)), 51, threshold = 0.25)$rmseu
+gum(rep(0.09917, times = choose(51,2)), 51, threshold = 0.05)$rmseu
+gum(rep(0.09917, times = choose(51,2)), 51, threshold = 0.05)$mee
+
+# --- save -----------------------------------------------
+save(rmseu0, rmseu44, rmseu50, rmseu75, file = "results/rmseu.RData")
+
+
+mymeu <- function(x){
+  0.0443451 - gum(rep(x, times = choose(51,2)), vertices = 51, threshold = 0.5)$meu
+}
+optim(seq(0.01,0.15, length = 100), mymeu)
+
+
+
+
+# uncertaintz shcie- 10 Knoten -> 45 kanten
+medges <- rep(0.5)
+gum(medges, vertices = 2, threshold = 0.05)$mee
diff --git a/analysis/10_Paths.R b/analysis/10_Paths.R
new file mode 100644
index 0000000..0b54442
--- /dev/null
+++ b/analysis/10_Paths.R
@@ -0,0 +1,205 @@
+# ------------------------------------------------------------------------------
+#
+# Project: Cohort Causal Graph
+#
+# Author:  R. Foraita
+# Date:    FEB 2023
+#
+# Purpose: Pathway structures between repeated measurements
+#
+# ------------------------------------------------------------------------------
+library(gt)
+
+load("data_not_load/06_graph-10trees-pa.RData")
+
+g <- pc2igraph(g.pa)
+g <- feature.igraph(g[[1]], labels = V(g[[1]])$name)
+
+V(g)$name <- c("Sex", "Country", "Migrant", "Income", "ISCED", "Age_at_birth",
+                "Breastfeeding", "Birthweight", "Weeks_of_pregnancy", "Formula_milk",
+                "HH_Diet", "Smoking_pregnancy",
+                "Age", "School", "AVM", "BMI", "Mothers_BMI", "Familymeal", "PA",
+                "Sleep", "Well-being", "YHEI", "HOMA",
+                "Age.FU1", "School.FU1", "AVM.FU1", "BMI.FU1", "Mothers_BMI.FU1",
+                "Familymeal.FU1", "Income.FU1", "ISCED.FU1","PA.FU1",
+                "Sleep.FU1", "Well-being.FU1", "YHEI.FU1", "HOMA.FU1",
+                "Age.FU2", "AVM.FU2", "BMI.FU2", "Mothers_BMI.FU2", "Alcohol.FU2",
+                "Familymeal.FU2", "Income.FU2", "ISCED.FU2","PA.FU2", "Puberty.FU2",
+                "Sleep.FU2", "Smoking.FU2", "Well-being.FU2", "YHEI.FU2", "HOMA.FU2")
+
+
+
+### Shortest and simple path outgoing from exposure variables at baseline ------
+# Distanz: number of hops
+search.for.causes <- function(g, variables, mode, minus = NULL){
+  require(gt)
+  distanz <- igraph::distances(g, mode = mode)
+
+  dist.tab <- t(distanz[which(names(V(g)) %in% variables), ])
+  table.dist <- data.frame(nodes = rownames(dist.tab), dist.tab) %>%
+                 gt(rowname_col = "nodes")
+  if(!is.null(minus)){
+    table.dist$'_data' <- table.dist$'_data'[-minus,]
+  }
+  table.dist
+}
+
+bmivariables <- c("BMI", "BMI.FU1", "BMI.FU2")
+exposurevariables <- c("AVM","PA","Sleep","Well-being","YHEI")
+
+table.dist.bmi <- search.for.causes(g, bmivariables, mode = "in")
+table.dist.exp <- search.for.causes(g, exposurevariables, mode = "out")#, minus = c(1:12))
+
+### save table ---------------------------------------------------------------
+table.dist.bmi %>% gtsave("table-dist-bmi.rtf", path = "results/")
+table.dist.exp %>% gtsave("table-dist.rtf", path = "results/")
+# ----------------------------------------------------------------------------
+
+
+# --------------------------------------------------------------
+whichexposure <- which(names(V(g)) %in% exposurevariables)
+makepfade <- function(g, exposure, mode = "out"){
+    # shortest paths
+  # browser()
+    ash <- all_shortest_paths(g, from = V(g)[exposure], mode = mode)[[1]]
+    # simpliest paths
+    asp <- all_simple_paths(g, from = V(g)[exposure], mode = mode)
+    sp <- which(asp %in% ash)
+    asp <- lapply(asp, function(x) names(V(g))[as.numeric(x)])
+    asp <- lapply(asp, function(x) toString(x))
+    if(length(asp) > 0){
+     data.frame(SP = c(1:length(asp)) %in% sp, pfad = do.call(c,asp)) %>% gt
+    } else {
+      cat("no simple path found")
+    }
+}
+avm   <- makepfade(g, whichexposure[1])
+pa    <- makepfade(g, whichexposure[2])
+sleep <- makepfade(g, whichexposure[3])
+wb    <- makepfade(g, whichexposure[4])
+yhei  <- makepfade(g, whichexposure[5])
+age   <- all_shortest_paths(g, from = "Age.FU1", to = "BMI.FU2")
+
+table.sp <- rbind(avm$`_data`, pa$`_data`, sleep$`_data`,
+                  wb$`_data`, yhei$`_data`) %>% gt
+
+
+### save table ---------------------------------------------------------------
+table.sp %>% gtsave("table-sp.rtf", path = "results/")
+# ----------------------------------------------------------------------------
+
+
+
+### Shortest and simple path incoming to BMI.FU2 and WB.FU2 -------------------
+exposurevariables <- c("BMI.FU2", "Well-being.FU2")
+whichout <- which(names(V(g)) %in% exposurevariables)
+bmi <- makepfade(g, whichout[1], mode = "in")
+wb  <- makepfade(g, whichout[2], mode = "in")
+
+table.sp.out <- rbind(bmi$`_data`, wb$`_data`) %>% gt
+
+### save table ---------------------------------------------------------------
+table.sp.out %>% gtsave("table-sp-out.rtf", path = "results/")
+# ----------------------------------------------------------------------------
+
+
+### Shortest and simple path from cultural, social and familial variables -----
+exposurevariables <- c("Country", "Migrant", "Income", "ISCED", "Age_at_birth", "Smoking_pregnancy")
+whichbasic <- which(names(V(g)) %in% exposurevariables)
+country <- makepfade(g, whichbasic[1]) #1593
+migrant <- makepfade(g, whichbasic[2]) #749
+income  <- makepfade(g, whichbasic[3]) #527
+isced   <- makepfade(g, whichbasic[4]) #527
+age     <- makepfade(g, whichbasic[5]) #287
+smok    <- makepfade(g, whichbasic[6]) #527
+
+table.sp.basic <- rbind(country$`_data`, migrant$`_data`, age$`_data`,
+                        income$`_data`, isced$`_data`) %>% gt
+
+### save table ---------------------------------------------------------------
+table.sp.basic %>% gtsave("table-sp-basic.rtf", path = "results/")
+# ----------------------------------------------------------------------------
+distanz <- igraph::distances(g, mode = "out")
+dist.tab <- t(distanz[whichbasic,])
+table.dist <- data.frame(nodes = rownames(dist.tab), dist.tab) %>%
+  gt(rowname_col = "nodes")
+
+
+
+
+
+
+
+## Find possible ascendents of BMI  ------------------------------------------
+which(names(V(g)) %in% bmivariables)
+amat <- as_adj(g, sparse = FALSE)
+isValidGraph(t(amat), type = "cpdag", verbose = TRUE)
+
+## possible ancestors of BMI.FU2
+vorfahren2 <- pcalg::possAn(m = t(amat), x = 39, type = "cpdag")
+vf2 <- rownames(amat)[setdiff(vorfahren2,39)]
+nvf2 <- length(vf2)
+vf2 <- data.frame(vorfahren = setdiff(vorfahren2,39), vf = vf2)
+
+## possible ancestors of BMI.FU1
+vorfahren1 <- pcalg::possAn(m = t(amat), x = 27, type = "cpdag")
+vf1 <- rownames(amat)[setdiff(vorfahren1, 27)]
+vf1 <- data.frame(vorfahren = setdiff(vorfahren1, 27),
+                  vf = vf1)
+
+## possible ancestors of BMI
+vorfahren <- pcalg::possAn(m = t(amat), x = 16, type = "cpdag")
+vf <- rownames(amat)[setdiff(vorfahren,16)]
+vf <- data.frame(vf = vf,
+                 vorfahren = setdiff(vorfahren,16))
+
+
+a <- full_join(vf2, vf1, by = "vorfahren", suffix = c("2","1"))
+posAncestors <- full_join(a,vf, by = "vorfahren")[,c(4:2)]
+
+### save table ---------------------------------------------------------------
+posAncestors %>% gt() %>% gtsave("table-posAn-bmi-pag.rtf", path = "results/")
+# ----------------------------------------------------------------------------
+
+## Find possible descendents of exposures  -------------------------------------
+which(names(V(g)) %in% exposurevariables)
+
+## AVM
+des_avm <- pcalg::possDe(m = t(amat), x = 15, type = "cpdag")
+des_avm <- data.frame(avm = rownames(amat)[setdiff(des_avm,15)],
+                      nr = setdiff(des_avm,15))
+
+## Physical activity
+des_pa <- pcalg::possDe(m = t(amat), x = 19, type = "cpdag")
+des_pa <- data.frame(pa = rownames(amat)[setdiff(des_pa,19)],
+                      nr = setdiff(des_pa,19))
+
+## Sleep
+des_sl <- pcalg::possDe(m = t(amat), x = 20, type = "cpdag")
+des_sl <- data.frame(sl = rownames(amat)[setdiff(des_sl, 20)],
+                     nr = setdiff(des_sl, 20))
+
+## Well-being
+des_wb <- pcalg::possDe(m = t(amat), x = 21, type = "cpdag")
+des_wb <- data.frame(wb = rownames(amat)[setdiff(des_wb, 21)],
+                     nr = setdiff(des_wb, 21))
+
+##YHEI
+des_yh <- pcalg::possDe(m = t(amat), x = 22, type = "cpdag")
+des_yh <- data.frame(yh = rownames(amat)[setdiff(des_yh, 22)],
+                     nr = setdiff(des_yh, 22))
+
+
+
+a1 <- full_join(des_avm, des_pa, by = "nr", suffix = c("avm","pa"))
+a2 <- full_join(des_sl, des_wb, by = "nr", suffix = c("sl","wb"))
+a3 <- full_join(a1, a2, by = "nr")
+# a4 <- full_join(a3, des_yh, by = "nr")
+posDescen <- full_join(a3, des_yh, by = "nr")[, -c(2)]
+
+
+### save table ---------------------------------------------------------------
+posDescen %>% gt() %>% gtsave("table-posDes-exposure.rtf", path = "results/")
+# ----------------------------------------------------------------------------
+
+
diff --git a/analysis/10_RMSEU.R b/analysis/10_RMSEU.R
deleted file mode 100644
index eaf41c6..0000000
--- a/analysis/10_RMSEU.R
+++ /dev/null
@@ -1,43 +0,0 @@
-# ------------------------------------------------------------------------------
-#
-# Project: Cohort Causal Graph
-#
-# Author:  R. Foraita
-#
-# Purpose: RMSEU
-#
-# ------------------------------------------------------------------------------
-boot <- readRDS("data/boot100_mi1-graph.rds")
-
-
-### make adjacency matrix of all pc-graphs
-amat <- lapply(boot, function(x){
-  tmp <- wgtMatrix(getGraph(x), transpose = FALSE)
-  wm2 <- (tmp + t(tmp))
-  # undirected edges get 0.5 (in sum they count as 1)
-  tmp[which(wm2 > 1)] <- 0.5
-  tmp
-})
-
-### average edges
-g.avg <- Reduce('+', amat)
-g.avg <- Reduce('+', amat) / length(amat)
-
-
-
-### sum up directed edges
-t1 <- t(g.avg)[lower.tri(t(g.avg))] 
-t2 <- g.avg[lower.tri(g.avg)]       
-t3 <- apply(cbind(t1, t2), 1, sum)  
-
-
-t44 <- t75 <- t3
-t44[t44 < 0.44] <- 0
-t75[t75 < 0.75] <- 0
-(rmseu0  <-  gum(t3, vertices = nrow(g.avg), threshold = 0.5))
-(rmseu44 <-  gum(t44, vertices = nrow(g.avg), threshold = 0.5))
-(rmseu75 <-  gum(t75, vertices = nrow(g.avg), threshold = 0.5))
-
-
-# --- save -----------------------------------------------
-save(rmseu0, rmseu44, rmseu75, file = "data/rmseu.RData")
\ No newline at end of file
diff --git a/analysis/Paths_for_all_modifiable_factors.R b/analysis/Paths_for_all_modifiable_factors.R
new file mode 100644
index 0000000..6bb0b60
--- /dev/null
+++ b/analysis/Paths_for_all_modifiable_factors.R
@@ -0,0 +1,327 @@
+# ------------------------------------------------------------------------------
+#
+# Project: Cohort Causal Graph
+#
+# Author:  R. Foraita
+# Date:    Apr 2023
+#
+# Purpose: Create Pathways
+#
+# ------------------------------------------------------------------------------
+library(gt)
+library(gtsummary)
+
+# load data
+boot <- readRDS("data_not_load/boot100_mi1-graph.RDS")
+load("data_not_load/06_graph-10trees-pa.RData")
+
+delete_undirected_edges <- function(adm){
+  x <- cbind(as.vector(adm), as.vector(t(adm)))
+  y <- apply(x, 1, sum)
+  print(table(y))
+  u <- which(y == 4)
+  print(paste0("Number of undirected edges: ", length(u)))
+
+  adm[u] <- 0
+  adm
+}
+
+# Data management ------------------------------------------------------------------------
+g1 <- pc2igraph(g.pa)[[1]]
+g1 <- feature.igraph(g1, labels = V(g1)$name)
+gb <- lapply(boot, function(x){
+             tmp <- pc2igraph(x)[[1]]
+             feature.igraph(tmp, labels = V(tmp)$name)})
+knoten <- g.pa@graph@nodes
+
+
+# 0: no edge
+# 1: directed edge
+# 2: undirected edge -> set to 0
+g1.undirected <-  wgtMatrix(getGraph(g.pa), transpose = FALSE)
+tmp <- delete_undirected_edges(g1.undirected)
+g1.undirected <- graph_from_adjacency_matrix(tmp)
+
+# # only for test purposes
+# g1.el <- as_edgelist(g1)
+# g1.el <- data.frame(A = paste(g1.el[,1], g1.el[,2], sep="-"))
+# g1.un.el <- as_edgelist(g1.undirected)
+# g1.un.el <- data.frame(A = paste(g1.un.el[,1], g1.un.el[,2], sep="-"), B = 1)
+# gg <- merge(g1.el, g1.un.el, by = "A", all = TRUE)
+# gg[is.na(gg$B),]
+
+# boot[[100]]
+# # 0     1      2   4
+# # 2335  250    4   12
+# table(wgtMatrix(getGraph(boot[[100]]), transpose = FALSE))
+# as_edgelist(gb.undirected[[100]])  #125+4
+
+gb.undirected <- lapply(boot, function(x){
+                    graph_from_adjacency_matrix(
+                      delete_undirected_edges(
+                        wgtMatrix(getGraph(x), transpose = FALSE)
+                        )
+                      )
+                   })
+
+# # Average matrix without undirected edges
+#
+# boot.adm <- lapply(boot, function(x){
+#                     wgtMatrix(getGraph(x),transpose = FALSE)})
+# boot.adm.dir <- boot.adm <- Reduce('+', boot.adm)
+# tt <- t(boot.adm)
+# boot.adm.dir[upper.tri(boot.adm.dir)] <-
+#      boot.adm.dir[upper.tri(boot.adm.dir)] > tt[upper.tri(boot.adm.dir)]
+# boot.adm.dir[lower.tri(boot.adm.dir)] <-
+#      boot.adm.dir[lower.tri(boot.adm.dir)] > tt[lower.tri(boot.adm.dir)]
+# boot.adm.dir[15:25, 15:25]
+# g.adm <- graph_from_adjacency_matrix(boot.adm.dir)
+# a <- as.data.frame(get.edgelist(g.adm))
+# filter(a, V1 %in% c("media.0", "media.1", "wb.1", "homa.1", "homa.2", "bmi.2") &
+#           V2 %in% c("media.0", "media.1", "wb.1", "homa.1", "homa.2", "bmi.2"))
+#
+# plot(induced_subgraph(g.adm, vids = which(knoten %in% c("media.0", "media.1", "wb.1", "homa.1", "homa.2", "bmi.2"))))
+
+
+
+
+# set.seed(6514)
+# mat <- list(matrix(sample(1:20, 16), ncol = 4, nrow = 4),
+#               matrix(sample(1:20, 16), ncol = 4, nrow = 4),
+#               matrix(sample(1:20, 16), ncol = 4, nrow = 4))
+# mat.old <- mat <- Reduce('+', mat)
+# diag(mat.old) <- diag(mat) <- 0
+# tt <- t(mat)
+# mat[upper.tri(mat)] <-
+#   mat[upper.tri(mat)] > tt[upper.tri(mat)]
+# mat[lower.tri(mat)] <-
+#   mat[lower.tri(mat)] > tt[lower.tri(mat)]
+
+
+# Original graph -------------------------------------------------------------------------
+makepfade <- function(graph, boot, FROM, TO){
+  if(class(graph) != "igraph") stop("object graph must be an igraph")
+  if(!is.list(boot)) stop("object boot must be a list")
+  if(any(sapply(boot, function(x) class(x) != "igraph"))) stop("all boot list elements must be igraph objects")
+
+  #if(class(graph) == "igraph"){
+     # ---   shortest paths  ---
+     sp <- unlist(shortest_paths(graph, from = FROM, to = TO, mode = "out")$vpath)
+     sp_chr <- paste(sp, collapse="-")
+
+     # ---   all simple paths  ---
+     ap <- all_simple_paths(graph, from = FROM, to = TO, mode = "out")
+     ap_chr <- unlist(map(ap, paste, collapse="-"))
+
+     gp <- list("sp" = sp, "sp_chr" = sp_chr, "ap" = ap, "ap_chr" = ap_chr)
+  # }
+  #
+  # if(is.list(graph)){
+    # ---   shortest paths  ---
+    sp_b <- lapply(boot, function(x)
+            try(shortest_paths(x, from = FROM, to = TO, mode = "out")$vpath,
+            silent = TRUE))
+    sp_b_ul <- lapply(sp_b, unlist)
+    # delete graphs without a path
+    sp_b_short <- discard(sp_b_ul,
+                        function(x) length(x) == 0)
+
+    sp_b_chr <- map(sp_b_short, paste, collapse="-")
+
+    # ---   all simple paths  ---
+    # list of all simple path for all BGs (list length 100)
+    ap_b <- lapply(boot, function(x)
+              try(all_simple_paths(x, from = FROM, to = TO, mode = "out"),
+              silent = TRUE))
+    # list of all paths (no assignment to BG)
+    ap_b_short <- flatten(discard(ap_b, function(x) length(x) == 0))
+    ap_b_chr   <- map(ap_b_short, paste, collapse="-")
+    # vector of all paths (no assignment to BG)
+    ap_b_chr_v <- unlist(ap_b_chr)
+
+
+    bp <- list("sp_b" = sp_b, "sp_b_short" = sp_b_short, "sp_b_chr" = sp_b_chr,
+               "ap_b" = ap_b,  "ap_b_short" = ap_b_short, "ap_b_chr" = ap_b_chr,
+               "ap_b_chr_v" = ap_b_chr_v)
+  # }
+    list("ccg" = gp, "boot" = bp)
+
+}
+
+
+descr_paths <- function(liste){
+ browser()
+  out.mat <- matrix(NA, nrow = 12, ncol = 2)
+  row.names(out.mat) <- c("CCG: Shortest path",
+                          "CCG: Number of all paths",
+                          "Boot: Number of graphs with any path",
+                          "Boot: Number of (unique) paths",
+                          "Boot: Median (IQR) number of paths in each BG",
+                          "Boot: Median (IQR) path length",
+                          "How often is the CCG shortest path in any (shortest) bootstrap?",
+                          "","","","","")
+
+  # Shortest path in original path
+  rownames(out.mat)[1] <- paste("CCG-SP: ",
+                                paste0(
+                                  knoten[as.numeric(unlist(str_split(liste$ccg$sp_chr, "-")))],
+                                  collapse = " > "))
+  out.mat[1,1] <- 1
+
+  # Number of paths in original graph
+  out.mat[2,1] <- length(liste$ccg$ap_chr)
+
+  # Number of BG with any path
+  out.mat[3,1] <- length(liste$boot$sp_b_short)
+  # Number of Bootstrap paths
+  out.mat[4,1] <- length(liste$boot$ap_b_chr_v)
+  # Number of unique Bootstrap paths
+  out.mat[4,2] <- length(unique(liste$boot$ap_b_chr_v))
+  # Median (IQR) number of paths in each BG
+  out.mat[5,1] <- median(sapply(liste$boot$ap_b, length)[sapply(liste$boot$ap_b, length) != 0])
+  out.mat[5,2] <- IQR(sapply(liste$boot$ap_b, length)[sapply(liste$boot$ap_b, length) != 0])
+  # Median path length and IQR in Bootstrap
+  out.mat[6,1] <- median(sapply(liste$boot$ap_b_short, length))
+  out.mat[6,2] <- IQR(sapply(liste$boot$ap_b_short, length))
+
+  ### how often is the shortest path in the any bootstrap path?
+  tab_allpaths <- table(liste$boot$ap_b_chr_v)
+#  out.mat[7,1] <- as.vector(tab_allpaths[which(names(tab_allpaths) == liste$ccg$sp_chr)])
+  ### how often is the shortest path in the bootstrap shortest paths?
+  tab_spaths <-  table(unlist(liste$boot$sp_b_chr))
+#  out.mat[7,2] <- as.vector(tab_spaths[which(names(tab_spaths) == liste$ccg$sp_chr)])
+
+  ### which shortest path is the most frequent one?
+  pfadname <- which(tab_spaths == max(tab_spaths))
+  out.mat[8,1] <- as.vector(tab_spaths[pfadname])[1]
+  tmp <- paste("Boot max% ShoP: ",
+               paste0(
+                 knoten[as.numeric(unlist(str_split(names(pfadname), "-")))],
+                 collapse = " > "))
+  rownames(out.mat)[8] <- str_replace(tmp, "bmi.2 >", "bmi.2;")
+
+  ### which simple path is the most frequent one?
+  pfadname <- which(tab_allpaths == max(tab_allpaths))
+  out.mat[9,1] <- as.vector(tab_allpaths[pfadname])[1]
+  rownames(out.mat)[9] <- paste("Boot max% AnyP: ",
+                                paste0(
+                                knoten[as.numeric(unlist(str_split(names(pfadname), "-")))],
+                                collapse = " > "))
+
+  # 3  most visited nodes on any bootstrap graph
+  # browser()
+  freq_nodes <- sort(table(unlist(liste$boot$ap_b_short)), decreasing = TRUE)
+  freq_nodes <- freq_nodes[3:5]
+  names(freq_nodes) <- knoten[as.numeric(names(freq_nodes))]
+  perc_nodes <- round(freq_nodes * 100 / length(liste$boot$ap_b_chr_v),0)
+  out.mat[10,] <- c(freq_nodes[1], perc_nodes[1])
+  out.mat[11,] <- c(freq_nodes[2], perc_nodes[2])
+  out.mat[12,] <- c(freq_nodes[3], perc_nodes[3])
+  rownames(out.mat)[10:12] <- paste(names(freq_nodes), " N(%)")
+
+  out.mat
+}
+
+
+
+p_avm   <- makepfade(g1, gb, FROM = "media.0", TO = "bmi.2")
+p_avm_un   <- makepfade(g1, gb.undirected, FROM = "media.0", TO = "bmi.2")
+p_pa    <- makepfade(g1, gb, FROM = "pa.0", TO = "bmi.2")
+p_pa_un    <- makepfade(g1, gb.undirected, FROM = "pa.0", TO = "bmi.2")
+p_sleep <- makepfade(g1, gb, FROM = "sleep.0", TO = "bmi.2")
+p_sleep_un <- makepfade(g1, gb.undirected, FROM = "sleep.0", TO = "bmi.2")
+p_wb    <- makepfade(g1, gb, FROM = "wb.0", TO = "bmi.2")
+p_wb_un    <- makepfade(g1, gb.undirected, FROM = "wb.0", TO = "bmi.2")
+p_yhei  <- makepfade(g1, gb, FROM = "yhei.0", TO = "bmi.2")
+p_yhei_un  <- makepfade(g1, gb.undirected, FROM = "yhei.0", TO = "bmi.2")
+
+t_avm  <- descr_paths(p_avm)
+t_avm_un <- descr_paths(p_avm_un)
+t_pa   <- descr_paths(p_pa)
+t_pa_un   <- descr_paths(p_pa_un)
+t_sleep<- descr_paths(p_sleep)
+t_sleep_un<- descr_paths(p_sleep_un)
+t_wb   <- descr_paths(p_wb)
+t_wb_un   <- descr_paths(p_wb_un)
+t_yhei <- descr_paths(p_yhei)
+t_yhei_un <- descr_paths(p_yhei_un)
+
+# save(t_yhei, file = "data_not_load/oslo_pfad_yhei.RData")
+
+# RTF output -----------------------------------------------------------------------------
+data.frame(V0 = rownames(t_avm),
+           as.data.frame(t_avm, row.names = FALSE),
+           V1 = rownames(t_avm_un),
+           as.data.frame(t_avm_un, row.names = FALSE)) %>%
+  as_tibble() %>% gt() %>%
+  cols_label(V0 = md("**AVM**"),
+             V1 = md("**N**"),
+             V2 = md("**N/%**"),
+             V1.1 = md("**directed**"),
+             V1.2 = md("**N**"),
+             V2.1 = md("**N, %**")) %>%
+  sub_missing(columns = c(2,3,5,6), missing_text = "-") %>%
+  gtsave("paths_avm.rtf", path = "results/")
+
+data.frame(V0 = rownames(t_pa),
+           as.data.frame(t_pa, row.names = FALSE),
+           V1 = rownames(t_pa_un),
+           as.data.frame(t_pa_un, row.names = FALSE)) %>%
+  as_tibble() %>% gt() %>%
+  cols_label(V0 = md("**Physical Activity**"),
+             V1 = md("**N**"),
+             V2 = md("**N/%**"),
+             V1.1 = md("**directed**"),
+             V1.2 = md("**N**"),
+             V2.1 = md("**N, %**")) %>%
+  sub_missing(columns = c(2,3,5,6), missing_text = "-") %>%
+  gtsave("paths_pa.rtf", path = "results/")
+
+data.frame(V0 = rownames(t_sleep),
+           as.data.frame(t_sleep, row.names = FALSE),
+           V1 = rownames(t_sleep_un),
+           as.data.frame(t_sleep_un, row.names = FALSE)) %>%
+  as_tibble() %>% gt() %>%
+  cols_label(V0 = md("**Sleep**"),
+             V1 = md("**N**"),
+             V2 = md("**N/%**"),
+             V1.1 = md("**directed**"),
+             V1.2 = md("**N**"),
+             V2.1 = md("**N, %**")) %>%
+  sub_missing(columns = c(2,3,5,6), missing_text = "-") %>%
+  gtsave("paths_sleep.rtf", path = "results/")
+
+data.frame(V0 = rownames(t_wb),
+           as.data.frame(t_wb, row.names = FALSE),
+           V1 = rownames(t_wb_un),
+           as.data.frame(t_wb_un, row.names = FALSE)) %>%
+  as_tibble() %>% gt() %>%
+  cols_label(V0 = md("**Well-being**"),
+             V1 = md("**N**"),
+             V2 = md("**N/%**"),
+             V1.1 = md("**directed**"),
+             V1.2 = md("**N**"),
+             V2.1 = md("**N, %**")) %>%
+  sub_missing(columns = c(2,3,5,6), missing_text = "-") %>%
+  gtsave("paths_wb.rtf", path = "results/")
+
+data.frame(V0 = rownames(t_yhei),
+           as.data.frame(t_yhei, row.names = FALSE),
+           V1 = rownames(t_yhei_un),
+           as.data.frame(t_yhei_un, row.names = FALSE)) %>%
+  as_tibble() %>% gt() %>%
+  cols_label(V0 = md("**YHEI**"),
+             V1 = md("**N**"),
+             V2 = md("**N/%**"),
+             V1.1 = md("**directed**"),
+             V1.2 = md("**N**"),
+             V2.1 = md("**N, %**")) %>%
+  sub_missing(columns = c(2,3,5,6), missing_text = "-") %>%
+  gtsave("paths_yhei.rtf", path = "results/")
+
+
+# ----------------------------------------------------------------------------------------
+
+# how many edges have the BGs on average?
+my.pc.print(g.pa)
+apply(sapply(boot, my.pc.print), 1, mean)
diff --git a/analysis/Table1.R b/analysis/Table1.R
new file mode 100644
index 0000000..a325aaf
--- /dev/null
+++ b/analysis/Table1.R
@@ -0,0 +1,150 @@
+# ------------------------------------------------------------------------------
+#
+# Project: Cohort Causal Graph
+#
+# Author:  R. Foraita
+# Date:    JUL 2021
+#
+# Purpose: Create Table 1
+#
+# ------------------------------------------------------------------------------
+library(gt)
+library(gtsummary)
+
+load("data_not_load/06_graph-pa.RData")
+df <- daten.pa.mids$data
+
+
+df1 <- df %>% rename(
+         "sex.iv" = "sex",
+         "country.iv" = "country3",
+         "migrant.iv" = "migrant",
+         "income.0" = "income",
+         "isced.0" = "isced",
+         "bage.el" = "bage",
+         "bf.el" = "bf",
+         "bweight.el" = "bweight",
+         "preg_week.el" = "preg_week",
+         "preg_smoke.el" = "preg_smoke",
+         "formula.el" = "formula",
+         "hdiet.el" = "hdiet") %>%
+        mutate(id = 1:nrow(df))
+
+levels(df1$country.iv) <-  c("1","2","3")   # Central   North   South
+levels(df1$familymeal.0) <-  c("0","1")     # less at least once a day
+levels(df1$familymeal.1) <-  c("0","1")     # less at least once a day
+levels(df1$familymeal.2) <-  c("0","1")     # less at least once a day
+
+df1 <- df1  %>%  mutate_if(is.factor, ~ as.numeric(as.character(.x)))
+
+
+# Review: include BMI categories --------------------------
+analysis_data <- readRDS("data_not_load/02_data2impute.RDS")
+# idnr <- analysis_data$id_no
+# save(idnr, file = "data_not_load/review_id.rda")
+
+# prepare the following dataset on the CDS
+# C:\Users\foraita\Documents\Foraita\ccg\review
+load("data_not_load/bmi_cat.rda")
+
+# merge BMI categories to df
+ad_sub <- subset(analysis_data, select = c(id_no, country, sex, age_t0, bmi_t0))
+ad_sub <- merge(ad_sub, core_out,
+                by.x = c("id_no", "country", "sex", "age_t0", "bmi_t0"),
+                by.y = c("id_no", "country", "sex", "age.0", "bmi.0"),
+                all.x = TRUE, sort = FALSE)
+names(ad_sub)[c(4:5, 10:12)] <- c("age.0", "bmi.0", "age.2", "bmi.2", "bmicat.2")
+
+merkmal <- c("underweight", "underweight", "underweight", "normal weight", "overweight", "obesity")
+
+# ad_sub$bmicat.0 <- factor(ad_sub$bmicat.0, labels = merkmal)
+# ad_sub$bmicat.1 <- factor(ad_sub$bmicat.1, labels = merkmal)
+# ad_sub$bmicat.2 <- factor(ad_sub$bmicat.2, labels = merkmal)
+
+together <- data.frame(df1, subset(ad_sub,
+                                  select = c(bmicat.0, bmicat.1, bmicat.2)))
+
+# review end ----------------------------------------------
+
+
+
+
+df2 <- together %>% pivot_longer(cols = c(1:51, 53:55), #starts_with("age"),
+                            names_to = "group",
+                            values_to = "val")
+
+
+df3 <- df2 %>%
+       separate(group, c("variable","group"), sep="\\.") %>%
+       pivot_wider(names_from = variable,
+                   values_from = val) %>%
+       mutate(bf = exp(bf) - 1,
+              pa = exp(pa) - 1,
+              media = media / 7,
+              hdiet = exp(hdiet) - 1,
+              wb = wb / 48 * 100) %>%
+       mutate_at(c("group", "sex", "migrant", "country", "school", "income", "isced",
+                   "formula", "preg_smoke", "familymeal", "alc", "pub", "smoke", "bmicat"),
+                 ~ as.factor(.x)) %>%
+       relocate(c(id, group,
+                  country, sex, migrant, preg_week,  preg_smoke, bage,
+                  bweight, bf, formula, hdiet,
+                  age, school, bmi, bmicat, wb, media, pa, sleep, yhei, familymeal,
+                  homa, pub, alc, smoke, bmi_m, income, isced))
+
+levels(df3$group) <-  c("Baseline", "FU1", "FU2", "EL", "IV")
+levels(df3$sex) <- c("no", "yes")
+levels(df3$migrant) <- c("yes", "no")
+levels(df3$country) <- c("Central", "North", "South")
+levels(df3$school) <- c("Kindergarden", "School", "neither")
+levels(df3$familymeal) <- c("no", "yes")
+levels(df3$income) <- c("low", "middle", "high")
+levels(df3$isced) <- c("low", "middle", "high")
+levels(df3$formula) <- c("no", "yes")
+levels(df3$preg_smoke) <- c("never", "rarely", "several occasions a week", "daily")
+levels(df3$alc) <- c("no", "yes")
+levels(df3$smoke) <- c("no", "yes")
+levels(df3$pub) <- c("no", "yes")
+levels(df3$bmicat) <- c("underweight", "underweight", "underweight", "normal weight", "overweight", "obesity")
+
+table1 <- df3 %>% select(.,-c(id)) %>%
+  tbl_summary(
+        by = group,
+        statistic = list(all_continuous() ~ "{mean} ({sd})",
+                         all_categorical() ~ "{n} ({p}%)"),
+        digits = list(all_categorical() ~ c(0, 1)),
+        label = list(country ~ "Country",
+                     sex ~ "Female",
+                     migrant ~ "Migration background",
+                     bweight ~ "Birthweight [g]",
+                     bage ~ "Mother's age at birth",
+                     preg_smoke ~ "Tobacco smoking during pregnancy",
+                     formula ~ "Was fed with formula milk",
+                     bf ~ "Total breastfeeding [months]",
+                     preg_week ~ "Completed weeks of pregnancy",
+                     hdiet ~ "Fully integrated into household's diet [month]",
+                     age ~ "Age",
+                     bmi ~ "BMI z-score",
+                     bmicat ~ "BMI",
+                     bmi_m ~ "Mother's BMI",
+                     school ~ "School",
+                     isced ~ "ISCED",
+                     income ~ "Household's income",
+                     media ~ "Audiovisual media consumption [h/day]",
+                     familymeal ~ "Daily family meals",
+                     pa ~ "Physical activity [h/week]",
+                     sleep ~ "Total sleep [h/day]",
+                     wb ~ "Well-being [%]",
+                     yhei ~ "Youth healthy eating index [%]",
+                     homa ~ "Homa index z-score",
+                     alc ~ "Ever alcohol drinking",
+                     smoke ~ "Ever tobacco smoking",
+                     pub ~ "Pubertal"
+                     ),
+        missing_text = "Missing")
+table1
+### save table ---------------------------------------------------------------
+table1 %>% as_gt() %>% gtsave("table1_revision.rtf", path = "results/")
+
+
+
diff --git a/analysis/Table1S.R b/analysis/Table1S.R
new file mode 100644
index 0000000..8f58f42
--- /dev/null
+++ b/analysis/Table1S.R
@@ -0,0 +1,155 @@
+# ------------------------------------------------------------------------------
+#
+# Project: Cohort Causal Graph
+#
+# Author:  R. Foraita
+# Date:    JUL 2021
+#
+# Purpose: Create Table 1
+#
+# ------------------------------------------------------------------------------
+library(gt)
+library(gtsummary)
+
+load("data_not_load/06_graph-pa.RData")
+rm(fg.pa, ebenen.pa, V.pa)
+
+df <- mice::complete(daten.pa.mids, include = FALSE, action = "stacked")
+
+
+df1 <- df %>% rename(
+         "sex.iv" = "sex",
+         "country.iv" = "country3",
+         "migrant.iv" = "migrant",
+         "income.0" = "income",
+         "isced.0" = "isced",
+         "bage.el" = "bage",
+         "bf.el" = "bf",
+         "bweight.el" = "bweight",
+         "preg_week.el" = "preg_week",
+         "preg_smoke.el" = "preg_smoke",
+         "formula.el" = "formula",
+         "hdiet.el" = "hdiet") %>%
+        mutate(id = 1:nrow(df))
+
+levels(df1$country.iv) <-  c("1","2","3")   # Central   North   South
+levels(df1$familymeal.0) <-  c("0","1")     # less at least once a day
+levels(df1$familymeal.1) <-  c("0","1")     # less at least once a day
+levels(df1$familymeal.2) <-  c("0","1")     # less at least once a day
+
+df1 <- df1  %>%  mutate_if(., is.factor, ~ as.numeric(as.character(.x)))
+
+
+
+
+# Review: include BMI categories --------------------------
+analysis_data <- readRDS("data_not_load/02_data2impute.RDS")
+# idnr <- analysis_data$id_no
+# save(idnr, file = "data_not_load/review_id.rda")
+
+# prepare the following dataset on the CDS
+# C:\Users\foraita\Documents\Foraita\ccg\review
+load("data_not_load/bmi_cat.rda")
+
+# merge BMI categories to df
+ad_sub <- subset(analysis_data, select = c(id_no, country, sex, age_t0, bmi_t0))
+ad_sub <- merge(ad_sub, core_out,
+                by.x = c("id_no", "country", "sex", "age_t0", "bmi_t0"),
+                by.y = c("id_no", "country", "sex", "age.0", "bmi.0"),
+                all.x = TRUE, sort = FALSE)
+names(ad_sub)[c(4:5, 10:12)] <- c("age.0", "bmi.0", "age.2", "bmi.2", "bmicat.2")
+
+# bind the dataset 10 times
+ad_sub_mult <- data.frame(bmicat.0 = rep(ad_sub$bmicat.0, 10),
+                          bmicat.1 = rep(ad_sub$bmicat.1, 10),
+                          bmicat.2 = rep(ad_sub$bmicat.2, 10))
+
+together <- data.frame(df1, ad_sub_mult)
+
+# review end ----------------------------------------------
+
+
+
+
+df2 <- together %>% pivot_longer(cols = c(1:51, 53:55), #starts_with("age"),
+                            names_to = "group",
+                            values_to = "val")
+
+
+df3 <- df2 %>%
+       separate(group, c("variable","group"), sep="\\.") %>%
+       pivot_wider(names_from = variable,
+                   values_from = val) %>%
+       mutate(bf = exp(bf) - 1,
+              pa = exp(pa) - 1,
+              media = media / 7,
+              hdiet = exp(hdiet) - 1,
+              wb = wb / 48 * 100) %>%
+       mutate_at(c("group", "sex", "migrant", "country", "school", "income", "isced",
+                   "formula", "preg_smoke", "familymeal", "alc", "pub", "smoke", "bmicat"),
+                 ~ as.factor(.x)) %>%
+       relocate(c(id, group,
+                  country, sex, migrant, preg_week,  preg_smoke, bage,
+                  bweight, bf, formula, hdiet,
+                  age, school, bmi, bmicat, wb, media, pa, sleep, yhei, familymeal,
+                  homa, pub, alc, smoke, bmi_m, income, isced))
+
+levels(df3$group) <-  c("Baseline", "FU1", "FU2", "EL", "IV")
+levels(df3$sex) <- c("no", "yes")
+levels(df3$migrant) <- c("yes", "no")
+levels(df3$country) <- c("Central", "North", "South")
+levels(df3$school) <- c("Kindergarden", "School", "neither")
+levels(df3$familymeal) <- c("no", "yes")
+levels(df3$income) <- c("low", "middle", "high")
+levels(df3$isced) <- c("low", "middle", "high")
+levels(df3$formula) <- c("no", "yes")
+levels(df3$preg_smoke) <- c("never", "rarely", "several occasions a week", "daily")
+levels(df3$alc) <- c("no", "yes")
+levels(df3$smoke) <- c("no", "yes")
+levels(df3$pub) <- c("no", "yes")
+levels(df3$bmicat) <- c("underweight", "underweight", "underweight", "normal weight", "overweight", "obesity")
+
+
+
+table1s <- df3 %>% select(.,-c(id)) %>%
+  tbl_summary(
+        by = group,
+        statistic = list(all_continuous() ~ "{mean} ({sd})",
+                         all_categorical() ~ "{n} ({p}%)"),
+        digits = list(all_categorical() ~ c(0, 1)),
+        label = list(country ~ "Country",
+                     sex ~ "Female",
+                     migrant ~ "Migration background",
+                     bweight ~ "Birthweight [g]",
+                     bage ~ "Mother's age at birth",
+                     preg_smoke ~ "Tobacco smoking during pregnancy",
+                     formula ~ "Was fed with formula milk",
+                     bf ~ "Total breastfeeding [months]",
+                     preg_week ~ "Completed weeks of pregnancy",
+                     hdiet ~ "Fully integrated into household's diet [month]",
+                     age ~ "Age",
+                     bmi ~ "BMI z-score",
+                     bmicat ~ "BMI",
+                     bmi_m ~ "Mother's BMI",
+                     school ~ "School",
+                     isced ~ "ISCED",
+                     income ~ "Household's income",
+                     media ~ "Audiovisual media consumption [h/day]",
+                     familymeal ~ "Daily family meals",
+                     pa ~ "Physical activity [h/week]",
+                     sleep ~ "Total sleep [h/day]",
+                     wb ~ "Well-being [%]",
+                     yhei ~ "Youth healthy eating index [%]",
+                     homa ~ "Homa index z-score",
+                     alc ~ "Ever alcohol drinking",
+                     smoke ~ "Ever tobacco smoking",
+                     pub ~ "Pubertal"
+                     ),
+        missing_text = "Missing")
+
+
+### save table ---------------------------------------------------------------
+table1s %>% as_gt() %>% gtsave("table1s_revision.rtf", path = "results/")
+# write_rds(df3, file = "data_not_load/table1s_review.RDS")
+
+
diff --git a/analysis/Table4.R b/analysis/Table4.R
new file mode 100644
index 0000000..3d5f6ff
--- /dev/null
+++ b/analysis/Table4.R
@@ -0,0 +1,71 @@
+# ------------------------------------------------------------------------------
+#
+# Project: Cohort Causal Graph
+#
+# Author:  R. Foraita
+# Date:    JUL 2021
+#
+# Purpose: Create Table 4
+#
+# ------------------------------------------------------------------------------
+library(gt)
+library(gtsummary)
+library(glue)
+load("data_not_load/06_graph-10trees-pa.RData")
+# load("data_not_load/07_boot-pa.RData")
+# load("data_not_load/07_boot100igraph.RData")
+load("data_not_load/06_graph-10trees-pa.RData")
+
+g <- pc2igraph(g.pa)
+g1 <- feature.igraph(g[[1]], labels = V(g[[1]])$name)
+
+V(g1)$name <- c("Sex", "Country", "Migrant", "Income", "ISCED", "Age_at_birth",
+                "Breastfeeding", "Birthweight", "Weeks_of_pregnancy", "Formula_milk",
+                "HH_Diet", "Smoking_pregnancy",
+                "Age", "School", "AVM", "BMI", "Mothers_BMI", "Familymeal", "PA",
+                "Sleep", "Well-being", "YHEI", "HOMA",
+                "Age.FU1", "School.FU1", "AVM.FU1", "BMI.FU1", "Mothers_BMI.FU1",
+                "Familymeal.FU1", "Income.FU1", "ISCED.FU1","PA.FU1",
+                "Sleep.FU1", "Well-being.FU1", "YHEI.FU1", "HOMA.FU1",
+                "Age.FU2", "AVM.FU2", "BMI.FU2", "Mothers_BMI.FU2", "Alcohol.FU2",
+                "Familymeal.FU2", "Income.FU2", "ISCED.FU2","PA.FU2", "Puberty.FU2",
+                "Sleep.FU2", "Smoking.FU2", "Well-being.FU2", "YHEI.FU2", "HOMA.FU2")
+
+edges.pa <- as.data.frame(as_edgelist(g1))
+names(edges.pa) <- c("from", "to")
+edges.pa <- edges.pa[order(edges.pa$from, edges.pa$to),]
+
+edges <- edges[order(edges$from, edges$to),]
+
+# merge pa-data and bootstrapped data frames
+edges.joint <- left_join(edges.pa, edges, by = c("from", "to"))
+edges.joint <- edges.joint[order(edges.joint$weight, decreasing = TRUE),]
+
+# summary table
+summary(edges.joint$weight)
+IQR(edges.joint$weight)
+
+# --- Table ---
+table4 <-
+  edges.joint %>% gt %>%
+  tab_spanner(
+      label = md("**Edges**"),
+      columns = c("from", "to")) %>%
+  cols_label(
+    from = md("**from**"),
+    to = md("**to**"),
+    weight = md("**%**")) %>%
+  cols_align(
+    align = "left", columns = c("from", "to"))
+
+table4
+
+### save table ---------------------------------------------------------------
+table4 %>% gtsave("table-edge-freqs.rtf", path = "results/")
+# ----------------------------------------------------------------------------
+
+
+Fn <- ecdf(edges.joint$weight)
+# Prob. of edge frequencies larger than x P(f(e) > 0.9) = 1 - P(f(e) <= 0.9)
+1 - Fn(90)
+summary(edges.joint$weight)
diff --git a/analysis/TableS_networkcharacteristic.R b/analysis/TableS_networkcharacteristic.R
new file mode 100644
index 0000000..e41256e
--- /dev/null
+++ b/analysis/TableS_networkcharacteristic.R
@@ -0,0 +1,600 @@
+# ------------------------------------------------------------------------------
+#
+# Project: Cohort Causal Graph
+#
+# Author:  R. Foraita
+# Date:    JUL 2021
+#
+# Purpose: Create Table 2
+#
+# ------------------------------------------------------------------------------
+library(gt)
+library(gtsummary)
+library(glue)
+load("data_not_load/06_graph-10trees-pa.RData")
+
+gii <- pc2igraph(g.pa)
+
+gii1 <- feature.igraph(gii[[1]], labels = V(gii[[1]])$name)
+gii2 <- feature.igraph(gii[[2]], labels = V(gii[[2]])$name)
+singeltons <- setdiff(V(gii1)$name, V(gii2)$name)
+
+# articulation points
+ap1 <- articulation_points(gii1)
+ap2 <- articulation_points(gii2)
+
+# diameter
+diam1 <- get_diameter(gii1)
+diam2 <- get_diameter(gii2)
+
+neighb2 <- V(gii2)$name[which(ego_size(gii2, order = 1, V(gii2), mindist = 1) ==
+                        max(ego_size(gii2, order = 1, V(gii2), mindist = 1)))]
+
+
+gd <- graph.descriptives(gii2)
+gdt <- tibble(
+         characteristic = rownames(gd),
+         val = gd[,1])
+
+# table2 <- gdt %>% gt %>%
+#   fmt_number(
+#     columns = 2,
+#     rows = c(4,6:9),
+#     decimals = 0
+#   ) %>%
+#   fmt_number(
+#     columns = 2,
+#     rows = c(5,10:11),
+#     decimals = 1
+#   ) %>%
+#   fmt_number(
+#     columns = 2,
+#     rows = c(1:3,12:15),
+#     decimals = 2
+#   ) %>%
+#   tab_footnote(
+#     footnote = glue_collapse(names(ap2), ", "),
+#     locations = cells_body(columns = 1, rows = 6)
+#   ) %>%
+#   tab_footnote(
+#     footnote = glue_collapse(names(diam2), " > "),
+#     locations = cells_body(columns = 1, rows = 4)
+#   ) %>%
+#   tab_footnote(
+#     footnote = glue_collapse(neighb2, ", "),
+#     locations = cells_body(columns = 1, rows = 9)
+#   ) %>%
+#   tab_footnote(
+#     footnote = paste0("without singletons ",glue_collapse(singeltons, ", ")),
+#     locations = cells_column_labels(columns = 2)
+#   ) %>%
+#   cols_label(
+#     characteristic = md("**Characteristics**"),
+#     val = md("**Main graph**")) %>%
+#   cols_align(
+#     align = "right", columns = c("val"))
+#
+# table2
+
+### save table ---------------------------------------------------------------
+# table2 %>% gtsave("table2.rtf", path = "results/")
+# ----------------------------------------------------------------------------
+
+
+
+# ----------------------------------------------------------------------------
+#       TWD, SEM, Bootstrap, MI0.1
+# ----------------------------------------------------------------------------
+# load("data_not_load/09_twd-pa.RData")
+# g.tw.old <- g.tw
+load("data_not_load/09_twd-pa-220718.RData")
+# load("data_not_load/10_sem-hc.RData")
+# sem.old <- sem
+load("data_not_load/10_sem-hc_bic-cg.RData")
+#load("data_not_load/07_boot-pa.RData")
+load("data_not_load/07_boot100igraph.RData")
+load("data_not_load/06_graph-10trees-pa.RData")
+load("data_not_load/06sens_graph-10trees-pa-alpha.RData")
+
+gii <- pc2igraph(g.pa)
+bn.pa  <- bnlearn::as.bn(gii[[1]])
+
+V(g.boot)$name <- V(gii[[1]])$name
+g.bot <- g.boot %>% delete_edges(which(E(g.boot)$weight < 44))
+g.bot <- feature.igraph(g.bot, labels = V(g.bot)$name)
+(bn.bot <- bnlearn::as.bn(g.bot))
+g.b50 <- g.boot %>% delete_edges(which(E(g.boot)$weight < 50))
+g.b50 <- feature.igraph(g.b50, labels = V(g.b50)$name)
+(bn.b50 <- bnlearn::as.bn(g.b50))
+g.b75 <- g.boot %>% delete_edges(which(E(g.boot)$weight < 75))
+g.b75 <- feature.igraph(g.b75, labels = V(g.b75)$name)
+(bn.b75 <- bnlearn::as.bn(g.b75))
+
+
+# g.sem.old <- bnlearn::as.igraph(sem.old)
+# g.sem.old <- feature.igraph(g.sem.old, labels = V(g.sem.old)$name)
+g.sem <- bnlearn::as.igraph(sem)
+g.sem <- feature.igraph(g.sem, labels = V(g.sem)$name)
+
+
+gtw <- pc2igraph(g.tw)
+gtw <- feature.igraph(gtw[[1]], labels = V(gtw[[1]])$name)
+bn.twd <- bnlearn::as.bn(gtw)
+
+# gtwold <- pc2igraph(g.tw.old)
+# gtwold <- feature.igraph(gtwold[[1]], labels = V(gtwold[[1]])$name)
+# bn.twdold <- bnlearn::as.bn(gtwold)
+
+g.mi <- pc2igraph(g.pa.alpha)
+g.mi <- feature.igraph(g.mi[[1]], labels = V(g.mi[[1]])$name)
+bn.mi <- bnlearn::as.bn(g.mi)
+
+
+# diameter
+diam.twd <- get_diameter(gtw)
+diam.sem <- get_diameter(g.sem)
+diam.bot <- get_diameter(g.bot)
+diam.b50 <- get_diameter(g.b50)
+diam.b75 <- get_diameter(g.b75)
+diam.mi <- get_diameter(g.mi)
+
+neighb.twd <- V(gtw)$name[which(ego_size(gtw, order = 1, V(gtw), mindist = 1) ==
+                            max(ego_size(gtw, order = 1, V(gtw), mindist = 1)))]
+neighb.sem <- V(g.sem)$name[which(ego_size(g.sem, order = 1, V(g.sem), mindist = 1) ==
+                            max(ego_size(g.sem, order = 1, V(g.sem), mindist = 1)))]
+neighb.bot <- V(g.bot)$name[which(ego_size(g.bot, order = 1, V(g.bot), mindist = 1) ==
+                                    max(ego_size(g.bot, order = 1, V(g.bot), mindist = 1)))]
+neighb.b50 <- V(g.b50)$name[which(ego_size(g.b50, order = 1, V(g.b50), mindist = 1) ==
+                                    max(ego_size(g.b50, order = 1, V(g.b50), mindist = 1)))]
+neighb.b75 <- V(g.b75)$name[which(ego_size(g.b75, order = 1, V(g.b75), mindist = 1) ==
+                                    max(ego_size(g.b75, order = 1, V(g.b75), mindist = 1)))]
+neighb.mi <- V(g.mi)$name[which(ego_size(g.mi, order = 1, V(g.mi), mindist = 1) ==
+                                    max(ego_size(g.mi, order = 1, V(g.mi), mindist = 1)))]
+
+
+# Hamming distance and structural Haming distance
+hamming.twd <- bnlearn::hamming(bn.pa, bn.twd)
+# hamming.twd.old <- bnlearn::hamming(bn.pa, bn.twdold)
+# hamming.sem.old <- bnlearn::hamming(bn.pa, sem.old)
+hamming.sem <- bnlearn::hamming(bn.pa, sem)
+hamming.bot <- bnlearn::hamming(bn.pa, bn.bot)
+hamming.b50 <- bnlearn::hamming(bn.pa, bn.b50)
+hamming.b75 <- bnlearn::hamming(bn.pa, bn.b75)
+hamming.mi <- bnlearn::hamming(bn.pa, bn.mi)
+shd.twd <- bnlearn::shd(bn.pa, bn.twd)
+# shd.twd.old <- bnlearn::shd(bn.pa, bn.twdold)
+# shd.sem.old <- bnlearn::shd(bn.pa, sem.old)
+shd.sem <- bnlearn::shd(bn.pa, sem)
+shd.bot <- bnlearn::shd(bn.pa, bn.bot)
+shd.b50 <- bnlearn::shd(bn.pa, bn.b50)
+shd.b75 <- bnlearn::shd(bn.pa, bn.b75)
+shd.mi <- bnlearn::shd(bn.pa, bn.mi)
+
+#twd
+gd <- graph.descriptives(gtw)
+rn <- rownames(gd)
+gd2 <- matrix(NA, nrow = 17, ncol = 1)
+gd2[1:15,1] <- gd
+gd2[16,1] <- hamming.twd
+gd2[17,1] <- shd.twd
+rownames(gd2) <- c(rn, "Hamming distance", "Structural Hamming distance")
+
+gd.twd <- tibble(
+  characteristic = rownames(gd2),
+  val = gd2[,1])
+
+#twd.old
+# gd <- graph.descriptives(gtwold)
+# rn <- rownames(gd)
+# gd2 <- matrix(NA, nrow = 17, ncol = 1)
+# gd2[1:15,1] <- gd
+# gd2[16,1] <- hamming.twd
+# gd2[17,1] <- shd.twd
+# rownames(gd2) <- c(rn, "Hamming distance", "Structural Hamming distance")
+# gd.twd.old <- tibble(
+#   characteristic = rownames(gd2),
+#   val = gd2[,1])
+
+# sem
+gd <- graph.descriptives(g.sem)
+gd[7,] <- nrow(sem$arcs)
+gd[8,] <- 0
+gd2 <- matrix(NA, nrow = 17, ncol = 1)
+gd2[1:15,1] <- gd
+gd2[16,1] <- hamming.sem
+gd2[17,1] <- shd.sem
+rownames(gd2) <- c(rn, "Hamming distance", "Structural Hamming distance")
+
+gd.sem <- tibble(
+  characteristic = rownames(gd2),
+  val = gd2[,1])
+
+# mi
+gd <- graph.descriptives(g.mi)
+rn <- rownames(gd)
+gd2 <- matrix(NA, nrow = 17, ncol = 1)
+gd2[1:15,1] <- gd
+gd2[16,1] <- hamming.mi
+gd2[17,1] <- shd.mi
+rownames(gd2) <- c(rn, "Hamming distance", "Structural Hamming distance")
+
+gd.mi <- tibble(
+  characteristic = rownames(gd2),
+  val = gd2[,1])
+
+
+# boot
+gd <- graph.descriptives(g.bot)
+rn <- rownames(gd)
+gd2 <- matrix(NA, nrow = 17, ncol = 1)
+gd2[1:15,1] <- gd
+gd2[16,1] <- hamming.bot
+gd2[17,1] <- shd.bot
+rownames(gd2) <- c(rn, "Hamming distance", "Structural Hamming distance")
+
+gd.bot <- tibble(
+  characteristic = rownames(gd2),
+  val = gd2[,1])
+
+# boot 50
+gd <- graph.descriptives(g.b50)
+rn <- rownames(gd)
+gd2 <- matrix(NA, nrow = 17, ncol = 1)
+gd2[1:15,1] <- gd
+gd2[16,1] <- hamming.b50
+gd2[17,1] <- shd.b50
+rownames(gd2) <- c(rn, "Hamming distance", "Structural Hamming distance")
+
+gd.b50 <- tibble(
+  characteristic = rownames(gd2),
+  val = gd2[,1])
+
+# boot 75
+gd <- graph.descriptives(g.b75)
+rn <- rownames(gd)
+gd2 <- matrix(NA, nrow = 17, ncol = 1)
+gd2[1:15,1] <- gd
+gd2[16,1] <- hamming.b75
+gd2[17,1] <- shd.b75
+rownames(gd2) <- c(rn, "Hamming distance", "Structural Hamming distance")
+
+gd.b75 <- tibble(
+  characteristic = rownames(gd2),
+  val = gd2[,1])
+
+# Average bootstrap graph
+boot <- readRDS("data_not_load/boot100_mi1-graph.RDS")
+bi <- lapply(boot, function(x){
+              tmp <- pc2igraph(x)[[1]]
+              feature.igraph(tmp)} )
+bn.bi <- lapply(boot, function(x){
+                  bnlearn::as.bn(x@graph, check.cycles = FALSE)})
+
+tmp <- do.call(cbind, lapply(bi, graph.descriptives))
+# tmp <- rbind(tmp, tmp[7,] - tmp[8,])
+# rownames(tmp)[16] <- "Number of directed edges"
+b.mean <- apply(tmp, 1, mean, na.rm = TRUE)
+
+gd.boot <- matrix(NA, nrow = 17, ncol = 1)
+gd.boot[1:15,1] <- b.mean
+gd.boot[16,1] <- mean(sapply(bn.bi, function(x){bnlearn::hamming(bn.pa, x)}))
+gd.boot[17,1] <- mean(sapply(bn.bi, function(x){bnlearn::shd(bn.pa, x)}))
+rownames(gd.boot) <- c(rn, "Hamming distance", "Structural Hamming distance")
+
+gd.boot <- tibble(
+  characteristic = rownames(gd.boot),
+  val = gd.boot[,1])
+
+# gd.alle <- bind_cols(gd.mi, gd.twd[,2], gd.sem[,2], gd.bot[,2], gd.b75[,2])
+# names(gd.alle)[2:6] <- c("val.mi", "val.twd", "val.sem", "val.boot35", "val.boot75")
+#
+#
+#
+# table.alle <-
+#   gd.alle[,1:6] %>% gt %>%
+#   fmt_number(
+#     columns = 2:6,
+#     rows = c(4,6:9,16,17),
+#     decimals = 0
+#   ) %>%
+#   fmt_number(
+#     columns = 2:6,
+#     rows = c(5,10:11),
+#     decimals = 1
+#   ) %>%
+#   fmt_number(
+#     columns = 2:6,
+#     rows = c(1:3,12:15),
+#     decimals = 2
+#   ) %>%
+#   tab_footnote(
+#     footnote = glue_collapse(names(diam2), " > "),
+#     locations = cells_body(columns = 3, rows = 4)
+#   ) %>%
+#   tab_footnote(
+#     footnote = glue_collapse(names(diam.twd), " > "),
+#     locations = cells_body(columns = 3, rows = 4)
+#   ) %>%
+#   tab_footnote(
+#     footnote = glue_collapse(names(diam.sem), " > "),
+#     locations = cells_body(columns = 4, rows = 4)
+#   ) %>%
+#   tab_footnote(
+#     footnote = glue_collapse(names(diam.bot), " > "),
+#     locations = cells_body(columns = 5, rows = 4)
+#   ) %>%
+#   tab_footnote(
+#     footnote = glue_collapse(names(diam.b75), " > "),
+#     locations = cells_body(columns = 5, rows = 4)
+#   ) %>%
+#   tab_footnote(
+#     footnote = glue_collapse(sort(neighb.mi), ", "),
+#     locations = cells_body(columns = 3, rows = 9)
+#   ) %>%
+#   tab_footnote(
+#     footnote = glue_collapse(sort(neighb.twd), ", "),
+#     locations = cells_body(columns = 3, rows = 9)
+#   ) %>%
+#   tab_footnote(
+#     footnote = glue_collapse(sort(neighb.sem), ", "),
+#     locations = cells_body(columns = 4, rows = 9)
+#   ) %>%
+#   tab_footnote(
+#     footnote = glue_collapse(sort(neighb.bot), ", "),
+#     locations = cells_body(columns = 5, rows = 9)
+#   ) %>%
+#   tab_footnote(
+#     footnote = glue_collapse(sort(neighb.b75), ", "),
+#     locations = cells_body(columns = 5, rows = 9)
+#   ) %>%
+#   # tab_footnote(
+#   #   footnote = paste0("without singletons ",glue_collapse(singeltons, ", ")),
+#   #   locations = cells_column_labels(columns = 2)
+#   # ) %>%
+#   # tab_footnote(
+#   #   footnote = paste0("without singletons ",glue_collapse(singeltons.tw, ", ")),
+#   #   locations = cells_column_labels(columns = 3)
+#   # ) %>%
+#   cols_label(
+#     characteristic = md("**Characteristics**"),
+#     val.mi     = md("**MI**"),
+#     val.twd    = md("**TWD**"),
+#     val.sem    = md("**SEM**"),
+#     val.boot35 = md("**BOOT35**"),
+#     val.boot75 = md("**BOOT75**")
+#     ) %>%
+#   cols_align(
+#     align = "right", columns = c("val.mi","val.twd", "val.sem", "val.boot35", "val.boot75"))
+#
+#   table.alle
+#
+#   ### save table ---------------------------------------------------------------
+#   save(gd.alle2, file = "data_not_load/table2_data.RData")
+#   table.alle %>% gtsave("table2_twd-sem-boot.rtf", path = "results/")
+#   # ----------------------------------------------------------------------------
+#
+#
+
+
+## add rmseu
+load(file = "results/rmseu.RData")
+
+  gdt[16:17,1:2] <- NA
+  gdt[16:17,1] <- c("Hamming distance", "Structural Hamming distance")
+  gd.alle2 <- bind_cols(gdt, gd.mi[,2], gd.twd[,2], gd.sem[,2], gd.bot[,2],
+                        gd.b50[,2], gd.b75[,2], gd.boot[,2])
+  names(gd.alle2)[2:9] <- c("val.main","val.mi","val.twd", "val.sem", "val.boot44",
+                            "val.boot50", "val.boot75", "avg.boot")
+
+  gd.alle2[18,1] <- "Mean edge uncertainty"
+  gd.alle2[18,6] <- round(rmseu44$meu * 100, 1)
+  gd.alle2[18,7] <- round(rmseu50$meu * 100, 1)
+  gd.alle2[18,8] <- round(rmseu75$meu * 100, 1)
+  gd.alle2[18,9] <- round(rmseu0$meu * 100, 1)
+
+
+table.alle2 <-
+    gd.alle2[c(7,8,11,9,5,4,16,17,18),] %>% gt %>%
+    fmt_number(
+      columns = 2:9,
+      rows = c(1,2,4,6:8),
+      decimals = 0
+    ) %>%
+    fmt_number(
+      columns = 2:9,
+      rows = c(3,5,9),
+      decimals = 1
+    ) %>%
+    # fmt_number(
+    #   columns = 2:8,
+    #   rows = c(1:3,12:15),
+    #   decimals = 2
+    # ) %>%
+    tab_footnote(
+      footnote = glue_collapse(names(diam2), " > "),
+      locations = cells_body(columns = 2, rows = 6)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(names(diam.mi), " > "),
+      locations = cells_body(columns = 3, rows = 6)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(names(diam.twd), " > "),
+      locations = cells_body(columns = 4, rows = 6)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(names(diam.sem), " > "),
+      locations = cells_body(columns = 5, rows = 6)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(names(diam.bot), " > "),
+      locations = cells_body(columns = 6, rows = 6)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(names(diam.b50), " > "),
+      locations = cells_body(columns = 7, rows = 6)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(names(diam.b75), " > "),
+      locations = cells_body(columns = 8, rows = 6)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(sort(neighb2), ", "),
+      locations = cells_body(columns = 2, rows = 4)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(sort(neighb.mi), ", "),
+      locations = cells_body(columns = 3, rows = 4)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(sort(neighb.twd), ", "),
+      locations = cells_body(columns = 4, rows = 4)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(sort(neighb.sem), ", "),
+      locations = cells_body(columns = 5, rows = 4)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(sort(neighb.bot), ", "),
+      locations = cells_body(columns = 6, rows = 4)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(sort(neighb.b50), ", "),
+      locations = cells_body(columns = 7, rows = 4)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(sort(neighb.b75), ", "),
+      locations = cells_body(columns = 8, rows = 4)
+    ) %>%
+    # tab_footnote(
+    #   footnote = paste0("without singletons ",glue_collapse(singeltons, ", ")),
+    #   locations = cells_column_labels(columns = 2)
+    # ) %>%
+    # tab_footnote(
+    #   footnote = paste0("without singletons ",glue_collapse(singeltons.tw, ", ")),
+    #   locations = cells_column_labels(columns = 3)
+    # ) %>%
+    cols_label(
+      characteristic = md("**Characteristics**"),
+      val.main   = md("**Main**"),
+      val.mi     = md("**MI**"),
+      val.twd    = md("**TWD**"),
+      val.sem    = md("**SEM**"),
+      val.boot44 = md("**BOOT44**"),
+      val.boot50 = md("**BOOT50**"),
+      val.boot75 = md("**BOOT75**"),
+      avg.boot = md("**Avg.BOOT**")
+    ) %>%
+    cols_align(
+      align = "right", columns = c("val.main","val.mi","val.twd", "val.sem", "val.boot44",
+                                   "val.boot50", "val.boot75",  "avg.boot"))
+
+table.alle2
+
+
+
+
+### save table ---------------------------------------------------------------
+  save(gd.alle2, file = "data_not_load/table2_data.RData")
+  table.alle2 %>% gtsave("table2_twd-sem-boot.rtf", path = "results/")
+# ----------------------------------------------------------------------------
+
+
+
+
+
+
+
+  # ---   MVPA   --------------------------------------------------------------
+
+  load("data_not_load/06_graph-10trees-mvpa.RData")
+  gmvpa <- pc2igraph(g.mvpa)
+
+  gmvpa1 <- feature.igraph(gmvpa[[1]], labels = V(gmvpa[[1]])$name)
+  gmvpa2 <- feature.igraph(gmvpa[[2]], labels = V(gmvpa[[2]])$name)
+  singeltons.mv <- setdiff(V(gmvpa1)$name, V(gmvpa2)$name)
+
+  # articulation points
+  ap1mv <- articulation_points(gmvpa1)
+  ap2mv <- articulation_points(gmvpa2)
+
+  # diameter
+  diam1mv <- get_diameter(gmvpa1)
+  diam2mv <- get_diameter(gmvpa2)
+
+
+  neighb2mv <- V(gmvpa2)$name[which(ego_size(gmvpa2, order = 1, V(gmvpa2), mindist = 1) ==
+                                      max(ego_size(gmvpa2, order = 1, V(gmvpa2), mindist = 1)))]
+
+
+  gd <- graph.descriptives(gmvpa2)
+  gdt2b <- tibble(
+    characteristic = rownames(gd),
+    val = gd[,1])
+
+  gdt2b <- bind_cols(gdt, gdt2b[,2])
+  names(gdt2b)[2:3] <- c("val", "val.mvpa")
+
+
+  table2b <- gdt2b %>% gt %>%
+    fmt_number(
+      columns = 2:3,
+      rows = c(4,6:9),
+      decimals = 0
+    ) %>%
+    fmt_number(
+      columns = 2:3,
+      rows = c(5,10:11),
+      decimals = 1
+    ) %>%
+    fmt_number(
+      columns = 2:3,
+      rows = c(1:3,12:15),
+      decimals = 2
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(names(diam2), " > "),
+      locations = cells_body(columns = 2, rows = 4)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(names(diam2mv), " > "),
+      locations = cells_body(columns = 3, rows = 4)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(sort(names(ap2)), ", "),
+      locations = cells_body(columns = 2, rows = 6)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(sort(names(ap2mv)), ", "),
+      locations = cells_body(columns = 3, rows = 6)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(sort(neighb2), ", "),
+      locations = cells_body(columns = 2, rows = 9)
+    ) %>%
+    tab_footnote(
+      footnote = glue_collapse(sort(neighb2mv), ", "),
+      locations = cells_body(columns = 3, rows = 9)
+    ) %>%
+    tab_footnote(
+      footnote = paste0("without singletons ",glue_collapse(singeltons, ", ")),
+      locations = cells_column_labels(columns = 2)
+    ) %>%
+    tab_footnote(
+      footnote = paste0("without singletons ",glue_collapse(singeltons.mv, ", ")),
+      locations = cells_column_labels(columns = 3)
+    ) %>%
+    cols_label(
+      characteristic = md("**Characteristics**"),
+      val = md("**PA graph**"),
+      val.mvpa = md("**MVPA graph**")) %>%
+    cols_align(
+      align = "right", columns = c("val", "val.mvpa"))
+
+  table2b
+
+
+
+  ### save table ---------------------------------------------------------------
+  # table2b %>% gtsave("table2_mvpa.rtf", path = "results/")
+  # ----------------------------------------------------------------------------
diff --git a/analysis/Table_edgefreq.R b/analysis/Table_edgefreq.R
new file mode 100644
index 0000000..f099652
--- /dev/null
+++ b/analysis/Table_edgefreq.R
@@ -0,0 +1,96 @@
+# ------------------------------------------------------------------------------
+#
+# Project: Cohort Causal Graph
+#
+# Author:  R. Foraita
+# Date:    JUL 2021
+#
+# Purpose: Table edge frequencies
+#
+# ------------------------------------------------------------------------------
+library(gt)
+library(gtsummary)
+
+# load data
+boot <- readRDS("data_not_load/boot100_mi1-graph.RDS")
+load("data_not_load/06_graph-10trees-pa.RData")
+
+
+
+
+
+
+# g.pa to igraph
+g <- pc2igraph(g.pa)
+g <- feature.igraph(g[[1]])
+V(g)$name <- c("Sex", "Region", "Migrant", "Income", "ISCED",
+               "Mother's age at birth", "Total breastfeeding",
+               "Birthweight", "Weeks of pregnancy", "Formula milk", "HH diet",
+               "Smoking during pregnancy", "Age (B)", "School (B)", "AVM (B)",
+               "BMI (B)", "Mother's BMI (B)", "Daily family meals (B)", "PA (B)",
+               "Sleep (B)", "Well-being (B)", "YHEI (B)", "HOMA (B)",
+               "Age (FU1)", "School (FU1)", "AVM (FU1)",
+               "BMI (FU1)", "Mother's BMI (FU1)", "Daily family meals (FU1)",
+               "Income (FU1)", "ISCED (FU1)","PA (FU1)",
+               "Sleep (FU1)", "Well-being (FU1)", "YHEI (FU1)", "HOMA (FU1)",
+               "Age (FU2)", "AVM (FU2)", "BMI (FU2)", "Mother's BMI (FU2)",
+               "Alcohol (FU2)", "Daily family meals (FU2)", "Income (FU2)", "ISCED (FU2)",
+               "PA (FU2)", "Puberty (FU2)","Sleep (FU2)", "Smoking (FU2)",
+               "Well-being (FU2)", "YHEI (FU2)", "HOMA (FU2)")
+
+edges.pa <- as.data.frame(as_edgelist(g))
+names(edges.pa) <- c("from", "to")
+
+
+# make adjacency matrix of all pc-graphs
+amat <- lapply(boot, function(x){
+               tmp <- wgtMatrix(getGraph(x), transpose = FALSE)
+               wm2 <- (tmp + t(tmp))
+               tmp[which(wm2 > 1)] <- 0.5
+               tmp
+          })
+g.avg <- Reduce('+', amat)
+
+
+
+# Build igraph
+g.boot <- graph.adjacency(g.avg, weighted = TRUE)
+V(g.boot)$name <- c("Sex", "Region", "Migrant", "Income", "ISCED",
+                 "Mother's age at birth", "Total breastfeeding",
+                 "Birthweight", "Weeks of pregnancy", "Formula milk", "HH diet",
+                 "Smoking during pregnancy", "Age (B)", "School (B)", "AVM (B)",
+                 "BMI (B)", "Mother's BMI (B)", "Daily family meals (B)", "PA (B)",
+                 "Sleep (B)", "Well-being (B)", "YHEI (B)", "HOMA (B)",
+                 "Age (FU1)", "School (FU1)", "AVM (FU1)",
+                 "BMI (FU1)", "Mother's BMI (FU1)", "Daily family meals (FU1)",
+                 "Income (FU1)", "ISCED (FU1)","PA (FU1)",
+                 "Sleep (FU1)", "Well-being (FU1)", "YHEI (FU1)", "HOMA (FU1)",
+                 "Age (FU2)", "AVM (FU2)", "BMI (FU2)", "Mother's BMI (FU2)",
+                 "Alcohol (FU2)", "Daily family meals (FU2)", "Income (FU2)", "ISCED (FU2)",
+                 "PA (FU2)", "Puberty (FU2)","Sleep (FU2)", "Smoking (FU2)",
+                 "Well-being (FU2)", "YHEI (FU2)", "HOMA (FU2)")
+
+# more information about edges
+edges <- as.data.frame(as_edgelist(g.boot))
+names(edges) <- c("from", "to")
+edges$weight <- E(g.boot)$weight
+
+# merge two data frames
+edges.joint <- left_join(edges.pa, edges, by = c("from", "to"))
+
+
+# median and IQR of selected edges
+summary(edges$weight)
+IQR(edges$weight)
+
+
+# edges with more than 80%
+e80 <- edges[which(edges$weight >= 80),]
+e80[order(e80$weight, decreasing = TRUE),]
+
+
+# Table ----------------------------------------------------------------------------------
+ej <- edges.joint[order(edges.joint$weight, decreasing = TRUE),]
+
+write.csv2(ej, file = "results/tab_edgefreq.csv")
+
diff --git a/analysis/VisNet.R b/analysis/VisNet.R
index d5edda9..d93acd5 100644
--- a/analysis/VisNet.R
+++ b/analysis/VisNet.R
@@ -1,19 +1,10 @@
-# ------------------------------------------------------------------------------
-#
-# Project: Cohort Causal Graph
-#
-# Author:  R. Foraita
-#
-# Purpose: Make interactive graphs
-#
-# ------------------------------------------------------------------------------
 library(visNetwork)
 library(glue)
 
 
 
 # ------------   Main graph   ------------------------------------------------------------
-load("data/graph-10trees.RData")
+load("data_not_load/06_graph-10trees-pa.RData")
 g <- pc2igraph(g.pa)[[1]]
 graph.descriptives(g)
 
@@ -107,6 +98,12 @@ visNetwork(nodes, edges, width = "100%", height = "800px") %>%
   visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE)
 vis.pa
 
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results")
+# visSave(vis.pa, file = "vis-pa.html")
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph")
+
+
+
 
 
 
@@ -114,8 +111,116 @@ vis.pa
 #
 #                          Sensitivity graphs
 #
+# ------------   MVPA   ------------------------------------------------------------------
+load("data_not_load/06_graph-10trees-mvpa.RData")
+g <- pc2igraph(g.mvpa)[[1]]
+
+
+# --- Prepare edges ---
+edges <- as.data.frame(as_edgelist(g))
+names(edges) <- c("from", "to")
+edges$from[which(edges$from == "income")] <- "income.0"
+edges$from[which(edges$from == "isced")] <- "isced.0"
+edges$to[which(edges$to == "income")] <- "income.0"
+edges$to[which(edges$to == "isced")] <- "isced.0"
+
+edges <- data.frame(edges, edges$from,
+                    arrowheads = rep("arrow", nrow(edges)),
+                    arrows.to.enabled = TRUE)
+
+# ungerichtete Kanten bearbeiten
+tmp <- tmp2 <- vector()
+for(i in 1:(nrow(edges))){
+  tmp  <- c(tmp,  glue_collapse(edges[i,1:2],"-"))
+  tmp2 <- c(tmp2, glue_collapse(edges[i,2:3],"-"))
+}
+
+isele <- which(is.element(tmp2, tmp))
+edges[isele,]
+edges$arrows.to.enabled[isele] <- FALSE
+
+# delete doppelte Kanten:c(21, 80, 83, 86, 88, 90, 107, 109, 110)
+edges <- edges[-c(21, 80, 83, 86, 88, 90, 107, 109, 110),]
+
+
+
+# --- prepre nodes ---
+farbpalette <- c(rep("#afa8b3",3), rep("#4F3824",7),
+                 rep("#D1603D",14),rep("#DDB967",14), rep("#D0E37F",16))
+
+nodes <- data.frame(id = g.mvpa@graph@nodes, label = g.mvpa@graph@nodes)
+nodes <- rbind(nodes[1:3,], nodes[6:12,], nodes[13:18,], nodes[4:5,], nodes[19:54,])
+nodes$id[which(nodes$id == "income")] <- "income.0"
+nodes$id[which(nodes$id == "isced")] <- "isced.0"
+
+nodes$group <- c(rep("background",3), rep("early life",7),
+                 rep("baseline", 14), rep("FU1",14), rep("FU2",16))
+nodes$level <- c(rep(1,3), rep(2,7), rep(3,14), rep(4,14), rep(5,16))
+nodes$color.background <- farbpalette
+nodes$color.border <- farbpalette
+nodes$color.highlight.background <- rep("#C62F4B",54)
+nodes$color.highlight.border <- rep("#C62F4B",54)
+row.names(nodes) <- 1:54
+nodes$label <- c("Sex", "Region", "Migrant", "Mother's age\nat birth", "Total\nbreastfeeding",
+                 "Birthweight", "Weeks of\npregnancy", "Formula milk", "HH diet",
+                 "Smoking\nduring pregnancy", "Age", "School", "AVM",
+                 "zBMI", "Mother's BMI", "Daily\nfamily meals", "Income", "ISCED", "MVPA",
+                 "Sedentary", "Sleep", "Well-being", "YHEI", "HOMA", "Age", "School", "AVM",
+                 "zBMI", "Mother's BMI", "Daily\nfamily meals", "Income", "ISCED","MVPA",
+                 "Sedentary", "Sleep", "Well-being", "YHEI", "HOMA", "Age", "AVM",
+                 "zBMI", "Mother's BMI", "Alcohol", "Daily\nfamily meals", "Income", "ISCED",
+                 "MVPA", "Puberty", "Sedentary","Sleep", "Smoking", "Well-being", "YHEI", "HOMA")
+
+
+
+# legend nodes
+legendNodes <- data.frame(
+  label = c("Context","Early life", "B", "FU1", "FU2"),
+  shape = c("square", "square", "dot", "dot", "dot"),
+  color.background = c(farbpalette[1], farbpalette[4], farbpalette[11], farbpalette[25], farbpalette[39]),
+  color.border = c(farbpalette[1], farbpalette[4], farbpalette[11], farbpalette[25], farbpalette[39])
+)
+
+
+
+
+# --- vis network ---
+vis.mvpa <-
+  visNetwork(nodes, edges, width = "100%", height = "800px") %>%
+  visLayout(randomSeed = 96) %>%
+  visIgraphLayout(layout = "layout_on_grid") %>%   #layout = "layout_on_grid", "layout_with_sugiyama
+  # visHierarchicalLayout(direction = "LR", levelSeparation = 200) %>%
+  visNodes(
+    shape = "dot",
+    label = nodes$label,
+    group = nodes$group,
+    font = list(size = 32))  %>%
+  visEdges(
+    shadow = FALSE,
+    arrows = "to",
+    color = list(color = "#466286", highlight = "#C62F4B"),
+    width = 2,
+    selectionWidth = 3
+  ) %>%
+  visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE)
+vis.mvpa
+
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results")
+# visSave(vis.mvpa, file = "vis-mvpa.html")
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph")
+
+nodes.mvpa <- nodes
+edges.mvpa <- edges
+legendNodes.mvpa <- legendNodes
+
+
+
+
+
+
 # ------------   Test-wise deletion   ----------------------------------------------------
-load("data/graph-twd.RData")
+#load("data_not_load/09_twd-pa.RData")
+load("data_not_load/09_twd-pa-220718.RData")
 g <- pc2igraph(g.tw)[[1]]
 
 
@@ -141,6 +246,12 @@ for(i in 1:(nrow(edges))){
 isele <- which(is.element(tmp2, tmp))
 edges[isele,]
 edges$arrows.to.enabled[isele] <- FALSE
+
+# delete doppelte Kanten OLD:c(32,61,63,80,94,95,98,102,123,136,142,148,150,153)
+# edges <- edges[-c(32,61,63,80,94,95,98,102,123,136,142,148,150,153),]
+
+# 19.07.2022: Fehler bei mixCItwd bereinigt:
+# delete doppelte Kanten OLD:c(98, 102, 121, 139. 141)
 edges <- edges[-c(98, 102, 121, 139, 141),]
 
 
@@ -206,6 +317,11 @@ vis.twd <-
   ) %>%
   visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE)
 vis.twd
+
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results")
+# visSave(vis.twd, file = "vis-twd.html")
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph")
+
 nodes.twd <- nodes
 edges.twd <- edges
 
@@ -215,7 +331,7 @@ edges.twd <- edges
 
 
 # ------------   SEM   -------------------------------------------------------------
-load("data/graph-sem-hc_bic-cg.RData")
+load("data_not_load/10_sem-hc_bic-cg.RData")
 g.sem <- bnlearn::as.igraph(sem)
 g.sem <- feature.igraph(g.sem, labels = V(g.sem)$name)
 
@@ -241,6 +357,11 @@ for(i in 1:(nrow(edges))){
 
 isele <- which(is.element(tmp2, tmp))
 edges[isele,]
+# edges$arrows.to.enabled[isele] <- FALSE
+
+# delete doppelte Kanten: keine doppelte Kanten
+
+
 
 # --- prepre nodes ---
 farbpalette <- c(rep("#afa8b3",3), rep("#4F3824",7),
@@ -304,6 +425,12 @@ vis.sem <-
   ) %>%
   visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE)
 vis.sem
+
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results")
+# visSave(vis.sem, file = "vis-sem.html")
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph")
+
+
 nodes.sem <- nodes
 edges.sem <- edges
 
@@ -313,10 +440,12 @@ edges.sem <- edges
 
 
 
+
 # ------------   Bootstrap   ----------------------------------------------------
 #
 # -------------------------------------------------------------------------------
-load("data/boot100igraph.RData")
+#load("data_not_load/07_boot-pa.RData")
+load("data_not_load/07_boot100igraph.RData")
 
 # verwende alte Knotennamen
 V(g.boot)$name <-
@@ -409,6 +538,11 @@ vis.boot.75 <-
   visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE)
 vis.boot.75
 
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results")
+# visSave(vis.boot.75, file = "vis-boot_75perc.html")
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph")
+
+
 
 
 # --- vis network: Bootstrap 60% ---
@@ -425,11 +559,16 @@ isele <- which(is.element(tmp2, tmp))
 edges[isele,]
 edges$arrows.to.enabled[isele] <- FALSE
 
+# delete doppelte Kanten:
+# edges <- edges[-c(15, 60, 73, 74, 88, 99, 123, 124, 128),]
+
 
 vis.boot.60 <-
   visNetwork(nodes, edges[edges$weight >= 60,], width = "100%", height = "800px") %>%
   visLayout(randomSeed = 96) %>%
+  # visOptions(highlightNearest = list(enabled = T, degree = 2, hover = T))
   visIgraphLayout(layout = "layout_on_grid") %>%   #layout = "layout_on_grid", "layout_with_sugiyama
+  # visHierarchicalLayout(direction = "LR", levelSeparation = 200) %>%
   visNodes(
     shape = "dot",
     label = nodes$label,
@@ -445,6 +584,13 @@ vis.boot.60 <-
   visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE)
 vis.boot.60
 
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results")
+# visSave(vis.boot.60, file = "vis-boot-60perc.html")
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph")
+#
+
+
+
 
 # --- vis network: Bootstrap 44% ---
 edges <- edges.boot[edges.boot$weight > 44,]
@@ -464,7 +610,9 @@ edges$arrows.to.enabled[isele] <- FALSE
 vis.boot.44 <-
   visNetwork(nodes, edges[edges$weight >= 44,], width = "100%", height = "800px") %>%
   visLayout(randomSeed = 756) %>%
+  # visOptions(highlightNearest = list(enabled = T, degree = 2, hover = T))
   visIgraphLayout(layout = "layout_on_grid") %>%   #layout = "layout_on_grid", "layout_with_sugiyama
+  # visHierarchicalLayout(direction = "LR", levelSeparation = 200) %>%
   visNodes(
     shape = "dot",
     label = nodes$label,
@@ -480,6 +628,12 @@ vis.boot.44 <-
   visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE)
 vis.boot.44
 
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results")
+# visSave(vis.boot.44, file = "vis-boot_44perc.html")
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph")
+
+
+
 
 
 
@@ -523,11 +677,16 @@ vis.boot.35 <-
     selectionWidth = 3
   ) %>%
   visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE)
+vis.boot.35
+
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results")
+# visSave(vis.boot.35, file = "vis-boot-35perc.html")
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph")
 
 
 
 # ------------   Main graph, alpha = .10  --------------------------------------
-load("data/graph-10trees-alpha.RData")
+load("data_not_load/06sens_graph-10trees-pa-alpha.RData")
 g <- pc2igraph(g.pa.alpha)[[1]]
 graph.descriptives(g)
 
@@ -620,6 +779,10 @@ vis.pa.alpha <-
   visLegend(width = 0.15, addNodes = legendNodes, useGroups = FALSE)
 vis.pa.alpha
 
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph/results")
+# visSave(vis.pa.alpha, file = "vis-pa-alpha.html")
+# setwd("G:/I-Family/Auswertung/Papers/DAG_overall_model_FORAITA/2020/cohort_causal_graph")
+
 
 
 
@@ -627,19 +790,27 @@ vis.pa.alpha
 save(nodes.pa, nodes.mvpa, nodes.sem, nodes.boot, nodes.twd, nodes.pa.alpha,
      edges.pa, edges.mvpa, edges.twd, edges.sem, edges.boot, edges.pa.alpha,
      legendNodes, legendNodes.mvpa,
-     file = "data/vis_pa.RData")
+     file = "data_not_load/vis_pa.RData")
 
 
-visSave(vis.pa, file = "vis-pa.html")
-visSave(vis.pa.alpha, file = "vis-pa-alpha.html")
-visSave(vis.twd, file = "vis-twd.html")
-visSave(vis.sem, file = "vis-sem.html")
-visSave(vis.boot.75, file = "vis-boot_75perc.html")
-visSave(vis.boot.60, file = "vis-boot-60perc.html")
-visSave(vis.boot.44, file = "vis-boot-44perc.html")
+#setwd("C:/Users/foraita/Documents/BIPS/software/cohort_causal_graph/results")
+visSave(vis.pa, file = "results/vis-pa.html")
+visSave(vis.mvpa, file = "results/vis-mvpa.html")
+visSave(vis.twd, file = "results/vis-twd.html")
+visSave(vis.sem, file = "results/vis-sem.html")
+visSave(vis.sem, file = "results/vis-pa-alpha.html")
+visSave(vis.boot.75, file = "results/vis-boot_75perc.html")
+visSave(vis.boot.60, file = "results/vis-boot-60perc.html")
+visSave(vis.boot.35, file = "results/vis-boot-35perc.html")
+visSave(vis.boot.44, file = "results/vis-boot-44perc.html")
+#setwd("C:/Users/foraita/Documents/BIPS/software/cohort_causal_graph/")
 
 
 
+# setEPS()
+# postscript("results/vispa.eps")
+# vis.pa
+# dev.off()