From 7fc6df3578d77c69f9a51ae5287cfe4b2cff2d42 Mon Sep 17 00:00:00 2001 From: Mohamad Date: Wed, 3 Jan 2024 08:06:43 +0000 Subject: [PATCH 01/25] fixing nesting issues --- config/dualGL_sparse.json | 8 +++++--- .../athomas_jtsampler/script.sh | 4 ++-- .../dualgl/rule.smk | 2 +- .../dualgl/script.sh | 17 ----------------- .../huge_glasso/schema.json | 6 +++--- .../huge_mb/schema.json | 8 ++++---- 6 files changed, 15 insertions(+), 30 deletions(-) delete mode 100644 workflow/rules/structure_learning_algorithms/dualgl/script.sh diff --git a/config/dualGL_sparse.json b/config/dualGL_sparse.json index 9cc7c460..e461fb4e 100644 --- a/config/dualGL_sparse.json +++ b/config/dualGL_sparse.json @@ -118,7 +118,7 @@ { "id": "dualGL-gt13", "timeout": null, - "startalg": "jtsampler_gt13", + "startalg": "jtsampler_gts", "alpha": [0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.9, 0.99] } ], @@ -152,13 +152,15 @@ "huge_glasso": [ { "id": "glasso", - "lambda": [2, 1, 0.8, 0.6, 0.4, 0.2, 0.1, 0.05, 0.01] + "lambda": [2, 1, 0.8, 0.6, 0.4, 0.2, 0.1, 0.05, 0.01], + "timeout": null } ], "huge_mb": [ { "id": "mb", - "lambda": [2, 1, 0.8, 0.6, 0.4, 0.2, 0.1, 0.05, 0.01] + "lambda": [2, 1, 0.8, 0.6, 0.4, 0.2, 0.1, 0.05, 0.01], + "timeout": null } ] } diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.sh b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.sh index daddb38f..ddbc2caa 100644 --- a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.sh +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.sh @@ -6,7 +6,7 @@ TEMP_FILENAME=${snakemake_output[seqgraph_full]/fulloutput.tar.gz/fulloutput_tob if [ ${snakemake_wildcards[timeout]} = "None" ]; then if [ ${snakemake_wildcards[full_output]} = "True" ]; then /usr/bin/time -f "%e" -o ${snakemake_output[time]} java -classpath $CP EstimateGM \ - -r ${snakemake_wildcards[replicate]} \ + -r ${snakemake_wildcards[mcmc_seed]} \ -n ${snakemake_wildcards[num_samples]} \ -s ${snakemake_wildcards[sampler]} \ -a ${snakemake_wildcards[edge_penalty]} \ @@ -20,7 +20,7 @@ if [ ${snakemake_wildcards[timeout]} = "None" ]; then rm -f $TEMP_FILENAME else /usr/bin/time -f "%e" -o ${snakemake_output[time]} java -classpath $CP EstimateGM \ - -r ${snakemake_wildcards[replicate]} \ + -r ${snakemake_wildcards[mcmc_seed]} \ -n ${snakemake_wildcards[num_samples]} \ -s ${snakemake_wildcards[sampler]} \ -a ${snakemake_wildcards[edge_penalty]} \ diff --git a/workflow/rules/structure_learning_algorithms/dualgl/rule.smk b/workflow/rules/structure_learning_algorithms/dualgl/rule.smk index 22ea2ac7..6f669207 100644 --- a/workflow/rules/structure_learning_algorithms/dualgl/rule.smk +++ b/workflow/rules/structure_learning_algorithms/dualgl/rule.smk @@ -2,7 +2,7 @@ def fix_none_startalg(wildcards): if wildcards["startalg"] == "None": return [] else: - return "{output_dir}/adjvecs/{data}/algorithm=/"+ wildcards['startalg']+"/seed={replicate}/adjvecs_fulloutput.csv" + return "{output_dir}/adjvecs/{data}/algorithm=/"+ wildcards['startalg']+"/seed={seed}/adjvecs_fulloutput.csv" def extract_filename(filename): return filename.replace("_fulloutput.tar.gz", "fulloutput_tobecompressed.csv") diff --git a/workflow/rules/structure_learning_algorithms/dualgl/script.sh b/workflow/rules/structure_learning_algorithms/dualgl/script.sh deleted file mode 100644 index fa3cf9a5..00000000 --- a/workflow/rules/structure_learning_algorithms/dualgl/script.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -CP=$(pwd)/workflow/rules/structure_learning_algorithms/dualGraphLearner/jtsampler - -if [ ${snakemake_wildcards[timeout]} = "None" ]; then - java -classpath $CP EstimateGM \ - -r ${snakemake_wildcards[replicate]} \ - -n ${snakemake_wildcards[n]} \ - -s ${snakemake_wildcards[s]} \ - -a ${snakemake_wildcards[a]} \ - -c ${snakemake_wildcards[c]} < ${snakemake_input[data]} > ${snakemake_output[seqgraph]} -else - /usr/bin/time -f "%e" -o ${snakemake_output[time]} timeout -s SIGINT ${snakemake_wildcards[timeout]} bash -c 'java -classpath $CP EstimateGM -r ${snakemake_wildcards[replicate]} < ${snakemake_input[data]} > ${snakemake_output[seqgraph]}' -fi - -## Rscript $CP/../screening.R ${snakemake_output[adjmat]} ${snakemake_output[seqgraph]} ${snakemake_output[time]} ${snakemake_input[data]} ${snakemake_output[ntests]} -## java -classpath $CP EstimateGM -r 1 -n 1000 -s 2 -a 10000 < results/data/adjmat\=/bdgraph_graphsim/p\=25/graph\=random/class\=None/size\=None/prob\=0.5/seed\=1/parameters\=/bdgraph_rgwish/b\=3/threshold_conv\=1e-07/seed\=1/data\=/iid/n\=100/seed\=1.csv diff --git a/workflow/rules/structure_learning_algorithms/huge_glasso/schema.json b/workflow/rules/structure_learning_algorithms/huge_glasso/schema.json index 470b4f48..7495dcd4 100644 --- a/workflow/rules/structure_learning_algorithms/huge_glasso/schema.json +++ b/workflow/rules/structure_learning_algorithms/huge_glasso/schema.json @@ -16,12 +16,12 @@ }, "lambda": { "description": "A positive number to control the regularization. Typical usage is to leave the input lambda: null and have the program compute its own.", - "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnum", + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnumnull", "default": null }, "nlambda": { "description": "The number of regularization/thresholding parameters. The default value is 10", - "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnum", + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnumnull", "default": null }, "select_criterion": { @@ -62,4 +62,4 @@ ] }, "uniqueItems": true -} \ No newline at end of file +} diff --git a/workflow/rules/structure_learning_algorithms/huge_mb/schema.json b/workflow/rules/structure_learning_algorithms/huge_mb/schema.json index cc8dc3a6..1fbbb04e 100644 --- a/workflow/rules/structure_learning_algorithms/huge_mb/schema.json +++ b/workflow/rules/structure_learning_algorithms/huge_mb/schema.json @@ -16,12 +16,12 @@ }, "lambda": { "description": "A positive number to control the regularization. Typical usage is to leave the input lambda: null and have the program compute its own.", - "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnum", + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnumnull", "default": null }, "nlambda": { "description": "The number of regularization/thresholding parameters. The default value is 10", - "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnum", + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnumnull", "default": null }, "select_criterion": { @@ -31,7 +31,7 @@ "stars", null ], - "default": "ric" + "default": "stars" } }, "required": [ @@ -59,4 +59,4 @@ ] }, "uniqueItems": true -} \ No newline at end of file +} From 05b9afb7b9ad563f33008c7b9c27a9aca2bf165e Mon Sep 17 00:00:00 2001 From: Mohamad Date: Sat, 6 Jan 2024 10:39:06 +0000 Subject: [PATCH 02/25] updating sparse config --- config/dualGL_sparse.json | 62 ++++++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/config/dualGL_sparse.json b/config/dualGL_sparse.json index e461fb4e..2ca02112 100644 --- a/config/dualGL_sparse.json +++ b/config/dualGL_sparse.json @@ -7,22 +7,49 @@ "parameters_id": "gwi", "seed_range": [ 1, - 2 + 5 ] }, { + "data_id": "example1", + "graph_id": "random-1-sparse", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + }, + { "data_id": "example1", "graph_id": "lattice", "parameters_id": "gwi", "seed_range": [ 1, - 2 + 5 ] - } + }, + { + "data_id": "example1", + "graph_id": "ar5", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "circle", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + } ], "evaluation": { "benchmarks": { - "filename_prefix": "dualGraph_p200/", + "filename_prefix": "dualGraph_p200_sparse/", "show_seed": false, "errorbar": true, "errorbarh": false, @@ -44,8 +71,8 @@ "mcmc_traj_plots": [ { "id": "jtsampler_gts", - "burn_in": 0.5, - "thinning": 10, + "burn_in": 0.0, + "thinning": 100, "functional": [ "score" ], @@ -69,7 +96,14 @@ ] }, "graph": { - "bdgraph_graphsim": [ + "trilearn_rand_bandmat": [ + { + "id": "ar5", + "max_bandwidth": 5, + "dim": 200 + } + ], + "bdgraph_graphsim": [ { "id": "random-1-sparse", "p": 200, @@ -93,6 +127,14 @@ "class": null, "size": null, "prob": 0.1 + }, + { + "id": "circle", + "p": 200, + "graph": "circle", + "class": null, + "size": null, + "prob": 0.2 } ] }, @@ -100,7 +142,7 @@ "bdgraph_rgwish": [ { "id": "gwi", - "b": 5, + "b": 3, "threshold_conv": 0.000001 } ] @@ -129,10 +171,10 @@ "mcmc_estimator": ["threshold"], "timeout": null, "mcmc_seed": 1, - "num_samples": 3000000, + "num_samples": 10000000, "sampler": 1, "edge_penalty": 0.0, - "size_maxclique": 48, + "size_maxclique": 100, "full_output": true, "threshold": [ 0.0, From da57e2e79fde75ebf8e62121559817f910165364 Mon Sep 17 00:00:00 2001 From: Mohamad Date: Sat, 6 Jan 2024 10:44:11 +0000 Subject: [PATCH 03/25] some fixes --- workflow/rules/evaluation/benchmarks/combine_ROC_data.R | 4 ++-- .../structure_learning_algorithms/athomas_jtsampler/script.sh | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/workflow/rules/evaluation/benchmarks/combine_ROC_data.R b/workflow/rules/evaluation/benchmarks/combine_ROC_data.R index 2b0b058d..26313988 100644 --- a/workflow/rules/evaluation/benchmarks/combine_ROC_data.R +++ b/workflow/rules/evaluation/benchmarks/combine_ROC_data.R @@ -114,8 +114,8 @@ for (algorithm in active_algorithms) { FNR_skel_q3 = quantile(FNR_skel, probs = c(0.95)), time_mean = mean(time), time_median = median(time), - time_q1 = quantile(time, probs = c(0.05)), - time_q3 = quantile(time, probs = c(0.95)), + time_q1 = quantile(time, probs = c(0.05), na.rm = TRUE), + time_q3 = quantile(time, probs = c(0.95), na.rm = TRUE), n_seeds = n(), curve_vals = mean(!!as.symbol(curve_param)) ) diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.sh b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.sh index ddbc2caa..e0e21217 100644 --- a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.sh +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.sh @@ -11,6 +11,7 @@ if [ ${snakemake_wildcards[timeout]} = "None" ]; then -s ${snakemake_wildcards[sampler]} \ -a ${snakemake_wildcards[edge_penalty]} \ -c ${snakemake_wildcards[size_maxclique]} \ + -pd 5 \ -F < ${snakemake_input[data]} > $TEMP_FILENAME ## convet to benchpress file ## copying the first 4 lines and every line afterwards that has a successfull move @@ -24,10 +25,11 @@ if [ ${snakemake_wildcards[timeout]} = "None" ]; then -n ${snakemake_wildcards[num_samples]} \ -s ${snakemake_wildcards[sampler]} \ -a ${snakemake_wildcards[edge_penalty]} \ + -pd 5 \ -c ${snakemake_wildcards[size_maxclique]} < ${snakemake_input[data]} > ${snakemake_output[seqgraph]} fi else - /usr/bin/time -f "%e" -o ${snakemake_output[time]} timeout -s SIGINT ${snakemake_wildcards[timeout]} bash -c 'java -classpath $CP EstimateGM -r ${snakemake_wildcards[replicate]} < ${snakemake_input[data]} > ${snakemake_output[seqgraph]}' + /usr/bin/time -f "%e" -o ${snakemake_output[time]} timeout -s SIGINT ${snakemake_wildcards[timeout]} bash -c 'java -classpath $CP EstimateGM -r ${snakemake_wildcards[mcmc_seed]} < ${snakemake_input[data]} > ${snakemake_output[seqgraph]}' fi From 5267f5dbd5fc0a5a43a0d169507f6753bac901bd Mon Sep 17 00:00:00 2001 From: Mohamad Elmasri Date: Wed, 14 Feb 2024 06:33:20 +0000 Subject: [PATCH 04/25] Update rule.smk Chainging docker. --- .../structure_learning_algorithms/athomas_jtsampler/rule.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/rule.smk b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/rule.smk index afa0286e..785522d2 100644 --- a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/rule.smk +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/rule.smk @@ -13,7 +13,7 @@ rule athomas_jtsampler: time=touch(alg_output_time_path(module_name)), ntests=touch(alg_output_ntests_path(module_name)) container: - "docker://hallawalla/athomas_jtsampler:1.4" + "docker://hallawalla/athomas_jtsampler:1.5" script: "script.sh" From 6b1bbfb27b4be1fa34c8eba7704220cab76ed698 Mon Sep 17 00:00:00 2001 From: Mohamad Date: Sat, 23 Mar 2024 16:24:31 +0000 Subject: [PATCH 05/25] adding radom precmat --- .../parameters/random_precmat/bibtex.bib | 0 .../rules/parameters/random_precmat/docs.rst | 3 ++ .../rules/parameters/random_precmat/info.json | 15 ++++++++ .../random_precmat/random_precmat.R | 37 +++++++++++++++++++ .../rules/parameters/random_precmat/rule.smk | 12 ++++++ .../parameters/random_precmat/schema.json | 33 +++++++++++++++++ 6 files changed, 100 insertions(+) create mode 100644 workflow/rules/parameters/random_precmat/bibtex.bib create mode 100644 workflow/rules/parameters/random_precmat/docs.rst create mode 100644 workflow/rules/parameters/random_precmat/info.json create mode 100644 workflow/rules/parameters/random_precmat/random_precmat.R create mode 100644 workflow/rules/parameters/random_precmat/rule.smk create mode 100644 workflow/rules/parameters/random_precmat/schema.json diff --git a/workflow/rules/parameters/random_precmat/bibtex.bib b/workflow/rules/parameters/random_precmat/bibtex.bib new file mode 100644 index 00000000..e69de29b diff --git a/workflow/rules/parameters/random_precmat/docs.rst b/workflow/rules/parameters/random_precmat/docs.rst new file mode 100644 index 00000000..23b8c3ef --- /dev/null +++ b/workflow/rules/parameters/random_precmat/docs.rst @@ -0,0 +1,3 @@ +Provides a way to create a precision matrix with entries assigned randomly from a given list. +The genrated precision is then converted to a correlation matrix. + diff --git a/workflow/rules/parameters/random_precmat/info.json b/workflow/rules/parameters/random_precmat/info.json new file mode 100644 index 00000000..df67210f --- /dev/null +++ b/workflow/rules/parameters/random_precmat/info.json @@ -0,0 +1,15 @@ +{ + "title": "Random-precision", + "version": "", + "package": { + "title": "", + "url": "" + }, + "docs_url": "", + "papers": [ + ], + "graph_types": [ + "UG" + ], + "language": "R" +} diff --git a/workflow/rules/parameters/random_precmat/random_precmat.R b/workflow/rules/parameters/random_precmat/random_precmat.R new file mode 100644 index 00000000..62daf4c1 --- /dev/null +++ b/workflow/rules/parameters/random_precmat/random_precmat.R @@ -0,0 +1,37 @@ +# Samples and inverts a precision matrics from the G-Wishart distribution. + + +seed <- as.integer(snakemake@wildcards[["seed"]]) +set.seed(seed) + +df <- read.csv(snakemake@input[["adjmat"]], header = TRUE, check.names = FALSE) +adjmat <- as.matrix(df) +p <- nrow(adjmat) + +K_values <-as.numeric(snakemake@input[['precision_values']]) + +print("Simulating randam-precision matrix") +if(length(K_values) == 1) +{ + precmat <- 1*(adjmat !=0) * K_values +} + +if(length(K_values) > 1) +{ + M = sum(adjmat !=0) + v = sample.int(length(K_values), M, replace=TRUE) + precmat<- 1*(adjmat !=0) + precmat[which(adjmat!=0)] <- K_values[v] +} + + +print("Inverting the precision matrix") +covmat <- cov2cor(solve(precmat)) +colnames(covmat) <- colnames(df) + +filename <- snakemake@output[["params"]] +write.table(covmat, + file = filename, row.names = FALSE, + quote = FALSE, col.names = TRUE, sep = "," +) + diff --git a/workflow/rules/parameters/random_precmat/rule.smk b/workflow/rules/parameters/random_precmat/rule.smk new file mode 100644 index 00000000..0eca8610 --- /dev/null +++ b/workflow/rules/parameters/random_precmat/rule.smk @@ -0,0 +1,12 @@ +rule bdgraph_rgwish: + input: + adjmat = "{output_dir}/adjmat/{adjmat}.csv" + output: + params = "{output_dir}/parameters/" + \ + pattern_strings["random_precmat"] + "/" \ + "seed={seed}/"+\ + "adjmat=/{adjmat}.csv" + container: + None + script: + "random_precmat.R" \ No newline at end of file diff --git a/workflow/rules/parameters/random_precmat/schema.json b/workflow/rules/parameters/random_precmat/schema.json new file mode 100644 index 00000000..9f382f8f --- /dev/null +++ b/workflow/rules/parameters/random_precmat/schema.json @@ -0,0 +1,33 @@ +{ + "title": "random_precision", + "description": "Generates a random precision matrix", + "type": "array", + "items": { + "title": "random_precision", + "description": "Generates a precision matrix from the given list of values, assigned randomly, then coverts it to a correlation matrix", + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "precision_values": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "id", + "precision_values" + ], + "additionalProperties": false, + "examples": [ + { + "id": "rand_K", + "precision_values": [0.5, 0.25] + } + ] + }, + "uniqueItems": true +} From 4d1cc9909aff9d33e04adfc043265c9031860731 Mon Sep 17 00:00:00 2001 From: Felix Rios Date: Tue, 26 Mar 2024 17:13:17 +0100 Subject: [PATCH 06/25] On the way to rand precmat --- workflow/rules/data/iid/rules.smk | 17 ++++++ .../rules/parameters/random_precmat/info.json | 2 - .../random_precmat/random_precmat.R | 59 ++++++++++++------- .../rules/parameters/random_precmat/rule.smk | 4 +- .../parameters/random_precmat/schema.json | 5 +- .../dualgl/rule.smk | 6 +- 6 files changed, 61 insertions(+), 32 deletions(-) diff --git a/workflow/rules/data/iid/rules.smk b/workflow/rules/data/iid/rules.smk index cd27eda6..5aa01c44 100644 --- a/workflow/rules/data/iid/rules.smk +++ b/workflow/rules/data/iid/rules.smk @@ -80,6 +80,23 @@ rule sample_rgwish_data: shell: "python workflow/rules/data/iid/numpy_sample_mvn_data.py {input.cov} {output.data} {wildcards.n} {wildcards.seed}" +rule sample_random_precmat_data: + input: + "workflow/rules/data/iid/numpy_sample_mvn_data.py", + cov="{output_dir}/parameters/random_precmat/{bn}/adjmat=/{adjmat}.csv" # This could probably be relaxed + output: + data="{output_dir}/data" \ + "/adjmat=/{adjmat}"\ + "/parameters=/random_precmat/{bn}"\ + "/data=/"+pattern_strings["iid"]+"/seed={seed}.csv" + wildcard_constraints: + n="[0-9]*" + container: + docker_image("trilearn") + shell: + "python workflow/rules/data/iid/numpy_sample_mvn_data.py {input.cov} {output.data} {wildcards.n} {wildcards.seed}" + + """ TODO: Standardisation should better be done in a separate preprocessing module in the data section in benchmark_setup. diff --git a/workflow/rules/parameters/random_precmat/info.json b/workflow/rules/parameters/random_precmat/info.json index df67210f..1dba0813 100644 --- a/workflow/rules/parameters/random_precmat/info.json +++ b/workflow/rules/parameters/random_precmat/info.json @@ -6,8 +6,6 @@ "url": "" }, "docs_url": "", - "papers": [ - ], "graph_types": [ "UG" ], diff --git a/workflow/rules/parameters/random_precmat/random_precmat.R b/workflow/rules/parameters/random_precmat/random_precmat.R index 62daf4c1..720bc1a7 100644 --- a/workflow/rules/parameters/random_precmat/random_precmat.R +++ b/workflow/rules/parameters/random_precmat/random_precmat.R @@ -1,6 +1,6 @@ -# Samples and inverts a precision matrics from the G-Wishart distribution. - - +#install.packages("matrixcalc") +#library(matrixcalc) +library(Matrix) seed <- as.integer(snakemake@wildcards[["seed"]]) set.seed(seed) @@ -8,30 +8,49 @@ df <- read.csv(snakemake@input[["adjmat"]], header = TRUE, check.names = FALSE) adjmat <- as.matrix(df) p <- nrow(adjmat) -K_values <-as.numeric(snakemake@input[['precision_values']]) +K_values <- c() -print("Simulating randam-precision matrix") -if(length(K_values) == 1) -{ - precmat <- 1*(adjmat !=0) * K_values -} +# Store the non null precision values in a vector +print(snakemake@wildcards[["prec_val_3"]]) +print(is.null(snakemake@wildcards[["prec_val_1"]])) -if(length(K_values) > 1) -{ - M = sum(adjmat !=0) - v = sample.int(length(K_values), M, replace=TRUE) - precmat<- 1*(adjmat !=0) - precmat[which(adjmat!=0)] <- K_values[v] +if (snakemake@wildcards[["prec_val_1"]] != "None") { + print("prec_val_1") + K_values <- c(K_values, as.numeric(snakemake@wildcards[["prec_val_1"]])) +} +if (snakemake@wildcards[["prec_val_2"]] != "None") { + K_values <- c(K_values, as.numeric(snakemake@wildcards[["prec_val_2"]])) +} +if (snakemake@wildcards[["prec_val_3"]] != "None") { + K_values <- c(K_values, as.numeric(snakemake@wildcards[["prec_val_3"]])) +} +print(K_values) + +precmat <- matrix(0, nrow = p, ncol = p) +# Sample untill we get a positive definite matrix +while (TRUE) { + print("Simulating random precision matrix") + if (length(K_values) == 1) { + precmat <- 1 * (adjmat != 0) * K_values + } + + if (length(K_values) > 1) { + M <- sum(adjmat != 0) + v <- sample.int(length(K_values), M, replace = TRUE) + precmat <- 1 * (adjmat != 0) + precmat[which(adjmat != 0)] <- K_values[v] + } + + eigen_values <- eigen(precmat)$values + is_positive_definite <- all(eigen_values > 0) + if (is_positive_definite) break } - - print("Inverting the precision matrix") covmat <- cov2cor(solve(precmat)) colnames(covmat) <- colnames(df) filename <- snakemake@output[["params"]] write.table(covmat, - file = filename, row.names = FALSE, - quote = FALSE, col.names = TRUE, sep = "," + file = filename, row.names = FALSE, + quote = FALSE, col.names = TRUE, sep = "," ) - diff --git a/workflow/rules/parameters/random_precmat/rule.smk b/workflow/rules/parameters/random_precmat/rule.smk index 0eca8610..1f01b51c 100644 --- a/workflow/rules/parameters/random_precmat/rule.smk +++ b/workflow/rules/parameters/random_precmat/rule.smk @@ -1,4 +1,4 @@ -rule bdgraph_rgwish: +rule random_precmat: input: adjmat = "{output_dir}/adjmat/{adjmat}.csv" output: @@ -7,6 +7,6 @@ rule bdgraph_rgwish: "seed={seed}/"+\ "adjmat=/{adjmat}.csv" container: - None + "docker://bpimages/bdgraph:2.72.0" script: "random_precmat.R" \ No newline at end of file diff --git a/workflow/rules/parameters/random_precmat/schema.json b/workflow/rules/parameters/random_precmat/schema.json index 9f382f8f..2fa60d13 100644 --- a/workflow/rules/parameters/random_precmat/schema.json +++ b/workflow/rules/parameters/random_precmat/schema.json @@ -18,10 +18,9 @@ } }, "required": [ - "id", - "precision_values" + "id" ], - "additionalProperties": false, + "additionalProperties": true, "examples": [ { "id": "rand_K", diff --git a/workflow/rules/structure_learning_algorithms/dualgl/rule.smk b/workflow/rules/structure_learning_algorithms/dualgl/rule.smk index 6f669207..7eeab75e 100644 --- a/workflow/rules/structure_learning_algorithms/dualgl/rule.smk +++ b/workflow/rules/structure_learning_algorithms/dualgl/rule.smk @@ -7,9 +7,6 @@ def fix_none_startalg(wildcards): def extract_filename(filename): return filename.replace("_fulloutput.tar.gz", "fulloutput_tobecompressed.csv") - - - rule extract_dualgl: input: "{whatever}/adjvecs_fulloutput.tar.gz" @@ -17,8 +14,7 @@ rule extract_dualgl: temp("{whatever}/adjvecs_fulloutput.csv") shell: "tar -xf {input} && mv {wildcards.whatever}/adjvecs_fulloutput_tobecompressed.csv {output}" - - + rule: name: module_name From ad43840b5a3eef582f2aeb8fcfe6c71f5581c2c4 Mon Sep 17 00:00:00 2001 From: Felix Rios Date: Tue, 26 Mar 2024 17:32:42 +0100 Subject: [PATCH 07/25] Fixing pos def problem by increasing diag elements. --- .../random_precmat/random_precmat.R | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/workflow/rules/parameters/random_precmat/random_precmat.R b/workflow/rules/parameters/random_precmat/random_precmat.R index 720bc1a7..527bfaf6 100644 --- a/workflow/rules/parameters/random_precmat/random_precmat.R +++ b/workflow/rules/parameters/random_precmat/random_precmat.R @@ -28,22 +28,24 @@ print(K_values) precmat <- matrix(0, nrow = p, ncol = p) # Sample untill we get a positive definite matrix -while (TRUE) { - print("Simulating random precision matrix") - if (length(K_values) == 1) { - precmat <- 1 * (adjmat != 0) * K_values - } - - if (length(K_values) > 1) { - M <- sum(adjmat != 0) - v <- sample.int(length(K_values), M, replace = TRUE) - precmat <- 1 * (adjmat != 0) - precmat[which(adjmat != 0)] <- K_values[v] - } +print("Simulating random precision matrix") +if (length(K_values) == 1) { + precmat <- 1 * (adjmat != 0) * K_values +} + +if (length(K_values) > 1) { + M <- sum(adjmat != 0) + v <- sample.int(length(K_values), M, replace = TRUE) + precmat <- 1 * (adjmat != 0) + precmat[which(adjmat != 0)] <- K_values[v] +} +while (TRUE) { eigen_values <- eigen(precmat)$values is_positive_definite <- all(eigen_values > 0) if (is_positive_definite) break + print("Adding 0.1 to the diagonal") + precmat <- precmat + 0.1 * diag(p) } print("Inverting the precision matrix") covmat <- cov2cor(solve(precmat)) From f6c39957562813e35b333c40f015367af21ba416 Mon Sep 17 00:00:00 2001 From: Mohamad Elmasri Date: Sun, 31 Mar 2024 21:30:35 +0100 Subject: [PATCH 08/25] adding delta to jtsampler --- .../athomas_jtsampler/schema.json | 7 ++++++- .../athomas_jtsampler/script.sh | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/schema.json b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/schema.json index 25835590..23f7cc57 100644 --- a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/schema.json +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/schema.json @@ -72,6 +72,10 @@ "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegint", "default": 100 }, + "delta": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/nonnegnum", + "default": 1.0 + }, "edge_penalty": { "$ref": "../../../schemas/definitions.schema.json#/definitions/nonnegnum", "default": 0.0 @@ -101,7 +105,8 @@ "num_samples": 10000, "sampler": 0, "edge_penalty": 1.0, - "size_maxclique": 10000 + "size_maxclique": 10000, + "delta": 5 } ] diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.sh b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.sh index e0e21217..55b61fc8 100644 --- a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.sh +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.sh @@ -11,7 +11,7 @@ if [ ${snakemake_wildcards[timeout]} = "None" ]; then -s ${snakemake_wildcards[sampler]} \ -a ${snakemake_wildcards[edge_penalty]} \ -c ${snakemake_wildcards[size_maxclique]} \ - -pd 5 \ + -pd ${snakemake_wildcards[delta]} \ -F < ${snakemake_input[data]} > $TEMP_FILENAME ## convet to benchpress file ## copying the first 4 lines and every line afterwards that has a successfull move @@ -25,7 +25,7 @@ if [ ${snakemake_wildcards[timeout]} = "None" ]; then -n ${snakemake_wildcards[num_samples]} \ -s ${snakemake_wildcards[sampler]} \ -a ${snakemake_wildcards[edge_penalty]} \ - -pd 5 \ + -pd ${snakemake_wildcards[delta]} \ -c ${snakemake_wildcards[size_maxclique]} < ${snakemake_input[data]} > ${snakemake_output[seqgraph]} fi else From 7c7f129e131f97a2e53dd61017f6de6e6f0b32ee Mon Sep 17 00:00:00 2001 From: Mohamad Elmasri Date: Mon, 1 Apr 2024 00:45:47 +0100 Subject: [PATCH 09/25] adding athomar/dualgl --- .../athomas_jtsampler/Dockerfile | 14 ++ .../athomas_jtsampler/docker.sh | 4 + .../athomas_jtsampler/docs.rst | 1 + .../athomas_jtsampler/info.json | 23 +++ .../athomas_jtsampler/script.py | 59 +++++++ .../dualgl/Dockerfile | 40 +++++ .../dualgl/docs.rst | 0 .../dualgl/info.json | 16 ++ .../dualgl/rule.smk | 6 +- .../dualgl/schema.json | 32 ++++ .../dualgl/screening.R | 154 ++++++++++++++++++ .../dualgl/script.py | 59 +++++++ .../dualgl/test.sh | 24 +++ 13 files changed, 431 insertions(+), 1 deletion(-) create mode 100644 workflow/rules/structure_learning_algorithms/athomas_jtsampler/Dockerfile create mode 100644 workflow/rules/structure_learning_algorithms/athomas_jtsampler/docker.sh create mode 100644 workflow/rules/structure_learning_algorithms/athomas_jtsampler/docs.rst create mode 100644 workflow/rules/structure_learning_algorithms/athomas_jtsampler/info.json create mode 100644 workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.py create mode 100644 workflow/rules/structure_learning_algorithms/dualgl/Dockerfile create mode 100644 workflow/rules/structure_learning_algorithms/dualgl/docs.rst create mode 100644 workflow/rules/structure_learning_algorithms/dualgl/info.json create mode 100644 workflow/rules/structure_learning_algorithms/dualgl/schema.json create mode 100644 workflow/rules/structure_learning_algorithms/dualgl/screening.R create mode 100644 workflow/rules/structure_learning_algorithms/dualgl/script.py create mode 100644 workflow/rules/structure_learning_algorithms/dualgl/test.sh diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/Dockerfile b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/Dockerfile new file mode 100644 index 00000000..5dc0d5aa --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/Dockerfile @@ -0,0 +1,14 @@ +# Source https://github.com/melmasri/jtsampler +FROM ubuntu:20.04 + +RUN apt update -y +RUN apt install default-jre -y +RUN apt install time -y +RUN apt install make -y +RUN apt install default-jdk -y + +COPY jtsampler jtsampler +WORKDIR /jtsampler +RUN make + + diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/docker.sh b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/docker.sh new file mode 100644 index 00000000..4af01bcc --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/docker.sh @@ -0,0 +1,4 @@ +docker build -t athomas_jtsampler . +docker run -it athomas_jtsampler bash +docker build -t hallawalla/athomas_jtsampler:1.4 . --no-cache +docker push hallawalla/athomas_jtsampler:1.4 diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/docs.rst b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/docs.rst new file mode 100644 index 00000000..30a83f32 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/docs.rst @@ -0,0 +1 @@ +Abstract: Full Bayesian computational inference for model determination in undirected graphical models is currently restricted to decomposable graphs or other special cases, except for small-scale problems, say up to 15 variables. In this paper we develop new, more efficient methodology for such inference, by making two contributions to the computational geometry of decomposable graphs. The first of these provides sufficient conditions under which it is possible to completely connect two disconnected complete subsets of vertices, or perform the reverse procedure, yet maintain decomposability of the graph. The second is a new Markov chain Monte Carlo sampler for arbitrary positive distributions on decomposable graphs, taking a junction tree representing the graph as its state variable. \ No newline at end of file diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/info.json b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/info.json new file mode 100644 index 00000000..6d7dbf61 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/info.json @@ -0,0 +1,23 @@ +{ + "title": "GT13", + "docker_image": "onceltuca/thomasgreen:1.19-bp", + "version": "1.19-bp", + "package": { + "title": "", + "url": "http" + }, + "docs_url": "", + "papers": [ + { + "title": "P. J. Green and A. Thomas (2013). Sampling decomposable graphs using a Markov chain on junction trees.", + "url": "https://www.jstor.org/stable/43304539" + } + ], + "outputs": [ + "graphtraj" + ], + "graph_types": [ + "DG" + ], + "language": "Java" +} \ No newline at end of file diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.py b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.py new file mode 100644 index 00000000..56cab9a1 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.py @@ -0,0 +1,59 @@ + +def alg_shell(algorithm): + if algorithm == "gt13_multipair": + return """if [ {wildcards.datatype} = \"discrete\" ]; then + tail -n +3 {input.data} > {output.seqgraph}.noheader + sed --in-place 's/,/\ /g' {output.seqgraph}.noheader + + if [ {wildcards.prior} = \"mbc\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior mbc -clq {wildcards.clq} -sep {wildcards.sep} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior mbc -clq {wildcards.clq} -sep {wildcards.sep} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + elif [ {wildcards.prior} = \"bc\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior bc -ascore {wildcards.ascore} -bscore {wildcards.bscore} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior bc -ascore {wildcards.ascore} -bscore {wildcards.bscore} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + elif [ {wildcards.prior} = \"ep\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior ep -pen {wildcards.penalty} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph} ; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior ep -pen {wildcards.penalty} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}' ; + fi + fi + fi + if [ {wildcards.datatype} = \"continuous\" ]; then + tail -n +2 {input.data} > {output.seqgraph}.noheader + sed --in-place 's/,/\ /g' {output.seqgraph}.noheader + if [ {wildcards.prior} = \"mbc\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior mbc -clq {wildcards.clq} -sep {wildcards.sep} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior mbc -clq {wildcards.clq} -sep {wildcards.sep} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + elif [ {wildcards.prior} = \"bc\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior bc -ascore {wildcards.ascore} -bscore {wildcards.bscore} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior bc -ascore {wildcards.ascore} -bscore {wildcards.bscore} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + elif [ {wildcards.prior} = \"ep\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior ep -pen {wildcards.penalty} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph} ; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior ep -pen {wildcards.penalty} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + fi + fi + rm -f {output.seqgraph}.noheader + if [ -f {output.seqgraph} ]; then + sleep 1 + else + touch {output.seqgraph} + echo None > {output.time}; + fi + + """ \ No newline at end of file diff --git a/workflow/rules/structure_learning_algorithms/dualgl/Dockerfile b/workflow/rules/structure_learning_algorithms/dualgl/Dockerfile new file mode 100644 index 00000000..e1232261 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/dualgl/Dockerfile @@ -0,0 +1,40 @@ +FROM r-base:4.3.0 + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + wget \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install CRAN packages + +RUN R -e "install.packages('stringr', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('igraph', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('data.table', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('graph', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('huge', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('ZIM', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('mvtnorm', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('speedglm', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('ncvreg', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('bnlearn', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('doParallel', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('SIS', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('snowfall', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('R.utils', repos='http://cran.us.r-project.org')" + +WORKDIR /tmp + +# Install XMRF +RUN wget https://cran.r-project.org/src/contrib/Archive/XMRF/XMRF_1.0.tar.gz \ + && R -e 'install.packages("XMRF_1.0.tar.gz", repos = NULL, type="source")' \ + && rm XMRF_1.0.tar.gz + +# Install equSA +RUN wget https://cran.r-project.org/src/contrib/Archive/equSA/equSA_1.2.1.tar.gz \ + && R -e 'install.packages("equSA_1.2.1.tar.gz", repos = NULL, type="source")' \ + && rm equSA_1.2.1.tar.gz + +# Cleanup +RUN rm -rf /tmp/* + diff --git a/workflow/rules/structure_learning_algorithms/dualgl/docs.rst b/workflow/rules/structure_learning_algorithms/dualgl/docs.rst new file mode 100644 index 00000000..e69de29b diff --git a/workflow/rules/structure_learning_algorithms/dualgl/info.json b/workflow/rules/structure_learning_algorithms/dualgl/info.json new file mode 100644 index 00000000..7e901204 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/dualgl/info.json @@ -0,0 +1,16 @@ +{ + "title": "Chordal-screening", + "version": "0.1", + "package": { + "title": "", + "url": "http" + }, + "docs_url": "", + "outputs": [ + "adjmat" + ], + "graph_types": [ + "UG" + ], + "language": "Java" +} diff --git a/workflow/rules/structure_learning_algorithms/dualgl/rule.smk b/workflow/rules/structure_learning_algorithms/dualgl/rule.smk index 7eeab75e..6f669207 100644 --- a/workflow/rules/structure_learning_algorithms/dualgl/rule.smk +++ b/workflow/rules/structure_learning_algorithms/dualgl/rule.smk @@ -7,6 +7,9 @@ def fix_none_startalg(wildcards): def extract_filename(filename): return filename.replace("_fulloutput.tar.gz", "fulloutput_tobecompressed.csv") + + + rule extract_dualgl: input: "{whatever}/adjvecs_fulloutput.tar.gz" @@ -14,7 +17,8 @@ rule extract_dualgl: temp("{whatever}/adjvecs_fulloutput.csv") shell: "tar -xf {input} && mv {wildcards.whatever}/adjvecs_fulloutput_tobecompressed.csv {output}" - + + rule: name: module_name diff --git a/workflow/rules/structure_learning_algorithms/dualgl/schema.json b/workflow/rules/structure_learning_algorithms/dualgl/schema.json new file mode 100644 index 00000000..c3865bfe --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/dualgl/schema.json @@ -0,0 +1,32 @@ +{ + "description": "Green & Thomas objects", + "title": "gt13_multipair", + "type": "array", + "items": { + "title": "gt13_multipair item", + "description": "Green & Thomas algorithm for learning decomopasble graphs.\n Source: Green, P. J., & Thomas, A. (2013). Sampling decomposable graphs using a Markov chain on junction trees. Biometrika, 100(1), 91-110.", + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier" + }, + "timeout": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnumnull" + } + }, + "additionalProperties": true, + "required": [ + "id", + "timeout" + ], + "examples": [ + { + "id": "jtsampler", + "burnin_frac": 0.5, + "startalg": "jtsampler_gg", + "timeout": null + } + ] + } +} \ No newline at end of file diff --git a/workflow/rules/structure_learning_algorithms/dualgl/screening.R b/workflow/rules/structure_learning_algorithms/dualgl/screening.R new file mode 100644 index 00000000..156773ee --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/dualgl/screening.R @@ -0,0 +1,154 @@ +# R.utils is needed for the timeout so make sure this is installed. +source("workflow/scripts/utils/helpers.R") +# source("/path/in/dockerimage/filetosource.R") +## Loading local libraries +library(data.table) +library(stringr) +library(equSA) + +split <- function(x) { + y = str_sub(x,2,-2) + a = str_split(y, '-') + u = as.numeric(a[[1]][1]) + v = as.numeric(a[[1]][2]) + c(u,v) +} + +fix_edges<-function(x, move='add') { + a = split(x) + c(edge=paste(min(a), max(a), sep='-'), orig=min(a), dest=max(a), move=move) +} + +z <-function(logLambda, n, m, delta= 1, g = 1/n) { + ## returns a normalized z + gamma1<-function(x) lgamma((delta + x-1)/2) - lgamma((delta+x - 2)/2) + ## L = lambda/(sqrt(n+1) * gamma1(m+2)) + L = (2/n) * (gamma1(n+m) - gamma1(m) - log(1+1/g) + logLambda) + sL = sqrt(1-pmin(exp(L),1-1e-8)) + z = 0.5*log((1+ sL)/(1-sL)) + z * sqrt(n - m -1) +} + +rho2 <-function(logLambda, n, m, delta= 1, g = 1/n) { + ## returns a normalized z + gamma1<-function(x) lgamma((delta + x-1)/2) - lgamma((delta+x - 2)/2) + ## L = lambda/(sqrt(n+1) * gamma1(m+2)) + L = (2/n) * (gamma1(n+m) - gamma1(m) - log(1+1/g) + logLambda) + 1 - exp(L) +} + +z_tild <-function(z) { + q<-pnorm(-abs(z), log.p=TRUE) + q<-q+log(2.0) + s<-qnorm(q,log.p=TRUE) + (-1) * s +} + + + + + +## local testing +## traj_filename = "graphs//adjvec_1_0.1_random.csv" +## data_filename = "graphs//seed_1_0.1_random.csv" +## all.files = dir('graphs/', full.names=TRUE) +## get_graph_filenames('0.1_random') +## data =get_graph_data('0.1_random') +## true_graph = data[[1]] +## dt = data[[2]] + +## traj_filename = "results/adjvecs/adjmat=/bdgraph_graphsim/p=50/graph=random/class=None/size=None/prob=0.5/seed=2/parameters=/bdgraph_rgwish/b=3/threshold_conv=1e-07/seed=2/data=/iid/n=50/standardized=False/algorithm=/athomas_jtsampler/alg_params=/timeout=None/mcmc_seed=1/num_samples=100000/sampler=0/edge_penalty=0.0/size_maxclique=100/full_output=True/mcmc_params/mcmc_estimator=map/threshold=0.0/burnin_frac=0.5/seed=2/adjvecs_fulloutput_tobecompressed.csv" +## data_filename = "results/data/adjmat=/bdgraph_graphsim/p=50/graph=random/class=None/size=None/prob=0.5/seed=2/parameters=/bdgraph_rgwish/b=3/threshold_conv=1e-07/seed=2/data=/iid/n=50/standardized=False/seed=2.csv" + +## traj_filename = "results/adjvecs/adjmat\=/bdgraph_graphsim/p\=50/graph\=random/class\=None/size\=None/prob\=0.1/seed\=1/parameters\=/bdgraph_rgwish/b\=3/threshold_conv\=1e-07/seed\=1/data\=/iid/n\=50/standardized\=False/algorithm\=/athomas_jtsampler/alg_params\=/timeout\=None/mcmc_seed\=1/num_samples\=2000000/sampler\=0/edge_penalty\=0.0/size_maxclique\=50/full_output\=True/seed\=1/adjvecs_fulloutput_tobecompressed.csv" + +myalg <- function() { + + output_filename <- snakemake@output[["adjmat"]] + traj_filename <- snakemake@input[["seqgraph"]] + time_filename <- snakemake@output[["time"]] + data_filename <- snakemake@input[["data"]] + ntests_filename <- snakemake@output[["ntests"]] + alpha = as.numeric(snakemake@wildcards[['alpha']]) + ## burnin <- 0.5 + + ## The algorithm should be in this function. + start <- proc.time()[1] + ## extract codes + data <- read.csv(traj_filename, check.names = FALSE, header = TRUE) + data = data.table(data) + codes = data$code[1] + codes = drop(sapply(str_split(str_sub(codes, 2,-2), '-'), as.numeric)) + data = data[-c(1:3), ] + colnames(data)<-c('index', 'score', 'added', 'removed', 'code', 'delta', 'm') + + input_data = read.csv(file = data_filename, header=TRUE, check.names=FALSE) + n = nrow(input_data) + p = ncol(input_data) + + ## data = data[code %in% c(0,9,5)][m>0] + ## 9 failed disconnect, 5 failed connect, 0 success One-pair JT sampler + ## 6 faild connection, 4 fails disconnection, 0 sucess, Guidici Green + data = data[code %in% codes][m>0][m < n-1] + + ## remove burnin + ## data = data[floor(nrow(data)*burnin):nrow(data)] + + ## re-order edge index, where x-y such that x < y. + a = rbind(data[removed=='[]',as.list(fix_edges(added, 'add')), index], + data[added=='[]', as.list(fix_edges(removed, 'remove')), index]) + data = merge(a, data, by ='index') + + + ## inverting Marginal likelihood ratio + data[, B := ifelse(move=='remove', delta,-delta)] + + ## compute the Z-test + data[, z:=z(B, n, m)] + data[, zt:=z_tild(z)] + + emp = data[ ,.(N_p = .N, prop = sum(1*(code=='0') * (move =='remove') + 1*(code!='0')*(move=='add'))/.N), by = c('edge')][order(edge)] + + data = merge(data, emp, by = 'edge') + + + ## converting orig/dest to numeric + data$orig = as.numeric(data$orig) + data$dest = as.numeric(data$dest) + + + data = unique(data[, .(orig, dest, edge, zt, z, prop, N_p)]) + + + data_treat=data[, .( + orig=orig[1], + dest=dest[1], + zt= mean(zt), + z = mean(z), + .N, + N_p = N_p[1], + prop = prop[1]), by = edge] + + data_treat[, pzt := sqrt(N_p) * prop/sqrt(1e-4+prop*(1-prop))] + data_treat + #q = data_treat[, pcorselR(cbind(orig, dest, zt), ALPHA2=0.05, GRID=3, iteration=100)] + q = as.numeric(snakemake@wildcards[['alpha']]) + + data_treat[, est_edge := 1*(pnorm(zt)> 1-q)] + + adjmat <- matrix(0, nrow = p, ncol = p) + ed = data_treat[est_edge==1][, cbind(orig+1, dest+1)] + adjmat[ed] <- 1 + + adjmat <- 1 * (adjmat | t(adjmat)) + diag(adjmat) <- 0 + + totaltime <- proc.time()[1] - start + colnames(adjmat) <- colnames(input_data) # Get the labels from the data + write.csv(adjmat, file = output_filename, row.names = FALSE, quote = FALSE) + write(totaltime, file = time_filename) + # Write the true number of c.i. tests here if possible. + cat("None", file = ntests_filename, sep = "\n") +} + +add_timeout(myalg) diff --git a/workflow/rules/structure_learning_algorithms/dualgl/script.py b/workflow/rules/structure_learning_algorithms/dualgl/script.py new file mode 100644 index 00000000..56cab9a1 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/dualgl/script.py @@ -0,0 +1,59 @@ + +def alg_shell(algorithm): + if algorithm == "gt13_multipair": + return """if [ {wildcards.datatype} = \"discrete\" ]; then + tail -n +3 {input.data} > {output.seqgraph}.noheader + sed --in-place 's/,/\ /g' {output.seqgraph}.noheader + + if [ {wildcards.prior} = \"mbc\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior mbc -clq {wildcards.clq} -sep {wildcards.sep} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior mbc -clq {wildcards.clq} -sep {wildcards.sep} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + elif [ {wildcards.prior} = \"bc\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior bc -ascore {wildcards.ascore} -bscore {wildcards.bscore} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior bc -ascore {wildcards.ascore} -bscore {wildcards.bscore} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + elif [ {wildcards.prior} = \"ep\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior ep -pen {wildcards.penalty} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph} ; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior ep -pen {wildcards.penalty} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}' ; + fi + fi + fi + if [ {wildcards.datatype} = \"continuous\" ]; then + tail -n +2 {input.data} > {output.seqgraph}.noheader + sed --in-place 's/,/\ /g' {output.seqgraph}.noheader + if [ {wildcards.prior} = \"mbc\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior mbc -clq {wildcards.clq} -sep {wildcards.sep} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior mbc -clq {wildcards.clq} -sep {wildcards.sep} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + elif [ {wildcards.prior} = \"bc\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior bc -ascore {wildcards.ascore} -bscore {wildcards.bscore} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior bc -ascore {wildcards.ascore} -bscore {wildcards.bscore} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + elif [ {wildcards.prior} = \"ep\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior ep -pen {wildcards.penalty} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph} ; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior ep -pen {wildcards.penalty} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + fi + fi + rm -f {output.seqgraph}.noheader + if [ -f {output.seqgraph} ]; then + sleep 1 + else + touch {output.seqgraph} + echo None > {output.time}; + fi + + """ \ No newline at end of file diff --git a/workflow/rules/structure_learning_algorithms/dualgl/test.sh b/workflow/rules/structure_learning_algorithms/dualgl/test.sh new file mode 100644 index 00000000..7c411dc2 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/dualgl/test.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +CP=$(pwd)/workflow/rules/structure_learning_algorithms/athomas_jtsampler/jtsampler + +if [ ${snakemake_wildcards[timeout]} = "None" ]; then + /usr/bin/time -q -f "%e" -o ${snakemake_output[time]} java -classpath $CP EstimateGM \ + -r ${snakemake_wildcards[replicate]} \ + -n ${snakemake_wildcards[n]} \ + -s ${snakemake_wildcards[s]} \ + -a ${snakemake_wildcards[a]} \ + -c ${snakemake_wildcards[c]} < ${snakemake_input[data]} > ${snakemake_output[seqgraph]} +else + /usr/bin/time -q -f "%e" -o ${snakemake_output[time]} timeout -s SIGINT ${snakemake_wildcards[timeout]} bash -c 'java -classpath $CP EstimateGM -r ${snakemake_wildcards[replicate]} < ${snakemake_input[data]} > ${snakemake_output[seqgraph]}' +fi + + +java -classpath $CP EstimateGM -r 1 -n 1000 -s 2 -a 10000 < results/data/adjmat\=/bdgraph_graphsim/p\=25/graph\=random/class\=None/size\=None/prob\=0.5/seed\=1/parameters\=/bdgraph_rgwish/b\=3/threshold_conv\=1e-07/seed\=1/data\=/iid/n\=100/seed\=1.csv + + + + +java EstimateGM -r 1 -n 1000 -s 2 -a 0.0 -c 10000 < seed\=1.csv + + From 415a8da701ac239d6ef6386cccaa9bdac898aa39 Mon Sep 17 00:00:00 2001 From: Mohamad Elmasri Date: Mon, 1 Apr 2024 00:54:19 +0100 Subject: [PATCH 10/25] updating psi learner --- .../equsa_psilearner/psi_learner.R | 62 +++++++++++++++++-- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/workflow/rules/structure_learning_algorithms/equsa_psilearner/psi_learner.R b/workflow/rules/structure_learning_algorithms/equsa_psilearner/psi_learner.R index ee342051..9d505003 100644 --- a/workflow/rules/structure_learning_algorithms/equsa_psilearner/psi_learner.R +++ b/workflow/rules/structure_learning_algorithms/equsa_psilearner/psi_learner.R @@ -2,6 +2,49 @@ source("workflow/scripts/utils/helpers.R") library(equSA) +equSAR_localR<-function(iData, iMaxNei, ALPHA1=0.05, ALPHA2=0.05, GRID=2, iteration=100) { + # Wrote this to avoid the sigfault issue when running the C code in equSA1 + p <- dim(iData)[2] + A <- matrix(0, ncol=p, nrow=p) + U <- psical(iData,iMaxNei, ALPHA1,GRID,iteration) + ## U should be cbind(row, col, psi score) + ## when the q-value is too large or too small + ## the C code returns a segfault memory issue + ## the R code returns a matix of zeroes, therefore, here we check it + ## and returns an empty mattix + if(U[1,1]==0 & U[1,2]==0) + return(list(score = U, Adj = A, sigfault=TRUE)) + + z<-U[,3] + q<-pnorm(-abs(z), log.p=TRUE) + q<-q+log(2.0) + s<-qnorm(q,log.p=TRUE) + s<-(-1)*s + UU<-cbind(U[,1:2],s) + ## subsampling for psi scores ### + N <- length(U[,1]) + ratio<-ceiling(N/100000) + UU<-UU[order(UU[,3]), 1:3] + m<-floor(N/ratio) + m0<-N-m*ratio + s<-sample.int(ratio,m,replace=TRUE) + for(i in 1:length(s)) s[i]<-s[i]+(i-1)*ratio + if(m0>0) { + s0<-sample.int(m0,1)+length(s)*ratio + s<-c(s,s0) + } + Us<-UU[s,] + y <- round(Us,6) + + ## multiple hypothesis tests ### + q = pcorselR(y, ALPHA2, GRID, iteration) + s = y[,3]> q + ij = y[s, c(1,2)] + A[ij] <-1 + A = A + t(A) + list(score = U, Adj = A, sigfault=FALSE) +} + myalg <- function() { output_filename <- snakemake@output[["adjmat"]] time_filename <- snakemake@output[["time"]] @@ -15,15 +58,15 @@ myalg <- function() { ## The algorithm should be in this function. start <- proc.time()[1] ## extract codes - input_data <- read.csv(data_filename) + input_data <- read.csv(data_filename, header = TRUE, check.names = FALSE) n <- nrow(input_data) p <- ncol(input_data) - neighborhood <- n / log(n) + neighborhood <- floor(n / log(n)) + 1 if (!is.null(snakemake@wildcards[["neig"]])) { neighborhood <- as.integer(snakemake@wildcards[["neig"]]) } - - res <- equSAR( + + res <- equSAR_localR( iData = input_data, iMaxNei = neighborhood, ALPHA1 = alpha1, @@ -35,11 +78,18 @@ myalg <- function() { adjmat <- res$Adj totaltime <- proc.time()[1] - start - colnames(adjmat) <- names(input_data) # Get the labels from the data - write.csv(adjmat, file = output_filename, row.names = FALSE, quote = FALSE) + if(res$sigfault) { + file.create(file = output_filename) + totaltime = "None" + }else { + colnames(adjmat) <- names(input_data) # Get the labels from the data + write.csv(adjmat, file = output_filename, row.names = FALSE, quote = FALSE) + } write(totaltime, file = time_filename) # Write the true number of c.i. tests here if possible. cat("None", file = ntests_filename, sep = "\n") } add_timeout(myalg) + + From 914fd14e38a75011dbe4ba1a602000a12667a176 Mon Sep 17 00:00:00 2001 From: Mohamad Elmasri Date: Mon, 1 Apr 2024 00:55:29 +0100 Subject: [PATCH 11/25] adding module strings --- workflow/rules/module_strings.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/workflow/rules/module_strings.py b/workflow/rules/module_strings.py index 86e7a1ab..2e07ccfa 100644 --- a/workflow/rules/module_strings.py +++ b/workflow/rules/module_strings.py @@ -11,6 +11,14 @@ from typing import Optional, List, Union, Tuple +def id_to_alg(id): + for key, alg in config["resources"]["structure_learning_algorithms"].items(): + for obj in alg: + if obj["id"] == id: + return key + + return None + def input_algorithm(wildcards): """This algorithm can be use as input field in n algorithm rule to pass the output graph of another algorithm as input. The requirement is that the algorithm JSON object @@ -52,6 +60,7 @@ def idtopath(mylist, json_string): return json_string[str(mylist)] + json_string = {} json_string_mcmc_noest = {} @@ -190,6 +199,29 @@ def gen_data_string_from_conf(data_id, seed, seed_in_path=True): **data) + +if "dualgl" in pattern_strings: + dualgl_list = config["resources"]["structure_learning_algorithms"]["dualgl"] + # The path to the startspace algorithm is extended here + + def local_idtopath(idlist): + + # mylist can either be None, an id, or a list of ids. + # The id may correspond to an MCMC alg, then the estimator parameters should be added too. + alg = id_to_alg(idlist) + vals = config["resources"]["structure_learning_algorithms"][alg][0] + if idlist is None: + return "None" + return expand(pattern_strings[alg], **vals) + + + for items in dualgl_list: + items["startalg"] = local_idtopath(items["startalg"]) + + json_string.update({val["id"]: expand(pattern_strings["dualgl"], **val,) + for val in dualgl_list } ) + + # def path_to_input_algorithm_graph(alg_id): # return "{output_dir}/adjmat_estimate/{data}/"\ # "algorithm=/" + json_string[alg_id][0] + "/" +\ From 47d679ef984d629f0cb60488da5b0736b0f773a0 Mon Sep 17 00:00:00 2001 From: Mohamad Elmasri Date: Mon, 1 Apr 2024 01:18:40 +0100 Subject: [PATCH 12/25] updating the random matrix --- .../parameters/random_precmat/random_precmat.R | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/workflow/rules/parameters/random_precmat/random_precmat.R b/workflow/rules/parameters/random_precmat/random_precmat.R index 527bfaf6..5bfd2e1a 100644 --- a/workflow/rules/parameters/random_precmat/random_precmat.R +++ b/workflow/rules/parameters/random_precmat/random_precmat.R @@ -1,6 +1,5 @@ -#install.packages("matrixcalc") -#library(matrixcalc) library(Matrix) + seed <- as.integer(snakemake@wildcards[["seed"]]) set.seed(seed) @@ -11,20 +10,17 @@ p <- nrow(adjmat) K_values <- c() # Store the non null precision values in a vector -print(snakemake@wildcards[["prec_val_3"]]) -print(is.null(snakemake@wildcards[["prec_val_1"]])) -if (snakemake@wildcards[["prec_val_1"]] != "None") { - print("prec_val_1") +if (!is.null(snakemake@wildcards[["prec_val_1"]])) { K_values <- c(K_values, as.numeric(snakemake@wildcards[["prec_val_1"]])) } -if (snakemake@wildcards[["prec_val_2"]] != "None") { +if (!is.null(snakemake@wildcards[["prec_val_2"]])) { K_values <- c(K_values, as.numeric(snakemake@wildcards[["prec_val_2"]])) } -if (snakemake@wildcards[["prec_val_3"]] != "None") { +if (!is.null(snakemake@wildcards[["prec_val_3"]])) { K_values <- c(K_values, as.numeric(snakemake@wildcards[["prec_val_3"]])) } -print(K_values) + precmat <- matrix(0, nrow = p, ncol = p) # Sample untill we get a positive definite matrix @@ -42,7 +38,7 @@ if (length(K_values) > 1) { } while (TRUE) { eigen_values <- eigen(precmat)$values - is_positive_definite <- all(eigen_values > 0) + is_positive_definite <- all(Re(eigen_values) > 0) if (is_positive_definite) break print("Adding 0.1 to the diagonal") precmat <- precmat + 0.1 * diag(p) From b1009efd450eda0700bcc37e4ba15e13f6bdbff4 Mon Sep 17 00:00:00 2001 From: Felix Rios Date: Mon, 15 Apr 2024 08:04:30 +0200 Subject: [PATCH 13/25] Adding missing info.json files. --- .../athomas_jtsampler/info.json | 22 +++++++++++++++++++ .../dualgl/info.json | 22 +++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 workflow/rules/structure_learning_algorithms/athomas_jtsampler/info.json create mode 100644 workflow/rules/structure_learning_algorithms/dualgl/info.json diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/info.json b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/info.json new file mode 100644 index 00000000..0817b02f --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/info.json @@ -0,0 +1,22 @@ +{ + "title": "Alun Thomas samplers", + "version": "", + "package": { + "title": "Dualgl", + "url": "https://github.com/enricogiudice/dualPC" + }, + "docs_url": "", + "papers": [ + { + "title": "", + "url": "" + } + ], + "outputs": [ + "adjmat" + ], + "graph_types": [ + "UG" + ], + "language": "R" +} \ No newline at end of file diff --git a/workflow/rules/structure_learning_algorithms/dualgl/info.json b/workflow/rules/structure_learning_algorithms/dualgl/info.json new file mode 100644 index 00000000..eccef7ac --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/dualgl/info.json @@ -0,0 +1,22 @@ +{ + "title": "Dualgl", + "version": "", + "package": { + "title": "Dualgl", + "url": "https://github.com/enricogiudice/dualPC" + }, + "docs_url": "", + "papers": [ + { + "title": "", + "url": "" + } + ], + "outputs": [ + "adjmat" + ], + "graph_types": [ + "UG" + ], + "language": "R" +} \ No newline at end of file From ea89e25bb5bb8312f6d2f70dada0bfcf86bfcada Mon Sep 17 00:00:00 2001 From: melmasri Date: Wed, 21 Aug 2024 16:54:37 +0100 Subject: [PATCH 14/25] adding confirg for high dim --- config/dualGL_sparse_randpreci.json | 225 ++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 config/dualGL_sparse_randpreci.json diff --git a/config/dualGL_sparse_randpreci.json b/config/dualGL_sparse_randpreci.json new file mode 100644 index 00000000..58c980b2 --- /dev/null +++ b/config/dualGL_sparse_randpreci.json @@ -0,0 +1,225 @@ +{ + "benchmark_setup": { + "data": [ + { + "data_id": "example1", + "graph_id": "random-1-ultra-sparse", + "parameters_id": "precmat", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "random-1-sparse", + "parameters_id": "precmat", + "seed_range": [ + 1, + 5 + ] + }, +{"data_id": "example1", +"graph_id": "lattice", +"parameters_id": "precmat", +"seed_range": [ + 1, + 5 +] +}, +{ +"data_id": "example1", +"graph_id": "ar5", +"parameters_id": "precmat", +"seed_range": [ + 1, + 5 +] +}, +{ +"data_id": "example1", +"graph_id": "circle", +"parameters_id": "precmat", +"seed_range": [ + 1, + 5 +] +} +], +"evaluation": { +"benchmarks": { +"filename_prefix": "dualGraph_p200_sparse_precmat/", +"show_seed": false, +"errorbar": true, +"errorbarh": false, +"scatter": true, +"path": true, +"text": false, + "ids": [ + "dualGL-gt13", + "jtsampler_gts", + "psi-learn", + "glasso", + "mb" +] +}, +"graph_true_plots": true, +"graph_true_stats": false, +"ggally_ggpairs": false, +"graph_plots": [], +"mcmc_traj_plots": [ + { + "id": "jtsampler_gts", + "burn_in": 0.0, + "thinning": 100, + "functional": [ + "score", +"size" + ], + "active": false +} +], +"mcmc_heatmaps": [], +"mcmc_autocorr_plots": [] +} +}, +"resources": { +"data": { +"iid": [ +{ + "id": "example1", + "standardized": false, + "n": [ + 20, 50, 100 + ] +} +] +}, +"graph": { +"trilearn_rand_bandmat": [ + { + "id": "ar5", + "max_bandwidth": 5, + "dim": 200 +} +], +"bdgraph_graphsim": [ + { + "id": "random-1-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.1 +}, + { + "id": "random-1-ultra-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.01 +}, + { + "id": "lattice", + "p": 200, + "graph": "lattice", + "class": null, + "size": null, + "prob": 0.1 +}, + { + "id": "circle", + "p": 200, + "graph": "circle", + "class": null, + "size": null, + "prob": 0.2 +} +] +}, +"parameters": { +"random_precmat":[ +{ +"id" : "precmat", +"prec_val_1" : 0.5, +"prec_val_2": 0.25 +} +] +,"trilearn_intra-class": [ +{ +"id": "intra-class", +"rho": 0.15, +"sigma2": 1.0 +} +], +"bdgraph_rgwish": [ +{ + "id": "gwi", + "b": 5, + "threshold_conv": 0.000001 +} +] +}, +"structure_learning_algorithms": { + "equsa_psilearner": [ + { + "id": "psi-learn", + "timeout": null, + "alpha1": 0.3, + "alpha2": [0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.9, 0.98] + + }], + "dualgl": [ + { + "id": "dualGL-gt13", + "timeout": null, + "startalg": "jtsampler_gts", + "alpha": [0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.9, 0.99] + } + ], +"athomas_jtsampler": [ + { + "id": "jtsampler_gts", + "burnin_frac": 0.5, + "mcmc_estimator": ["threshold"], + "timeout": null, + "mcmc_seed": 1, + "num_samples": 20000000, + "sampler": 1, +"delta": 5, + "edge_penalty": 0, + "size_maxclique": 500, + "full_output": true, + "threshold": [ + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1.0 + ] +} +], + "huge_glasso": [ + { + "id": "glasso", + "lambda": [2, 1, 0.8, 0.6, 0.4, 0.2, 0.1, 0.05, 0.01], + "timeout": null + } + ], + "huge_mb": [ + { + "id": "mb", + "lambda": [2, 1, 0.8, 0.6, 0.4, 0.2, 0.1, 0.05, 0.01], + "timeout": null + } + ] +} +} +} \ No newline at end of file From 511ebb7dc6374250a0b8b01d98654cef89ed78b5 Mon Sep 17 00:00:00 2001 From: melmasri Date: Wed, 21 Aug 2024 16:55:35 +0100 Subject: [PATCH 15/25] sparse run --- config/dualGL_sparse_randpreci.json | 471 ++++++++++++++++------------ 1 file changed, 262 insertions(+), 209 deletions(-) diff --git a/config/dualGL_sparse_randpreci.json b/config/dualGL_sparse_randpreci.json index 58c980b2..3954fa4e 100644 --- a/config/dualGL_sparse_randpreci.json +++ b/config/dualGL_sparse_randpreci.json @@ -1,225 +1,278 @@ { "benchmark_setup": { "data": [ - { + { "data_id": "example1", - "graph_id": "random-1-ultra-sparse", - "parameters_id": "precmat", + "graph_id": "random-1-ultra-sparse", + "parameters_id": "precmat", "seed_range": [ - 1, - 5 + 1, + 5 ] - }, - { + }, + { "data_id": "example1", "graph_id": "random-1-sparse", "parameters_id": "precmat", "seed_range": [ - 1, - 5 + 1, + 5 ] - }, -{"data_id": "example1", -"graph_id": "lattice", -"parameters_id": "precmat", -"seed_range": [ - 1, - 5 -] -}, -{ -"data_id": "example1", -"graph_id": "ar5", -"parameters_id": "precmat", -"seed_range": [ - 1, - 5 -] -}, -{ -"data_id": "example1", -"graph_id": "circle", -"parameters_id": "precmat", -"seed_range": [ - 1, - 5 -] -} -], -"evaluation": { -"benchmarks": { -"filename_prefix": "dualGraph_p200_sparse_precmat/", -"show_seed": false, -"errorbar": true, -"errorbarh": false, -"scatter": true, -"path": true, -"text": false, - "ids": [ - "dualGL-gt13", - "jtsampler_gts", - "psi-learn", - "glasso", - "mb" -] -}, -"graph_true_plots": true, -"graph_true_stats": false, -"ggally_ggpairs": false, -"graph_plots": [], -"mcmc_traj_plots": [ - { - "id": "jtsampler_gts", - "burn_in": 0.0, - "thinning": 100, - "functional": [ - "score", -"size" - ], - "active": false -} -], -"mcmc_heatmaps": [], -"mcmc_autocorr_plots": [] -} -}, -"resources": { -"data": { -"iid": [ -{ - "id": "example1", - "standardized": false, - "n": [ - 20, 50, 100 - ] -} -] -}, -"graph": { -"trilearn_rand_bandmat": [ - { - "id": "ar5", - "max_bandwidth": 5, - "dim": 200 -} -], -"bdgraph_graphsim": [ - { - "id": "random-1-sparse", - "p": 200, - "graph": "random", - "class": null, - "size": null, - "prob": 0.1 -}, - { - "id": "random-1-ultra-sparse", - "p": 200, - "graph": "random", - "class": null, - "size": null, - "prob": 0.01 -}, - { - "id": "lattice", - "p": 200, - "graph": "lattice", - "class": null, - "size": null, - "prob": 0.1 -}, - { - "id": "circle", - "p": 200, - "graph": "circle", - "class": null, - "size": null, - "prob": 0.2 -} -] -}, -"parameters": { -"random_precmat":[ -{ -"id" : "precmat", -"prec_val_1" : 0.5, -"prec_val_2": 0.25 -} -] -,"trilearn_intra-class": [ -{ -"id": "intra-class", -"rho": 0.15, -"sigma2": 1.0 -} -], -"bdgraph_rgwish": [ -{ - "id": "gwi", - "b": 5, - "threshold_conv": 0.000001 -} -] -}, -"structure_learning_algorithms": { - "equsa_psilearner": [ - { - "id": "psi-learn", - "timeout": null, - "alpha1": 0.3, - "alpha2": [0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.9, 0.98] - - }], - "dualgl": [ + }, { - "id": "dualGL-gt13", - "timeout": null, - "startalg": "jtsampler_gts", - "alpha": [0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.9, 0.99] - } - ], -"athomas_jtsampler": [ - { - "id": "jtsampler_gts", - "burnin_frac": 0.5, - "mcmc_estimator": ["threshold"], - "timeout": null, - "mcmc_seed": 1, - "num_samples": 20000000, - "sampler": 1, -"delta": 5, - "edge_penalty": 0, - "size_maxclique": 500, - "full_output": true, - "threshold": [ - 0.0, - 0.1, - 0.2, - 0.3, - 0.4, - 0.5, - 0.6, - 0.7, - 0.8, - 0.9, - 1.0 - ] -} -], - "huge_glasso": [ + "data_id": "example1", + "graph_id": "lattice", + "parameters_id": "precmat", + "seed_range": [ + 1, + 5 + ] + }, { - "id": "glasso", - "lambda": [2, 1, 0.8, 0.6, 0.4, 0.2, 0.1, 0.05, 0.01], - "timeout": null - } - ], - "huge_mb": [ + "data_id": "example1", + "graph_id": "ar5", + "parameters_id": "precmat", + "seed_range": [ + 1, + 5 + ] + }, { - "id": "mb", - "lambda": [2, 1, 0.8, 0.6, 0.4, 0.2, 0.1, 0.05, 0.01], - "timeout": null + "data_id": "example1", + "graph_id": "circle", + "parameters_id": "precmat", + "seed_range": [ + 1, + 5 + ] + } + ], + "evaluation": { + "benchmarks": { + "filename_prefix": "dualGraph_p200_sparse_precmat/", + "show_seed": false, + "errorbar": true, + "errorbarh": false, + "scatter": true, + "path": true, + "text": false, + "ids": [ + "dualGL-gt13", + "jtsampler_gts", + "psi-learn", + "glasso", + "mb" + ] + }, + "graph_true_plots": true, + "graph_true_stats": false, + "ggally_ggpairs": false, + "graph_plots": [], + "mcmc_traj_plots": [ + { + "id": "jtsampler_gts", + "burn_in": 0.0, + "thinning": 100, + "functional": [ + "score", + "size" + ], + "active": false + } + ], + "mcmc_heatmaps": [], + "mcmc_autocorr_plots": [] + } + }, + "resources": { + "data": { + "iid": [ + { + "id": "example1", + "standardized": false, + "n": [ + 20, + 50, + 100 + ] + } + ] + }, + "graph": { + "trilearn_rand_bandmat": [ + { + "id": "ar5", + "max_bandwidth": 5, + "dim": 200 + } + ], + "bdgraph_graphsim": [ + { + "id": "random-1-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.1 + }, + { + "id": "random-1-ultra-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.01 + }, + { + "id": "lattice", + "p": 200, + "graph": "lattice", + "class": null, + "size": null, + "prob": 0.1 + }, + { + "id": "circle", + "p": 200, + "graph": "circle", + "class": null, + "size": null, + "prob": 0.2 + } + ] + }, + "parameters": { + "random_precmat": [ + { + "id": "precmat", + "prec_val_1": 0.5, + "prec_val_2": 0.1 + } + ], + "trilearn_intra-class": [ + { + "id": "intra-class", + "rho": 0.15, + "sigma2": 1.0 + } + ], + "bdgraph_rgwish": [ + { + "id": "gwi", + "b": 5, + "threshold_conv": 0.000001 + } + ] + }, + "structure_learning_algorithms": { + "equsa_psilearner": [ + { + "id": "psi-learn", + "timeout": null, + "alpha1": 0.3, + "alpha2": [ + 0.01, + 0.025, + 0.05, + 0.1, + 0.15, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.9, + 0.98 + ] + } + ], + "dualgl": [ + { + "id": "dualGL-gt13", + "timeout": null, + "startalg": "jtsampler_gts", + "alpha": [ + 0.01, + 0.025, + 0.05, + 0.1, + 0.15, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.9, + 0.99 + ] + } + ], + "athomas_jtsampler": [ + { + "id": "jtsampler_gts", + "burnin_frac": 0.5, + "mcmc_estimator": [ + "threshold" + ], + "timeout": null, + "mcmc_seed": 1, + "num_samples": 20000000, + "sampler": 1, + "delta": 5, + "edge_penalty": 0, + "size_maxclique": 500, + "full_output": true, + "threshold": [ + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1.0 + ] + } + ], + "huge_glasso": [ + { + "id": "glasso", + "lambda": [ + 2, + 1, + 0.8, + 0.6, + 0.4, + 0.2, + 0.1, + 0.05, + 0.01 + ], + "timeout": null + } + ], + "huge_mb": [ + { + "id": "mb", + "lambda": [ + 2, + 1, + 0.8, + 0.6, + 0.4, + 0.2, + 0.1, + 0.05, + 0.01 + ], + "timeout": null + } + ] + } } - ] -} -} } \ No newline at end of file From b0cc220263d1d17a163cee9e50dd6df4c5779246 Mon Sep 17 00:00:00 2001 From: melmasri Date: Wed, 21 Aug 2024 22:25:40 +0100 Subject: [PATCH 16/25] updated the random_precmat.R and dualGL_sparse_randpreci.json files --- config/dualGL_sparse_randpreci.json | 5 +-- .../random_precmat/random_precmat.R | 33 ++++++++++++------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/config/dualGL_sparse_randpreci.json b/config/dualGL_sparse_randpreci.json index 3954fa4e..d4f7f36f 100644 --- a/config/dualGL_sparse_randpreci.json +++ b/config/dualGL_sparse_randpreci.json @@ -145,8 +145,9 @@ "random_precmat": [ { "id": "precmat", - "prec_val_1": 0.5, - "prec_val_2": 0.1 + "prec_val_1": 0.75, + "prec_val_2": 0.25, + "prec_val_3": 0.1 } ], "trilearn_intra-class": [ diff --git a/workflow/rules/parameters/random_precmat/random_precmat.R b/workflow/rules/parameters/random_precmat/random_precmat.R index 5bfd2e1a..55921180 100644 --- a/workflow/rules/parameters/random_precmat/random_precmat.R +++ b/workflow/rules/parameters/random_precmat/random_precmat.R @@ -31,18 +31,29 @@ if (length(K_values) == 1) { } if (length(K_values) > 1) { - M <- sum(adjmat != 0) - v <- sample.int(length(K_values), M, replace = TRUE) - precmat <- 1 * (adjmat != 0) - precmat[which(adjmat != 0)] <- K_values[v] -} -while (TRUE) { - eigen_values <- eigen(precmat)$values - is_positive_definite <- all(Re(eigen_values) > 0) - if (is_positive_definite) break - print("Adding 0.1 to the diagonal") - precmat <- precmat + 0.1 * diag(p) + is_positive_definite <- FALSE + k = 0 + while (!is_positive_definite) { + M <- sum(adjmat != 0) + v <- sample.int(length(K_values), M, replace = TRUE) + precmat <- 1 * (adjmat != 0) + precmat[which(adjmat != 0)] <- K_values[v] + eigen_values <- eigen(precmat)$values + is_positive_definite <- all(Re(eigen_values) > 0) + print(paste("Iteration", k, "is_positive_definite", is_positive_definite)) + } + #M <- sum(adjmat != 0) + #v <- sample.int(length(K_values), M, replace = TRUE) + #precmat <- 1 * (adjmat != 0) + #precmat[which(adjmat != 0)] <- K_values[v] } +# while (TRUE) { +# eigen_values <- eigen(precmat)$values +# is_positive_definite <- all(Re(eigen_values) > 0) +# if (is_positive_definite) break +# print("Adding 0.1 to the diagonal") +# precmat <- precmat + 0.1 * diag(p) +# } print("Inverting the precision matrix") covmat <- cov2cor(solve(precmat)) colnames(covmat) <- colnames(df) From 77dad3f11abf9a1ebb6c136adb8ba3f599741876 Mon Sep 17 00:00:00 2001 From: melmasri Date: Fri, 23 Aug 2024 12:51:28 +0100 Subject: [PATCH 17/25] updating config --- config/dualGL_sparse_randpreci.json | 5 ++--- workflow/rules/parameters/random_precmat/random_precmat.R | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/dualGL_sparse_randpreci.json b/config/dualGL_sparse_randpreci.json index d4f7f36f..7f6db379 100644 --- a/config/dualGL_sparse_randpreci.json +++ b/config/dualGL_sparse_randpreci.json @@ -145,9 +145,8 @@ "random_precmat": [ { "id": "precmat", - "prec_val_1": 0.75, - "prec_val_2": 0.25, - "prec_val_3": 0.1 + "prec_val_1": 0.5, + "prec_val_3": 0.25 } ], "trilearn_intra-class": [ diff --git a/workflow/rules/parameters/random_precmat/random_precmat.R b/workflow/rules/parameters/random_precmat/random_precmat.R index 55921180..fc3a0c8b 100644 --- a/workflow/rules/parameters/random_precmat/random_precmat.R +++ b/workflow/rules/parameters/random_precmat/random_precmat.R @@ -40,6 +40,7 @@ if (length(K_values) > 1) { precmat[which(adjmat != 0)] <- K_values[v] eigen_values <- eigen(precmat)$values is_positive_definite <- all(Re(eigen_values) > 0) + k = k+1 print(paste("Iteration", k, "is_positive_definite", is_positive_definite)) } #M <- sum(adjmat != 0) From bd466dd54b8eb32fd9866d7ec1578d4e1c6c562b Mon Sep 17 00:00:00 2001 From: melmasri Date: Fri, 23 Aug 2024 13:03:04 +0100 Subject: [PATCH 18/25] fixing config --- config/dualGL_sparse_randpreci.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/dualGL_sparse_randpreci.json b/config/dualGL_sparse_randpreci.json index 7f6db379..dac81538 100644 --- a/config/dualGL_sparse_randpreci.json +++ b/config/dualGL_sparse_randpreci.json @@ -146,7 +146,7 @@ { "id": "precmat", "prec_val_1": 0.5, - "prec_val_3": 0.25 + "prec_val_2": 0.25 } ], "trilearn_intra-class": [ From e048e977203b0ce4d6f187d10bd229885082a2f4 Mon Sep 17 00:00:00 2001 From: melmasri Date: Fri, 23 Aug 2024 14:10:51 +0100 Subject: [PATCH 19/25] updating the random precision --- workflow/rules/parameters/random_precmat/random_precmat.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/workflow/rules/parameters/random_precmat/random_precmat.R b/workflow/rules/parameters/random_precmat/random_precmat.R index fc3a0c8b..1b1d6ae9 100644 --- a/workflow/rules/parameters/random_precmat/random_precmat.R +++ b/workflow/rules/parameters/random_precmat/random_precmat.R @@ -38,6 +38,8 @@ if (length(K_values) > 1) { v <- sample.int(length(K_values), M, replace = TRUE) precmat <- 1 * (adjmat != 0) precmat[which(adjmat != 0)] <- K_values[v] + + diag(precmat) <- diag(precmat) + 0.1 eigen_values <- eigen(precmat)$values is_positive_definite <- all(Re(eigen_values) > 0) k = k+1 From 2cf9dd1bc8a3812de4f1d30b2aad59108709e13a Mon Sep 17 00:00:00 2001 From: melmasri Date: Fri, 23 Aug 2024 17:18:13 +0100 Subject: [PATCH 20/25] modified precmat to be autoregressive in distance from diag --- .../rules/parameters/random_precmat/random_precmat.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/workflow/rules/parameters/random_precmat/random_precmat.R b/workflow/rules/parameters/random_precmat/random_precmat.R index 1b1d6ae9..7647b1fc 100644 --- a/workflow/rules/parameters/random_precmat/random_precmat.R +++ b/workflow/rules/parameters/random_precmat/random_precmat.R @@ -37,9 +37,12 @@ if (length(K_values) > 1) { M <- sum(adjmat != 0) v <- sample.int(length(K_values), M, replace = TRUE) precmat <- 1 * (adjmat != 0) - precmat[which(adjmat != 0)] <- K_values[v] - - diag(precmat) <- diag(precmat) + 0.1 + indx = which (adjmat != 0, arr.ind = TRUE) + rho = K_values[1] + distance_to_diag = abs(indx[, 'row'] - indx[, 'col']) + r2 = rho^distance_to_diag + precmat[indx] <- r2 + diag(precmat) <- 1 eigen_values <- eigen(precmat)$values is_positive_definite <- all(Re(eigen_values) > 0) k = k+1 From 725da792e1a8a4fcafbf603955be8ba3af14b6df Mon Sep 17 00:00:00 2001 From: melmasri Date: Sat, 24 Aug 2024 17:41:12 +0100 Subject: [PATCH 21/25] adding interclass config --- config/dualGL_sparse_interclass.json | 310 +++++++++++++++++++++++++++ 1 file changed, 310 insertions(+) create mode 100644 config/dualGL_sparse_interclass.json diff --git a/config/dualGL_sparse_interclass.json b/config/dualGL_sparse_interclass.json new file mode 100644 index 00000000..fab7a4cd --- /dev/null +++ b/config/dualGL_sparse_interclass.json @@ -0,0 +1,310 @@ +{ + "benchmark_setup": { + "data": [ + { + "data_id": "example1", + "graph_id": "random-1-ultra-sparse", + "parameters_id": "intra-class", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "random-1-sparse", + "parameters_id": "intra-class", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "lattice", + "parameters_id": "intra-class", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "ar5", + "parameters_id": "intra-class", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "circle", + "parameters_id": "intra-class", + "seed_range": [ + 1, + 5 + ] + } + ], + "evaluation": { + "benchmarks": { + "filename_prefix": "dualGraph_p200_sparse_intra-class/", + "show_seed": false, + "errorbar": true, + "errorbarh": false, + "scatter": true, + "path": true, + "text": false, + "ids": [ + "dualGL-gt13", + "jtsampler_gts", + "psi-learn", + "glasso", + "mb", + "bdgraph" + ] + }, + "graph_true_plots": true, + "graph_true_stats": false, + "ggally_ggpairs": false, + "graph_plots": [], + "mcmc_traj_plots": [ + { + "id": "jtsampler_gts", + "burn_in": 0.0, + "thinning": 100, + "functional": [ + "score", + "size" + ], + "active": false + } + ], + "mcmc_heatmaps": [], + "mcmc_autocorr_plots": [] + } + }, + "resources": { + "data": { + "iid": [ + { + "id": "example1", + "standardized": false, + "n": [ + 20, + 50, + 100, + 200, + 500 + ] + } + ] + }, + "graph": { + "trilearn_rand_bandmat": [ + { + "id": "ar5", + "max_bandwidth": 5, + "dim": 200 + } + ], + "bdgraph_graphsim": [ + { + "id": "random-1-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.1 + }, + { + "id": "random-1-ultra-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.01 + }, + { + "id": "lattice", + "p": 200, + "graph": "lattice", + "class": null, + "size": null, + "prob": 0.1 + }, + { + "id": "circle", + "p": 200, + "graph": "circle", + "class": null, + "size": null, + "prob": 0.2 + }, + ] + }, + "parameters": { + "random_precmat": [ + { + "id": "precmat", + "prec_val_1": 0.5, + "prec_val_2": 0.25 + } + ], + "trilearn_intra-class": [ + { + "id": "intra-class", + "rho": 0.5, + "sigma2": 1.0 + } + ], + "bdgraph_rgwish": [ + { + "id": "gwi", + "b": 5, + "threshold_conv": 0.000001 + } + ] + }, + "structure_learning_algorithms": { + "equsa_psilearner": [ + { + "id": "psi-learn", + "timeout": null, + "alpha1": 0.3, + "alpha2": [ + 0.01, + 0.025, + 0.05, + 0.1, + 0.15, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.9, + 0.98 + ] + } + ], + "dualgl": [ + { + "id": "dualGL-gt13", + "timeout": null, + "startalg": "jtsampler_gts", + "alpha": [ + 0.01, + 0.025, + 0.05, + 0.1, + 0.15, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.9, + 0.99 + ] + } + ], + "bdgraph": [ + { + "id": "bdgraph", + "method": "ggm", + "algo": ["rjmcmc"], + "iter": 10000, + "gprior": 0.5, + "dfprior": 3, + "gstart": "empty", + "timeout": null, + "mcmc_seed": [1], + "thresh": 0.5, + "mcmc_estimator": ["threshold"], + "threshold": [ + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1.0 + ], + "burnin_frac": 0.5 + } + ], + "athomas_jtsampler": [ + { + "id": "jtsampler_gts", + "burnin_frac": 0.5, + "mcmc_estimator": [ + "threshold" + ], + "timeout": null, + "mcmc_seed": 1, + "num_samples": 10000000, + "sampler": 1, + "delta": 5, + "edge_penalty": 0, + "size_maxclique": 500, + "full_output": true, + "threshold": [ + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1.0 + ] + } + ], + "huge_glasso": [ + { + "id": "glasso", + "lambda": [ + 2, + 1, + 0.8, + 0.6, + 0.4, + 0.2, + 0.1, + 0.05, + 0.01 + ], + "timeout": null + } + ], + "huge_mb": [ + { + "id": "mb", + "lambda": [ + 2, + 1, + 0.8, + 0.6, + 0.4, + 0.2, + 0.1, + 0.05, + 0.01 + ], + "timeout": null + } + ] + } + } +} \ No newline at end of file From 2c6d302bffb596b83acfda32587f8a4082d3dee2 Mon Sep 17 00:00:00 2001 From: melmasri Date: Sat, 24 Aug 2024 21:27:42 +0100 Subject: [PATCH 22/25] bug with json file --- config/dualGL_sparse_interclass.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/dualGL_sparse_interclass.json b/config/dualGL_sparse_interclass.json index fab7a4cd..748b826e 100644 --- a/config/dualGL_sparse_interclass.json +++ b/config/dualGL_sparse_interclass.json @@ -141,7 +141,7 @@ "class": null, "size": null, "prob": 0.2 - }, + } ] }, "parameters": { From a66e834d0a82994355ac545b5256a87a818d48b7 Mon Sep 17 00:00:00 2001 From: melmasri Date: Wed, 28 Aug 2024 14:41:11 +0100 Subject: [PATCH 23/25] adding a small config file --- config/dualGL_sparse_interclass_simple.json | 136 ++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 config/dualGL_sparse_interclass_simple.json diff --git a/config/dualGL_sparse_interclass_simple.json b/config/dualGL_sparse_interclass_simple.json new file mode 100644 index 00000000..b9cecf23 --- /dev/null +++ b/config/dualGL_sparse_interclass_simple.json @@ -0,0 +1,136 @@ +{ + "benchmark_setup": { + "data": [ + { + "data_id": "example1", + "graph_id": "random-1-ultra-sparse", + "parameters_id": "intra-class", + "seed_range": [ + 1, + 2 + ] + } + ], + "evaluation": { + "benchmarks": { + "filename_prefix": "dualGraph_p200_sparse_small/", + "show_seed": false, + "errorbar": true, + "errorbarh": false, + "scatter": true, + "path": true, + "text": false, + "ids": [ + "dualGL-gt13", + "jtsampler_gts" + ] + }, + "graph_true_plots": true, + "graph_true_stats": false, + "ggally_ggpairs": false, + "graph_plots": [], + "mcmc_traj_plots": [ + { + "id": "jtsampler_gts", + "burn_in": 0.0, + "thinning": 100, + "functional": [ + "score", + "size" + ], + "active": false + } + ], + "mcmc_heatmaps": [], + "mcmc_autocorr_plots": [] + } + }, + "resources": { + "data": { + "iid": [ + { + "id": "example1", + "standardized": false, + "n": [ + 20 + ] + } + ] + }, + "graph": { + "bdgraph_graphsim": [ + { + "id": "random-1-ultra-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.01 + } + ] + }, + "parameters": { + "trilearn_intra-class": [ + { + "id": "intra-class", + "rho": 0.5, + "sigma2": 1.0 + } + ] + }, + "structure_learning_algorithms": { + "dualgl": [ + { + "id": "dualGL-gt13", + "timeout": null, + "startalg": "jtsampler_gts", + "alpha": [ + 0.01, + 0.025, + 0.05, + 0.1, + 0.15, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.9, + 0.99 + ] + } + ], + "athomas_jtsampler": [ + { + "id": "jtsampler_gts", + "burnin_frac": 0.5, + "mcmc_estimator": [ + "threshold" + ], + "timeout": null, + "mcmc_seed": 1, + "num_samples": 10000000, + "sampler": 1, + "delta": 5, + "edge_penalty": 0, + "size_maxclique": 500, + "full_output": true, + "threshold": [ + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1.0 + ] + } + ] + } + } +} \ No newline at end of file From 3c225c72486bd2b92cc0968b536bbc89532a42a9 Mon Sep 17 00:00:00 2001 From: melmasri Date: Fri, 30 Aug 2024 17:47:23 +0100 Subject: [PATCH 24/25] adding wishart run --- config/dualGL_sparse_wishart.json | 310 ++++++++++++++++++++++++++++++ 1 file changed, 310 insertions(+) create mode 100644 config/dualGL_sparse_wishart.json diff --git a/config/dualGL_sparse_wishart.json b/config/dualGL_sparse_wishart.json new file mode 100644 index 00000000..1eb38bf2 --- /dev/null +++ b/config/dualGL_sparse_wishart.json @@ -0,0 +1,310 @@ +{ + "benchmark_setup": { + "data": [ + { + "data_id": "example1", + "graph_id": "random-1-ultra-sparse", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "random-1-sparse", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "lattice", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "ar5", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "circle", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + } + ], + "evaluation": { + "benchmarks": { + "filename_prefix": "dualGraph_p200_sparse_gwi/", + "show_seed": false, + "errorbar": true, + "errorbarh": false, + "scatter": true, + "path": true, + "text": false, + "ids": [ + "dualGL-gt13", + "jtsampler_gts", + "psi-learn", + "glasso", + "mb", + "bdgraph" + ] + }, + "graph_true_plots": true, + "graph_true_stats": false, + "ggally_ggpairs": false, + "graph_plots": [], + "mcmc_traj_plots": [ + { + "id": "jtsampler_gts", + "burn_in": 0.0, + "thinning": 100, + "functional": [ + "score", + "size" + ], + "active": false + } + ], + "mcmc_heatmaps": [], + "mcmc_autocorr_plots": [] + } + }, + "resources": { + "data": { + "iid": [ + { + "id": "example1", + "standardized": false, + "n": [ + 20, + 50, + 100, + 200, + 500 + ] + } + ] + }, + "graph": { + "trilearn_rand_bandmat": [ + { + "id": "ar5", + "max_bandwidth": 5, + "dim": 200 + } + ], + "bdgraph_graphsim": [ + { + "id": "random-1-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.1 + }, + { + "id": "random-1-ultra-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.01 + }, + { + "id": "lattice", + "p": 200, + "graph": "lattice", + "class": null, + "size": null, + "prob": 0.1 + }, + { + "id": "circle", + "p": 200, + "graph": "circle", + "class": null, + "size": null, + "prob": 0.2 + } + ] + }, + "parameters": { + "random_precmat": [ + { + "id": "precmat", + "prec_val_1": 0.5, + "prec_val_2": 0.25 + } + ], + "trilearn_intra-class": [ + { + "id": "intra-class", + "rho": 0.5, + "sigma2": 1.0 + } + ], + "bdgraph_rgwish": [ + { + "id": "gwi", + "b": 5, + "threshold_conv": 0.000001 + } + ] + }, + "structure_learning_algorithms": { + "equsa_psilearner": [ + { + "id": "psi-learn", + "timeout": null, + "alpha1": 0.3, + "alpha2": [ + 0.01, + 0.025, + 0.05, + 0.1, + 0.15, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.9, + 0.98 + ] + } + ], + "dualgl": [ + { + "id": "dualGL-gt13", + "timeout": null, + "startalg": "jtsampler_gts", + "alpha": [ + 0.01, + 0.025, + 0.05, + 0.1, + 0.15, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.9, + 0.99 + ] + } + ], + "bdgraph": [ + { + "id": "bdgraph", + "method": "ggm", + "algo": ["rjmcmc"], + "iter": 10000, + "gprior": 0.5, + "dfprior": 3, + "gstart": "empty", + "timeout": null, + "mcmc_seed": [1], + "thresh": 0.5, + "mcmc_estimator": ["threshold"], + "threshold": [ + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1.0 + ], + "burnin_frac": 0.5 + } + ], + "athomas_jtsampler": [ + { + "id": "jtsampler_gts", + "burnin_frac": 0.5, + "mcmc_estimator": [ + "threshold" + ], + "timeout": null, + "mcmc_seed": 1, + "num_samples": 10000000, + "sampler": 1, + "delta": 5, + "edge_penalty": 0, + "size_maxclique": 500, + "full_output": true, + "threshold": [ + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1.0 + ] + } + ], + "huge_glasso": [ + { + "id": "glasso", + "lambda": [ + 2, + 1, + 0.8, + 0.6, + 0.4, + 0.2, + 0.1, + 0.05, + 0.01 + ], + "timeout": null + } + ], + "huge_mb": [ + { + "id": "mb", + "lambda": [ + 2, + 1, + 0.8, + 0.6, + 0.4, + 0.2, + 0.1, + 0.05, + 0.01 + ], + "timeout": null + } + ] + } + } +} \ No newline at end of file From 13612a3eb91be0e6fdff585235a3e4778b9f0a6f Mon Sep 17 00:00:00 2001 From: melmasri Date: Tue, 3 Sep 2024 22:50:04 +0100 Subject: [PATCH 25/25] random percent.. --- config/dualGL_sparse_randpreci.json | 34 +++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/config/dualGL_sparse_randpreci.json b/config/dualGL_sparse_randpreci.json index dac81538..b49863f6 100644 --- a/config/dualGL_sparse_randpreci.json +++ b/config/dualGL_sparse_randpreci.json @@ -61,7 +61,8 @@ "jtsampler_gts", "psi-learn", "glasso", - "mb" + "mb", + "bdgraph" ] }, "graph_true_plots": true, @@ -187,6 +188,35 @@ ] } ], + "bdgraph": [ + { + "id": "bdgraph", + "method": "ggm", + "algo": ["rjmcmc"], + "iter": 10000, + "gprior": 0.5, + "dfprior": 3, + "gstart": "empty", + "timeout": null, + "mcmc_seed": [1], + "thresh": 0.5, + "mcmc_estimator": ["threshold"], + "threshold": [ + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1.0 + ], + "burnin_frac": 0.5 + } + ], "dualgl": [ { "id": "dualGL-gt13", @@ -218,7 +248,7 @@ ], "timeout": null, "mcmc_seed": 1, - "num_samples": 20000000, + "num_samples": 10000000, "sampler": 1, "delta": 5, "edge_penalty": 0,