felixleopoldo · melmasri · Jan 2, 2024 · Jan 3, 2024 · Jan 6, 2024 · Jan 6, 2024
diff --git a/config/dualGL_sparse.json b/config/dualGL_sparse.json
@@ -0,0 +1,212 @@
+{
+  "benchmark_setup": {
+    "data": [
+       {
+        "data_id": "example1",
+        "graph_id": "random-1-ultra-sparse",
+        "parameters_id": "gwi",
+        "seed_range": [
+            1,
+            5
+        ]
+    },
+        {
+        "data_id": "example1",
+        "graph_id": "random-1-sparse",
+        "parameters_id": "gwi",
+        "seed_range": [
+            1,
+            5
+        ]
+    },
+      {
+        "data_id": "example1",
+        "graph_id": "lattice",
+        "parameters_id": "gwi",
+        "seed_range": [
+            1,
+            5
+        ]
+    },
+        {
+        "data_id": "example1",
+        "graph_id": "ar5",
+        "parameters_id": "gwi",
+        "seed_range": [
+            1,
+            5
+        ]
+    },
+        {
+        "data_id": "example1",
+        "graph_id": "circle",
+        "parameters_id": "gwi",
+        "seed_range": [
+            1,
+            5
+        ]
+    }
+    ],
+    "evaluation": {
+      "benchmarks": {
+        "filename_prefix": "dualGraph_p200_sparse/",
+        "show_seed": false,
+        "errorbar": true,
+        "errorbarh": false,
+        "scatter": true,
+        "path": true,
+        "text": false,
+          "ids": [
+              "dualGL-gt13",
+              "jtsampler_gts",
+              "psi-learn",
+              "glasso",
+              "mb"
+        ]
+      },
+        "graph_true_plots": true,
+        "graph_true_stats": false,
+        "ggally_ggpairs": false,
+        "graph_plots": [],
+        "mcmc_traj_plots": [
+            {
+            "id": "jtsampler_gts",
+            "burn_in": 0.0,
+            "thinning": 100,
+            "functional": [
+                "score"
+            ],
+            "active": true
+        }
+        ],
+        "mcmc_heatmaps": [],
+        "mcmc_autocorr_plots": []
+    }
+  },
+  "resources": {
+    "data": {
+      "iid": [
+        {
+          "id": "example1",
+          "standardized": false,
+          "n": [
+              20, 50, 100
+          ]
+        }
+      ]
+    },
+    "graph": {
+        "trilearn_rand_bandmat": [
+            {
+            "id": "ar5",
+            "max_bandwidth": 5,
+            "dim": 200
+        }
+        ],          
+        "bdgraph_graphsim": [
+          {
+          "id": "random-1-sparse",
+          "p": 200,
+          "graph": "random",
+          "class": null,
+          "size": null,
+          "prob": 0.1
+        },
+          {
+          "id": "random-1-ultra-sparse",
+          "p": 200,
+          "graph": "random",
+          "class": null,
+          "size": null,
+          "prob": 0.01
+        },
+          {
+          "id": "lattice",
+          "p": 200,
+          "graph": "lattice",
+          "class": null,
+          "size": null,
+          "prob": 0.1
+        },
+          {
+          "id": "circle",
+          "p": 200,
+          "graph": "circle",
+          "class": null,
+          "size": null,
+          "prob": 0.2
+        }
+      ]
+    },
+    "parameters": {
+      "bdgraph_rgwish": [
+        {
+          "id": "gwi",
+          "b": 3,
+          "threshold_conv": 0.000001
+        }
+      ]
+    },
+      "structure_learning_algorithms": {
+          "equsa_psilearner": [
+              {
+              "id": "psi-learn",
+              "timeout": null, 
+              "alpha1": 0.3,
+              "alpha2": [0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.9, 0.98]
+
+          }], 
+          "dualgl": [
+              {
+              "id": "dualGL-gt13",
+              "timeout": null,
+              "startalg": "jtsampler_gts",
+               "alpha": [0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.9, 0.99]
+           }
+           ],
+      "athomas_jtsampler": [
+          {
+          "id": "jtsampler_gts",                    
+          "burnin_frac": 0.5,
+          "mcmc_estimator": ["threshold"],
+          "timeout": null,
+          "mcmc_seed": 1,                    
+          "num_samples": 10000000,
+          "sampler": 1,
+          "edge_penalty": 0.0,
+          "size_maxclique": 100,
+          "full_output": true,
+          "threshold": [
+              0.0,
+              0.1,
+              0.2,
+              0.3,
+              0.4,
+              0.5,
+              0.6,
+              0.7,
+              0.8,
+              0.9,
+              1.0
+          ]
+      }
+      ],
+          "huge_glasso": [
+              {
+              "id": "glasso",
+              "lambda": [2, 1, 0.8, 0.6, 0.4, 0.2, 0.1, 0.05, 0.01],
+              "timeout": null
+          }
+          ],
+          "huge_mb": [
+              {
+              "id": "mb",
+              "lambda": [2, 1, 0.8, 0.6, 0.4, 0.2, 0.1, 0.05, 0.01],
+              "timeout": null
+          }
+          ]
+      }
+}
+}
+
+
diff --git a/workflow/rules/evaluation/benchmarks/combine_ROC_data.R b/workflow/rules/evaluation/benchmarks/combine_ROC_data.R
@@ -114,8 +114,8 @@ for (algorithm in active_algorithms) {
         FNR_skel_q3 = quantile(FNR_skel, probs = c(0.95)),
         time_mean = mean(time),
         time_median = median(time),
-        time_q1 = quantile(time, probs = c(0.05)),
-        time_q3 = quantile(time, probs = c(0.95)),
+        time_q1 = quantile(time, probs = c(0.05), na.rm = TRUE),
+        time_q3 = quantile(time, probs = c(0.95), na.rm = TRUE),
         n_seeds = n(),
         curve_vals = mean(!!as.symbol(curve_param))
       )

diff --git a/workflow/rules/parameters/random_precmat/bibtex.bib b/workflow/rules/parameters/random_precmat/bibtex.bib
diff --git a/workflow/rules/parameters/random_precmat/docs.rst b/workflow/rules/parameters/random_precmat/docs.rst
@@ -0,0 +1,3 @@
+Provides a way to create a precision matrix with entries assigned randomly from a given list. 
+The genrated precision is then converted to a correlation matrix.
+
diff --git a/workflow/rules/parameters/random_precmat/info.json b/workflow/rules/parameters/random_precmat/info.json
@@ -0,0 +1,15 @@
+{
+    "title": "Random-precision",
+    "version": "",
+    "package": {
+        "title": "",
+        "url": ""
+    },
+    "docs_url": "",
+    "papers": [
+    ],
+    "graph_types": [
+        "UG"
+    ],
+    "language": "R"
+}
diff --git a/workflow/rules/parameters/random_precmat/random_precmat.R b/workflow/rules/parameters/random_precmat/random_precmat.R
@@ -0,0 +1,37 @@
+# Samples and inverts a precision matrics from the G-Wishart distribution.
+
+
+seed <- as.integer(snakemake@wildcards[["seed"]])
+set.seed(seed)
+
+df <- read.csv(snakemake@input[["adjmat"]], header = TRUE, check.names = FALSE)
+adjmat <- as.matrix(df)
+p <- nrow(adjmat)
+
+K_values <-as.numeric(snakemake@input[['precision_values']])
+
+print("Simulating randam-precision matrix")
+if(length(K_values) == 1)
+{
+    precmat <- 1*(adjmat !=0) * K_values
+}
+
+if(length(K_values) > 1)
+{
+    M = sum(adjmat !=0)
+    v = sample.int(length(K_values), M, replace=TRUE)
+    precmat<- 1*(adjmat !=0)
+    precmat[which(adjmat!=0)] <- K_values[v]    
+}
+
+
+print("Inverting the precision matrix")
+covmat <- cov2cor(solve(precmat))
+colnames(covmat) <- colnames(df)
+
+filename <- snakemake@output[["params"]]
+write.table(covmat,
+  file = filename, row.names = FALSE,
+  quote = FALSE, col.names = TRUE, sep = ","
+)
+
diff --git a/workflow/rules/parameters/random_precmat/rule.smk b/workflow/rules/parameters/random_precmat/rule.smk
@@ -0,0 +1,12 @@
+rule bdgraph_rgwish:
+    input:
+        adjmat = "{output_dir}/adjmat/{adjmat}.csv" 
+    output:
+        params = "{output_dir}/parameters/" + \
+                pattern_strings["random_precmat"] + "/" \
+                "seed={seed}/"+\
+                "adjmat=/{adjmat}.csv"
+    container:
+        None
+    script:
+        "random_precmat.R"
diff --git a/workflow/rules/parameters/random_precmat/schema.json b/workflow/rules/parameters/random_precmat/schema.json
@@ -0,0 +1,33 @@
+{
+    "title": "random_precision",
+    "description": "Generates a random precision matrix",
+    "type": "array",
+    "items": {
+        "title": "random_precision",
+        "description": "Generates a precision matrix from the given list of values, assigned randomly, then coverts it to a correlation matrix",
+        "type": "object",
+        "properties": {
+            "id": {
+                "type": "string"
+            },
+            "precision_values": {
+                "type": "array",
+                "items": {
+                    "type": "string"
+                }
+        }
+        },
+        "required": [
+            "id",
+            "precision_values"
+        ],
+        "additionalProperties": false,
+        "examples": [
+            {
+            "id": "rand_K",
+            "precision_values": [0.5, 0.25]
+            }
+        ]
+    },
+    "uniqueItems": true
+}
diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/rule.smk b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/rule.smk
@@ -0,0 +1,19 @@
+import os
+
+def change_filename(original_string):
+    return original_string.replace("adjvecs_tobecompressed.csv", "adjvecs_fulloutput.tar.gz")
+
+
+rule athomas_jtsampler:
+    input:
+        data=alg_input_data(),
+    output:
+        seqgraph=touch(alg_output_seqgraph_path(module_name)),
+        seqgraph_full=touch(change_filename(alg_output_seqgraph_path(module_name))),
+        time=touch(alg_output_time_path(module_name)),
+        ntests=touch(alg_output_ntests_path(module_name))
+    container:
+       "docker://hallawalla/athomas_jtsampler:1.5"
+    script:
+        "script.sh"
+
diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.sh b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+#CP=$(pwd)/workflow/rules/structure_learning_algorithms/athomas_jtsampler/jtsampler
+CP=/jtsampler
+TEMP_FILENAME=${snakemake_output[seqgraph_full]/fulloutput.tar.gz/fulloutput_tobecompressed.csv}
+if [ ${snakemake_wildcards[timeout]} = "None" ]; then
+    if [ ${snakemake_wildcards[full_output]} = "True" ]; then
+        /usr/bin/time -f "%e" -o ${snakemake_output[time]} java -classpath $CP EstimateGM \
+                      -r ${snakemake_wildcards[mcmc_seed]} \
+                      -n ${snakemake_wildcards[num_samples]} \
+                      -s ${snakemake_wildcards[sampler]} \
+                      -a ${snakemake_wildcards[edge_penalty]} \
+                      -c ${snakemake_wildcards[size_maxclique]} \
+                      -pd 5 \
+                      -F < ${snakemake_input[data]} > $TEMP_FILENAME
+        ## convet to benchpress file
+        ## copying the first 4 lines and every line afterwards that has a successfull move
+        awk -F, -v OFS=',' 'NR <= 4 || $5 == 0 {print $1, $2, $3, $4}' $TEMP_FILENAME > ${snakemake_output[seqgraph]}
+        ## compressing the files
+        tar -czf ${snakemake_output[seqgraph_full]} $TEMP_FILENAME
+        rm -f $TEMP_FILENAME
+    else
+        /usr/bin/time -f "%e" -o ${snakemake_output[time]} java -classpath $CP EstimateGM \
+                      -r ${snakemake_wildcards[mcmc_seed]} \
+                      -n ${snakemake_wildcards[num_samples]} \
+                      -s ${snakemake_wildcards[sampler]} \
+                      -a ${snakemake_wildcards[edge_penalty]} \
+                      -pd 5 \
+                      -c ${snakemake_wildcards[size_maxclique]} < ${snakemake_input[data]} > ${snakemake_output[seqgraph]}
+   fi
+else
+    /usr/bin/time -f "%e" -o ${snakemake_output[time]} timeout -s SIGINT ${snakemake_wildcards[timeout]} bash -c 'java -classpath $CP EstimateGM -r ${snakemake_wildcards[mcmc_seed]} < ${snakemake_input[data]} > ${snakemake_output[seqgraph]}'
+fi
+
+
+
+
+
+
+#java -classpath $CP EstimateGM -r 1    -n 1000    -s 2 -a 10000 < results/data/adjmat\=/bdgraph_graphsim/p\=25/graph\=random/class\=None/size\=None/prob\=0.5/seed\=1/parameters\=/bdgraph_rgwish/b\=3/threshold_conv\=1e-07/seed\=1/data\=/iid/n\=100/seed\=1.csv
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		Provides a way to create a precision matrix with entries assigned randomly from a given list.
		The genrated precision is then converted to a correlation matrix.