diff --git a/config/dualGL_sparse.json b/config/dualGL_sparse.json new file mode 100644 index 00000000..2ca02112 --- /dev/null +++ b/config/dualGL_sparse.json @@ -0,0 +1,212 @@ +{ + "benchmark_setup": { + "data": [ + { + "data_id": "example1", + "graph_id": "random-1-ultra-sparse", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "random-1-sparse", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "lattice", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "ar5", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "circle", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + } + ], + "evaluation": { + "benchmarks": { + "filename_prefix": "dualGraph_p200_sparse/", + "show_seed": false, + "errorbar": true, + "errorbarh": false, + "scatter": true, + "path": true, + "text": false, + "ids": [ + "dualGL-gt13", + "jtsampler_gts", + "psi-learn", + "glasso", + "mb" + ] + }, + "graph_true_plots": true, + "graph_true_stats": false, + "ggally_ggpairs": false, + "graph_plots": [], + "mcmc_traj_plots": [ + { + "id": "jtsampler_gts", + "burn_in": 0.0, + "thinning": 100, + "functional": [ + "score" + ], + "active": true + } + ], + "mcmc_heatmaps": [], + "mcmc_autocorr_plots": [] + } + }, + "resources": { + "data": { + "iid": [ + { + "id": "example1", + "standardized": false, + "n": [ + 20, 50, 100 + ] + } + ] + }, + "graph": { + "trilearn_rand_bandmat": [ + { + "id": "ar5", + "max_bandwidth": 5, + "dim": 200 + } + ], + "bdgraph_graphsim": [ + { + "id": "random-1-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.1 + }, + { + "id": "random-1-ultra-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.01 + }, + { + "id": "lattice", + "p": 200, + "graph": "lattice", + "class": null, + "size": null, + "prob": 0.1 + }, + { + "id": "circle", + "p": 200, + "graph": "circle", + "class": null, + "size": null, + "prob": 0.2 + } + ] + }, + "parameters": { + "bdgraph_rgwish": [ + { + "id": "gwi", + "b": 3, + "threshold_conv": 0.000001 + } + ] + }, + "structure_learning_algorithms": { + "equsa_psilearner": [ + { + "id": "psi-learn", + "timeout": null, + "alpha1": 0.3, + "alpha2": [0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.9, 0.98] + + }], + "dualgl": [ + { + "id": "dualGL-gt13", + "timeout": null, + "startalg": "jtsampler_gts", + "alpha": [0.01, 0.025, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.9, 0.99] + } + ], + "athomas_jtsampler": [ + { + "id": "jtsampler_gts", + "burnin_frac": 0.5, + "mcmc_estimator": ["threshold"], + "timeout": null, + "mcmc_seed": 1, + "num_samples": 10000000, + "sampler": 1, + "edge_penalty": 0.0, + "size_maxclique": 100, + "full_output": true, + "threshold": [ + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1.0 + ] + } + ], + "huge_glasso": [ + { + "id": "glasso", + "lambda": [2, 1, 0.8, 0.6, 0.4, 0.2, 0.1, 0.05, 0.01], + "timeout": null + } + ], + "huge_mb": [ + { + "id": "mb", + "lambda": [2, 1, 0.8, 0.6, 0.4, 0.2, 0.1, 0.05, 0.01], + "timeout": null + } + ] + } +} +} + + diff --git a/config/dualGL_sparse_interclass.json b/config/dualGL_sparse_interclass.json new file mode 100644 index 00000000..748b826e --- /dev/null +++ b/config/dualGL_sparse_interclass.json @@ -0,0 +1,310 @@ +{ + "benchmark_setup": { + "data": [ + { + "data_id": "example1", + "graph_id": "random-1-ultra-sparse", + "parameters_id": "intra-class", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "random-1-sparse", + "parameters_id": "intra-class", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "lattice", + "parameters_id": "intra-class", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "ar5", + "parameters_id": "intra-class", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "circle", + "parameters_id": "intra-class", + "seed_range": [ + 1, + 5 + ] + } + ], + "evaluation": { + "benchmarks": { + "filename_prefix": "dualGraph_p200_sparse_intra-class/", + "show_seed": false, + "errorbar": true, + "errorbarh": false, + "scatter": true, + "path": true, + "text": false, + "ids": [ + "dualGL-gt13", + "jtsampler_gts", + "psi-learn", + "glasso", + "mb", + "bdgraph" + ] + }, + "graph_true_plots": true, + "graph_true_stats": false, + "ggally_ggpairs": false, + "graph_plots": [], + "mcmc_traj_plots": [ + { + "id": "jtsampler_gts", + "burn_in": 0.0, + "thinning": 100, + "functional": [ + "score", + "size" + ], + "active": false + } + ], + "mcmc_heatmaps": [], + "mcmc_autocorr_plots": [] + } + }, + "resources": { + "data": { + "iid": [ + { + "id": "example1", + "standardized": false, + "n": [ + 20, + 50, + 100, + 200, + 500 + ] + } + ] + }, + "graph": { + "trilearn_rand_bandmat": [ + { + "id": "ar5", + "max_bandwidth": 5, + "dim": 200 + } + ], + "bdgraph_graphsim": [ + { + "id": "random-1-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.1 + }, + { + "id": "random-1-ultra-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.01 + }, + { + "id": "lattice", + "p": 200, + "graph": "lattice", + "class": null, + "size": null, + "prob": 0.1 + }, + { + "id": "circle", + "p": 200, + "graph": "circle", + "class": null, + "size": null, + "prob": 0.2 + } + ] + }, + "parameters": { + "random_precmat": [ + { + "id": "precmat", + "prec_val_1": 0.5, + "prec_val_2": 0.25 + } + ], + "trilearn_intra-class": [ + { + "id": "intra-class", + "rho": 0.5, + "sigma2": 1.0 + } + ], + "bdgraph_rgwish": [ + { + "id": "gwi", + "b": 5, + "threshold_conv": 0.000001 + } + ] + }, + "structure_learning_algorithms": { + "equsa_psilearner": [ + { + "id": "psi-learn", + "timeout": null, + "alpha1": 0.3, + "alpha2": [ + 0.01, + 0.025, + 0.05, + 0.1, + 0.15, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.9, + 0.98 + ] + } + ], + "dualgl": [ + { + "id": "dualGL-gt13", + "timeout": null, + "startalg": "jtsampler_gts", + "alpha": [ + 0.01, + 0.025, + 0.05, + 0.1, + 0.15, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.9, + 0.99 + ] + } + ], + "bdgraph": [ + { + "id": "bdgraph", + "method": "ggm", + "algo": ["rjmcmc"], + "iter": 10000, + "gprior": 0.5, + "dfprior": 3, + "gstart": "empty", + "timeout": null, + "mcmc_seed": [1], + "thresh": 0.5, + "mcmc_estimator": ["threshold"], + "threshold": [ + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1.0 + ], + "burnin_frac": 0.5 + } + ], + "athomas_jtsampler": [ + { + "id": "jtsampler_gts", + "burnin_frac": 0.5, + "mcmc_estimator": [ + "threshold" + ], + "timeout": null, + "mcmc_seed": 1, + "num_samples": 10000000, + "sampler": 1, + "delta": 5, + "edge_penalty": 0, + "size_maxclique": 500, + "full_output": true, + "threshold": [ + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1.0 + ] + } + ], + "huge_glasso": [ + { + "id": "glasso", + "lambda": [ + 2, + 1, + 0.8, + 0.6, + 0.4, + 0.2, + 0.1, + 0.05, + 0.01 + ], + "timeout": null + } + ], + "huge_mb": [ + { + "id": "mb", + "lambda": [ + 2, + 1, + 0.8, + 0.6, + 0.4, + 0.2, + 0.1, + 0.05, + 0.01 + ], + "timeout": null + } + ] + } + } +} \ No newline at end of file diff --git a/config/dualGL_sparse_interclass_simple.json b/config/dualGL_sparse_interclass_simple.json new file mode 100644 index 00000000..b9cecf23 --- /dev/null +++ b/config/dualGL_sparse_interclass_simple.json @@ -0,0 +1,136 @@ +{ + "benchmark_setup": { + "data": [ + { + "data_id": "example1", + "graph_id": "random-1-ultra-sparse", + "parameters_id": "intra-class", + "seed_range": [ + 1, + 2 + ] + } + ], + "evaluation": { + "benchmarks": { + "filename_prefix": "dualGraph_p200_sparse_small/", + "show_seed": false, + "errorbar": true, + "errorbarh": false, + "scatter": true, + "path": true, + "text": false, + "ids": [ + "dualGL-gt13", + "jtsampler_gts" + ] + }, + "graph_true_plots": true, + "graph_true_stats": false, + "ggally_ggpairs": false, + "graph_plots": [], + "mcmc_traj_plots": [ + { + "id": "jtsampler_gts", + "burn_in": 0.0, + "thinning": 100, + "functional": [ + "score", + "size" + ], + "active": false + } + ], + "mcmc_heatmaps": [], + "mcmc_autocorr_plots": [] + } + }, + "resources": { + "data": { + "iid": [ + { + "id": "example1", + "standardized": false, + "n": [ + 20 + ] + } + ] + }, + "graph": { + "bdgraph_graphsim": [ + { + "id": "random-1-ultra-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.01 + } + ] + }, + "parameters": { + "trilearn_intra-class": [ + { + "id": "intra-class", + "rho": 0.5, + "sigma2": 1.0 + } + ] + }, + "structure_learning_algorithms": { + "dualgl": [ + { + "id": "dualGL-gt13", + "timeout": null, + "startalg": "jtsampler_gts", + "alpha": [ + 0.01, + 0.025, + 0.05, + 0.1, + 0.15, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.9, + 0.99 + ] + } + ], + "athomas_jtsampler": [ + { + "id": "jtsampler_gts", + "burnin_frac": 0.5, + "mcmc_estimator": [ + "threshold" + ], + "timeout": null, + "mcmc_seed": 1, + "num_samples": 10000000, + "sampler": 1, + "delta": 5, + "edge_penalty": 0, + "size_maxclique": 500, + "full_output": true, + "threshold": [ + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1.0 + ] + } + ] + } + } +} \ No newline at end of file diff --git a/config/dualGL_sparse_randpreci.json b/config/dualGL_sparse_randpreci.json new file mode 100644 index 00000000..b49863f6 --- /dev/null +++ b/config/dualGL_sparse_randpreci.json @@ -0,0 +1,308 @@ +{ + "benchmark_setup": { + "data": [ + { + "data_id": "example1", + "graph_id": "random-1-ultra-sparse", + "parameters_id": "precmat", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "random-1-sparse", + "parameters_id": "precmat", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "lattice", + "parameters_id": "precmat", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "ar5", + "parameters_id": "precmat", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "circle", + "parameters_id": "precmat", + "seed_range": [ + 1, + 5 + ] + } + ], + "evaluation": { + "benchmarks": { + "filename_prefix": "dualGraph_p200_sparse_precmat/", + "show_seed": false, + "errorbar": true, + "errorbarh": false, + "scatter": true, + "path": true, + "text": false, + "ids": [ + "dualGL-gt13", + "jtsampler_gts", + "psi-learn", + "glasso", + "mb", + "bdgraph" + ] + }, + "graph_true_plots": true, + "graph_true_stats": false, + "ggally_ggpairs": false, + "graph_plots": [], + "mcmc_traj_plots": [ + { + "id": "jtsampler_gts", + "burn_in": 0.0, + "thinning": 100, + "functional": [ + "score", + "size" + ], + "active": false + } + ], + "mcmc_heatmaps": [], + "mcmc_autocorr_plots": [] + } + }, + "resources": { + "data": { + "iid": [ + { + "id": "example1", + "standardized": false, + "n": [ + 20, + 50, + 100 + ] + } + ] + }, + "graph": { + "trilearn_rand_bandmat": [ + { + "id": "ar5", + "max_bandwidth": 5, + "dim": 200 + } + ], + "bdgraph_graphsim": [ + { + "id": "random-1-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.1 + }, + { + "id": "random-1-ultra-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.01 + }, + { + "id": "lattice", + "p": 200, + "graph": "lattice", + "class": null, + "size": null, + "prob": 0.1 + }, + { + "id": "circle", + "p": 200, + "graph": "circle", + "class": null, + "size": null, + "prob": 0.2 + } + ] + }, + "parameters": { + "random_precmat": [ + { + "id": "precmat", + "prec_val_1": 0.5, + "prec_val_2": 0.25 + } + ], + "trilearn_intra-class": [ + { + "id": "intra-class", + "rho": 0.15, + "sigma2": 1.0 + } + ], + "bdgraph_rgwish": [ + { + "id": "gwi", + "b": 5, + "threshold_conv": 0.000001 + } + ] + }, + "structure_learning_algorithms": { + "equsa_psilearner": [ + { + "id": "psi-learn", + "timeout": null, + "alpha1": 0.3, + "alpha2": [ + 0.01, + 0.025, + 0.05, + 0.1, + 0.15, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.9, + 0.98 + ] + } + ], + "bdgraph": [ + { + "id": "bdgraph", + "method": "ggm", + "algo": ["rjmcmc"], + "iter": 10000, + "gprior": 0.5, + "dfprior": 3, + "gstart": "empty", + "timeout": null, + "mcmc_seed": [1], + "thresh": 0.5, + "mcmc_estimator": ["threshold"], + "threshold": [ + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1.0 + ], + "burnin_frac": 0.5 + } + ], + "dualgl": [ + { + "id": "dualGL-gt13", + "timeout": null, + "startalg": "jtsampler_gts", + "alpha": [ + 0.01, + 0.025, + 0.05, + 0.1, + 0.15, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.9, + 0.99 + ] + } + ], + "athomas_jtsampler": [ + { + "id": "jtsampler_gts", + "burnin_frac": 0.5, + "mcmc_estimator": [ + "threshold" + ], + "timeout": null, + "mcmc_seed": 1, + "num_samples": 10000000, + "sampler": 1, + "delta": 5, + "edge_penalty": 0, + "size_maxclique": 500, + "full_output": true, + "threshold": [ + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1.0 + ] + } + ], + "huge_glasso": [ + { + "id": "glasso", + "lambda": [ + 2, + 1, + 0.8, + 0.6, + 0.4, + 0.2, + 0.1, + 0.05, + 0.01 + ], + "timeout": null + } + ], + "huge_mb": [ + { + "id": "mb", + "lambda": [ + 2, + 1, + 0.8, + 0.6, + 0.4, + 0.2, + 0.1, + 0.05, + 0.01 + ], + "timeout": null + } + ] + } + } +} \ No newline at end of file diff --git a/config/dualGL_sparse_wishart.json b/config/dualGL_sparse_wishart.json new file mode 100644 index 00000000..1eb38bf2 --- /dev/null +++ b/config/dualGL_sparse_wishart.json @@ -0,0 +1,310 @@ +{ + "benchmark_setup": { + "data": [ + { + "data_id": "example1", + "graph_id": "random-1-ultra-sparse", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "random-1-sparse", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "lattice", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "ar5", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + }, + { + "data_id": "example1", + "graph_id": "circle", + "parameters_id": "gwi", + "seed_range": [ + 1, + 5 + ] + } + ], + "evaluation": { + "benchmarks": { + "filename_prefix": "dualGraph_p200_sparse_gwi/", + "show_seed": false, + "errorbar": true, + "errorbarh": false, + "scatter": true, + "path": true, + "text": false, + "ids": [ + "dualGL-gt13", + "jtsampler_gts", + "psi-learn", + "glasso", + "mb", + "bdgraph" + ] + }, + "graph_true_plots": true, + "graph_true_stats": false, + "ggally_ggpairs": false, + "graph_plots": [], + "mcmc_traj_plots": [ + { + "id": "jtsampler_gts", + "burn_in": 0.0, + "thinning": 100, + "functional": [ + "score", + "size" + ], + "active": false + } + ], + "mcmc_heatmaps": [], + "mcmc_autocorr_plots": [] + } + }, + "resources": { + "data": { + "iid": [ + { + "id": "example1", + "standardized": false, + "n": [ + 20, + 50, + 100, + 200, + 500 + ] + } + ] + }, + "graph": { + "trilearn_rand_bandmat": [ + { + "id": "ar5", + "max_bandwidth": 5, + "dim": 200 + } + ], + "bdgraph_graphsim": [ + { + "id": "random-1-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.1 + }, + { + "id": "random-1-ultra-sparse", + "p": 200, + "graph": "random", + "class": null, + "size": null, + "prob": 0.01 + }, + { + "id": "lattice", + "p": 200, + "graph": "lattice", + "class": null, + "size": null, + "prob": 0.1 + }, + { + "id": "circle", + "p": 200, + "graph": "circle", + "class": null, + "size": null, + "prob": 0.2 + } + ] + }, + "parameters": { + "random_precmat": [ + { + "id": "precmat", + "prec_val_1": 0.5, + "prec_val_2": 0.25 + } + ], + "trilearn_intra-class": [ + { + "id": "intra-class", + "rho": 0.5, + "sigma2": 1.0 + } + ], + "bdgraph_rgwish": [ + { + "id": "gwi", + "b": 5, + "threshold_conv": 0.000001 + } + ] + }, + "structure_learning_algorithms": { + "equsa_psilearner": [ + { + "id": "psi-learn", + "timeout": null, + "alpha1": 0.3, + "alpha2": [ + 0.01, + 0.025, + 0.05, + 0.1, + 0.15, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.9, + 0.98 + ] + } + ], + "dualgl": [ + { + "id": "dualGL-gt13", + "timeout": null, + "startalg": "jtsampler_gts", + "alpha": [ + 0.01, + 0.025, + 0.05, + 0.1, + 0.15, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.9, + 0.99 + ] + } + ], + "bdgraph": [ + { + "id": "bdgraph", + "method": "ggm", + "algo": ["rjmcmc"], + "iter": 10000, + "gprior": 0.5, + "dfprior": 3, + "gstart": "empty", + "timeout": null, + "mcmc_seed": [1], + "thresh": 0.5, + "mcmc_estimator": ["threshold"], + "threshold": [ + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1.0 + ], + "burnin_frac": 0.5 + } + ], + "athomas_jtsampler": [ + { + "id": "jtsampler_gts", + "burnin_frac": 0.5, + "mcmc_estimator": [ + "threshold" + ], + "timeout": null, + "mcmc_seed": 1, + "num_samples": 10000000, + "sampler": 1, + "delta": 5, + "edge_penalty": 0, + "size_maxclique": 500, + "full_output": true, + "threshold": [ + 0.0, + 0.1, + 0.2, + 0.3, + 0.4, + 0.5, + 0.6, + 0.7, + 0.8, + 0.9, + 1.0 + ] + } + ], + "huge_glasso": [ + { + "id": "glasso", + "lambda": [ + 2, + 1, + 0.8, + 0.6, + 0.4, + 0.2, + 0.1, + 0.05, + 0.01 + ], + "timeout": null + } + ], + "huge_mb": [ + { + "id": "mb", + "lambda": [ + 2, + 1, + 0.8, + 0.6, + 0.4, + 0.2, + 0.1, + 0.05, + 0.01 + ], + "timeout": null + } + ] + } + } +} \ No newline at end of file diff --git a/workflow/rules/data/iid/rules.smk b/workflow/rules/data/iid/rules.smk index cd27eda6..5aa01c44 100644 --- a/workflow/rules/data/iid/rules.smk +++ b/workflow/rules/data/iid/rules.smk @@ -80,6 +80,23 @@ rule sample_rgwish_data: shell: "python workflow/rules/data/iid/numpy_sample_mvn_data.py {input.cov} {output.data} {wildcards.n} {wildcards.seed}" +rule sample_random_precmat_data: + input: + "workflow/rules/data/iid/numpy_sample_mvn_data.py", + cov="{output_dir}/parameters/random_precmat/{bn}/adjmat=/{adjmat}.csv" # This could probably be relaxed + output: + data="{output_dir}/data" \ + "/adjmat=/{adjmat}"\ + "/parameters=/random_precmat/{bn}"\ + "/data=/"+pattern_strings["iid"]+"/seed={seed}.csv" + wildcard_constraints: + n="[0-9]*" + container: + docker_image("trilearn") + shell: + "python workflow/rules/data/iid/numpy_sample_mvn_data.py {input.cov} {output.data} {wildcards.n} {wildcards.seed}" + + """ TODO: Standardisation should better be done in a separate preprocessing module in the data section in benchmark_setup. diff --git a/workflow/rules/evaluation/benchmarks/combine_ROC_data.R b/workflow/rules/evaluation/benchmarks/combine_ROC_data.R index 2b0b058d..26313988 100644 --- a/workflow/rules/evaluation/benchmarks/combine_ROC_data.R +++ b/workflow/rules/evaluation/benchmarks/combine_ROC_data.R @@ -114,8 +114,8 @@ for (algorithm in active_algorithms) { FNR_skel_q3 = quantile(FNR_skel, probs = c(0.95)), time_mean = mean(time), time_median = median(time), - time_q1 = quantile(time, probs = c(0.05)), - time_q3 = quantile(time, probs = c(0.95)), + time_q1 = quantile(time, probs = c(0.05), na.rm = TRUE), + time_q3 = quantile(time, probs = c(0.95), na.rm = TRUE), n_seeds = n(), curve_vals = mean(!!as.symbol(curve_param)) ) diff --git a/workflow/rules/module_strings.py b/workflow/rules/module_strings.py index 86e7a1ab..2e07ccfa 100644 --- a/workflow/rules/module_strings.py +++ b/workflow/rules/module_strings.py @@ -11,6 +11,14 @@ from typing import Optional, List, Union, Tuple +def id_to_alg(id): + for key, alg in config["resources"]["structure_learning_algorithms"].items(): + for obj in alg: + if obj["id"] == id: + return key + + return None + def input_algorithm(wildcards): """This algorithm can be use as input field in n algorithm rule to pass the output graph of another algorithm as input. The requirement is that the algorithm JSON object @@ -52,6 +60,7 @@ def idtopath(mylist, json_string): return json_string[str(mylist)] + json_string = {} json_string_mcmc_noest = {} @@ -190,6 +199,29 @@ def gen_data_string_from_conf(data_id, seed, seed_in_path=True): **data) + +if "dualgl" in pattern_strings: + dualgl_list = config["resources"]["structure_learning_algorithms"]["dualgl"] + # The path to the startspace algorithm is extended here + + def local_idtopath(idlist): + + # mylist can either be None, an id, or a list of ids. + # The id may correspond to an MCMC alg, then the estimator parameters should be added too. + alg = id_to_alg(idlist) + vals = config["resources"]["structure_learning_algorithms"][alg][0] + if idlist is None: + return "None" + return expand(pattern_strings[alg], **vals) + + + for items in dualgl_list: + items["startalg"] = local_idtopath(items["startalg"]) + + json_string.update({val["id"]: expand(pattern_strings["dualgl"], **val,) + for val in dualgl_list } ) + + # def path_to_input_algorithm_graph(alg_id): # return "{output_dir}/adjmat_estimate/{data}/"\ # "algorithm=/" + json_string[alg_id][0] + "/" +\ diff --git a/workflow/rules/parameters/random_precmat/bibtex.bib b/workflow/rules/parameters/random_precmat/bibtex.bib new file mode 100644 index 00000000..e69de29b diff --git a/workflow/rules/parameters/random_precmat/docs.rst b/workflow/rules/parameters/random_precmat/docs.rst new file mode 100644 index 00000000..23b8c3ef --- /dev/null +++ b/workflow/rules/parameters/random_precmat/docs.rst @@ -0,0 +1,3 @@ +Provides a way to create a precision matrix with entries assigned randomly from a given list. +The genrated precision is then converted to a correlation matrix. + diff --git a/workflow/rules/parameters/random_precmat/info.json b/workflow/rules/parameters/random_precmat/info.json new file mode 100644 index 00000000..1dba0813 --- /dev/null +++ b/workflow/rules/parameters/random_precmat/info.json @@ -0,0 +1,13 @@ +{ + "title": "Random-precision", + "version": "", + "package": { + "title": "", + "url": "" + }, + "docs_url": "", + "graph_types": [ + "UG" + ], + "language": "R" +} diff --git a/workflow/rules/parameters/random_precmat/random_precmat.R b/workflow/rules/parameters/random_precmat/random_precmat.R new file mode 100644 index 00000000..7647b1fc --- /dev/null +++ b/workflow/rules/parameters/random_precmat/random_precmat.R @@ -0,0 +1,71 @@ +library(Matrix) + +seed <- as.integer(snakemake@wildcards[["seed"]]) +set.seed(seed) + +df <- read.csv(snakemake@input[["adjmat"]], header = TRUE, check.names = FALSE) +adjmat <- as.matrix(df) +p <- nrow(adjmat) + +K_values <- c() + +# Store the non null precision values in a vector + +if (!is.null(snakemake@wildcards[["prec_val_1"]])) { + K_values <- c(K_values, as.numeric(snakemake@wildcards[["prec_val_1"]])) +} +if (!is.null(snakemake@wildcards[["prec_val_2"]])) { + K_values <- c(K_values, as.numeric(snakemake@wildcards[["prec_val_2"]])) +} +if (!is.null(snakemake@wildcards[["prec_val_3"]])) { + K_values <- c(K_values, as.numeric(snakemake@wildcards[["prec_val_3"]])) +} + + +precmat <- matrix(0, nrow = p, ncol = p) +# Sample untill we get a positive definite matrix + +print("Simulating random precision matrix") +if (length(K_values) == 1) { + precmat <- 1 * (adjmat != 0) * K_values +} + +if (length(K_values) > 1) { + is_positive_definite <- FALSE + k = 0 + while (!is_positive_definite) { + M <- sum(adjmat != 0) + v <- sample.int(length(K_values), M, replace = TRUE) + precmat <- 1 * (adjmat != 0) + indx = which (adjmat != 0, arr.ind = TRUE) + rho = K_values[1] + distance_to_diag = abs(indx[, 'row'] - indx[, 'col']) + r2 = rho^distance_to_diag + precmat[indx] <- r2 + diag(precmat) <- 1 + eigen_values <- eigen(precmat)$values + is_positive_definite <- all(Re(eigen_values) > 0) + k = k+1 + print(paste("Iteration", k, "is_positive_definite", is_positive_definite)) + } + #M <- sum(adjmat != 0) + #v <- sample.int(length(K_values), M, replace = TRUE) + #precmat <- 1 * (adjmat != 0) + #precmat[which(adjmat != 0)] <- K_values[v] +} +# while (TRUE) { +# eigen_values <- eigen(precmat)$values +# is_positive_definite <- all(Re(eigen_values) > 0) +# if (is_positive_definite) break +# print("Adding 0.1 to the diagonal") +# precmat <- precmat + 0.1 * diag(p) +# } +print("Inverting the precision matrix") +covmat <- cov2cor(solve(precmat)) +colnames(covmat) <- colnames(df) + +filename <- snakemake@output[["params"]] +write.table(covmat, + file = filename, row.names = FALSE, + quote = FALSE, col.names = TRUE, sep = "," +) diff --git a/workflow/rules/parameters/random_precmat/rule.smk b/workflow/rules/parameters/random_precmat/rule.smk new file mode 100644 index 00000000..1f01b51c --- /dev/null +++ b/workflow/rules/parameters/random_precmat/rule.smk @@ -0,0 +1,12 @@ +rule random_precmat: + input: + adjmat = "{output_dir}/adjmat/{adjmat}.csv" + output: + params = "{output_dir}/parameters/" + \ + pattern_strings["random_precmat"] + "/" \ + "seed={seed}/"+\ + "adjmat=/{adjmat}.csv" + container: + "docker://bpimages/bdgraph:2.72.0" + script: + "random_precmat.R" \ No newline at end of file diff --git a/workflow/rules/parameters/random_precmat/schema.json b/workflow/rules/parameters/random_precmat/schema.json new file mode 100644 index 00000000..2fa60d13 --- /dev/null +++ b/workflow/rules/parameters/random_precmat/schema.json @@ -0,0 +1,32 @@ +{ + "title": "random_precision", + "description": "Generates a random precision matrix", + "type": "array", + "items": { + "title": "random_precision", + "description": "Generates a precision matrix from the given list of values, assigned randomly, then coverts it to a correlation matrix", + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "precision_values": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "id" + ], + "additionalProperties": true, + "examples": [ + { + "id": "rand_K", + "precision_values": [0.5, 0.25] + } + ] + }, + "uniqueItems": true +} diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/Dockerfile b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/Dockerfile new file mode 100644 index 00000000..5dc0d5aa --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/Dockerfile @@ -0,0 +1,14 @@ +# Source https://github.com/melmasri/jtsampler +FROM ubuntu:20.04 + +RUN apt update -y +RUN apt install default-jre -y +RUN apt install time -y +RUN apt install make -y +RUN apt install default-jdk -y + +COPY jtsampler jtsampler +WORKDIR /jtsampler +RUN make + + diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/docker.sh b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/docker.sh new file mode 100644 index 00000000..4af01bcc --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/docker.sh @@ -0,0 +1,4 @@ +docker build -t athomas_jtsampler . +docker run -it athomas_jtsampler bash +docker build -t hallawalla/athomas_jtsampler:1.4 . --no-cache +docker push hallawalla/athomas_jtsampler:1.4 diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/docs.rst b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/docs.rst new file mode 100644 index 00000000..30a83f32 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/docs.rst @@ -0,0 +1 @@ +Abstract: Full Bayesian computational inference for model determination in undirected graphical models is currently restricted to decomposable graphs or other special cases, except for small-scale problems, say up to 15 variables. In this paper we develop new, more efficient methodology for such inference, by making two contributions to the computational geometry of decomposable graphs. The first of these provides sufficient conditions under which it is possible to completely connect two disconnected complete subsets of vertices, or perform the reverse procedure, yet maintain decomposability of the graph. The second is a new Markov chain Monte Carlo sampler for arbitrary positive distributions on decomposable graphs, taking a junction tree representing the graph as its state variable. \ No newline at end of file diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/info.json b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/info.json new file mode 100644 index 00000000..6d7dbf61 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/info.json @@ -0,0 +1,23 @@ +{ + "title": "GT13", + "docker_image": "onceltuca/thomasgreen:1.19-bp", + "version": "1.19-bp", + "package": { + "title": "", + "url": "http" + }, + "docs_url": "", + "papers": [ + { + "title": "P. J. Green and A. Thomas (2013). Sampling decomposable graphs using a Markov chain on junction trees.", + "url": "https://www.jstor.org/stable/43304539" + } + ], + "outputs": [ + "graphtraj" + ], + "graph_types": [ + "DG" + ], + "language": "Java" +} \ No newline at end of file diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/rule.smk b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/rule.smk new file mode 100644 index 00000000..785522d2 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/rule.smk @@ -0,0 +1,19 @@ +import os + +def change_filename(original_string): + return original_string.replace("adjvecs_tobecompressed.csv", "adjvecs_fulloutput.tar.gz") + + +rule athomas_jtsampler: + input: + data=alg_input_data(), + output: + seqgraph=touch(alg_output_seqgraph_path(module_name)), + seqgraph_full=touch(change_filename(alg_output_seqgraph_path(module_name))), + time=touch(alg_output_time_path(module_name)), + ntests=touch(alg_output_ntests_path(module_name)) + container: + "docker://hallawalla/athomas_jtsampler:1.5" + script: + "script.sh" + diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/schema.json b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/schema.json new file mode 100644 index 00000000..23f7cc57 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/schema.json @@ -0,0 +1,114 @@ +{ + "description": "Green & Thomas objects", + "title": "gt13_multipair", + "type": "array", + "items": { + "title": "gt13_multipair item", + "description": "Green & Thomas algorithm for learning decomopasble graphs.\n Source: Green, P. J., & Thomas, A. (2013). Sampling decomposable graphs using a Markov chain on junction trees. Biometrika, 100(1), 91-110.", + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier" + }, + "n": { + "description": "Set the number of iterations to int.", + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegint" + }, + "s": { + "description": "Set the sampler to the one indexed by: 0 = Giudicci & Green (1999) sampler. This is the default. 1 = Green & Thomas (2013) single edge junction tree sampler. 2 = Green & Thomas (2013) multiple edge junction tree sampler. ", + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegint" + }, + "a": { + "description": "Set the edge penalty in the prior.", + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnum" + }, + "c": { + "description": "Set the maximum clique size.", + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegint" + }, + "timeout": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnumnull" + }, + "mcmc_seed": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegint" + }, + "threshold": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexprob" + }, + "mcmc_estimator": { + "anyOf": [ + { + "type": "string", + "enum": [ + "map", + "threshold" + ] + }, + { + "type": "array", + "items": { + "type": "string", + "enum": [ + "map", + "threshold" + ] + } + } + ] + }, + "burnin_frac": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexprob" + }, + "num_samples": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegint" + }, + "sampler": { + "type": "integer", + "enum": [0, 1, 2], + "default": 0 + }, + "size_maxclique": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegint", + "default": 100 + }, + "delta": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/nonnegnum", + "default": 1.0 + }, + "edge_penalty": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/nonnegnum", + "default": 0.0 + }, + "full_output": { + "type": "boolean", + "default": false + } + }, + "additionalProperties": true, + "required": [ + "id", + "timeout", + "burnin_frac", + "threshold", + "mcmc_estimator", + "num_samples" + ], + "examples": [ + { + "id": "jtsampler", + "burnin_frac": 0.5, + "mcmc_estimator": "map", + "timeout": null, + "threshold":0.5, + "mcmc_seed": 1, + "num_samples": 10000, + "sampler": 0, + "edge_penalty": 1.0, + "size_maxclique": 10000, + "delta": 5 + } + + ] + } +} diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.py b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.py new file mode 100644 index 00000000..56cab9a1 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.py @@ -0,0 +1,59 @@ + +def alg_shell(algorithm): + if algorithm == "gt13_multipair": + return """if [ {wildcards.datatype} = \"discrete\" ]; then + tail -n +3 {input.data} > {output.seqgraph}.noheader + sed --in-place 's/,/\ /g' {output.seqgraph}.noheader + + if [ {wildcards.prior} = \"mbc\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior mbc -clq {wildcards.clq} -sep {wildcards.sep} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior mbc -clq {wildcards.clq} -sep {wildcards.sep} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + elif [ {wildcards.prior} = \"bc\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior bc -ascore {wildcards.ascore} -bscore {wildcards.bscore} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior bc -ascore {wildcards.ascore} -bscore {wildcards.bscore} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + elif [ {wildcards.prior} = \"ep\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior ep -pen {wildcards.penalty} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph} ; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior ep -pen {wildcards.penalty} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}' ; + fi + fi + fi + if [ {wildcards.datatype} = \"continuous\" ]; then + tail -n +2 {input.data} > {output.seqgraph}.noheader + sed --in-place 's/,/\ /g' {output.seqgraph}.noheader + if [ {wildcards.prior} = \"mbc\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior mbc -clq {wildcards.clq} -sep {wildcards.sep} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior mbc -clq {wildcards.clq} -sep {wildcards.sep} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + elif [ {wildcards.prior} = \"bc\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior bc -ascore {wildcards.ascore} -bscore {wildcards.bscore} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior bc -ascore {wildcards.ascore} -bscore {wildcards.bscore} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + elif [ {wildcards.prior} = \"ep\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior ep -pen {wildcards.penalty} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph} ; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior ep -pen {wildcards.penalty} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + fi + fi + rm -f {output.seqgraph}.noheader + if [ -f {output.seqgraph} ]; then + sleep 1 + else + touch {output.seqgraph} + echo None > {output.time}; + fi + + """ \ No newline at end of file diff --git a/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.sh b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.sh new file mode 100644 index 00000000..55b61fc8 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/athomas_jtsampler/script.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +#CP=$(pwd)/workflow/rules/structure_learning_algorithms/athomas_jtsampler/jtsampler +CP=/jtsampler +TEMP_FILENAME=${snakemake_output[seqgraph_full]/fulloutput.tar.gz/fulloutput_tobecompressed.csv} +if [ ${snakemake_wildcards[timeout]} = "None" ]; then + if [ ${snakemake_wildcards[full_output]} = "True" ]; then + /usr/bin/time -f "%e" -o ${snakemake_output[time]} java -classpath $CP EstimateGM \ + -r ${snakemake_wildcards[mcmc_seed]} \ + -n ${snakemake_wildcards[num_samples]} \ + -s ${snakemake_wildcards[sampler]} \ + -a ${snakemake_wildcards[edge_penalty]} \ + -c ${snakemake_wildcards[size_maxclique]} \ + -pd ${snakemake_wildcards[delta]} \ + -F < ${snakemake_input[data]} > $TEMP_FILENAME + ## convet to benchpress file + ## copying the first 4 lines and every line afterwards that has a successfull move + awk -F, -v OFS=',' 'NR <= 4 || $5 == 0 {print $1, $2, $3, $4}' $TEMP_FILENAME > ${snakemake_output[seqgraph]} + ## compressing the files + tar -czf ${snakemake_output[seqgraph_full]} $TEMP_FILENAME + rm -f $TEMP_FILENAME + else + /usr/bin/time -f "%e" -o ${snakemake_output[time]} java -classpath $CP EstimateGM \ + -r ${snakemake_wildcards[mcmc_seed]} \ + -n ${snakemake_wildcards[num_samples]} \ + -s ${snakemake_wildcards[sampler]} \ + -a ${snakemake_wildcards[edge_penalty]} \ + -pd ${snakemake_wildcards[delta]} \ + -c ${snakemake_wildcards[size_maxclique]} < ${snakemake_input[data]} > ${snakemake_output[seqgraph]} + fi +else + /usr/bin/time -f "%e" -o ${snakemake_output[time]} timeout -s SIGINT ${snakemake_wildcards[timeout]} bash -c 'java -classpath $CP EstimateGM -r ${snakemake_wildcards[mcmc_seed]} < ${snakemake_input[data]} > ${snakemake_output[seqgraph]}' +fi + + + + + + +#java -classpath $CP EstimateGM -r 1 -n 1000 -s 2 -a 10000 < results/data/adjmat\=/bdgraph_graphsim/p\=25/graph\=random/class\=None/size\=None/prob\=0.5/seed\=1/parameters\=/bdgraph_rgwish/b\=3/threshold_conv\=1e-07/seed\=1/data\=/iid/n\=100/seed\=1.csv diff --git a/workflow/rules/structure_learning_algorithms/dualgl/Dockerfile b/workflow/rules/structure_learning_algorithms/dualgl/Dockerfile new file mode 100644 index 00000000..e1232261 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/dualgl/Dockerfile @@ -0,0 +1,40 @@ +FROM r-base:4.3.0 + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + wget \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install CRAN packages + +RUN R -e "install.packages('stringr', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('igraph', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('data.table', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('graph', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('huge', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('ZIM', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('mvtnorm', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('speedglm', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('ncvreg', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('bnlearn', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('doParallel', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('SIS', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('snowfall', repos='http://cran.us.r-project.org')" +RUN R -e "install.packages('R.utils', repos='http://cran.us.r-project.org')" + +WORKDIR /tmp + +# Install XMRF +RUN wget https://cran.r-project.org/src/contrib/Archive/XMRF/XMRF_1.0.tar.gz \ + && R -e 'install.packages("XMRF_1.0.tar.gz", repos = NULL, type="source")' \ + && rm XMRF_1.0.tar.gz + +# Install equSA +RUN wget https://cran.r-project.org/src/contrib/Archive/equSA/equSA_1.2.1.tar.gz \ + && R -e 'install.packages("equSA_1.2.1.tar.gz", repos = NULL, type="source")' \ + && rm equSA_1.2.1.tar.gz + +# Cleanup +RUN rm -rf /tmp/* + diff --git a/workflow/rules/structure_learning_algorithms/dualgl/docs.rst b/workflow/rules/structure_learning_algorithms/dualgl/docs.rst new file mode 100644 index 00000000..e69de29b diff --git a/workflow/rules/structure_learning_algorithms/dualgl/info.json b/workflow/rules/structure_learning_algorithms/dualgl/info.json new file mode 100644 index 00000000..7e901204 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/dualgl/info.json @@ -0,0 +1,16 @@ +{ + "title": "Chordal-screening", + "version": "0.1", + "package": { + "title": "", + "url": "http" + }, + "docs_url": "", + "outputs": [ + "adjmat" + ], + "graph_types": [ + "UG" + ], + "language": "Java" +} diff --git a/workflow/rules/structure_learning_algorithms/dualgl/rule.smk b/workflow/rules/structure_learning_algorithms/dualgl/rule.smk new file mode 100644 index 00000000..6f669207 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/dualgl/rule.smk @@ -0,0 +1,37 @@ +def fix_none_startalg(wildcards): + if wildcards["startalg"] == "None": + return [] + else: + return "{output_dir}/adjvecs/{data}/algorithm=/"+ wildcards['startalg']+"/seed={seed}/adjvecs_fulloutput.csv" + +def extract_filename(filename): + return filename.replace("_fulloutput.tar.gz", "fulloutput_tobecompressed.csv") + + + + +rule extract_dualgl: + input: + "{whatever}/adjvecs_fulloutput.tar.gz" + output: + temp("{whatever}/adjvecs_fulloutput.csv") + shell: + "tar -xf {input} && mv {wildcards.whatever}/adjvecs_fulloutput_tobecompressed.csv {output}" + + +rule: + name: + module_name + input: + data=alg_input_data(), + seqgraph=fix_none_startalg + output: + adjmat=alg_output_adjmat_path(module_name), + time=touch(alg_output_time_path(module_name)), + ntests=touch(alg_output_ntests_path(module_name)) + container: + "docker://hallawalla/dualgl:1.4" + script: + "screening.R" + + diff --git a/workflow/rules/structure_learning_algorithms/dualgl/schema.json b/workflow/rules/structure_learning_algorithms/dualgl/schema.json new file mode 100644 index 00000000..c3865bfe --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/dualgl/schema.json @@ -0,0 +1,32 @@ +{ + "description": "Green & Thomas objects", + "title": "gt13_multipair", + "type": "array", + "items": { + "title": "gt13_multipair item", + "description": "Green & Thomas algorithm for learning decomopasble graphs.\n Source: Green, P. J., & Thomas, A. (2013). Sampling decomposable graphs using a Markov chain on junction trees. Biometrika, 100(1), 91-110.", + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier" + }, + "timeout": { + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnumnull" + } + }, + "additionalProperties": true, + "required": [ + "id", + "timeout" + ], + "examples": [ + { + "id": "jtsampler", + "burnin_frac": 0.5, + "startalg": "jtsampler_gg", + "timeout": null + } + ] + } +} \ No newline at end of file diff --git a/workflow/rules/structure_learning_algorithms/dualgl/screening.R b/workflow/rules/structure_learning_algorithms/dualgl/screening.R new file mode 100644 index 00000000..156773ee --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/dualgl/screening.R @@ -0,0 +1,154 @@ +# R.utils is needed for the timeout so make sure this is installed. +source("workflow/scripts/utils/helpers.R") +# source("/path/in/dockerimage/filetosource.R") +## Loading local libraries +library(data.table) +library(stringr) +library(equSA) + +split <- function(x) { + y = str_sub(x,2,-2) + a = str_split(y, '-') + u = as.numeric(a[[1]][1]) + v = as.numeric(a[[1]][2]) + c(u,v) +} + +fix_edges<-function(x, move='add') { + a = split(x) + c(edge=paste(min(a), max(a), sep='-'), orig=min(a), dest=max(a), move=move) +} + +z <-function(logLambda, n, m, delta= 1, g = 1/n) { + ## returns a normalized z + gamma1<-function(x) lgamma((delta + x-1)/2) - lgamma((delta+x - 2)/2) + ## L = lambda/(sqrt(n+1) * gamma1(m+2)) + L = (2/n) * (gamma1(n+m) - gamma1(m) - log(1+1/g) + logLambda) + sL = sqrt(1-pmin(exp(L),1-1e-8)) + z = 0.5*log((1+ sL)/(1-sL)) + z * sqrt(n - m -1) +} + +rho2 <-function(logLambda, n, m, delta= 1, g = 1/n) { + ## returns a normalized z + gamma1<-function(x) lgamma((delta + x-1)/2) - lgamma((delta+x - 2)/2) + ## L = lambda/(sqrt(n+1) * gamma1(m+2)) + L = (2/n) * (gamma1(n+m) - gamma1(m) - log(1+1/g) + logLambda) + 1 - exp(L) +} + +z_tild <-function(z) { + q<-pnorm(-abs(z), log.p=TRUE) + q<-q+log(2.0) + s<-qnorm(q,log.p=TRUE) + (-1) * s +} + + + + + +## local testing +## traj_filename = "graphs//adjvec_1_0.1_random.csv" +## data_filename = "graphs//seed_1_0.1_random.csv" +## all.files = dir('graphs/', full.names=TRUE) +## get_graph_filenames('0.1_random') +## data =get_graph_data('0.1_random') +## true_graph = data[[1]] +## dt = data[[2]] + +## traj_filename = "results/adjvecs/adjmat=/bdgraph_graphsim/p=50/graph=random/class=None/size=None/prob=0.5/seed=2/parameters=/bdgraph_rgwish/b=3/threshold_conv=1e-07/seed=2/data=/iid/n=50/standardized=False/algorithm=/athomas_jtsampler/alg_params=/timeout=None/mcmc_seed=1/num_samples=100000/sampler=0/edge_penalty=0.0/size_maxclique=100/full_output=True/mcmc_params/mcmc_estimator=map/threshold=0.0/burnin_frac=0.5/seed=2/adjvecs_fulloutput_tobecompressed.csv" +## data_filename = "results/data/adjmat=/bdgraph_graphsim/p=50/graph=random/class=None/size=None/prob=0.5/seed=2/parameters=/bdgraph_rgwish/b=3/threshold_conv=1e-07/seed=2/data=/iid/n=50/standardized=False/seed=2.csv" + +## traj_filename = "results/adjvecs/adjmat\=/bdgraph_graphsim/p\=50/graph\=random/class\=None/size\=None/prob\=0.1/seed\=1/parameters\=/bdgraph_rgwish/b\=3/threshold_conv\=1e-07/seed\=1/data\=/iid/n\=50/standardized\=False/algorithm\=/athomas_jtsampler/alg_params\=/timeout\=None/mcmc_seed\=1/num_samples\=2000000/sampler\=0/edge_penalty\=0.0/size_maxclique\=50/full_output\=True/seed\=1/adjvecs_fulloutput_tobecompressed.csv" + +myalg <- function() { + + output_filename <- snakemake@output[["adjmat"]] + traj_filename <- snakemake@input[["seqgraph"]] + time_filename <- snakemake@output[["time"]] + data_filename <- snakemake@input[["data"]] + ntests_filename <- snakemake@output[["ntests"]] + alpha = as.numeric(snakemake@wildcards[['alpha']]) + ## burnin <- 0.5 + + ## The algorithm should be in this function. + start <- proc.time()[1] + ## extract codes + data <- read.csv(traj_filename, check.names = FALSE, header = TRUE) + data = data.table(data) + codes = data$code[1] + codes = drop(sapply(str_split(str_sub(codes, 2,-2), '-'), as.numeric)) + data = data[-c(1:3), ] + colnames(data)<-c('index', 'score', 'added', 'removed', 'code', 'delta', 'm') + + input_data = read.csv(file = data_filename, header=TRUE, check.names=FALSE) + n = nrow(input_data) + p = ncol(input_data) + + ## data = data[code %in% c(0,9,5)][m>0] + ## 9 failed disconnect, 5 failed connect, 0 success One-pair JT sampler + ## 6 faild connection, 4 fails disconnection, 0 sucess, Guidici Green + data = data[code %in% codes][m>0][m < n-1] + + ## remove burnin + ## data = data[floor(nrow(data)*burnin):nrow(data)] + + ## re-order edge index, where x-y such that x < y. + a = rbind(data[removed=='[]',as.list(fix_edges(added, 'add')), index], + data[added=='[]', as.list(fix_edges(removed, 'remove')), index]) + data = merge(a, data, by ='index') + + + ## inverting Marginal likelihood ratio + data[, B := ifelse(move=='remove', delta,-delta)] + + ## compute the Z-test + data[, z:=z(B, n, m)] + data[, zt:=z_tild(z)] + + emp = data[ ,.(N_p = .N, prop = sum(1*(code=='0') * (move =='remove') + 1*(code!='0')*(move=='add'))/.N), by = c('edge')][order(edge)] + + data = merge(data, emp, by = 'edge') + + + ## converting orig/dest to numeric + data$orig = as.numeric(data$orig) + data$dest = as.numeric(data$dest) + + + data = unique(data[, .(orig, dest, edge, zt, z, prop, N_p)]) + + + data_treat=data[, .( + orig=orig[1], + dest=dest[1], + zt= mean(zt), + z = mean(z), + .N, + N_p = N_p[1], + prop = prop[1]), by = edge] + + data_treat[, pzt := sqrt(N_p) * prop/sqrt(1e-4+prop*(1-prop))] + data_treat + #q = data_treat[, pcorselR(cbind(orig, dest, zt), ALPHA2=0.05, GRID=3, iteration=100)] + q = as.numeric(snakemake@wildcards[['alpha']]) + + data_treat[, est_edge := 1*(pnorm(zt)> 1-q)] + + adjmat <- matrix(0, nrow = p, ncol = p) + ed = data_treat[est_edge==1][, cbind(orig+1, dest+1)] + adjmat[ed] <- 1 + + adjmat <- 1 * (adjmat | t(adjmat)) + diag(adjmat) <- 0 + + totaltime <- proc.time()[1] - start + colnames(adjmat) <- colnames(input_data) # Get the labels from the data + write.csv(adjmat, file = output_filename, row.names = FALSE, quote = FALSE) + write(totaltime, file = time_filename) + # Write the true number of c.i. tests here if possible. + cat("None", file = ntests_filename, sep = "\n") +} + +add_timeout(myalg) diff --git a/workflow/rules/structure_learning_algorithms/dualgl/script.py b/workflow/rules/structure_learning_algorithms/dualgl/script.py new file mode 100644 index 00000000..56cab9a1 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/dualgl/script.py @@ -0,0 +1,59 @@ + +def alg_shell(algorithm): + if algorithm == "gt13_multipair": + return """if [ {wildcards.datatype} = \"discrete\" ]; then + tail -n +3 {input.data} > {output.seqgraph}.noheader + sed --in-place 's/,/\ /g' {output.seqgraph}.noheader + + if [ {wildcards.prior} = \"mbc\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior mbc -clq {wildcards.clq} -sep {wildcards.sep} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior mbc -clq {wildcards.clq} -sep {wildcards.sep} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + elif [ {wildcards.prior} = \"bc\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior bc -ascore {wildcards.ascore} -bscore {wildcards.bscore} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior bc -ascore {wildcards.ascore} -bscore {wildcards.bscore} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + elif [ {wildcards.prior} = \"ep\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior ep -pen {wildcards.penalty} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph} ; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGM -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior ep -pen {wildcards.penalty} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}' ; + fi + fi + fi + if [ {wildcards.datatype} = \"continuous\" ]; then + tail -n +2 {input.data} > {output.seqgraph}.noheader + sed --in-place 's/,/\ /g' {output.seqgraph}.noheader + if [ {wildcards.prior} = \"mbc\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior mbc -clq {wildcards.clq} -sep {wildcards.sep} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior mbc -clq {wildcards.clq} -sep {wildcards.sep} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + elif [ {wildcards.prior} = \"bc\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior bc -ascore {wildcards.ascore} -bscore {wildcards.bscore} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior bc -ascore {wildcards.ascore} -bscore {wildcards.bscore} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + elif [ {wildcards.prior} = \"ep\" ]; then + if [ {wildcards.timeout} = \"None\" ]; then + /usr/bin/time -q -f \"%e\" -o {output.time} java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior ep -pen {wildcards.penalty} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph} ; + else + /usr/bin/time -q -f \"%e\" -o {output.time} timeout -s SIGINT {wildcards.timeout} bash -c 'java -classpath /jtsampler/classes FitGaussianGM -v -n {wildcards.n_samples} -s 2 -r {wildcards.randomits} -prior ep -pen {wildcards.penalty} -seed {wildcards.mcmc_seed} < {output.seqgraph}.noheader > {output.seqgraph}'; + fi + fi + fi + rm -f {output.seqgraph}.noheader + if [ -f {output.seqgraph} ]; then + sleep 1 + else + touch {output.seqgraph} + echo None > {output.time}; + fi + + """ \ No newline at end of file diff --git a/workflow/rules/structure_learning_algorithms/dualgl/test.sh b/workflow/rules/structure_learning_algorithms/dualgl/test.sh new file mode 100644 index 00000000..7c411dc2 --- /dev/null +++ b/workflow/rules/structure_learning_algorithms/dualgl/test.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +CP=$(pwd)/workflow/rules/structure_learning_algorithms/athomas_jtsampler/jtsampler + +if [ ${snakemake_wildcards[timeout]} = "None" ]; then + /usr/bin/time -q -f "%e" -o ${snakemake_output[time]} java -classpath $CP EstimateGM \ + -r ${snakemake_wildcards[replicate]} \ + -n ${snakemake_wildcards[n]} \ + -s ${snakemake_wildcards[s]} \ + -a ${snakemake_wildcards[a]} \ + -c ${snakemake_wildcards[c]} < ${snakemake_input[data]} > ${snakemake_output[seqgraph]} +else + /usr/bin/time -q -f "%e" -o ${snakemake_output[time]} timeout -s SIGINT ${snakemake_wildcards[timeout]} bash -c 'java -classpath $CP EstimateGM -r ${snakemake_wildcards[replicate]} < ${snakemake_input[data]} > ${snakemake_output[seqgraph]}' +fi + + +java -classpath $CP EstimateGM -r 1 -n 1000 -s 2 -a 10000 < results/data/adjmat\=/bdgraph_graphsim/p\=25/graph\=random/class\=None/size\=None/prob\=0.5/seed\=1/parameters\=/bdgraph_rgwish/b\=3/threshold_conv\=1e-07/seed\=1/data\=/iid/n\=100/seed\=1.csv + + + + +java EstimateGM -r 1 -n 1000 -s 2 -a 0.0 -c 10000 < seed\=1.csv + + diff --git a/workflow/rules/structure_learning_algorithms/equsa_psilearner/psi_learner.R b/workflow/rules/structure_learning_algorithms/equsa_psilearner/psi_learner.R index ee342051..9d505003 100644 --- a/workflow/rules/structure_learning_algorithms/equsa_psilearner/psi_learner.R +++ b/workflow/rules/structure_learning_algorithms/equsa_psilearner/psi_learner.R @@ -2,6 +2,49 @@ source("workflow/scripts/utils/helpers.R") library(equSA) +equSAR_localR<-function(iData, iMaxNei, ALPHA1=0.05, ALPHA2=0.05, GRID=2, iteration=100) { + # Wrote this to avoid the sigfault issue when running the C code in equSA1 + p <- dim(iData)[2] + A <- matrix(0, ncol=p, nrow=p) + U <- psical(iData,iMaxNei, ALPHA1,GRID,iteration) + ## U should be cbind(row, col, psi score) + ## when the q-value is too large or too small + ## the C code returns a segfault memory issue + ## the R code returns a matix of zeroes, therefore, here we check it + ## and returns an empty mattix + if(U[1,1]==0 & U[1,2]==0) + return(list(score = U, Adj = A, sigfault=TRUE)) + + z<-U[,3] + q<-pnorm(-abs(z), log.p=TRUE) + q<-q+log(2.0) + s<-qnorm(q,log.p=TRUE) + s<-(-1)*s + UU<-cbind(U[,1:2],s) + ## subsampling for psi scores ### + N <- length(U[,1]) + ratio<-ceiling(N/100000) + UU<-UU[order(UU[,3]), 1:3] + m<-floor(N/ratio) + m0<-N-m*ratio + s<-sample.int(ratio,m,replace=TRUE) + for(i in 1:length(s)) s[i]<-s[i]+(i-1)*ratio + if(m0>0) { + s0<-sample.int(m0,1)+length(s)*ratio + s<-c(s,s0) + } + Us<-UU[s,] + y <- round(Us,6) + + ## multiple hypothesis tests ### + q = pcorselR(y, ALPHA2, GRID, iteration) + s = y[,3]> q + ij = y[s, c(1,2)] + A[ij] <-1 + A = A + t(A) + list(score = U, Adj = A, sigfault=FALSE) +} + myalg <- function() { output_filename <- snakemake@output[["adjmat"]] time_filename <- snakemake@output[["time"]] @@ -15,15 +58,15 @@ myalg <- function() { ## The algorithm should be in this function. start <- proc.time()[1] ## extract codes - input_data <- read.csv(data_filename) + input_data <- read.csv(data_filename, header = TRUE, check.names = FALSE) n <- nrow(input_data) p <- ncol(input_data) - neighborhood <- n / log(n) + neighborhood <- floor(n / log(n)) + 1 if (!is.null(snakemake@wildcards[["neig"]])) { neighborhood <- as.integer(snakemake@wildcards[["neig"]]) } - - res <- equSAR( + + res <- equSAR_localR( iData = input_data, iMaxNei = neighborhood, ALPHA1 = alpha1, @@ -35,11 +78,18 @@ myalg <- function() { adjmat <- res$Adj totaltime <- proc.time()[1] - start - colnames(adjmat) <- names(input_data) # Get the labels from the data - write.csv(adjmat, file = output_filename, row.names = FALSE, quote = FALSE) + if(res$sigfault) { + file.create(file = output_filename) + totaltime = "None" + }else { + colnames(adjmat) <- names(input_data) # Get the labels from the data + write.csv(adjmat, file = output_filename, row.names = FALSE, quote = FALSE) + } write(totaltime, file = time_filename) # Write the true number of c.i. tests here if possible. cat("None", file = ntests_filename, sep = "\n") } add_timeout(myalg) + + diff --git a/workflow/rules/structure_learning_algorithms/huge_glasso/schema.json b/workflow/rules/structure_learning_algorithms/huge_glasso/schema.json index 470b4f48..7495dcd4 100644 --- a/workflow/rules/structure_learning_algorithms/huge_glasso/schema.json +++ b/workflow/rules/structure_learning_algorithms/huge_glasso/schema.json @@ -16,12 +16,12 @@ }, "lambda": { "description": "A positive number to control the regularization. Typical usage is to leave the input lambda: null and have the program compute its own.", - "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnum", + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnumnull", "default": null }, "nlambda": { "description": "The number of regularization/thresholding parameters. The default value is 10", - "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnum", + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnumnull", "default": null }, "select_criterion": { @@ -62,4 +62,4 @@ ] }, "uniqueItems": true -} \ No newline at end of file +} diff --git a/workflow/rules/structure_learning_algorithms/huge_mb/schema.json b/workflow/rules/structure_learning_algorithms/huge_mb/schema.json index cc8dc3a6..1fbbb04e 100644 --- a/workflow/rules/structure_learning_algorithms/huge_mb/schema.json +++ b/workflow/rules/structure_learning_algorithms/huge_mb/schema.json @@ -16,12 +16,12 @@ }, "lambda": { "description": "A positive number to control the regularization. Typical usage is to leave the input lambda: null and have the program compute its own.", - "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnum", + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnumnull", "default": null }, "nlambda": { "description": "The number of regularization/thresholding parameters. The default value is 10", - "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnum", + "$ref": "../../../schemas/definitions.schema.json#/definitions/flexnonnegnumnull", "default": null }, "select_criterion": { @@ -31,7 +31,7 @@ "stars", null ], - "default": "ric" + "default": "stars" } }, "required": [ @@ -59,4 +59,4 @@ ] }, "uniqueItems": true -} \ No newline at end of file +}