From dbc5cdd51e460b319800d4f58973a2e22a5ae7df Mon Sep 17 00:00:00 2001 From: wkusmirek Date: Fri, 14 Jul 2017 11:48:50 +0200 Subject: [PATCH] callers wrapper (#33) * first, not tested, not working version * run_CODEXCOV.R file removed * CODEXCOV_wrapper.R file done * bugfix in CODEXCOV_wrapper.R * run_cnvcaller.R divided into functions * bugfix * changed parameters tables * bugfixes in run_cnvcaller.R * proper driver managment * coverage table name from paramaters * option for only Y chr * bugfix --- R/CNVCALLER.RUNNER/R/CODEXCOV_wrapper.R | 45 ++++++++ R/CNVCALLER.RUNNER/inst/run_cnvcaller.R | 139 +++++++++++++++++------- R/CODEXCOV/inst/run_CODEXCOV.R | 83 -------------- sql/parameters_calls_model.sql | 2 + sql/parameters_calls_test_model.sql | 42 +++---- 5 files changed, 168 insertions(+), 143 deletions(-) mode change 100644 => 100755 R/CNVCALLER.RUNNER/inst/run_cnvcaller.R delete mode 100755 R/CODEXCOV/inst/run_CODEXCOV.R diff --git a/R/CNVCALLER.RUNNER/R/CODEXCOV_wrapper.R b/R/CNVCALLER.RUNNER/R/CODEXCOV_wrapper.R index e69de29..20a72bf 100644 --- a/R/CNVCALLER.RUNNER/R/CODEXCOV_wrapper.R +++ b/R/CNVCALLER.RUNNER/R/CODEXCOV_wrapper.R @@ -0,0 +1,45 @@ +library('CODEXCOV') + +#' Function Description +#' +#' Function description. +#' @param mapp_thresh +#' @param cov_thresh_from +#' @param cov_thresh_to +#' @param length_thresh_from +#' @param length_thresh_to +#' @param gc_thresh_from +#' @param gc_thresh_to +#' @param K_from +#' @param K_to +#' @param ds +#' @param lmax +#' @keywords +#' @export +#' @examples +#' run_wrapper_CODEXCOV +run_wrapper_CODEXCOV <- function(mapp_thresh, + cov_thresh_from, + cov_thresh_to, + length_thresh_from, + length_thresh_to, + gc_thresh_from, + gc_thresh_to, + K_from, + K_to, + lmax, + cov_table){ + calls <- run_CODEXCOV(as.double(mapp_thresh), + strtoi(cov_thresh_from), + strtoi(cov_thresh_to), + strtoi(length_thresh_from), + strtoi(length_thresh_to), + strtoi(gc_thresh_from), + strtoi(gc_thresh_to), + strtoi(K_from), + strtoi(K_to), + strtoi(lmax), + cov_table + ) + calls +} diff --git a/R/CNVCALLER.RUNNER/inst/run_cnvcaller.R b/R/CNVCALLER.RUNNER/inst/run_cnvcaller.R old mode 100644 new mode 100755 index c27d95d..66ee125 --- a/R/CNVCALLER.RUNNER/inst/run_cnvcaller.R +++ b/R/CNVCALLER.RUNNER/inst/run_cnvcaller.R @@ -1,48 +1,107 @@ #!/usr/bin/env Rscript library(devtools) -#install('CODEXCOV') -library('CODEXCOV') +install('CNVCALLER.RUNNER') ### zakomentować!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +library('CNVCALLER.RUNNER') library(optparse) - +#install.packages("RJDBC",dep=TRUE) +library(RJDBC) option_list <- list( -make_option("--id", default=1, -help="Id of parameters [default %default]"), -make_option("--mapp_thresh", default=0.9, -help="Mapping threshold for quality checking. [default %default]"), -make_option("--cov_thresh_from", default=20, -help="Coverage threshold (begin of interval) for quality checking. [default %default]"), -make_option("--cov_thresh_to", default=4000, -help="Coverage threshold (end of interval) for quality checking. [default %default]"), -make_option("--length_thresh_from", default=20, -help="Length threshold (begin of interval) for quality checking. [default %default]"), -make_option("--length_thresh_to", default=2000, -help="Length threshold (end of interval) for quality checking. [default %default]"), -make_option("--gc_thresh_from", default=20, -help="GC threshold (begin of interval) for quality checking. [default %default]"), -make_option("--gc_thresh_to", default=80, -help="GC threshold (end of interval) for quality checking. [default %default]"), -make_option("--K_from", default=1, -help="K value (begin of interval). [default %default]"), -make_option("--K_to", default=9, -help="K value (end of interval). [default %default]"), -make_option("--lmax", default=200, -help="Maximum CNV length in number of exons returned. [default %default]"), -make_option("--cov_table", default="ds", -help="Coverage table. [default %default]") + make_option("--tabName", default="public.test_parameters", + help="Parameters table. [default %default]"), + make_option("--id", default="1", + help="Parameters id. [default %default]") ) - opt <- parse_args(OptionParser(option_list=option_list)) -calls <- run_CODEXCOV(opt$mapp_thresh, -opt$cov_thresh_from, -opt$cov_thresh_to, -opt$length_thresh_from, -opt$length_thresh_to, -opt$gc_thresh_from, -opt$gc_thresh_to, -opt$K_from, -opt$K_to, -opt$lmax, -opt$cov_table -) +read_parameters <- function(tabName, id, conn){ + query <- paste("Select * from ", tabName, " where id = ", id, ";", sep="") + parameters <- dbGetQuery(conn, query) + caller <- parameters[1,'caller'] + cov_table <- parameters[1,'cov_table'] + mapp_thresh <- parameters[1,'mapp_thresh'] + cov_thresh_from <- parameters[1,'cov_thresh_from'] + cov_thresh_to <- parameters[1,'cov_thresh_to'] + length_thresh_from <- parameters[1,'length_thresh_from'] + length_thresh_to <- parameters[1,'length_thresh_to'] + gc_thresh_from <- parameters[1,'gc_thresh_from'] + gc_thresh_to <- parameters[1,'gc_thresh_to'] + K_from <- parameters[1,'k_from'] + K_to <- parameters[1,'k_to'] + lmax <- parameters[1,'lmax'] + return(list(caller=caller, + cov_table=cov_table, + mapp_thresh=mapp_thresh, + cov_thresh_from=cov_thresh_from, + cov_thresh_to=cov_thresh_to, + length_thresh_from=length_thresh_from, + length_thresh_to=length_thresh_to, + gc_thresh_from=gc_thresh_from, + gc_thresh_to=gc_thresh_to, + K_from=K_from, + K_to=K_to, + lmax=lmax)) +} + +save_calls <- function(calls, conn){ + if (nrow(calls) != 0) { + for(i in 1:nrow(calls)) { + call <- calls[i,] + query <- paste("INSERT INTO TEST_CALLS (parameters_id, sample_name, chr, cnv, st_bp, ed_bp, length_kb, st_exon, ed_exon, raw_cov, norm_cov, copy_no, lratio, mBIC) VALUES ('", opt$id, "','", call[1], "','", call[2], "','", call[3], "','", call[4], "','", call[5], "','", call[6], "','", call[7], "','", call[8], "','", call[9], "','", call[10], "','", call[11], "','", call[12], "','", call[13], "');", sep="") + dbSendUpdate(conn, query) + } + } +} + +read_coverage_table <- function(cov_table, conn){ + #query <- paste("select * from ", cov_table, sep="") + query <- paste("select * from ", cov_table, " where chr='Y'", sep="") + ds <- dbGetQuery(conn, query) + colnames(ds) <- c("sample_name", "target_id", "chr", "pos_min", "pos_max", "cov_avg") + ds +} + +run_caller <- function(parameters, cov_table){ + if (parameters$caller == "codex"){ + calls <- run_wrapper_CODEXCOV(parameters$mapp_thresh, + parameters$cov_thresh_from, + parameters$cov_thresh_to, + parameters$length_thresh_from, + parameters$length_thresh_to, + parameters$gc_thresh_from, + parameters$gc_thresh_to, + parameters$K_from, + parameters$K_to, + parameters$lmax, + cov_table + ) + calls + } else if(parameters$caller == "xhmm") { + } +} + +if (!file.exists("zsi-bio-cdh-hive-jdbc_2.11-0.1-assembly.jar")) { + download.file("http://zsibio.ii.pw.edu.pl:50007/repository/maven-releases/pl/edu/pw/ii/zsibio/zsi-bio-cdh-hive-jdbc_2.11/0.1/zsi-bio-cdh-hive-jdbc_2.11-0.1-assembly.jar",destfile="zsi-bio-cdh-hive-jdbc_2.11-0.1-assembly.jar") +} +drv_hive <- JDBC("com.cloudera.hiveserver2.hive.core.Hive2JDBCDriver", "./zsi-bio-cdh-hive-jdbc_2.11-0.1-assembly.jar",identifier.quote="`") +conn_hive <- dbConnect(drv_hive, "jdbc:hive2://cdh01.ii.pw.edu.pl:10000", "mwiewior", "") + +if (!file.exists("postgresql-42.1.1.jar")) { + download.file("http://zsibio.ii.pw.edu.pl:50007/repository/zsi-bio-raw/common/jdbc/postgresql-42.1.1.jar",destfile="postgresql-42.1.1.jar") +} +drv_psql <- JDBC("org.postgresql.Driver", "./postgresql-42.1.1.jar",identifier.quote="`") +conn_psql <- dbConnect(drv_psql, "jdbc:postgresql://cdh00.ii.pw.edu.pl:15432/cnv-opt", "cnv-opt", "zsibio321") + +parameters <- read_parameters(opt$tabName, opt$id, conn_psql) +#print(parameters) +cov_table <- read_coverage_table(parameters$cov_table, conn_hive) +#print(cov_table) +calls <- run_caller(parameters, cov_table) +#print(calls) +save_calls(calls, conn_psql) + +dbDisconnect(conn_hive) +dbUnloadDriver(drv_hive) + +dbDisconnect(conn_psql) +dbUnloadDriver(drv_psql) diff --git a/R/CODEXCOV/inst/run_CODEXCOV.R b/R/CODEXCOV/inst/run_CODEXCOV.R deleted file mode 100755 index 561b426..0000000 --- a/R/CODEXCOV/inst/run_CODEXCOV.R +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env Rscript -library(devtools) -#install('CODEXCOV') -library('CODEXCOV') -library(optparse) - - -option_list <- list( - make_option("--id", default=1, - help="Id of parameters [default %default]"), - make_option("--mapp_thresh", default=0.9, - help="Mapping threshold for quality checking. [default %default]"), - make_option("--cov_thresh_from", default=20, - help="Coverage threshold (begin of interval) for quality checking. [default %default]"), - make_option("--cov_thresh_to", default=4000, - help="Coverage threshold (end of interval) for quality checking. [default %default]"), - make_option("--length_thresh_from", default=20, - help="Length threshold (begin of interval) for quality checking. [default %default]"), - make_option("--length_thresh_to", default=2000, - help="Length threshold (end of interval) for quality checking. [default %default]"), - make_option("--gc_thresh_from", default=20, - help="GC threshold (begin of interval) for quality checking. [default %default]"), - make_option("--gc_thresh_to", default=80, - help="GC threshold (end of interval) for quality checking. [default %default]"), - make_option("--K_from", default=1, - help="K value (begin of interval). [default %default]"), - make_option("--K_to", default=9, - help="K value (end of interval). [default %default]"), - make_option("--lmax", default=200, - help="Maximum CNV length in number of exons returned. [default %default]"), - make_option("--cov_table", default="ds", - help="Coverage table. [default %default]") -) - -opt <- parse_args(OptionParser(option_list=option_list)) - -calls <- run_CODEXCOV(opt$mapp_thresh, - opt$cov_thresh_from, - opt$cov_thresh_to, - opt$length_thresh_from, - opt$length_thresh_to, - opt$gc_thresh_from, - opt$gc_thresh_to, - opt$K_from, - opt$K_to, - opt$lmax, - opt$cov_table - ) - - - - - - -#Y_qc <- qcObjDemo$Y_qc -#Yhat <- normObjDemo$Yhat -#BIC <- normObjDemo$BIC -#K <- normObjDemo$K -#sampname_qc <- qcObjDemo$sampname_qc -#ref_qc <- qcObjDemo$ref_qc -#chr <- bambedObjDemo$chr -#calls <- segment1(Y_qc, Yhat, optK = 2, K = K, sampname_qc, -# ref_qc, chr, lmax = 200, mode = "integer") -#print(calls) - - - -#install.packages("RJDBC",dep=TRUE) -#library(RJDBC) -#download.file("http://zsibio.ii.pw.edu.pl:50007/repository/zsi-bio-raw/common/jdbc/postgresql-42.1.1.jar",destfile="postgresql-42.1.1.jar") -#drv <- JDBC("org.postgresql.Driver", "./postgresql-42.1.1.jar",identifier.quote="`") -#conn <- dbConnect(drv, "jdbc:postgresql://cdh00.ii.pw.edu.pl:15432/cnv-opt", "cnv-opt", "zsibio321") -#for(call in calls) { -# query <- paste("INSERT INTO TEST_CALLS (parameters_id, sample_name, chr, cnv, st_bp, ed_bp, length_kb, st_exon, ed_exon, raw_cov, norm_cov, copy_no, lratio, mBIC) VALUES ('", opt$id, "','", call[1], "','", call[2], "','", call[3], "','", call[4], "','", call[5], "','", call[6], "','", call[7], "','", call[8], "','", call[9], "','", call[10], "','", call[11], "','", call[12], "','", call[13], "');", sep="") -# print(query) -# dbSendQuery(conn, query) -#} -#dbGetQuery(conn, "Select * from test_calls;") -#dbDisconnect(conn) - - - - \ No newline at end of file diff --git a/sql/parameters_calls_model.sql b/sql/parameters_calls_model.sql index b3ee42b..f701c18 100644 --- a/sql/parameters_calls_model.sql +++ b/sql/parameters_calls_model.sql @@ -4,6 +4,8 @@ CREATE SCHEMA IF NOT EXISTS CNV; CREATE TABLE IF NOT EXISTS CNV.PARAMETERS ( id SERIAL PRIMARY KEY, + caller TEXT, + cov_table TEXT, mapp_thresh TEXT, cov_thresh_from TEXT, cov_thresh_to TEXT, diff --git a/sql/parameters_calls_test_model.sql b/sql/parameters_calls_test_model.sql index 6b87ed4..62dea1b 100644 --- a/sql/parameters_calls_test_model.sql +++ b/sql/parameters_calls_test_model.sql @@ -1,5 +1,7 @@ CREATE TABLE IF NOT EXISTS TEST_PARAMETERS ( id SERIAL PRIMARY KEY, + caller TEXT, + cov_table TEXT, mapp_thresh TEXT, cov_thresh_from TEXT, cov_thresh_to TEXT, @@ -12,26 +14,26 @@ CREATE TABLE IF NOT EXISTS TEST_PARAMETERS ( lmax TEXT ); -INSERT INTO TEST_PARAMETERS VALUES (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); -INSERT INTO TEST_PARAMETERS VALUES (2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); -INSERT INTO TEST_PARAMETERS VALUES (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); -INSERT INTO TEST_PARAMETERS VALUES (4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14); -INSERT INTO TEST_PARAMETERS VALUES (5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); -INSERT INTO TEST_PARAMETERS VALUES (6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); -INSERT INTO TEST_PARAMETERS VALUES (7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); -INSERT INTO TEST_PARAMETERS VALUES (8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18); -INSERT INTO TEST_PARAMETERS VALUES (9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19); -INSERT INTO TEST_PARAMETERS VALUES (10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20); -INSERT INTO TEST_PARAMETERS VALUES (11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); -INSERT INTO TEST_PARAMETERS VALUES (12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22); -INSERT INTO TEST_PARAMETERS VALUES (13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23); -INSERT INTO TEST_PARAMETERS VALUES (14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24); -INSERT INTO TEST_PARAMETERS VALUES (15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25); -INSERT INTO TEST_PARAMETERS VALUES (16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26); -INSERT INTO TEST_PARAMETERS VALUES (17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27); -INSERT INTO TEST_PARAMETERS VALUES (18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28); -INSERT INTO TEST_PARAMETERS VALUES (19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29); -INSERT INTO TEST_PARAMETERS VALUES (20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30); +INSERT INTO TEST_PARAMETERS VALUES (1, 'codex', 'cnv.coverage_target', 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +INSERT INTO TEST_PARAMETERS VALUES (2, 'codex', 'cnv.coverage_target', 3, 4, 5, 6, 7, 8, 9, 10, 11, 12); +INSERT INTO TEST_PARAMETERS VALUES (3, 'codex', 'cnv.coverage_target', 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); +INSERT INTO TEST_PARAMETERS VALUES (4, 'codex', 'cnv.coverage_target', 5, 6, 7, 8, 9, 10, 11, 12, 13, 14); +INSERT INTO TEST_PARAMETERS VALUES (5, 'codex', 'cnv.coverage_target', 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); +INSERT INTO TEST_PARAMETERS VALUES (6, 'codex', 'cnv.coverage_target', 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); +INSERT INTO TEST_PARAMETERS VALUES (7, 'codex', 'cnv.coverage_target', 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); +INSERT INTO TEST_PARAMETERS VALUES (8, 'codex', 'cnv.coverage_target', 9, 10, 11, 12, 13, 14, 15, 16, 17, 18); +INSERT INTO TEST_PARAMETERS VALUES (9, 'codex', 'cnv.coverage_target', 10, 11, 12, 13, 14, 15, 16, 17, 18, 19); +INSERT INTO TEST_PARAMETERS VALUES (10, 'xhmm', 'cnv.coverage_target', 11, 12, 13, 14, 15, 16, 17, 18, 19, 20); +INSERT INTO TEST_PARAMETERS VALUES (11, 'xhmm', 'cnv.coverage_target', 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); +INSERT INTO TEST_PARAMETERS VALUES (12, 'xhmm', 'cnv.coverage_target', 13, 14, 15, 16, 17, 18, 19, 20, 21, 22); +INSERT INTO TEST_PARAMETERS VALUES (13, 'xhmm', 'cnv.coverage_target', 14, 15, 16, 17, 18, 19, 20, 21, 22, 23); +INSERT INTO TEST_PARAMETERS VALUES (14, 'xhmm', 'cnv.coverage_target', 15, 16, 17, 18, 19, 20, 21, 22, 23, 24); +INSERT INTO TEST_PARAMETERS VALUES (15, 'xhmm', 'cnv.coverage_target', 16, 17, 18, 19, 20, 21, 22, 23, 24, 25); +INSERT INTO TEST_PARAMETERS VALUES (16, 'xhmm', 'cnv.coverage_target', 17, 18, 19, 20, 21, 22, 23, 24, 25, 26); +INSERT INTO TEST_PARAMETERS VALUES (17, 'xhmm', 'cnv.coverage_target', 18, 19, 20, 21, 22, 23, 24, 25, 26, 27); +INSERT INTO TEST_PARAMETERS VALUES (18, 'xhmm', 'cnv.coverage_target', 19, 20, 21, 22, 23, 24, 25, 26, 27, 28); +INSERT INTO TEST_PARAMETERS VALUES (19, 'xhmm', 'cnv.coverage_target', 20, 21, 22, 23, 24, 25, 26, 27, 28, 29); +INSERT INTO TEST_PARAMETERS VALUES (20, 'xhmm', 'cnv.coverage_target', 21, 22, 23, 24, 25, 26, 27, 28, 29, 30); CREATE TABLE IF NOT EXISTS TEST_CALLS ( id SERIAL PRIMARY KEY,