diff --git a/cloudgene.yaml b/cloudgene.yaml index 139267a..6e882af 100644 --- a/cloudgene.yaml +++ b/cloudgene.yaml @@ -109,12 +109,14 @@ workflow: off: Off - id: subsampling - description: Subsample to 2000x + description: Coverage Subsampling type: number visible: true + default: off values: - on: On - off: Off + off: No subsampling + on: Subsample to 2000x + - id: myseparator0 type: separator diff --git a/modules/local/subsampling.nf b/modules/local/subsampling.nf index 52ead1a..04698bb 100644 --- a/modules/local/subsampling.nf +++ b/modules/local/subsampling.nf @@ -16,12 +16,16 @@ process SUBSAMPLING { # convert to integer mean_cov_int=\$(printf "%.0f" "\$mean_cov") - # set seed to 1 (for reproducbility) and FRAC to coverage/mean_cov (e.g. 1.2) - fraction=\$(echo "scale=4; 1+(${coverage} / \${mean_cov})" | bc) + fraction=\$(echo "scale=4; ${coverage} / \${mean_cov}" | bc) if [ \${mean_cov_int} -gt ${coverage} ] then - samtools view -s \$fraction -b -o ${bam_file.baseName}.subsampled.bam ${bam_file} + samtools view \ + --subsample-seed 1 \ + --subsample \$fraction \ + -b \ + -o ${bam_file.baseName}.subsampled.bam ${bam_file} + mv ${bam_file.baseName}.subsampled.bam ${bam_file} fi """ diff --git a/nextflow.config b/nextflow.config index fe65cdc..15465d9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -23,7 +23,7 @@ params { alignQ = 30 coverage_estimation = "on" max_samples = 0 - subsampling = "on" + subsampling = "off" subsampling_coverage = 2000 diff --git a/workflows/mtdna_server_2.nf b/workflows/mtdna_server_2.nf index 8b8bf68..a0f2bcf 100644 --- a/workflows/mtdna_server_2.nf +++ b/workflows/mtdna_server_2.nf @@ -62,6 +62,14 @@ workflow MTDNA_SERVER_2 { ref_file_mutect2 ) + if(params.subsampling.equals("on") ) { + SUBSAMPLING ( + bams_ch, + params.subsampling_coverage + ) + bams_ch = SUBSAMPLING.out.subsampled_bam_ch + } + CALCULATE_STATISTICS( bams_ch ) @@ -84,14 +92,6 @@ workflow MTDNA_SERVER_2 { validated_files = INPUT_VALIDATION.out.validated_files.flatten() - if(params.subsampling.equals("on") ) { - SUBSAMPLING ( - validated_files, - params.subsampling_coverage - ) - validated_files = SUBSAMPLING.out.subsampled_bam_ch - } - if (params.mode == 'mutserve') { MUTSERVE(