diff --git a/modules/local/filter_variants.nf b/modules/local/filter_variants.nf index 4c6ebf6..aaa0cfc 100644 --- a/modules/local/filter_variants.nf +++ b/modules/local/filter_variants.nf @@ -3,7 +3,7 @@ process FILTER_VARIANTS { //publishDir "${params.output}", mode: 'copy' input: - tuple path(vcf_file), val(method) + tuple path(vcf_file), path(vcf_file_idx), val(method) output: path("${vcf_file.baseName}.${method}.filtered.txt"), emit: combined_methods_ch @@ -12,27 +12,38 @@ process FILTER_VARIANTS { def vcf_name = "${vcf_file}".replaceAll('.vcf.gz', '') """ - echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" \ + echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tGT" \ > ${vcf_file.baseName}.${method}.txt + + bcftools query \ + -f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP\t%GT]\n' \ + ${vcf_file} >> ${vcf_file.baseName}.${method}.txt if [[ ${method} == "mutserve_fusion" ]] then - bcftools query \ - -f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP\t%GT]\n' \ - ${vcf_file} >> ${vcf_file.baseName}.${method}.txt - awk -F'\t' 'NR == 1 || (length(\$4) == 1 && length(\$5) == 1)' \ - ${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt + ${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.tmp.txt elif [[ ${method} == "mutect2_fusion" ]] then - bcftools query \ - -f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tINDEL\n' \ - ${vcf_file} >> ${vcf_file.baseName}.${method}.txt - awk -F'\t' 'NR == 1 || ((length(\$4) > 1 || length(\$5) > 1) && length(\$4) != length(\$5))' \ - ${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt + ${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.tmp.txt + else + mv ${vcf_file.baseName}.${method}.txt ${vcf_file.baseName}.${method}.filtered.tmp.txt fi + + ## annotating SNVS and INDELs for reporting + awk 'BEGIN {OFS="\t"} { + if (NR == 1) { print \$0, "Type"; next } + if ((length(\$4) > 1 || length(\$5) > 1) && length(\$4) != length(\$5)) { \$9="3" } + else if (\$8 == "1") { \$9="1" } + else if (\$8 == "0/1" || \$8 == "1/0" || \$8 == "0|1" || \$8 == "1|0" ) { \$9="2" } + else { \$9="UNKNOWN" } + print + }' ${vcf_file.baseName}.${method}.filtered.tmp.txt > ${vcf_file.baseName}.${method}.filtered.txt + + rm ${vcf_file.baseName}.${method}.filtered.tmp.txt + """ } diff --git a/modules/local/merging_variants.nf b/modules/local/merging_variants.nf index d13e8c9..f58a2f4 100644 --- a/modules/local/merging_variants.nf +++ b/modules/local/merging_variants.nf @@ -14,7 +14,7 @@ process MERGING_VARIANTS { csvtk sort \ -t variants.concat.txt \ - -k ID:N -k Pos:n -k Type:r \ + -k ID:N -k Pos:n -k Type:nr \ -T -o variants.sorted.txt if [[ ${mode} == "fusion" ]] diff --git a/modules/local/mutect2.nf b/modules/local/mutect2.nf index cc95b22..47964e9 100644 --- a/modules/local/mutect2.nf +++ b/modules/local/mutect2.nf @@ -5,12 +5,10 @@ process MUTECT2 { path reference path fasta_index_files val detected_contig + val method output: - path("${bam_file.baseName}.txt"), emit: mutect2_txt_ch - tuple path("${bam_file.baseName}.vcf.gz"), val('mutect2_fusion'), emit: mutect2_fusion_vcf_ch - path("${bam_file.baseName}.vcf.gz"), emit: mutect2_vcf_ch - path("${bam_file.baseName}.vcf.gz.tbi"), emit: mutect2_vcf_idx_ch + tuple path("${bam_file.baseName}.vcf.gz"), path("${bam_file.baseName}.vcf.gz.tbi"), val(method), emit: mutect2_ch """ samtools index ${bam_file} @@ -38,14 +36,6 @@ process MUTECT2 { mv ${bam_file.baseName}.norm.vcf.gz ${bam_file.baseName}.vcf.gz tabix -f ${bam_file.baseName}.vcf.gz - #required for mutect2-only mode! - echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" \ - > ${bam_file.baseName}.txt - - bcftools query \ - -f '${bam_file}\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tMUTECT2\n' \ - ${bam_file.baseName}.vcf.gz >> ${bam_file.baseName}.txt - rm raw.vcf.gz """ } \ No newline at end of file diff --git a/modules/local/mutserve.nf b/modules/local/mutserve.nf index 99c8431..c82034b 100644 --- a/modules/local/mutserve.nf +++ b/modules/local/mutserve.nf @@ -3,12 +3,10 @@ process MUTSERVE { input: path bam_file path reference + val method output: - path("${bam_file.simpleName}.txt"), emit: mutserve_txt_ch - tuple path("${bam_file.baseName}.vcf.gz"), val('mutserve_fusion'), emit: mutserve_fusion_vcf_ch - path("${bam_file.baseName}.vcf.gz"), emit: mutserve_vcf_ch - path("${bam_file.baseName}.vcf.gz.tbi"), emit: mutserve_vcf_idx_ch + tuple path("${bam_file.baseName}.vcf.gz"), path("${bam_file.baseName}.vcf.gz.tbi"), val(method), emit: mutserve_ch script: def avail_mem = 1024 diff --git a/tests/mitocalling.nf.test.snap b/tests/mitocalling.nf.test.snap index 6d9e99b..65bb23e 100644 --- a/tests/mitocalling.nf.test.snap +++ b/tests/mitocalling.nf.test.snap @@ -1,31 +1,31 @@ { "Runs with single BAM file and mutect2": { "content": [ - "variants.annotated.txt:md5,94fa4b4d0613f61410618a7eba4c5c97" + "variants.annotated.txt:md5,52dc3737a73bb80cf3a1c8946990c3e4" ], - "timestamp": "2024-02-26T23:09:58.251860914" + "timestamp": "2024-03-02T13:11:34.901277407" }, "Runs with single BAM file and mutserve": { "content": [ - "variants.annotated.txt:md5,aac0aee87ce1a91f8c35264be248b52e" + "variants.annotated.txt:md5,ee00b39fc49e1f34e6affab00368181f" ], - "timestamp": "2024-02-27T15:19:15.934593072" + "timestamp": "2024-03-02T13:09:45.954385792" }, "Runs with BAM file including different header contigs": { "content": [ - "variants.annotated.txt:md5,d68e6fed173c766228164438a24ecf5a" + "variants.annotated.txt:md5,a5345c7718757ded27246ae342b5f25b" ], - "timestamp": "2024-02-26T23:03:07.470112898" + "timestamp": "2024-03-02T13:10:28.843431233" }, "Runs with two files and one goes through QC": { "content": [ - "variants.annotated.txt:md5,3946a3bd26ceafe8f61903499bfcbb35", + "variants.annotated.txt:md5,ef402762b03356a208583e434d96dfa3", { "tasksFailed": 0, "tasksCount": 10, "tasksSucceeded": 10 } ], - "timestamp": "2024-02-28T12:32:40.624776" + "timestamp": "2024-03-02T13:12:29.64633988" } } \ No newline at end of file diff --git a/workflows/mtdna_server_2.nf b/workflows/mtdna_server_2.nf index b11588c..09f62b9 100644 --- a/workflows/mtdna_server_2.nf +++ b/workflows/mtdna_server_2.nf @@ -72,18 +72,12 @@ workflow MTDNA_SERVER_2 { MUTSERVE( validated_files, - ref_file_mutserve + ref_file_mutserve, + "mutserve_single" ) - MERGING_VARIANTS( - MUTSERVE.out.mutserve_txt_ch.collect(), - params.mode - ) - - variants_txt_ch = MERGING_VARIANTS.out.txt_summarized_ch - variants_vcf_ch = MUTSERVE.out.mutserve_vcf_ch.collect() - variants_vcf_idx_ch = MUTSERVE.out.mutserve_vcf_idx_ch.collect() - file_count = MUTSERVE.out.mutserve_vcf_ch.count() + vcf_ch = MUTSERVE.out.mutserve_ch + file_count = MUTSERVE.out.mutserve_ch.count() } @@ -93,69 +87,68 @@ workflow MTDNA_SERVER_2 { validated_files, ref_file_mutect2, INDEX.out.fasta_index_ch, - detected_contig + detected_contig, + "mutect2_single" ) - MERGING_VARIANTS( - MUTECT2.out.mutect2_txt_ch.collect(), - params.mode - ) - - variants_txt_ch = MERGING_VARIANTS.out.txt_summarized_ch - variants_vcf_ch = MUTECT2.out.mutect2_vcf_ch.collect() - variants_vcf_idx_ch = MUTECT2.out.mutect2_vcf_idx_ch.collect() - file_count = MUTECT2.out.mutect2_vcf_ch.count() + vcf_ch = MUTECT2.out.mutect2_ch + file_count = MUTECT2.out.mutect2_ch.count() } else if (params.mode == 'fusion') { MUTSERVE( validated_files, - ref_file_mutserve + ref_file_mutserve, + "mutserve_fusion" ) MUTECT2( validated_files, ref_file_mutect2, INDEX.out.fasta_index_ch, - detected_contig + detected_contig, + "mutect2_fusion" ) - merged_ch = MUTSERVE.out.mutserve_fusion_vcf_ch.concat(MUTECT2.out.mutect2_fusion_vcf_ch) + vcf_ch = MUTSERVE.out.mutserve_ch.concat(MUTECT2.out.mutect2_ch) + file_count = MUTSERVE.out.mutserve_ch.count() + } - FILTER_VARIANTS ( - merged_ch - ) + FILTER_VARIANTS ( + vcf_ch + ) - MERGING_VARIANTS( - FILTER_VARIANTS.out.combined_methods_ch.collect(), - params.mode - ) + MERGING_VARIANTS( + FILTER_VARIANTS.out.combined_methods_ch.collect(), + params.mode + ) + + variants_txt_ch = MERGING_VARIANTS.out.txt_summarized_ch - variants_txt_ch = MERGING_VARIANTS.out.txt_summarized_ch - // only use mutserve calls for haplogroup and contamination detection - variants_vcf_ch = MUTSERVE.out.mutserve_vcf_ch.collect() - variants_vcf_idx_ch = MUTSERVE.out.mutserve_vcf_idx_ch.collect() - file_count = MUTSERVE.out.mutserve_vcf_ch.count() - } + if (params.mode != 'mutect2') { + + variants_vcf_ch = vcf_ch + .filter { it[2].contains("mutserve") } + .collect { it[0]} - VCF_MERGE ( - variants_vcf_ch, - variants_vcf_idx_ch, - file_count + variants_vcf_idx_ch = vcf_ch + .filter { it[2].contains("mutserve") } + .collect { it[1]} + + VCF_MERGE ( + variants_vcf_ch, + variants_vcf_idx_ch, + file_count ) - if (params.mode != 'mutect2') { HAPLOGROUPS_CONTAMINATION ( VCF_MERGE.out.vcf_merged_ch ) haplogrep_ch = HAPLOGROUPS_CONTAMINATION.out.haplogroups_ch contamination_ch = HAPLOGROUPS_CONTAMINATION.out.contamination_txt_ch - } - - ANNOTATE( variants_txt_ch, ref_file_mutserve, @@ -207,4 +200,4 @@ workflow.onComplete { } else { } -} +} \ No newline at end of file