diff --git a/modules/local/annotate.nf b/modules/local/annotate.nf index 796e375..342086a 100644 --- a/modules/local/annotate.nf +++ b/modules/local/annotate.nf @@ -8,9 +8,13 @@ process ANNOTATE { path annotation output: - path ("${variants_ch.baseName}_ann.txt"), emit: variants_ann_ch + path ("${variants_ch.baseName}.annotated.txt"), emit: variants_ann_ch """ - java -jar /opt/mutserve/mutserve.jar annotate --input ${variants_ch} --output ${variants_ch.baseName}_ann.txt --annotation ${annotation} + java -jar /opt/mutserve/mutserve.jar \ + annotate \ + --input ${variants_ch} \ + --output ${variants_ch.baseName}.annotated.txt \ + --annotation ${annotation} """ } diff --git a/modules/local/filter_variants.nf b/modules/local/filter_variants.nf index 71bbe4f..4c6ebf6 100644 --- a/modules/local/filter_variants.nf +++ b/modules/local/filter_variants.nf @@ -12,16 +12,26 @@ process FILTER_VARIANTS { def vcf_name = "${vcf_file}".replaceAll('.vcf.gz', '') """ - echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" > ${vcf_file.baseName}.${method}.txt + echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" \ + > ${vcf_file.baseName}.${method}.txt if [[ ${method} == "mutserve_fusion" ]] then - bcftools query -f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP\t%GT]\n' ${vcf_file} >> ${vcf_file.baseName}.${method}.txt - awk -F'\t' 'NR == 1 || (length(\$4) == 1 && length(\$5) == 1)' ${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt + bcftools query \ + -f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP\t%GT]\n' \ + ${vcf_file} >> ${vcf_file.baseName}.${method}.txt + + awk -F'\t' 'NR == 1 || (length(\$4) == 1 && length(\$5) == 1)' \ + ${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt + elif [[ ${method} == "mutect2_fusion" ]] then - bcftools query -f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tINDEL\n' ${vcf_file} >> ${vcf_file.baseName}.${method}.txt - awk -F'\t' 'NR == 1 || ((length(\$4) > 1 || length(\$5) > 1) && length(\$4) != length(\$5))' ${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt + bcftools query \ + -f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tINDEL\n' \ + ${vcf_file} >> ${vcf_file.baseName}.${method}.txt + + awk -F'\t' 'NR == 1 || ((length(\$4) > 1 || length(\$5) > 1) && length(\$4) != length(\$5))' \ + ${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt fi """ } diff --git a/modules/local/index.nf b/modules/local/index.nf index 6d4fabb..148c651 100644 --- a/modules/local/index.nf +++ b/modules/local/index.nf @@ -7,6 +7,7 @@ process INDEX { """ samtools faidx $reference - samtools dict $reference -o ${reference.baseName}.dict + samtools dict $reference \ + -o ${reference.baseName}.dict """ } \ No newline at end of file diff --git a/modules/local/input_validation.nf b/modules/local/input_validation.nf index ab50c45..9ab4139 100644 --- a/modules/local/input_validation.nf +++ b/modules/local/input_validation.nf @@ -13,17 +13,23 @@ process INPUT_VALIDATION { path("contig.txt"), emit: contig_ch """ - csvtk concat -t ${statistics} -T -o sample_statistics.txt - csvtk concat -t ${mapping} -T -o sample_mappings.txt + csvtk concat \ + -t ${statistics} \ + -T -o sample_statistics.txt + + csvtk concat \ + -t ${mapping} \ + -T -o sample_mappings.txt + java -jar /opt/mutserve/mutserve.jar stats \ - --input sample_statistics.txt \ - --detection-limit ${params.detection_limit} \ - --reference ${params.reference} \ - --baseQ ${params.baseQ}\ - --mapQ ${params.mapQ} \ - --alignQ ${params.alignQ} \ - --output-excluded-samples excluded_samples.txt \ - --output-contig contig.txt \ - --tool ${params.mode} + --input sample_statistics.txt \ + --detection-limit ${params.detection_limit} \ + --reference ${params.reference} \ + --baseQ ${params.baseQ}\ + --mapQ ${params.mapQ} \ + --alignQ ${params.alignQ} \ + --output-excluded-samples excluded_samples.txt \ + --output-contig contig.txt \ + --tool ${params.mode} """ } \ No newline at end of file diff --git a/modules/local/merging_variants.nf b/modules/local/merging_variants.nf index d4b6121..d13e8c9 100644 --- a/modules/local/merging_variants.nf +++ b/modules/local/merging_variants.nf @@ -5,20 +5,25 @@ process MERGING_VARIANTS { val mode output: - path("variants.merged.txt"), emit: txt_summarized_ch + path("variants.txt"), emit: txt_summarized_ch """ - csvtk concat -t ${variants_txt} -T -o variants.concat.txt + csvtk concat \ + -t ${variants_txt} \ + -T -o variants.concat.txt - csvtk sort -t variants.concat.txt -k ID:N -k Pos:n -k Type:r -T -o variants.sorted.txt + csvtk sort \ + -t variants.concat.txt \ + -k ID:N -k Pos:n -k Type:r \ + -T -o variants.sorted.txt if [[ ${mode} == "fusion" ]] then java -jar /opt/VariantMerger.jar \ variants.sorted.txt \ - --output variants.merged.txt + --output variants.txt else - mv variants.sorted.txt variants.merged.txt + mv variants.sorted.txt variants.txt fi """ } \ No newline at end of file diff --git a/modules/local/mutect2.nf b/modules/local/mutect2.nf index a73ea38..ea6c9d1 100644 --- a/modules/local/mutect2.nf +++ b/modules/local/mutect2.nf @@ -17,25 +17,35 @@ process MUTECT2 { samtools index ${bam_file} gatk Mutect2 \ - -R ${reference} \ - -L ${detected_contig} \ - --min-base-quality-score ${params.baseQ} \ - -callable-depth 6 --max-reads-per-alignment-start 0 \ - -I ${bam_file} \ - -O raw.vcf.gz + -R ${reference} \ + -L ${detected_contig} \ + --min-base-quality-score ${params.baseQ} \ + -callable-depth 6 --max-reads-per-alignment-start 0 \ + -I ${bam_file} \ + -O raw.vcf.gz gatk FilterMutectCalls \ - -R ${reference} \ - --min-reads-per-strand 2 \ - -V raw.vcf.gz \ - -O ${bam_file.baseName}.vcf.gz + -R ${reference} \ + --min-reads-per-strand 2 \ + -V raw.vcf.gz \ + -O ${bam_file.baseName}.vcf.gz - rm raw.vcf.gz - bcftools norm -m-any -f ${reference} ${bam_file.baseName}.vcf.gz -o ${bam_file.baseName}.norm.vcf.gz -Oz + bcftools norm \ + -m-any \ + -f ${reference} \ + -o ${bam_file.baseName}.norm.vcf.gz -Oz \ + ${bam_file.baseName}.vcf.gz + mv ${bam_file.baseName}.norm.vcf.gz ${bam_file.baseName}.vcf.gz #required for mutect2-only mode! - echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" > ${bam_file.baseName}.txt - bcftools query -f '${bam_file}\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tMUTECT2\n' ${bam_file.baseName}.vcf.gz >> ${bam_file.baseName}.txt + echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" \ + > ${bam_file.baseName}.txt + + bcftools query \ + -f '${bam_file}\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tMUTECT2\n' \ + ${bam_file.baseName}.vcf.gz >> ${bam_file.baseName}.txt + + rm raw.vcf.gz """ } \ No newline at end of file diff --git a/modules/local/mutserve.nf b/modules/local/mutserve.nf index 710728a..82203b4 100644 --- a/modules/local/mutserve.nf +++ b/modules/local/mutserve.nf @@ -6,7 +6,7 @@ process MUTSERVE { path excluded_samples output: - path("${bam_file.baseName}.txt"), emit: mutserve_txt_ch + path("${bam_file.simpleName}.txt"), emit: mutserve_txt_ch tuple path("${bam_file.baseName}.vcf.gz"), val('mutserve_fusion'), emit: mutserve_fusion_vcf_ch path("${bam_file.baseName}.vcf.gz"), emit: mutserve_vcf_ch path("${bam_file.baseName}.vcf.gz.tbi"), emit: mutserve_vcf_idx_ch @@ -30,7 +30,12 @@ process MUTSERVE { --write-raw \ ${bam_file} - bcftools norm -m-any -f ${reference} ${bam_file.baseName}.vcf.gz -o ${bam_file.baseName}.norm.vcf.gz -Oz + bcftools norm \ + -m-any \ + -f ${reference} \ + -o ${bam_file.baseName}.norm.vcf.gz -Oz \ + ${bam_file.baseName}.vcf.gz + mv ${bam_file.baseName}.norm.vcf.gz ${bam_file.baseName}.vcf.gz tabix ${bam_file.baseName}.vcf.gz """ diff --git a/modules/local/quality_control.nf b/modules/local/quality_control.nf index 9a813b0..a084c5d 100644 --- a/modules/local/quality_control.nf +++ b/modules/local/quality_control.nf @@ -1,12 +1,15 @@ process QUALITY_CONTROL { + publishDir "${params.output_reports}", mode: "copy", pattern: '*.html' + input: path excluded_samples path zip + output: path "*.html" """ - multiqc . + multiqc . """ } \ No newline at end of file diff --git a/tests/data/bam/HG00096.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123_copy.bam b/tests/data/bam/HG00096.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123_copy.bam deleted file mode 100644 index 5584359..0000000 Binary files a/tests/data/bam/HG00096.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123_copy.bam and /dev/null differ diff --git a/tests/mitocalling.nf.test b/tests/mitocalling.nf.test index 4f9409e..0e62264 100644 --- a/tests/mitocalling.nf.test +++ b/tests/mitocalling.nf.test @@ -18,7 +18,7 @@ nextflow_pipeline { then { assert workflow.success - assert snapshot(path("${launchDir}/out/variants_ann.txt")).match() + assert snapshot(path("${launchDir}/out/variants.annotated.txt")).match() } } @@ -37,7 +37,7 @@ nextflow_pipeline { } then { - assert snapshot(path("${launchDir}/out/variants_ann.txt")).match() + assert snapshot(path("${launchDir}/out/variants.annotated.txt")).match() } } @@ -57,8 +57,7 @@ nextflow_pipeline { then { assert workflow.success - //TODO: sort variants file by sample after merging to get a deterministic order - //assert snapshot(path("${launchDir}/out/variants_ann.txt")).match() + assert snapshot(path("${launchDir}/out/variants.annotated.txt")).match() } } diff --git a/tests/mitocalling.nf.test.snap b/tests/mitocalling.nf.test.snap index 5efd4e5..d2c86c3 100644 --- a/tests/mitocalling.nf.test.snap +++ b/tests/mitocalling.nf.test.snap @@ -1,14 +1,20 @@ { + "Runs with single BAM file and mutect2": { + "content": [ + "variants.annotated.txt:md5,94fa4b4d0613f61410618a7eba4c5c97" + ], + "timestamp": "2024-02-26T23:09:58.251860914" + }, "Runs with single BAM file and mutserve": { "content": [ - "variants_ann.txt:md5,1d8423fc3e89a12e226e5e4b89f60150" + "variants.annotated.txt:md5,21b561f9b05098cdc137f43b4988570e" ], - "timestamp": "2024-01-10T14:27:36.082310248" + "timestamp": "2024-02-26T23:02:15.242706827" }, "Runs with BAM file including different header contigs": { "content": [ - "variants_ann.txt:md5,c2aa6c45193a921f4a20238fe9bf6cd8" + "variants.annotated.txt:md5,d68e6fed173c766228164438a24ecf5a" ], - "timestamp": "2024-01-10T14:28:13.798945731" + "timestamp": "2024-02-26T23:03:07.470112898" } } \ No newline at end of file