diff --git a/Dockerfile b/Dockerfile index c141760..3b67a71 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,4 +38,14 @@ RUN wget https://github.com/genepi/haplogrep3/releases/download/v3.2.1/haplogrep rm haplogrep3-3.2.1-linux.zip ENV PATH="/opt/haplogrep:${PATH}" +WORKDIR "/opt" +RUN wget https://github.com/jbangdev/jbang/releases/download/v0.91.0/jbang-0.91.0.zip && \ + unzip -q jbang-*.zip && \ + mv jbang-0.91.0 jbang && \ + rm jbang*.zip + +ENV PATH="/opt/jbang/bin:${PATH}" +WORKDIR "/opt" +COPY ./bin/VariantMerger.java ./ +RUN jbang export portable -O=VariantMerger.jar VariantMerger.java diff --git a/bin/VariantMerger.java b/bin/VariantMerger.java new file mode 100644 index 0000000..1d3287a --- /dev/null +++ b/bin/VariantMerger.java @@ -0,0 +1,86 @@ +//usr/bin/env jbang "$0" "$@" ; exit $? +//REPOS jcenter,genepi-maven=https://genepi.i-med.ac.at/maven +//DEPS info.picocli:picocli:4.6.1 +//DEPS genepi:genepi-io:1.1.1 + +import java.io.File; +import java.util.concurrent.Callable; +import genepi.io.table.reader.CsvTableReader; +import genepi.io.table.writer.CsvTableWriter; +import picocli.CommandLine; +import picocli.CommandLine.Option; +import picocli.CommandLine.Parameters; + +public class VariantMerger implements Callable { + + @Parameters(description = "Combined variants file") + private String file; + + @Option(names = "--output", description = "Output files", required = true) + private String output; + + @Option(names = "--indel-tag", description = "Detect indels by this tag", required = false) + private String tag = "INDEL"; + + public Integer call() throws Exception { + + assert (file != null); + assert (output != null); + + CsvTableWriter writer = new CsvTableWriter(new File(output).getAbsolutePath(), '\t', false); + CsvTableReader reader = new CsvTableReader(file, '\t'); + + int lastPos = 0; + String[] lastRow = null; + int diff = 0; + writer.setColumns(reader.getColumns()); + + while (reader.next()) { + + int pos = reader.getInteger("Pos"); + int refLength = reader.getString("Ref").length(); + int variantlength = reader.getString("Variant").length(); + + // init new position + if (lastPos == 0 || pos == 3107 ) { + diff = refLength - variantlength; + lastPos = pos; + lastRow = reader.getRow(); + continue; + } + + if (comparePositions(lastPos, pos, diff)) { + writer.setRow(lastRow); + writer.next(); + lastPos = 0; + } else { + // since no hit, write previous row. + writer.setRow(lastRow); + writer.next(); + diff = refLength - variantlength; + lastPos = pos; + lastRow = reader.getRow(); + } + + } + writer.setRow(lastRow); + writer.next(); + reader.close(); + writer.close(); + return 0; + } + + public static void main(String... args) { + int exitCode = new CommandLine(new VariantMerger()).execute(args); + System.exit(exitCode); + } + + public boolean comparePositions(int lastPos, int pos, int diff) { + for (int i = 0; i <= Math.abs(diff); i++) { + if ((lastPos + i) == pos) { + return true; + } + } + return false; + } +} diff --git a/files/mutserve.zip b/files/mutserve.zip index 62c597c..c209cd8 100644 Binary files a/files/mutserve.zip and b/files/mutserve.zip differ diff --git a/modules/local/annotate.nf b/modules/local/annotate.nf index 796e375..342086a 100644 --- a/modules/local/annotate.nf +++ b/modules/local/annotate.nf @@ -8,9 +8,13 @@ process ANNOTATE { path annotation output: - path ("${variants_ch.baseName}_ann.txt"), emit: variants_ann_ch + path ("${variants_ch.baseName}.annotated.txt"), emit: variants_ann_ch """ - java -jar /opt/mutserve/mutserve.jar annotate --input ${variants_ch} --output ${variants_ch.baseName}_ann.txt --annotation ${annotation} + java -jar /opt/mutserve/mutserve.jar \ + annotate \ + --input ${variants_ch} \ + --output ${variants_ch.baseName}.annotated.txt \ + --annotation ${annotation} """ } diff --git a/modules/local/contamination_detection.nf b/modules/local/contamination_detection.nf deleted file mode 100644 index bd012f4..0000000 --- a/modules/local/contamination_detection.nf +++ /dev/null @@ -1,12 +0,0 @@ -process CONTAMINATION_DETECTION { - - input: - path variants_ch - - output: - path ("haplocheck.html"), emit: contamination_report_ch - path ("haplocheck.txt"), emit: contamination_txt_ch - """ - java -jar /opt/haplocheck/haplocheck.jar --out haplocheck.txt --raw ${variants_ch} - """ -} \ No newline at end of file diff --git a/modules/local/filter_variants.nf b/modules/local/filter_variants.nf new file mode 100644 index 0000000..4c6ebf6 --- /dev/null +++ b/modules/local/filter_variants.nf @@ -0,0 +1,38 @@ +process FILTER_VARIANTS { + + //publishDir "${params.output}", mode: 'copy' + + input: + tuple path(vcf_file), val(method) + + output: + path("${vcf_file.baseName}.${method}.filtered.txt"), emit: combined_methods_ch + + script: + def vcf_name = "${vcf_file}".replaceAll('.vcf.gz', '') + + """ + echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" \ + > ${vcf_file.baseName}.${method}.txt + + if [[ ${method} == "mutserve_fusion" ]] + then + bcftools query \ + -f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP\t%GT]\n' \ + ${vcf_file} >> ${vcf_file.baseName}.${method}.txt + + awk -F'\t' 'NR == 1 || (length(\$4) == 1 && length(\$5) == 1)' \ + ${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt + + elif [[ ${method} == "mutect2_fusion" ]] + then + bcftools query \ + -f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tINDEL\n' \ + ${vcf_file} >> ${vcf_file.baseName}.${method}.txt + + awk -F'\t' 'NR == 1 || ((length(\$4) > 1 || length(\$5) > 1) && length(\$4) != length(\$5))' \ + ${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt + fi + """ +} + diff --git a/modules/local/haplogroup_detection.nf b/modules/local/haplogroup_detection.nf deleted file mode 100644 index 2bf2e2b..0000000 --- a/modules/local/haplogroup_detection.nf +++ /dev/null @@ -1,14 +0,0 @@ -process HAPLOGROUP_DETECTION { - - publishDir "${params.output_auxiliary}", mode: 'copy' - - input: - path variants_ch - - output: - path ("haplogroups.txt"), emit: haplogroups_ch - - """ - java -jar /opt/haplogrep/haplogrep3.jar classify --tree phylotree-fu-rcrs@1.2 --in ${variants_ch} --out haplogroups.txt --extend-report - """ -} \ No newline at end of file diff --git a/modules/local/haplogroups_contamination.nf b/modules/local/haplogroups_contamination.nf new file mode 100644 index 0000000..1b687d0 --- /dev/null +++ b/modules/local/haplogroups_contamination.nf @@ -0,0 +1,26 @@ +process HAPLOGROUPS_CONTAMINATION { + + publishDir "${params.output_auxiliary}", mode: 'copy' + + input: + path(merged_vcf_ch) + + output: + path ("haplogroups.txt"), emit: haplogroups_ch + path ("haplocheck.html"), emit: contamination_report_ch + path ("haplocheck.txt"), emit: contamination_txt_ch + + """ + java -jar /opt/haplogrep/haplogrep3.jar \ + classify \ + --tree phylotree-fu-rcrs@1.2 \ + --in ${merged_vcf_ch} \ + --out haplogroups.txt \ + --extend-report + + java -jar /opt/haplocheck/haplocheck.jar \ + --out haplocheck.txt \ + --raw \ + ${merged_vcf_ch} + """ +} \ No newline at end of file diff --git a/modules/local/index.nf b/modules/local/index.nf index 6d4fabb..148c651 100644 --- a/modules/local/index.nf +++ b/modules/local/index.nf @@ -7,6 +7,7 @@ process INDEX { """ samtools faidx $reference - samtools dict $reference -o ${reference.baseName}.dict + samtools dict $reference \ + -o ${reference.baseName}.dict """ } \ No newline at end of file diff --git a/modules/local/input_validation.nf b/modules/local/input_validation.nf index ab50c45..9ab4139 100644 --- a/modules/local/input_validation.nf +++ b/modules/local/input_validation.nf @@ -13,17 +13,23 @@ process INPUT_VALIDATION { path("contig.txt"), emit: contig_ch """ - csvtk concat -t ${statistics} -T -o sample_statistics.txt - csvtk concat -t ${mapping} -T -o sample_mappings.txt + csvtk concat \ + -t ${statistics} \ + -T -o sample_statistics.txt + + csvtk concat \ + -t ${mapping} \ + -T -o sample_mappings.txt + java -jar /opt/mutserve/mutserve.jar stats \ - --input sample_statistics.txt \ - --detection-limit ${params.detection_limit} \ - --reference ${params.reference} \ - --baseQ ${params.baseQ}\ - --mapQ ${params.mapQ} \ - --alignQ ${params.alignQ} \ - --output-excluded-samples excluded_samples.txt \ - --output-contig contig.txt \ - --tool ${params.mode} + --input sample_statistics.txt \ + --detection-limit ${params.detection_limit} \ + --reference ${params.reference} \ + --baseQ ${params.baseQ}\ + --mapQ ${params.mapQ} \ + --alignQ ${params.alignQ} \ + --output-excluded-samples excluded_samples.txt \ + --output-contig contig.txt \ + --tool ${params.mode} """ } \ No newline at end of file diff --git a/modules/local/merge/strategy_mutect2.nf b/modules/local/merge/strategy_mutect2.nf new file mode 100644 index 0000000..918c229 --- /dev/null +++ b/modules/local/merge/strategy_mutect2.nf @@ -0,0 +1,51 @@ +process STRATEGY_MUTECT2 { + + //publishDir "${params.output}", mode: 'copy' + + input: + path bam_file + path reference + path excluded_samples + path fasta_index_files + val detected_contig + + output: + path("${bam_file.baseName}.mutect2.filtered.txt"), emit: results + + """ + samtools index ${bam_file} + + gatk Mutect2 \ + -R ${reference} \ + -L ${detected_contig} \ + --min-base-quality-score ${params.baseQ} \ + -callable-depth 6 --max-reads-per-alignment-start 0 \ + -I ${bam_file} \ + -O raw.vcf.gz + + gatk FilterMutectCalls \ + -R ${reference} \ + --min-reads-per-strand 2 \ + -V raw.vcf.gz \ + -O ${bam_file.baseName}.vcf.gz + + bcftools norm -m-any -f ${reference} ${bam_file.baseName}.vcf.gz -o ${bam_file.baseName}.filtered.vcf.gz -Oz + +tabix ${bam_file.baseName}.filtered.vcf.gz + gatk LeftAlignAndTrimVariants \ + -R ${reference} \ + -V ${bam_file.baseName}.vcf.gz \ + -O ${bam_file.baseName}.filtered2.vcf.gz \ + --max-indel-length 208 + + rm raw.vcf.gz + + echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" > ${bam_file.baseName}.mutect2.txt + + bcftools query -f '${bam_file}\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\t0\n' ${bam_file.baseName}.filtered2.vcf.gz >> ${bam_file.baseName}.mutect2.txt + + awk -F'\t' 'NR == 1 || length(\$4) > 1 || length(\$5) > 1' ${bam_file.baseName}.mutect2.txt > ${bam_file.baseName}.mutect2.filtered.txt + + + """ +} \ No newline at end of file diff --git a/modules/local/merging_variants.nf b/modules/local/merging_variants.nf new file mode 100644 index 0000000..d13e8c9 --- /dev/null +++ b/modules/local/merging_variants.nf @@ -0,0 +1,29 @@ +process MERGING_VARIANTS { + + input: + path variants_txt + val mode + + output: + path("variants.txt"), emit: txt_summarized_ch + + """ + csvtk concat \ + -t ${variants_txt} \ + -T -o variants.concat.txt + + csvtk sort \ + -t variants.concat.txt \ + -k ID:N -k Pos:n -k Type:r \ + -T -o variants.sorted.txt + + if [[ ${mode} == "fusion" ]] + then + java -jar /opt/VariantMerger.jar \ + variants.sorted.txt \ + --output variants.txt + else + mv variants.sorted.txt variants.txt + fi + """ +} \ No newline at end of file diff --git a/modules/local/mutect2.nf b/modules/local/mutect2.nf index 0e0b690..c51f4f7 100644 --- a/modules/local/mutect2.nf +++ b/modules/local/mutect2.nf @@ -1,7 +1,5 @@ process MUTECT2 { - publishDir "${params.output}", mode: 'copy' - input: path bam_file path reference @@ -10,33 +8,45 @@ process MUTECT2 { val detected_contig output: - path("${bam_file.baseName}.vcf.gz"), emit: vcf_ch - path("${bam_file.baseName}.txt"), emit: txt_ch + path("${bam_file.baseName}.txt"), emit: mutect2_txt_ch + tuple path("${bam_file.baseName}.vcf.gz"), val('mutect2_fusion'), emit: mutect2_fusion_vcf_ch + path("${bam_file.baseName}.vcf.gz"), emit: mutect2_vcf_ch + path("${bam_file.baseName}.vcf.gz.tbi"), emit: mutect2_vcf_idx_ch """ samtools index ${bam_file} gatk Mutect2 \ - --mitochondria-mode \ - -R ${reference} \ - -L ${detected_contig} \ - --min-base-quality-score ${params.baseQ} \ - -I ${bam_file} \ - -O raw.vcf.gz + -R ${reference} \ + -L ${detected_contig} \ + --min-base-quality-score ${params.baseQ} \ + -callable-depth 6 --max-reads-per-alignment-start 0 \ + -I ${bam_file} \ + -O raw.vcf.gz gatk FilterMutectCalls \ - --mitochondria-mode \ - --max-alt-allele-count 10 \ - -R ${reference} \ - --min-allele-fraction ${params.detection_limit} \ - -V raw.vcf.gz \ - -O ${bam_file.baseName}.vcf.gz + -R ${reference} \ + --min-reads-per-strand 2 \ + -V raw.vcf.gz \ + -O ${bam_file.baseName}.vcf.gz + + bcftools norm \ + -m-any \ + -f ${reference} \ + -o ${bam_file.baseName}.norm.vcf.gz -Oz \ + ${bam_file.baseName}.vcf.gz + + mv ${bam_file.baseName}.norm.vcf.gz ${bam_file.baseName}.vcf.gz + tabix -f ${bam_file.baseName}.vcf.gz + #required for mutect2-only mode! + echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" \ + > ${bam_file.baseName}.txt + + bcftools query \ + -f '${bam_file}\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tMUTECT2\n' \ + ${bam_file.baseName}.vcf.gz >> ${bam_file.baseName}.txt + rm raw.vcf.gz - - echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" > ${bam_file.baseName}.txt - - bcftools query -f '${bam_file}\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\t0\n' ${bam_file.baseName}.vcf.gz >> ${bam_file.baseName}.txt - """ } \ No newline at end of file diff --git a/modules/local/mutect2_summarize.nf b/modules/local/mutect2_summarize.nf deleted file mode 100644 index dcd4316..0000000 --- a/modules/local/mutect2_summarize.nf +++ /dev/null @@ -1,12 +0,0 @@ -process MUTECT2_SUMMARIZE { - - input: - path variants_txt - - output: - path("variants.txt"), emit: txt_summarized_ch - - """ - csvtk concat -t ${variants_txt} -T -o variants.txt - """ -} \ No newline at end of file diff --git a/modules/local/mutserve.nf b/modules/local/mutserve.nf index fa8a048..b9d6724 100644 --- a/modules/local/mutserve.nf +++ b/modules/local/mutserve.nf @@ -1,34 +1,42 @@ process MUTSERVE { - publishDir "${params.output}", mode: 'copy',pattern: '*.vcf.gz' - input: - path bam_files_ch + path bam_file path reference path excluded_samples output: - path("variants.vcf.gz"), emit: vcf_ch - path("variants.txt"), emit: txt_ch - + path("${bam_file.simpleName}.txt"), emit: mutserve_txt_ch + tuple path("${bam_file.baseName}.vcf.gz"), val('mutserve_fusion'), emit: mutserve_fusion_vcf_ch + path("${bam_file.baseName}.vcf.gz"), emit: mutserve_vcf_ch + path("${bam_file.baseName}.vcf.gz.tbi"), emit: mutserve_vcf_idx_ch + script: def avail_mem = 1024 - if (!task.memory) { - log.info '[MUTSERVE] Available memory not known - defaulting to 1GB. Specify process memory requirements to change this.' - } else { + if (task.memory) { avail_mem = (task.memory.mega*0.8).intValue() } """ - java -Xmx${avail_mem}M -jar /opt/mutserve/mutserve.jar call \ - --level ${params.detection_limit} \ - --reference ${reference} \ - --mapQ ${params.mapQ} \ - --baseQ ${params.baseQ} \ - --deletions --output variants.vcf.gz \ - --no-ansi ${bam_files_ch} \ - --excluded-samples ${excluded_samples} \ - --threads ${task.cpus} \ - --write-raw + java -Xmx${avail_mem}M -jar /opt/mutserve/mutserve.jar \ + call \ + --level ${params.detection_limit} \ + --reference ${reference} \ + --mapQ ${params.mapQ} \ + --baseQ ${params.baseQ} \ + --output ${bam_file.baseName}.vcf.gz \ + --no-ansi \ + --excluded-samples ${excluded_samples} \ + --write-raw \ + ${bam_file} + + bcftools norm \ + -m-any \ + -f ${reference} \ + -o ${bam_file.baseName}.norm.vcf.gz -Oz \ + ${bam_file.baseName}.vcf.gz + + mv ${bam_file.baseName}.norm.vcf.gz ${bam_file.baseName}.vcf.gz + tabix -f ${bam_file.baseName}.vcf.gz """ } \ No newline at end of file diff --git a/modules/local/quality_control.nf b/modules/local/quality_control.nf index 9a813b0..a084c5d 100644 --- a/modules/local/quality_control.nf +++ b/modules/local/quality_control.nf @@ -1,12 +1,15 @@ process QUALITY_CONTROL { + publishDir "${params.output_reports}", mode: "copy", pattern: '*.html' + input: path excluded_samples path zip + output: path "*.html" """ - multiqc . + multiqc . """ } \ No newline at end of file diff --git a/modules/local/vcf_merge.nf b/modules/local/vcf_merge.nf new file mode 100644 index 0000000..577e963 --- /dev/null +++ b/modules/local/vcf_merge.nf @@ -0,0 +1,19 @@ +process VCF_MERGE { + + input: + path(variants_ch) + path(variants_idx_ch) + val count + + output: + path ("merged.vcf.gz"), emit: vcf_merged_ch + + """ + if [[ ${count} > 1 ]] + then + bcftools merge -Oz -o merged.vcf.gz ${variants_ch} + else + mv ${variants_ch} merged.vcf.gz + fi + """ +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index cac9dcf..b5ff855 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,7 +16,7 @@ params { project_date = "`date`" files = null reference = "rcrs" - mode = "mutserve" + mode = "fusion" detection_limit = 0.01 mapQ = 20 baseQ = 20 diff --git a/reports/report.Rmd b/reports/report.Rmd index b55b532..349262d 100644 --- a/reports/report.Rmd +++ b/reports/report.Rmd @@ -230,12 +230,21 @@ Row ```{r echo=FALSE, results='asis'} +variants <- variants %>% + mutate(Type = case_when( + Type == "0/1" ~ "2", + Type == "1/0" ~ "2", + Type == "INDEL" ~ "3", + TRUE ~ as.character(Type) + )) + summary_data <- aggregate( Sample_Label ~ Pos + Type , data = variants, FUN = length) -summary_data <- summary_data %>% filter(Sample_Label >= 2) +summary_data <- summary_data %>% filter(Sample_Label >= 1) summary_data <- summary_data %>% mutate( label = case_when( Type == 1 ~ "Variant", Type == 2 ~ "Heteroplasmy", + Type == 3 ~ "INDEL", TRUE ~ "Unknown" ) ) diff --git a/tests/data/bam/HG00096.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123_copy.bam b/tests/data/bam/HG00096.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123_copy.bam deleted file mode 100644 index 5584359..0000000 Binary files a/tests/data/bam/HG00096.mapped.ILLUMINA.bwa.GBR.low_coverage.20101123_copy.bam and /dev/null differ diff --git a/tests/mitocalling.nf.test b/tests/mitocalling.nf.test index 4f9409e..0e62264 100644 --- a/tests/mitocalling.nf.test +++ b/tests/mitocalling.nf.test @@ -18,7 +18,7 @@ nextflow_pipeline { then { assert workflow.success - assert snapshot(path("${launchDir}/out/variants_ann.txt")).match() + assert snapshot(path("${launchDir}/out/variants.annotated.txt")).match() } } @@ -37,7 +37,7 @@ nextflow_pipeline { } then { - assert snapshot(path("${launchDir}/out/variants_ann.txt")).match() + assert snapshot(path("${launchDir}/out/variants.annotated.txt")).match() } } @@ -57,8 +57,7 @@ nextflow_pipeline { then { assert workflow.success - //TODO: sort variants file by sample after merging to get a deterministic order - //assert snapshot(path("${launchDir}/out/variants_ann.txt")).match() + assert snapshot(path("${launchDir}/out/variants.annotated.txt")).match() } } diff --git a/tests/mitocalling.nf.test.snap b/tests/mitocalling.nf.test.snap index 5efd4e5..d2c86c3 100644 --- a/tests/mitocalling.nf.test.snap +++ b/tests/mitocalling.nf.test.snap @@ -1,14 +1,20 @@ { + "Runs with single BAM file and mutect2": { + "content": [ + "variants.annotated.txt:md5,94fa4b4d0613f61410618a7eba4c5c97" + ], + "timestamp": "2024-02-26T23:09:58.251860914" + }, "Runs with single BAM file and mutserve": { "content": [ - "variants_ann.txt:md5,1d8423fc3e89a12e226e5e4b89f60150" + "variants.annotated.txt:md5,21b561f9b05098cdc137f43b4988570e" ], - "timestamp": "2024-01-10T14:27:36.082310248" + "timestamp": "2024-02-26T23:02:15.242706827" }, "Runs with BAM file including different header contigs": { "content": [ - "variants_ann.txt:md5,c2aa6c45193a921f4a20238fe9bf6cd8" + "variants.annotated.txt:md5,d68e6fed173c766228164438a24ecf5a" ], - "timestamp": "2024-01-10T14:28:13.798945731" + "timestamp": "2024-02-26T23:03:07.470112898" } } \ No newline at end of file diff --git a/tests/data/test_single_bam.config b/tests/test_mitohpc_fusion.config similarity index 75% rename from tests/data/test_single_bam.config rename to tests/test_mitohpc_fusion.config index f2f23c1..433f8a8 100644 --- a/tests/data/test_single_bam.config +++ b/tests/test_mitohpc_fusion.config @@ -10,7 +10,9 @@ params { project = "test-job" - files = "$projectDir/tests/data/bam/*.bam" - output = "out" + files = "$projectDir/tests/data/bam/mitohpc/*.bam" + output = "output/mitohpc_fusion" + detection_limit = 0.03 + mode = "fusion" } diff --git a/tests/data/mutect2_test_single_bam.config b/tests/test_mitohpc_mutect2.config similarity index 75% rename from tests/data/mutect2_test_single_bam.config rename to tests/test_mitohpc_mutect2.config index 42b6371..8948f23 100644 --- a/tests/data/mutect2_test_single_bam.config +++ b/tests/test_mitohpc_mutect2.config @@ -9,9 +9,10 @@ */ params { - project = "test-job-mutect2" - files = "$projectDir/tests/data/bam/*.bam" - output = "out" + project = "test-job" + files = "$projectDir/tests/data/bam/mitohpc/*.bam" + output = "output/mitohpc_mutect2" + detection_limit = 0.03 mode = "mutect2" } diff --git a/tests/test_mitohpc_mutserve.config b/tests/test_mitohpc_mutserve.config new file mode 100644 index 0000000..6c93272 --- /dev/null +++ b/tests/test_mitohpc_mutserve.config @@ -0,0 +1,18 @@ +/* +======================================================================================== + Nextflow config file for running minimal tests +======================================================================================== + Defines input files and everything required to run a fast and simple pipeline test. + Use as follows: + nextflow run main.nf -profile test,development +---------------------------------------------------------------------------------------- +*/ + +params { + project = "test-job" + files = "$projectDir/tests/data/bam/mitohpc/*.bam" + output = "output/mitohpc_mutserve" + detection_limit = 0.03 + mode = "mutserve" +} + diff --git a/workflows/mtdna_server_2.nf b/workflows/mtdna_server_2.nf index 16cc366..a05525c 100644 --- a/workflows/mtdna_server_2.nf +++ b/workflows/mtdna_server_2.nf @@ -21,14 +21,14 @@ include { INPUT_VALIDATION } from '../modules/local/input_validation' include { QUALITY_CONTROL } from '../modules/local/quality_control' include { MUTSERVE } from '../modules/local/mutserve' include { MUTECT2 } from '../modules/local/mutect2' -include { MUTECT2_SUMMARIZE } from '../modules/local/mutect2_summarize' +include { FILTER_VARIANTS } from '../modules/local/filter_variants' +include { MERGING_VARIANTS } from '../modules/local/merging_variants' +include { VCF_MERGE } from '../modules/local/vcf_merge' include { ANNOTATE } from '../modules/local/annotate' -include { HAPLOGROUP_DETECTION } from '../modules/local/haplogroup_detection' -include { CONTAMINATION_DETECTION } from '../modules/local/contamination_detection' +include { HAPLOGROUPS_CONTAMINATION } from '../modules/local/haplogroups_contamination' include { REPORT } from '../modules/local/report' - workflow MTDNA_SERVER_2 { report_file_ch = file("$projectDir/reports/report.Rmd") @@ -65,45 +65,98 @@ workflow MTDNA_SERVER_2 { haplogrep_ch = file("$projectDir/files/haplogroups.txt") contamination_ch = file("$projectDir/files/haplocheck.txt") - if (params.mode == 'mutect2') { + + if (params.mode == 'mutserve') { + + MUTSERVE( + bams_ch, + ref_file_mutserve, + INPUT_VALIDATION.out.excluded_ch + ) + + MERGING_VARIANTS( + MUTSERVE.out.mutserve_txt_ch.collect(), + params.mode + ) + + variants_txt_ch = MERGING_VARIANTS.out.txt_summarized_ch + variants_vcf_ch = MUTSERVE.out.mutserve_vcf_ch.collect() + variants_vcf_idx_ch = MUTSERVE.out.mutserve_vcf_idx_ch.collect() + file_count = MUTSERVE.out.mutserve_vcf_ch.count() + + } + + else if (params.mode == 'mutect2') { MUTECT2( - bams_ch, - ref_file_mutect2, - INPUT_VALIDATION.out.excluded_ch, - INDEX.out.fasta_index_ch, - detected_contig + bams_ch, + ref_file_mutect2, + INPUT_VALIDATION.out.excluded_ch, + INDEX.out.fasta_index_ch, + detected_contig ) - MUTECT2_SUMMARIZE( - MUTECT2.out.txt_ch.collect() + MERGING_VARIANTS( + MUTECT2.out.mutect2_txt_ch.collect(), + params.mode ) - variants_txt_ch = MUTECT2_SUMMARIZE.out.txt_summarized_ch + variants_txt_ch = MERGING_VARIANTS.out.txt_summarized_ch + variants_vcf_ch = MUTECT2.out.mutect2_vcf_ch.collect() + variants_vcf_idx_ch = MUTECT2.out.mutect2_vcf_idx_ch.collect() + file_count = MUTECT2.out.mutect2_vcf_ch.count() } - else { + else if (params.mode == 'fusion') { MUTSERVE( - bams_ch.collect(), + bams_ch, ref_file_mutserve, INPUT_VALIDATION.out.excluded_ch ) - variants_txt_ch = MUTSERVE.out.txt_ch + MUTECT2( + bams_ch, + ref_file_mutect2, + INPUT_VALIDATION.out.excluded_ch, + INDEX.out.fasta_index_ch, + detected_contig + ) + + merged_ch = MUTSERVE.out.mutserve_fusion_vcf_ch.concat(MUTECT2.out.mutect2_fusion_vcf_ch) - HAPLOGROUP_DETECTION( - MUTSERVE.out.vcf_ch - ) + FILTER_VARIANTS ( + merged_ch + ) - haplogrep_ch = HAPLOGROUP_DETECTION.out.haplogroups_ch - - CONTAMINATION_DETECTION( - MUTSERVE.out.vcf_ch + MERGING_VARIANTS( + FILTER_VARIANTS.out.combined_methods_ch.collect(), + params.mode ) - contamination_ch = CONTAMINATION_DETECTION.out.contamination_txt_ch - } + variants_txt_ch = MERGING_VARIANTS.out.txt_summarized_ch + // only use mutserve calls for haplogroup and contamination detection + variants_vcf_ch = MUTSERVE.out.mutserve_vcf_ch.collect() + variants_vcf_idx_ch = MUTSERVE.out.mutserve_vcf_idx_ch.collect() + file_count = MUTSERVE.out.mutserve_vcf_ch.count() + } + + VCF_MERGE ( + variants_vcf_ch, + variants_vcf_idx_ch, + file_count + ) + + if (params.mode != 'mutect2') { + HAPLOGROUPS_CONTAMINATION ( + VCF_MERGE.out.vcf_merged_ch + ) + haplogrep_ch = HAPLOGROUPS_CONTAMINATION.out.haplogroups_ch + contamination_ch = HAPLOGROUPS_CONTAMINATION.out.contamination_txt_ch + + } + + ANNOTATE( variants_txt_ch, @@ -124,10 +177,7 @@ workflow MTDNA_SERVER_2 { } workflow.onComplete { - //TODO: use templates - //TODO: move in EmailHelper class - //see https://www.nextflow.io/docs/latest/mail.html for configuration etc... - + def report = new CloudgeneReport() //job failed