Skip to content

Commit

Permalink
Features/uniform calling (#13)
Browse files Browse the repository at this point in the history
* Use VCF files for all modes

* Remove duplicate output channels

* Remove duplicate code

* Fix snapshots due to new output column
  • Loading branch information
seppinho authored Mar 2, 2024
1 parent 5fb5701 commit 316134d
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 82 deletions.
35 changes: 23 additions & 12 deletions modules/local/filter_variants.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ process FILTER_VARIANTS {
//publishDir "${params.output}", mode: 'copy'

input:
tuple path(vcf_file), val(method)
tuple path(vcf_file), path(vcf_file_idx), val(method)

output:
path("${vcf_file.baseName}.${method}.filtered.txt"), emit: combined_methods_ch
Expand All @@ -12,27 +12,38 @@ process FILTER_VARIANTS {
def vcf_name = "${vcf_file}".replaceAll('.vcf.gz', '')

"""
echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" \
echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tGT" \
> ${vcf_file.baseName}.${method}.txt
bcftools query \
-f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP\t%GT]\n' \
${vcf_file} >> ${vcf_file.baseName}.${method}.txt
if [[ ${method} == "mutserve_fusion" ]]
then
bcftools query \
-f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP\t%GT]\n' \
${vcf_file} >> ${vcf_file.baseName}.${method}.txt
awk -F'\t' 'NR == 1 || (length(\$4) == 1 && length(\$5) == 1)' \
${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt
${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.tmp.txt
elif [[ ${method} == "mutect2_fusion" ]]
then
bcftools query \
-f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tINDEL\n' \
${vcf_file} >> ${vcf_file.baseName}.${method}.txt
awk -F'\t' 'NR == 1 || ((length(\$4) > 1 || length(\$5) > 1) && length(\$4) != length(\$5))' \
${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt
${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.tmp.txt
else
mv ${vcf_file.baseName}.${method}.txt ${vcf_file.baseName}.${method}.filtered.tmp.txt
fi
## annotating SNVS and INDELs for reporting
awk 'BEGIN {OFS="\t"} {
if (NR == 1) { print \$0, "Type"; next }
if ((length(\$4) > 1 || length(\$5) > 1) && length(\$4) != length(\$5)) { \$9="3" }
else if (\$8 == "1") { \$9="1" }
else if (\$8 == "0/1" || \$8 == "1/0" || \$8 == "0|1" || \$8 == "1|0" ) { \$9="2" }
else { \$9="UNKNOWN" }
print
}' ${vcf_file.baseName}.${method}.filtered.tmp.txt > ${vcf_file.baseName}.${method}.filtered.txt
rm ${vcf_file.baseName}.${method}.filtered.tmp.txt
"""
}

2 changes: 1 addition & 1 deletion modules/local/merging_variants.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ process MERGING_VARIANTS {
csvtk sort \
-t variants.concat.txt \
-k ID:N -k Pos:n -k Type:r \
-k ID:N -k Pos:n -k Type:nr \
-T -o variants.sorted.txt
if [[ ${mode} == "fusion" ]]
Expand Down
14 changes: 2 additions & 12 deletions modules/local/mutect2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,10 @@ process MUTECT2 {
path reference
path fasta_index_files
val detected_contig
val method

output:
path("${bam_file.baseName}.txt"), emit: mutect2_txt_ch
tuple path("${bam_file.baseName}.vcf.gz"), val('mutect2_fusion'), emit: mutect2_fusion_vcf_ch
path("${bam_file.baseName}.vcf.gz"), emit: mutect2_vcf_ch
path("${bam_file.baseName}.vcf.gz.tbi"), emit: mutect2_vcf_idx_ch
tuple path("${bam_file.baseName}.vcf.gz"), path("${bam_file.baseName}.vcf.gz.tbi"), val(method), emit: mutect2_ch

"""
samtools index ${bam_file}
Expand Down Expand Up @@ -38,14 +36,6 @@ process MUTECT2 {
mv ${bam_file.baseName}.norm.vcf.gz ${bam_file.baseName}.vcf.gz
tabix -f ${bam_file.baseName}.vcf.gz
#required for mutect2-only mode!
echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" \
> ${bam_file.baseName}.txt
bcftools query \
-f '${bam_file}\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tMUTECT2\n' \
${bam_file.baseName}.vcf.gz >> ${bam_file.baseName}.txt
rm raw.vcf.gz
"""
}
6 changes: 2 additions & 4 deletions modules/local/mutserve.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,10 @@ process MUTSERVE {
input:
path bam_file
path reference
val method

output:
path("${bam_file.simpleName}.txt"), emit: mutserve_txt_ch
tuple path("${bam_file.baseName}.vcf.gz"), val('mutserve_fusion'), emit: mutserve_fusion_vcf_ch
path("${bam_file.baseName}.vcf.gz"), emit: mutserve_vcf_ch
path("${bam_file.baseName}.vcf.gz.tbi"), emit: mutserve_vcf_idx_ch
tuple path("${bam_file.baseName}.vcf.gz"), path("${bam_file.baseName}.vcf.gz.tbi"), val(method), emit: mutserve_ch

script:
def avail_mem = 1024
Expand Down
16 changes: 8 additions & 8 deletions tests/mitocalling.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
{
"Runs with single BAM file and mutect2": {
"content": [
"variants.annotated.txt:md5,94fa4b4d0613f61410618a7eba4c5c97"
"variants.annotated.txt:md5,52dc3737a73bb80cf3a1c8946990c3e4"
],
"timestamp": "2024-02-26T23:09:58.251860914"
"timestamp": "2024-03-02T13:11:34.901277407"
},
"Runs with single BAM file and mutserve": {
"content": [
"variants.annotated.txt:md5,aac0aee87ce1a91f8c35264be248b52e"
"variants.annotated.txt:md5,ee00b39fc49e1f34e6affab00368181f"
],
"timestamp": "2024-02-27T15:19:15.934593072"
"timestamp": "2024-03-02T13:09:45.954385792"
},
"Runs with BAM file including different header contigs": {
"content": [
"variants.annotated.txt:md5,d68e6fed173c766228164438a24ecf5a"
"variants.annotated.txt:md5,a5345c7718757ded27246ae342b5f25b"
],
"timestamp": "2024-02-26T23:03:07.470112898"
"timestamp": "2024-03-02T13:10:28.843431233"
},
"Runs with two files and one goes through QC": {
"content": [
"variants.annotated.txt:md5,3946a3bd26ceafe8f61903499bfcbb35",
"variants.annotated.txt:md5,ef402762b03356a208583e434d96dfa3",
{
"tasksFailed": 0,
"tasksCount": 10,
"tasksSucceeded": 10
}
],
"timestamp": "2024-02-28T12:32:40.624776"
"timestamp": "2024-03-02T13:12:29.64633988"
}
}
83 changes: 38 additions & 45 deletions workflows/mtdna_server_2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -72,18 +72,12 @@ workflow MTDNA_SERVER_2 {

MUTSERVE(
validated_files,
ref_file_mutserve
ref_file_mutserve,
"mutserve_single"
)

MERGING_VARIANTS(
MUTSERVE.out.mutserve_txt_ch.collect(),
params.mode
)

variants_txt_ch = MERGING_VARIANTS.out.txt_summarized_ch
variants_vcf_ch = MUTSERVE.out.mutserve_vcf_ch.collect()
variants_vcf_idx_ch = MUTSERVE.out.mutserve_vcf_idx_ch.collect()
file_count = MUTSERVE.out.mutserve_vcf_ch.count()
vcf_ch = MUTSERVE.out.mutserve_ch
file_count = MUTSERVE.out.mutserve_ch.count()

}

Expand All @@ -93,69 +87,68 @@ workflow MTDNA_SERVER_2 {
validated_files,
ref_file_mutect2,
INDEX.out.fasta_index_ch,
detected_contig
detected_contig,
"mutect2_single"
)

MERGING_VARIANTS(
MUTECT2.out.mutect2_txt_ch.collect(),
params.mode
)

variants_txt_ch = MERGING_VARIANTS.out.txt_summarized_ch
variants_vcf_ch = MUTECT2.out.mutect2_vcf_ch.collect()
variants_vcf_idx_ch = MUTECT2.out.mutect2_vcf_idx_ch.collect()
file_count = MUTECT2.out.mutect2_vcf_ch.count()
vcf_ch = MUTECT2.out.mutect2_ch
file_count = MUTECT2.out.mutect2_ch.count()
}

else if (params.mode == 'fusion') {

MUTSERVE(
validated_files,
ref_file_mutserve
ref_file_mutserve,
"mutserve_fusion"
)

MUTECT2(
validated_files,
ref_file_mutect2,
INDEX.out.fasta_index_ch,
detected_contig
detected_contig,
"mutect2_fusion"
)

merged_ch = MUTSERVE.out.mutserve_fusion_vcf_ch.concat(MUTECT2.out.mutect2_fusion_vcf_ch)
vcf_ch = MUTSERVE.out.mutserve_ch.concat(MUTECT2.out.mutect2_ch)
file_count = MUTSERVE.out.mutserve_ch.count()
}

FILTER_VARIANTS (
merged_ch
)
FILTER_VARIANTS (
vcf_ch
)

MERGING_VARIANTS(
FILTER_VARIANTS.out.combined_methods_ch.collect(),
params.mode
)
MERGING_VARIANTS(
FILTER_VARIANTS.out.combined_methods_ch.collect(),
params.mode
)

variants_txt_ch = MERGING_VARIANTS.out.txt_summarized_ch

variants_txt_ch = MERGING_VARIANTS.out.txt_summarized_ch
// only use mutserve calls for haplogroup and contamination detection
variants_vcf_ch = MUTSERVE.out.mutserve_vcf_ch.collect()
variants_vcf_idx_ch = MUTSERVE.out.mutserve_vcf_idx_ch.collect()
file_count = MUTSERVE.out.mutserve_vcf_ch.count()
}
if (params.mode != 'mutect2') {

variants_vcf_ch = vcf_ch
.filter { it[2].contains("mutserve") }
.collect { it[0]}

VCF_MERGE (
variants_vcf_ch,
variants_vcf_idx_ch,
file_count
variants_vcf_idx_ch = vcf_ch
.filter { it[2].contains("mutserve") }
.collect { it[1]}

VCF_MERGE (
variants_vcf_ch,
variants_vcf_idx_ch,
file_count
)

if (params.mode != 'mutect2') {
HAPLOGROUPS_CONTAMINATION (
VCF_MERGE.out.vcf_merged_ch
)
haplogrep_ch = HAPLOGROUPS_CONTAMINATION.out.haplogroups_ch
contamination_ch = HAPLOGROUPS_CONTAMINATION.out.contamination_txt_ch

}



ANNOTATE(
variants_txt_ch,
ref_file_mutserve,
Expand Down Expand Up @@ -207,4 +200,4 @@ workflow.onComplete {
} else {

}
}
}

0 comments on commit 316134d

Please sign in to comment.