Skip to content

Commit

Permalink
Fix testcases and format modules
Browse files Browse the repository at this point in the history
  • Loading branch information
seppinho committed Feb 26, 2024
1 parent ad817d1 commit cb57a35
Show file tree
Hide file tree
Showing 11 changed files with 98 additions and 49 deletions.
8 changes: 6 additions & 2 deletions modules/local/annotate.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@ process ANNOTATE {
path annotation

output:
path ("${variants_ch.baseName}_ann.txt"), emit: variants_ann_ch
path ("${variants_ch.baseName}.annotated.txt"), emit: variants_ann_ch

"""
java -jar /opt/mutserve/mutserve.jar annotate --input ${variants_ch} --output ${variants_ch.baseName}_ann.txt --annotation ${annotation}
java -jar /opt/mutserve/mutserve.jar \
annotate \
--input ${variants_ch} \
--output ${variants_ch.baseName}.annotated.txt \
--annotation ${annotation}
"""
}
20 changes: 15 additions & 5 deletions modules/local/filter_variants.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,26 @@ process FILTER_VARIANTS {
def vcf_name = "${vcf_file}".replaceAll('.vcf.gz', '')

"""
echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" > ${vcf_file.baseName}.${method}.txt
echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" \
> ${vcf_file.baseName}.${method}.txt
if [[ ${method} == "mutserve_fusion" ]]
then
bcftools query -f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP\t%GT]\n' ${vcf_file} >> ${vcf_file.baseName}.${method}.txt
awk -F'\t' 'NR == 1 || (length(\$4) == 1 && length(\$5) == 1)' ${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt
bcftools query \
-f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP\t%GT]\n' \
${vcf_file} >> ${vcf_file.baseName}.${method}.txt
awk -F'\t' 'NR == 1 || (length(\$4) == 1 && length(\$5) == 1)' \
${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt
elif [[ ${method} == "mutect2_fusion" ]]
then
bcftools query -f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tINDEL\n' ${vcf_file} >> ${vcf_file.baseName}.${method}.txt
awk -F'\t' 'NR == 1 || ((length(\$4) > 1 || length(\$5) > 1) && length(\$4) != length(\$5))' ${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt
bcftools query \
-f '${vcf_name}.bam\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tINDEL\n' \
${vcf_file} >> ${vcf_file.baseName}.${method}.txt
awk -F'\t' 'NR == 1 || ((length(\$4) > 1 || length(\$5) > 1) && length(\$4) != length(\$5))' \
${vcf_file.baseName}.${method}.txt > ${vcf_file.baseName}.${method}.filtered.txt
fi
"""
}
Expand Down
3 changes: 2 additions & 1 deletion modules/local/index.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ process INDEX {

"""
samtools faidx $reference
samtools dict $reference -o ${reference.baseName}.dict
samtools dict $reference \
-o ${reference.baseName}.dict
"""
}
28 changes: 17 additions & 11 deletions modules/local/input_validation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,23 @@ process INPUT_VALIDATION {
path("contig.txt"), emit: contig_ch

"""
csvtk concat -t ${statistics} -T -o sample_statistics.txt
csvtk concat -t ${mapping} -T -o sample_mappings.txt
csvtk concat \
-t ${statistics} \
-T -o sample_statistics.txt
csvtk concat \
-t ${mapping} \
-T -o sample_mappings.txt
java -jar /opt/mutserve/mutserve.jar stats \
--input sample_statistics.txt \
--detection-limit ${params.detection_limit} \
--reference ${params.reference} \
--baseQ ${params.baseQ}\
--mapQ ${params.mapQ} \
--alignQ ${params.alignQ} \
--output-excluded-samples excluded_samples.txt \
--output-contig contig.txt \
--tool ${params.mode}
--input sample_statistics.txt \
--detection-limit ${params.detection_limit} \
--reference ${params.reference} \
--baseQ ${params.baseQ}\
--mapQ ${params.mapQ} \
--alignQ ${params.alignQ} \
--output-excluded-samples excluded_samples.txt \
--output-contig contig.txt \
--tool ${params.mode}
"""
}
15 changes: 10 additions & 5 deletions modules/local/merging_variants.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,25 @@ process MERGING_VARIANTS {
val mode

output:
path("variants.merged.txt"), emit: txt_summarized_ch
path("variants.txt"), emit: txt_summarized_ch

"""
csvtk concat -t ${variants_txt} -T -o variants.concat.txt
csvtk concat \
-t ${variants_txt} \
-T -o variants.concat.txt
csvtk sort -t variants.concat.txt -k ID:N -k Pos:n -k Type:r -T -o variants.sorted.txt
csvtk sort \
-t variants.concat.txt \
-k ID:N -k Pos:n -k Type:r \
-T -o variants.sorted.txt
if [[ ${mode} == "fusion" ]]
then
java -jar /opt/VariantMerger.jar \
variants.sorted.txt \
--output variants.merged.txt
--output variants.txt
else
mv variants.sorted.txt variants.merged.txt
mv variants.sorted.txt variants.txt
fi
"""
}
38 changes: 24 additions & 14 deletions modules/local/mutect2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,35 @@ process MUTECT2 {
samtools index ${bam_file}
gatk Mutect2 \
-R ${reference} \
-L ${detected_contig} \
--min-base-quality-score ${params.baseQ} \
-callable-depth 6 --max-reads-per-alignment-start 0 \
-I ${bam_file} \
-O raw.vcf.gz
-R ${reference} \
-L ${detected_contig} \
--min-base-quality-score ${params.baseQ} \
-callable-depth 6 --max-reads-per-alignment-start 0 \
-I ${bam_file} \
-O raw.vcf.gz
gatk FilterMutectCalls \
-R ${reference} \
--min-reads-per-strand 2 \
-V raw.vcf.gz \
-O ${bam_file.baseName}.vcf.gz
-R ${reference} \
--min-reads-per-strand 2 \
-V raw.vcf.gz \
-O ${bam_file.baseName}.vcf.gz
rm raw.vcf.gz
bcftools norm -m-any -f ${reference} ${bam_file.baseName}.vcf.gz -o ${bam_file.baseName}.norm.vcf.gz -Oz
bcftools norm \
-m-any \
-f ${reference} \
-o ${bam_file.baseName}.norm.vcf.gz -Oz \
${bam_file.baseName}.vcf.gz
mv ${bam_file.baseName}.norm.vcf.gz ${bam_file.baseName}.vcf.gz
#required for mutect2-only mode!
echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" > ${bam_file.baseName}.txt
bcftools query -f '${bam_file}\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tMUTECT2\n' ${bam_file.baseName}.vcf.gz >> ${bam_file.baseName}.txt
echo -e "ID\tFilter\tPos\tRef\tVariant\tVariantLevel\tCoverage\tType" \
> ${bam_file.baseName}.txt
bcftools query \
-f '${bam_file}\t%FILTER\t%POS\t%REF\t%ALT\t[%AF\t%DP]\tMUTECT2\n' \
${bam_file.baseName}.vcf.gz >> ${bam_file.baseName}.txt
rm raw.vcf.gz
"""
}
9 changes: 7 additions & 2 deletions modules/local/mutserve.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ process MUTSERVE {
path excluded_samples

output:
path("${bam_file.baseName}.txt"), emit: mutserve_txt_ch
path("${bam_file.simpleName}.txt"), emit: mutserve_txt_ch
tuple path("${bam_file.baseName}.vcf.gz"), val('mutserve_fusion'), emit: mutserve_fusion_vcf_ch
path("${bam_file.baseName}.vcf.gz"), emit: mutserve_vcf_ch
path("${bam_file.baseName}.vcf.gz.tbi"), emit: mutserve_vcf_idx_ch
Expand All @@ -30,7 +30,12 @@ process MUTSERVE {
--write-raw \
${bam_file}
bcftools norm -m-any -f ${reference} ${bam_file.baseName}.vcf.gz -o ${bam_file.baseName}.norm.vcf.gz -Oz
bcftools norm \
-m-any \
-f ${reference} \
-o ${bam_file.baseName}.norm.vcf.gz -Oz \
${bam_file.baseName}.vcf.gz
mv ${bam_file.baseName}.norm.vcf.gz ${bam_file.baseName}.vcf.gz
tabix ${bam_file.baseName}.vcf.gz
"""
Expand Down
5 changes: 4 additions & 1 deletion modules/local/quality_control.nf
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
process QUALITY_CONTROL {

publishDir "${params.output_reports}", mode: "copy", pattern: '*.html'

input:
path excluded_samples
path zip

output:
path "*.html"

"""
multiqc .
multiqc .
"""
}
Binary file not shown.
7 changes: 3 additions & 4 deletions tests/mitocalling.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ nextflow_pipeline {

then {
assert workflow.success
assert snapshot(path("${launchDir}/out/variants_ann.txt")).match()
assert snapshot(path("${launchDir}/out/variants.annotated.txt")).match()
}

}
Expand All @@ -37,7 +37,7 @@ nextflow_pipeline {
}

then {
assert snapshot(path("${launchDir}/out/variants_ann.txt")).match()
assert snapshot(path("${launchDir}/out/variants.annotated.txt")).match()
}

}
Expand All @@ -57,8 +57,7 @@ nextflow_pipeline {

then {
assert workflow.success
//TODO: sort variants file by sample after merging to get a deterministic order
//assert snapshot(path("${launchDir}/out/variants_ann.txt")).match()
assert snapshot(path("${launchDir}/out/variants.annotated.txt")).match()
}

}
Expand Down
14 changes: 10 additions & 4 deletions tests/mitocalling.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
{
"Runs with single BAM file and mutect2": {
"content": [
"variants.annotated.txt:md5,94fa4b4d0613f61410618a7eba4c5c97"
],
"timestamp": "2024-02-26T23:09:58.251860914"
},
"Runs with single BAM file and mutserve": {
"content": [
"variants_ann.txt:md5,1d8423fc3e89a12e226e5e4b89f60150"
"variants.annotated.txt:md5,21b561f9b05098cdc137f43b4988570e"
],
"timestamp": "2024-01-10T14:27:36.082310248"
"timestamp": "2024-02-26T23:02:15.242706827"
},
"Runs with BAM file including different header contigs": {
"content": [
"variants_ann.txt:md5,c2aa6c45193a921f4a20238fe9bf6cd8"
"variants.annotated.txt:md5,d68e6fed173c766228164438a24ecf5a"
],
"timestamp": "2024-01-10T14:28:13.798945731"
"timestamp": "2024-02-26T23:03:07.470112898"
}
}

0 comments on commit cb57a35

Please sign in to comment.