Skip to content

Commit

Permalink
Add basic QC calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
seppinho committed Dec 12, 2023
1 parent d07f0f7 commit 977d484
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 1 deletion.
2 changes: 2 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ dependencies:
- r-rmdformats=1.0.3
- r-knitr=1.37
- pandoc=2.14.2
- samtools=1.18
- csvtk=0.26.0
34 changes: 34 additions & 0 deletions modules/local/calculate_statistics.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
process CALCULATE_STATISTICS {

input:
path bam_file

output:
path("*summary.txt"), emit: stats_ch


script:
def output_name = "${bam_file.baseName}.summary.txt"

"""
## calculate summary statistics
samtools coverage ${bam_file} > samtools_coverage_${bam_file.baseName}.txt
csvtk grep -t -f3 -p 16569 samtools_coverage_${bam_file.baseName}.txt -T -o mtdna.txt
contig=\$(csvtk cut -t -f 1 mtdna.txt)
numreads=\$(csvtk cut -t -f 4 mtdna.txt)
covered_bases=\$(csvtk cut -t -f 5 mtdna.txt)
covered_bases_percentage=\$(csvtk cut -t -f 6 mtdna.txt)
mean_depth=\$(csvtk cut -t -f 7 mtdna.txt)
mean_base_quality=\$(csvtk cut -t -f 8 mtdna.txt)
mean_map_quality=\$(csvtk cut -t -f 9 mtdna.txt)
echo Sample\tParameter\tValue > $output_name
echo ${bam_file.baseName}\tContig\t\${contig} >> $output_name
echo ${bam_file.baseName}\tNumberofReads\t\${numreads} >> $output_name
echo ${bam_file.baseName}\tCoveredBases\t\${covered_bases} >> $output_name
echo ${bam_file.baseName}\tCoveragePercentage\t\${covered_bases_percentage} >> $output_name
echo ${bam_file.baseName}\tMeanDepth\t\${mean_depth} >> $output_name
echo ${bam_file.baseName}\tMeanBaseQuality\t\${mean_base_quality} >> $output_name
echo ${bam_file.baseName}\tMeanMapQuality\t\${mean_map_quality} >> $output_name
"""
}
15 changes: 15 additions & 0 deletions modules/local/summarize_statistics.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
process SUMMARIZE_STATISTICS {

publishDir "${params.output}/statistics", mode: 'copy'

input:
path statistics

output:
path("sample_statistics.txt"), emit: stats_summarized_ch


"""
csvtk concat ${statistics} -T -o sample_statistics.txt
"""
}
Binary file not shown.
11 changes: 10 additions & 1 deletion workflows/mitocalling.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@


include { CALCULATE_STATISTICS } from '../modules/local/calculate_statistics'
include { SUMMARIZE_STATISTICS } from '../modules/local/summarize_statistics'
include { MUTSERVE } from '../modules/local/mutserve'
include { ANNOTATE } from '../modules/local/annotate'
include { HAPLOGROUP_DETECTION } from '../modules/local/haplogroup_detection'
Expand Down Expand Up @@ -29,6 +30,14 @@ workflow MITOCALLING {
exit 1, "Reference " + params.reference + "not supported"
}

CALCULATE_STATISTICS(
bams_ch
)

SUMMARIZE_STATISTICS(
CALCULATE_STATISTICS.out.stats_ch.collect()
)

MUTSERVE(
bams_ch.collect(),
ref_file
Expand Down

0 comments on commit 977d484

Please sign in to comment.