Skip to content

Commit

Permalink
Add subsampling module (#17)
Browse files Browse the repository at this point in the history
* Add subsampling module

* Add comments

* Update base config

* Improve naming
  • Loading branch information
seppinho authored Mar 6, 2024
1 parent cff6e28 commit 0f05320
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 7 deletions.
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ RUN apt-get update && \
zlib1g-dev \
libgomp1 \
procps \
libx11-6
libx11-6 \
bc

RUN apt-get clean && rm -rf /var/lib/apt/lists/*

# Install mutserve (not as conda package available)
Expand Down
15 changes: 12 additions & 3 deletions cloudgene.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,16 @@ workflow:
label: "Estimating required VAF Coverage"
view: progressbar
- process: "MTDNA_SERVER_2:HAPLOGROUPS_CONTAMINATION"
label: "Haplogroup % Contamination Detection"
label: "Haplogroups & Contamination Detection"
view: progressbar
- process: "MTDNA_SERVER_2:VCF_MERGE"
label: "VCF Merge"
view: progressbar
- process: "MTDNA_SERVER_2:SAMPLE_REPORT"
label: "Sample Report Generation"
view: progressbar
- process: "MTDNA_SERVER_2:REPORT"
label: "Report Generation"
label: "Dashboard Generation"
view: progressbar
inputs:

Expand Down Expand Up @@ -98,13 +101,19 @@ workflow:
0.1: 0.1

- id: coverage_estimation
description: Apply Coverage Estimation for Diagnostic NGS
description: Apply Coverage Estimate
type: list
value: off
values:
on: On
off: Off

- id: subsampling
description: Coverage Subsampling (0 means deactivated)
type: number
visible: true
value: 0

- id: myseparator0
type: separator

Expand Down
6 changes: 3 additions & 3 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ process {

withName: 'CALCULATE_STATISTICS' {
cpus = { 1 * task.attempt }
memory = { 1.GB * task.attempt }
memory = { 2.GB * task.attempt }
}

withName: 'INPUT_VALIDATION' {
Expand All @@ -27,7 +27,7 @@ process {

withName: 'MUTECT2' {
cpus = { 1 * task.attempt }
memory = { 2.GB * task.attempt }
memory = { 4.GB * task.attempt }
}

withName: 'FILTER_VARIANTS' {
Expand All @@ -42,7 +42,7 @@ process {

withName: 'VCF_MERGE' {
cpus = { 1 * task.attempt }
memory = { 1.GB * task.attempt }
memory = { 2.GB * task.attempt }
}

withName: 'HAPLOGROUPS_CONTAMINATION' {
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ channels:
- conda-forge
- bioconda
dependencies:
- conda-forge::ncurses
- unzip=6.0
- openjdk=17
- r-base=4.3.2
Expand Down
25 changes: 25 additions & 0 deletions modules/local/subsampling.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
process SUBSAMPLING {

input:
path bam_file
val coverage

output:
path "${bam_file}", includeInputs: true, emit: subsampled_bam_ch

"""
# calculate mean coverage
samtools coverage ${bam_file} > samtools_coverage_${bam_file.baseName}.txt
csvtk grep -t -f3 -p 16569 -C '\$' samtools_coverage_${bam_file.baseName}.txt
mean_cov=\$(csvtk grep -t -f3 -p 16569 -C '\$' samtools_coverage_${bam_file.baseName}.txt | csvtk cut -t -f 7)
# convert to integer
mean_cov_int=\$(printf "%.0f" "\$mean_cov")
# set seed to 1 (for reproducbility) and FRAC to coverage/mean_cov (e.g. 1.2)
fraction=\$(echo "scale=4; 1+(${coverage} / \${mean_cov})" | bc)
if [ \${mean_cov_int} -gt ${coverage} ]
then
samtools view -s \$fraction -b -o ${bam_file.baseName}.subsampled.bam ${bam_file}
mv ${bam_file.baseName}.subsampled.bam ${bam_file}
fi
"""
}
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ params {
alignQ = 30
coverage_estimation = "on"
max_samples = 0
subsampling = 0


service = [
Expand Down
9 changes: 9 additions & 0 deletions workflows/mtdna_server_2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ include { ANNOTATE } from '../modules/local/annotate'
include { HAPLOGROUPS_CONTAMINATION } from '../modules/local/haplogroups_contamination'
include { COVERAGE_ESTIMATION } from '../modules/local/coverage_estimation'
include { REPORT } from '../modules/local/report'
include { SUBSAMPLING } from '../modules/local/subsampling'
include { SAMPLE_REPORT } from '../modules/local/sample_report'


Expand Down Expand Up @@ -83,6 +84,14 @@ workflow MTDNA_SERVER_2 {

validated_files = INPUT_VALIDATION.out.validated_files.flatten()

if(params.subsampling != 0) {
SUBSAMPLING (
validated_files,
params.subsampling
)
validated_files = SUBSAMPLING.out.subsampled_bam_ch
}

if (params.mode == 'mutserve') {

MUTSERVE(
Expand Down

0 comments on commit 0f05320

Please sign in to comment.