Skip to content

Commit

Permalink
Add subsampling module
Browse files Browse the repository at this point in the history
  • Loading branch information
seppinho committed Mar 6, 2024
1 parent 35d0e4b commit bb711a6
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 1 deletion.
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ RUN apt-get update && \
zlib1g-dev \
libgomp1 \
procps \
libx11-6
libx11-6 \
bc

RUN apt-get clean && rm -rf /var/lib/apt/lists/*

# Install mutserve (not as conda package available)
Expand Down
6 changes: 6 additions & 0 deletions cloudgene.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,12 @@ workflow:
on: On
off: Off

- id: subsampling
description: Subsample to specified coverage (Deactived with value 0)
type: number
visible: true
value: 0

- id: myseparator0
type: separator

Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ channels:
- conda-forge
- bioconda
dependencies:
- conda-forge::ncurses
- unzip=6.0
- openjdk=17
- r-base=4.3.2
Expand Down
28 changes: 28 additions & 0 deletions modules/local/subsampling.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
process SUBSAMPLING {

input:
path bam_file
val coverage

output:
path "${bam_file}", includeInputs: true, emit: subsampled_bam_ch

script:
def avail_mem = 1024
if (task.memory) {
avail_mem = (task.memory.mega*0.8).intValue()
}

"""
samtools coverage ${bam_file} > samtools_coverage_${bam_file.baseName}.txt
csvtk grep -t -f3 -p 16569 -C '\$' samtools_coverage_${bam_file.baseName}.txt
mean_cov=\$(csvtk grep -t -f3 -p 16569 -C '\$' samtools_coverage_${bam_file.baseName}.txt | csvtk cut -t -f 7)
mean_cov_int=\$(printf "%.0f" "\$mean_cov")
fraction=\$(echo "scale=4; 1+(${coverage} / \${mean_cov})" | bc)
if [ \${mean_cov_int} -gt ${coverage} ]
then
samtools view -s \$fraction -b -o ${bam_file.baseName}.subsampled.bam ${bam_file}
mv ${bam_file.baseName}.subsampled.bam ${bam_file}
fi
"""
}
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ params {
alignQ = 30
coverage_estimation = "on"
max_samples = 0
subsampling = 0


service = [
Expand Down
9 changes: 9 additions & 0 deletions workflows/mtdna_server_2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ include { ANNOTATE } from '../modules/local/annotate'
include { HAPLOGROUPS_CONTAMINATION } from '../modules/local/haplogroups_contamination'
include { COVERAGE_ESTIMATION } from '../modules/local/coverage_estimation'
include { REPORT } from '../modules/local/report'
include { SUBSAMPLING } from '../modules/local/subsampling'
include { SAMPLE_REPORT } from '../modules/local/sample_report'


Expand Down Expand Up @@ -83,6 +84,14 @@ workflow MTDNA_SERVER_2 {

validated_files = INPUT_VALIDATION.out.validated_files.flatten()

if(params.subsampling != 0) {
SUBSAMPLING (
validated_files,
params.subsampling
)
validated_files = SUBSAMPLING.out.subsampled_bam_ch
}

if (params.mode == 'mutserve') {

MUTSERVE(
Expand Down

0 comments on commit bb711a6

Please sign in to comment.