Skip to content

Commit

Permalink
better merging of INS
Browse files Browse the repository at this point in the history
  • Loading branch information
ASLeonard committed Dec 6, 2023
1 parent 3440671 commit 0ff5a22
Showing 1 changed file with 30 additions and 6 deletions.
36 changes: 30 additions & 6 deletions snakepit/short_reads.smk
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from pathlib import PurePath

rule all:
input:
'SR_SV/SVs.vcf.gz',
Expand All @@ -13,7 +15,8 @@ rule picard_add_MQ:
'picard/3.1.1'
threads: 1
resources:
mem_mb = 7500
mem_mb = 15000,
walltime = '24h'
shell:
'''
picard FixMateInformation -I {input} -O {output[0]}
Expand All @@ -28,13 +31,30 @@ rule insurveyor:
vcf = 'SR_SV/{sample}_insurveyor/out.pass.vcf.gz'
threads: 4
resources:
mem_mb = 5000
mem_mb = 7000
shell:
'''
mkdir -p {output._dir}
python /cluster/work/pausch/alex/software/INSurVeyor/insurveyor.py --threads {threads} --samplename {wildcards.sample} {input[0]} {output._dir} {config[reference]}
'''

rule SurVClusterer:
input:
expand(rules.insurveyor.output['vcf'],sample=config['HiFi'])
output:
multiext('SR_SV/cohort.insurveyor','.sv','.vcf.gz','.vcf.gz.tbi')
params:
prefix = lambda wildcards, output: PurePath(output[0]).with_suffix('')
threads: 6
resources:
mem_mb = 4000
shell:
'''
awk '$1=="-" {{print $2, "SR_SV/"$2"_insurveyor/out.pass.vcf.gz"}}' config/SR_SV.yaml > $TMPDIR/samples.fofn
/cluster/work/pausch/alex/software/SurVClusterer/clusterer $TMPDIR/samples.fofn {config[reference]} -t {threads} --min-overlap-precise 0.95 --max-dist-precise 25 --overlap-for-ins -o {params.prefix}
tabix -fp vcf {output[1]}
'''

rule delly_call_denovo:
input:
'/cluster/work/pausch/inputs/bam/BTA_eQTL/{sample}.bam'
Expand All @@ -50,7 +70,7 @@ rule delly_call_denovo:
shell:
'''
/cluster/work/pausch/alex/software/delly/src/delly call -g {config[reference]} -o {output} {input}
bcftools index {output}
#bcftools index {output}
'''

rule delly_merge:
Expand Down Expand Up @@ -95,6 +115,8 @@ rule bcftools_merge:
lambda wildcards: expand(rules.delly_call_forced.output,sample=config['HiFi']) if wildcards.caller == 'delly.forced' else (expand(rules.survtyper.output['vcf'],sample=config['HiFi']) if wildcards.caller == 'insurveyor.forced' else expand(rules.insurveyor.output['vcf'],sample=config['HiFi']))
output:
'SR_SV/cohort.{caller}.vcf.gz'
resources:
mem_mb = 10000
shell:
'''
bcftools merge -m id -o {output} {input}
Expand All @@ -120,16 +142,18 @@ rule delly_filter:
tabix -fp vcf {output}
'''

#awk '$1=="-" {print $2, "SR_SV/"$2"_insurveyor/out.pass.vcf.gz"}' ../config/SR_SV.yaml > ../insurveyor.fofn
rule survtyper:
input:
vcf = expand(rules.bcftools_merge.output,caller='insurveyor'),
vcf = rules.SurVClusterer.output[1],
bam = '/cluster/work/pausch/inputs/bam/BTA_eQTL/{sample}.bam'
output:
_dir = directory('SR_SV/{sample}_insurveyor_forced'),
vcf = 'SR_SV/{sample}_insurveyor_forced/genotyped.vcf.gz'
threads: 4
threads: 6
resources:
mem_mb = 5000
mem_mb = 8000,
walltime = '4h'
shell:
'''
python /cluster/work/pausch/alex/software/SurVTyper/survtyper.py --threads {threads} --samplename {wildcards.sample} {input.vcf} {input.bam} {output._dir} {config[reference]}
Expand Down

0 comments on commit 0ff5a22

Please sign in to comment.