-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile
174 lines (155 loc) · 9.29 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
include: "snakefiles/snakefile-reads-assembly-spades"
include: "snakefiles/snakefile-reads-assembly-megahit"
include: "snakefiles/snakefile-reads-assembly-spades-qc"
include: "snakefiles/snakefile-reads-assembly-megahit-qc"
include: "snakefiles/snakefile-reads-assembly-qc"
include: "snakefiles/snakefile-reads-preprocess-qc"
include: "snakefiles/snakefile-pb-assembly"
include: "snakefiles/snakefile-reads-rnaseq-preprocess-qc"
include: "snakefiles/snakefile-reads-rnaseq-assembly"
rule annotation_hifiasm_comb:
input:
expand("assembly/hifiasm-comb_{spref}/scaffolds.fasta",
spref=['LmagMAC','LmagMIC']),
# expand("annotation/hifiasm-comb_{spref}/mapping/minimap2.{spmap}_pb-ccs_vs_hifiasm-comb_{spref}.sort.bam",
# spref=['LmagMAC','LmagMIC'], spmap=['LmagMAC','LmagMIC']),
# expand("annotation/hifiasm-comb_{spref}/mapping/hisat2.{lib}_q28_nochlamy.hifiasm-comb_{spref}.sort.bam.bai",
# spref=['LmagMAC','LmagMIC'],lib=config["libraries_rnaseq"]),
expand("annotation/hifiasm-comb_{spref}/trf/hifiasm-comb_{spref}.trf.no_overlap.min{minlen}.gff3",
spref=['LmagMAC','LmagMIC'], minlen=[100,1000]),
rule annotation_falcon_comb:
# Falcon assembly step not under snakemake control because manual interventions required
input:
# Falcon reference scaffold is step 2 p_ctg polished by Racon
expand("annotation/falcon-comb_{spref}/mapping/minimap2.{spmap}_pb-ccs_vs_falcon-comb_{spref}.sort.bam",
spref=['LmagMAC','LmagMIC'], spmap=['LmagMAC','LmagMIC']),
expand("annotation/falcon-comb_{spref}/mapping/hisat2.{lib}_q28_nochlamy.falcon-comb_{spref}.sort.bam.bai",
spref=['LmagMAC','LmagMIC'],lib=config["libraries_rnaseq"]),
expand("annotation/falcon-comb_{spref}/falcon-comb_{spref}.gcstats", spref=['LmagMAC','LmagMIC']),
expand("annotation/falcon-comb_{spref}/falcon-comb_{spref}.minimap2.bedcov", spref=['LmagMAC','LmagMIC']),
expand("annotation/falcon-comb_{spref}/trf/falcon-comb_{spref}.trf.no_overlap.min{minlen}.gff3",
spref=['LmagMAC','LmagMIC'], minlen=[100,1000]),
# expand("annotation/falcon-comb_{spref}/mapping/bbmap.{sp}_R12_ktrim_qtrim28_vs_falcon-comb_{spref}.covstats",
# spref=['LmagMAC','LmagMIC'], sp=['LmagMAC','LmagMIC']),
expand("annotation/falcon-comb_{spref}/mapping/bowtie2.{sp}_R12_ktrim_qtrim28_vs_falcon-comb_{spref}.sort.bam",
spref=['LmagMAC'], sp=['LmagMAC','LmagMIC']),
rule annotation_flye_comb:
input:
expand("annotation/flye-comb_{sp}/flye-comb_{sp}.{output_type}",
sp=['LmagMAC','LmagMIC'],
output_type=['gbtquick.blobplot.png','gbtquick.covstats.tsv']),
# expand("annotation/flye-comb_{spref}/mapping/minimap2.{spmap}_pb-ccs_vs_flye-comb_{spref}.sort.bam",
# spref=['LmagMAC','LmagMIC'], spmap=['LmagMAC','LmagMIC']),
# expand("annotation/flye-comb_{spref}/mapping/hisat2.{lib}_q28_nochlamy.flye-comb_{spref}.sort.bam.bai",
# spref=['LmagMAC','LmagMIC'],lib=config["libraries_rnaseq"]),
expand("annotation/flye-comb_{spref}/flye-comb_{spref}.gcstats", spref=['LmagMAC','LmagMIC']),
expand("annotation/flye-comb_{spref}/flye-comb_{spref}.minimap2.bedcov", spref=['LmagMAC','LmagMIC']),
expand("annotation/flye-comb_{spref}/trf/flye-comb_{spref}.trf.no_overlap.min{minlen}.gff3",
spref=['LmagMAC','LmagMIC'], minlen=[100,1000]),
# expand("annotation/flye-comb_{spref}/mapping/bbmap.{sp}_R12_ktrim_qtrim28_vs_flye-comb_{spref}.covstats",
# spref=['LmagMAC','LmagMIC'], sp=['LmagMAC','LmagMIC']),
# expand("annotation/flye-comb_{spref}/mapping/bowtie2.{sp}_R12_ktrim_qtrim28_vs_flye-comb_{spref}.sort.bam",
# spref=['LmagMAC'], sp=['LmagMAC','LmagMIC']),
rule annotation_megahit_comb:
input:
expand("annotation/megahit-comb_{sp}_q{qtrimvals}/megahit-comb_{sp}_q{qtrimvals}.{output_type}",
sp=config['species'],
qtrimvals=[28],
output_type=['bin_cds50_gc40.fasta','gbtquick.blobplot.png','gbtquick.covstats.tsv']),
expand("annotation/megahit-comb_{sp}_q{qtrimvals}/megahit-comb_{sp}_q{qtrimvals}.barrnap.{kingdom}.gff",
sp=config['species'],
qtrimvals=[28],
kingdom=config['barrnap_kingdoms']),
rule annotation_spades_comb:
# Annotate each combined metagenomic assembly
input:
expand("annotation/spades-comb_{sp}_q{qtrimvals}/spades-comb_{sp}_q{qtrimvals}.{output_type}",
sp=config['species'],
qtrimvals=[28],
output_type=['bin_cds50_gc40.fasta','gbtquick.blobplot.png','gbtquick.covstats.tsv']),
expand("annotation/spades-comb_{sp}_q{qtrimvals}/spades-comb_{sp}_q{qtrimvals}.barrnap.{kingdom}.gff",
sp=config['species'],
qtrimvals=[28],
kingdom=config['barrnap_kingdoms']),
rule annotation_spades_sc:
# Annotate each single-cell MDA assembly
input:
expand("annotation/spades-sc_{lib}_q{qtrimvals}/spades-sc_{lib}_q{qtrimvals}.{output_type}",
lib=config['libraries_sc'],
qtrimvals=config['qtrimvals'],
output_type=['gbtquick.blobplot.png','gbtquick.covstats.tsv','bin_cds50_gc40.fasta']),
rule latest_assemblies:
# symlink latest versions of all the target assemblies
input:
# single-cell assemblies
expand("assembly/latest/spades-sc_{lib_sc}_q{qtrimvals}.{output_type}",
lib_sc=config["libraries_sc"],
qtrimvals=config["qtrimvals"],
output_type=['scaffolds.fasta','assembly_graph.fastg']),
# Combined assemblies of all bulk metagenomic libraries per species
expand("assembly/latest/{assembler}-comb_{sp}_q{qtrimvals}.{output_type}",
assembler=['spades','megahit'],
sp=config['species'],
qtrimvals=[28],
output_type=['scaffolds.fasta','assembly_graph.fastg']),
# Combined assemblies of PacBio libraries per species
expand("assembly/latest/flye-comb_{sp}.{output_type}",
sp=['LmagMAC','LmagMIC'],
output_type=['assembly.fasta','assembly_graph.gfa','assembly_graph.gv','assembly_info.txt']),
# Combined assemblies of RNAseq libraries, combined by experiment
expand("assembly/latest/trinity_rnaseq_{experiment}_nochlamy_comb.{output_type}",
experiment=["exp146"],
output_type=['Trinity.fasta','Trinity.fasta.gene_trans_map']),
# expand("assembly/latest/trinity_rnaseq_{experiment}_nochlamy_comb.mapped_{ref_params}.{output_type}",
# experiment=["exp146"],
# ref_params=['spades-comb_LmagMAC_q28'],
# output_type=['Trinity.fasta','Trinity.fasta.gene_trans_map']),
expand("assembly/latest/trinity_rnaseq_{experiment}_nochlamy_comb.gg_{ref_params}.{output_type}",
experiment=["exp146"],
ref_params=['flye-comb_LmagMAC'],
output_type=['Trinity.fasta','Trinity.fasta.gene_trans_map']),
rule assembly_rnaseq:
input:
# Combined assemblies of RNAseq libraries, combined by experiment
expand("assembly/trinity_rnaseq_{experiment}_nochlamy_comb/trinity_outdir/Trinity.fasta",
experiment=["exp146"]),
# expand("assembly/trinity_rnaseq_{experiment}_nochlamy_comb.mapped_{ref_params}/trinity_outdir/Trinity.fasta",
# experiment=["exp146"],
# ref_params=['spades-comb_LmagMAC_q28']),
# Genome-guided assembly of RNAseq libraries, mapped to reference genome
expand("assembly/trinity_rnaseq_{experiment}_nochlamy_comb.gg_{ref_params}/trinity_outdir/Trinity.fasta",
experiment=["exp146"],
ref_params=['flye-comb_LmagMAC']),
rule assembly_illumina_sc: #todel
input:
# Individual assemblies of each single-cell MDA library
expand("assembly/spades-sc_{lib_sc}_q{qtrimvals}/scaffolds.fasta",
lib_sc=config["libraries_sc"],
qtrimvals=config["qtrimvals"]),
rule assembly_illumina_comb: # todel
input:
# Combined assemblies of all bulk metagenomic libraries per species
expand("assembly/{assembler}-comb_{sp}_q{qtrimvals}/scaffolds.fasta",
assembler=['spades','megahit'],
sp=config['species'],
qtrimvals=[28]),
expand("assembly/{assembler}-comb_{sp}_q{qtrimvals}/assembly_graph.fastg",
assembler=['spades','megahit'],
sp=config['species'],
qtrimvals=[28])
rule assembly_flye_comb: # todel
input:
expand("assembly/flye-comb_{sp}/assembly.fasta",
sp=['LmagMAC','LmagMIC'])
rule prepare_falcon_assembly_files:
input:
expand("assembly/falcon-comb_{sp}/falcon-comb_{sp}.fofn", sp=['LmagMAC','LmagMIC'])
rule qc:
input:
expand("qc/phyloFlash/{lib}.phyloFlash.tar.gz",lib=config["libraries"])
rule rnaseq_qc:
input:
expand("qc/phyloFlash_rnaseq/{lib}.phyloFlash.tar.gz",lib=config["libraries_rnaseq"])
rule rnaseq_filter:
input:
expand("data/reads-rnaseq-filter/{lib}_R12_ktrim_qtrim{qtrimvals}_bbmap_nochlamy.R12.fq.gz",lib=config["libraries_rnaseq"],qtrimvals=[28])