-
Notifications
You must be signed in to change notification settings - Fork 5
/
Preprocessing_Snakefile
50 lines (38 loc) · 1.71 KB
/
Preprocessing_Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os
import re
import pandas as pd
from snakemake.utils import validate, min_version
min_version("5.3.1")
ADAPTERS = config["adapter"]
INDEXPATH = config["genomeindexpath"]
UORFANNOTATIONPATH = config["uorfannotationpath"]
CODONS = config["alternativestartcodons"]
samples = pd.read_table(config["samples"], dtype=str).set_index(["method", "condition", "replicate"], drop=False)
samples.index = samples.index.set_levels([i.astype(str) for i in samples.index.levels])
def replicate_check(samples):
if(samples['replicate'].nunique() < 2):
print("Warning: Please make sure your experiment contains replicates!")
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
print(samples)
validate(samples, schema="schemas/samples.schema.yaml")
onstart:
if not os.path.exists("logs"):
os.makedirs("logs")
replicate_check(samples)
report: "report/extended_workflow.rst"
rule all:
input:
expand("fastqc/raw/{sample.method}-{sample.condition}-{sample.replicate}-raw.html", sample=samples.itertuples()),
expand("fastqc/trimmed/{sample.method}-{sample.condition}-{sample.replicate}-trimmed.html", sample=samples.itertuples()),
expand("fastqc/norRNA/{sample.method}-{sample.condition}-{sample.replicate}-norRNA.html", sample=samples.itertuples()),
expand("maplink/{sample.method}-{sample.condition}-{sample.replicate}.bam", sample=samples.itertuples()),
onsuccess:
print("Done, no error")
# Copying of user provided genome annotion
include: "rules/preprocessing.smk"
# Adaper removal and quality control
include: "rules/trimming.smk"
# Removal of reads mapping to ribosomal rna genes
include: "rules/rrnafiltering.smk"
# Mapping
include: "rules/mapping.smk"