forked from maxsonBraunLab/atac_seq
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Snakefile
62 lines (50 loc) · 2.33 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#ATAC_seq2 pipeline rebooted
#Garth Kong and Rowan Callahan
#Maxson Lab 09/09/2020
import os
import pandas as pd
import plotly as plt
import plotly.graph_objects as go
configfile: "config.yaml"
SAMPLE, dir = glob_wildcards("samples/raw/{sample}_{dir,R1|R2}.fastq.gz")
SAMPLE = sorted(set(SAMPLE))
DIR = sorted(set(dir)) # R1, R2
# sample will glob cond+replicate information like 'MOLM24D_1' but not R1 or R2.
for i in SAMPLE:
print("--- samples to process: {}".format(i))
essential_report = []
if config["gen_report"] == True: essential_report.append("data/essential_report.html"); print("Generating essential report")
# snakemake -j 64 --use-conda --rerun-incomplete --latency-wait 60 --keep-going --profile ./slurm --cluster-config cluster.yaml
DB, CELL_LINE, FILE, = glob_wildcards("/home/groups/MaxsonLab/kongg/chip_seq/data/beds/{db}/{cell_line}/{file}.bed.gz")
localrules: fragment_length_plot, frip_plot, counts_table, align_stats, essential_report
rule all:
input:
# trim, align reads, get fragment length distribution
"samples/sample_stats/align_stats.txt",
expand("samples/align/sorted/{sample}_sorted.bam", sample = SAMPLE),
"data/fragment_length_dist.html",
# fastqc and fastq_screen
expand("samples/fastqc/{sample}_{dir}_paired_fastqc.{ext}",
sample = SAMPLE, dir = DIR, ext = ["html", "zip"]),
expand("samples/fastq_screen/{sample}/{sample}_{dir}_paired_screen.{ext}",
sample = SAMPLE, dir = DIR, ext = ["png", "txt", "html"]),
# filtered bamfiles, bigwigs, frip, counts table, consensus peaks
"samples/macs/consensus_stats.txt",
"data/frip.html",
"data/counts_table.txt",
expand("samples/bamfiles/filtered/{sample}_rmChrM_dedup_quality_shiftedReads_sorted.bam", sample = SAMPLE),
# expand("data/bigwigs/{sample}_tracks.bw", sample = SAMPLE),
# deseq2 for PCA and differential peak calc
directory("data/de"),
"data/de/norm_counts.txt",
"data/de/log_norm_counts.txt",
"data/de/sample_PCA.png",
"data/de/de_stats.txt",
essential_report,
# chip screen - intersect consensus peaks with public chip data
expand("data/chip_screen/{db}/{cell_line}/consensus_peaks_{file}.{ext}",
zip, db = DB, cell_line = CELL_LINE, file = FILE, ext = ['bed.gz', 'out'] )
include: "rules/quality_and_align.smk"
include: "rules/filter_shift.smk"
include: "rules/peak_catalog_no_downsample.smk"
include: "rules/deseq2_report.smk"