-
Notifications
You must be signed in to change notification settings - Fork 0
/
alternative_splicing.py
executable file
·86 lines (65 loc) · 2.59 KB
/
alternative_splicing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import subprocess
import os
import argparse
from mylog import get_logger
logger = get_logger(__file__, __name__)
# For starter only work with single reads
def get_gdir(gfas):
"""
Return the path to the gdir folder for star
"""
gdir = os.path.join(os.path.split(gfas)[0],
'star_index_' + os.path.basename(gfas))
return gdir
def star_index(gfas, nthreads):
"""
Produce the star index if not present already in a folder in the same
folder as gfas.
"""
gdir = get_gdir(gfas)
os.makedirs(gdir)
# This is a syntax to concatenate strings (can add '+' for verbose)
cmd = ('STAR '
'--runMode genomeGenerate '
'--genomeDir {0} '
'--genomeFastaFiles {1} '
'--runThreadN {2}').format(gdir, gfas, nthreads)
logger.info('Executing cmd:' + cmd)
p = subprocess.check_output(cmd, shell=True)
print(p)
def star_align_1pass(gfas, gtf, read_list1, nthreads):
"""
Produce the read alignment with STAR
"""
gdir = get_gdir(gfas)
if not os.path.exists(gdir):
star_index(gfas, nthreads)
else:
logger.info("Using index already present in {}".format(gdir))
for rd in read_list1:
outdir = 'star_out'
if os.path.exists(outdir):
raise IOError("The directory {} already exists. To not overwrite it, Star alignment is canceled.".format(outdir))
os.makedirs(outdir)
prefix = os.path.join(outdir, os.path.splitext(os.path.basename(rd))[0])
logger.info("Starting mapping read:{}".format(rd))
cmd = ('STAR '
'--genomeDir {0} '
'--sjdbGTFfile {1} '
'--readFilesIn {2} '
'--outFileNamePrefix {3} '
'--outSAMtype BAM SortedByCoordinate '
'--runThreadN {4}').format(gdir, gtf, rd, prefix, nthreads)
logger.info('Executing cmd:' + cmd)
p = subprocess.check_output(cmd, shell=True)
logger.info(p)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("genome_fas")
parser.add_argument("gtf")
parser.add_argument("read_file", nargs="+")
parser.add_argument("--threads", '-t', default=1)
args = parser.parse_args()
star_align_1pass(args.genome_fas, args.gtf, args.read_file, args.threads)
# Usage example:
# python alternative_splicing.py ~/data/genomes/c_elegans/Caenorhabditis_elegans.WBcel235.dna_sm.toplevel.fa ~/data/genomes/c_elegans/Caenorhabditis_elegans.WBcel235.86.gtf ~/data/demo_data/c_elegans/illumina/SRR019721.fastq -t 20