-
Notifications
You must be signed in to change notification settings - Fork 1
/
mutation_type.py
66 lines (50 loc) · 2.12 KB
/
mutation_type.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
import subprocess
import sys
if len(sys.argv) < 2:
print("Please provide a vcf file to proceed; The format should be python mutation_type.py xxx.vcf.gz xxx.vcf.gz "
"......export_file_name")
sys.exit(0)
else:
filename = sys.argv[1:]
#store export file name
export_file = filename[-1]
filename = filename[:-1]
for file in filename:
if '.vcf.gz' not in file or '.vcf' not in file:
print("Please provide vcf files only")
sys.exit(0)
mutation_change_dict = dict()
exportfile = open(export_file + '.txt', 'w')
# iterate through each file
for file in filename:
exportfile.write(file[:file.index('.')])
exportfile.write('\n')
#write header for file
header = subprocess.check_output('gunzip -c ' + file + '| grep \'#CHROM\'', shell=True).splitlines()
header = header[0].decode('utf-8').split('\t')[:5]
exportfile.write(','.join(header))
exportfile.write('\n')
print(file[:file.index('.')])
# extract element with novoPP >= 0.9 only from vcf files
if '.vcf.gz' in file:
#only use subprocess if grep returns a value
if len(os.popen('gunzip -c ' + file + '| grep \'novoPP=0.9\'').read()) > 0:
mutation_list = subprocess.check_output('gunzip -c ' + file + '| grep \'novoPP=0.9\'', shell=True, stderr=None).splitlines()
# iterate through each mutation obtained from grep
for mutation in mutation_list:
# decode the into string and extract the mutation elements
mutation = mutation.decode('utf-8').split('\t')
mutation = mutation[:5]
print(mutation)
exportfile.write(','.join(mutation))
exportfile.write('\n')
mutation_change = ' > '.join(mutation[-2:])
if mutation_change in mutation_change_dict:
mutation_change_dict[mutation_change] += 1
else:
mutation_change_dict[mutation_change] = 1
exportfile.write('\n')
exportfile.write(str(mutation_change_dict))
exportfile.close()
print(mutation_change_dict)