-
Notifications
You must be signed in to change notification settings - Fork 0
/
stat_newform.py
36 lines (34 loc) · 1.15 KB
/
stat_newform.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import numpy as np
n = 0
inverted = []
spaceing_error = []
length_list = []
f_newform = open('./data_test7/gu_yu18_mmi_q60F2048.newform', 'r')
for line in f_newform:
n += 1
line = line.strip('\n').split('\t')
if line[2] == line[4]:
strand1 = (int(line[1]) >> (5 - 1)) & 1
strand2 = (int(line[3]) >> (5 - 1)) & 1
if strand1 != strand2:
inverted.append(line[0])
else:
if strand1 == 0:
length = int(line[8]) - int(line[5])
if length < 26600 or length > 49400:
spaceing_error.append((line[0], length))
else:
length_list.append(length)
else:
length = int(line[6]) - int(line[7])
if length < 26600 or length > 49400:
spaceing_error.append((line[0], length))
else:
length_list.append(length)
f_newform.close()
print(n)
print(inverted)
print(spaceing_error)
print(round(len(length_list)/n, 4)*100)
print(round(np.array(length_list).mean(), 2))
print(round(np.array(length_list).std(), 2))