-
Notifications
You must be signed in to change notification settings - Fork 0
/
gaussFit.py
110 lines (87 loc) · 2.8 KB
/
gaussFit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import matplotlib.pyplot as plt
import numpy as np
import sys, pymzml
from scipy.stats.stats import pearsonr
x = np.arange(1000)/100
y = np.ones(1000)
shape = [
[-5.99675, 0.1756],
[-3.99906, 0.0593],
[-2.99841, 0.4044],
[-2.00013, 0.3994],
[-0.99877, 0.5407],
[0, 1],
[2.00108, 0.5902],
]
def readSpectra(mzml_file, msLevel = None):
msrun = pymzml.run.Reader(str(mzml_file), obo_version = '3.71.0')
for n, spectrum in enumerate(msrun):
if msLevel:
if spectrum['ms level'] != msLevel: continue
lvl = spectrum['ms level']
try:
time = spectrum['scan time']
except:
try:
time = spectrum['scan start time']
except Exception, e:
#print 'Warning, skipping spectrum %s' %n
#print 'Stack trace:'
#print str(e)
continue
try:
mzs = np.array(spectrum.mz, dtype = "float32")
ints = np.array(spectrum.i, dtype = 'float32')
assert mzs.shape == ints.shape
yield time, mzs, ints, lvl
except Exception, e:
#print 'Warning, skipping spectrum %s' %n
#print 'Stack trace:'
#print str(e)
continue
def getShapeDimensions(shape):
offsets = [float(s[0]) for s in shape]
return min(offsets), max(offsets)
def makeModel(m, msubset, maxAmp, shape, x, isubset):
wid = 0.08
ymodel = 0
for s in shape:
cent = s[0] + m
amp = s[1] * maxAmp
ymodel += amp / (np.sqrt(2*np.pi*(wid)**2 )) * np.exp(-(msubset-cent)**2 / (2*(wid)**2))
#index = np.argmin(np.abs(mzsubset - cent))
#targetmz = msubset[index]
#targetint = isubset[index]
return ymodel
def fit(x, mzs, ints):
i, m = ints[x], mzs[x]
# git points in shape boundaries +/- tolerance
mask = np.where(
(mzs > mzs[x] + SHAPE_TARGET_LOWER_OFFSET - tolerance)
&
(mzs < mzs[x] + SHAPE_TARGET_UPPER_OFFSET + tolerance)
)
if mask[0].shape[0] < 100: return
msubset = mzs[mask]
isubset = ints[mask]
ifit = makeModel(m, msubset, i, shape, x, isubset)
pearsonC, p_val = pearsonr(ifit,isubset)
# print (fit)
# plt.plot(msubset, ifit)
# plt.show()
return pearsonC
MINIMUM_POINT_INTENSITY = 1000
tolerance = 0.5
SHAPE_TARGET_LOWER_OFFSET, SHAPE_TARGET_UPPER_OFFSET = getShapeDimensions(shape)
spectra = readSpectra(sys.argv[1])
of1 = open('%s_out' %(sys.argv[1]), 'wt')
for spectrum in spectra:
time, mzs, ints, lvl = spectrum
for x in range(len(mzs)):
i = ints[x]
if i < MINIMUM_POINT_INTENSITY: continue
m = mzs[x]
pearsonC = fit(x, mzs, ints)
if pearsonC:
of1.write('%s, %s, %s, %s\n'%(time, m,i,pearsonC))
of1.close()