-
Notifications
You must be signed in to change notification settings - Fork 0
/
SyntheticRVData.py
306 lines (258 loc) · 11 KB
/
SyntheticRVData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 7 16:11:10 2019
@author: ZhexingLi
"""
# Script that produces synthetic RV data at any epoch for any time baseline, with
# some randomness in time of observation and rv scatter to mimic real data.
# import modules
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.optimize import root
import random
# Input options to toggle
# First element as first planet, second element as second planet etc.
P = [29.8] # orbital period of the planet(s) (unit of days), 29.8
Tp = [2455000] # time of periastron, in unit of days (JD), in the past (Tp < ref1)
e = [0.0] # eccentricity
K = [1] # semi amplitude, in unit of m/s, 0.0024
w = [0] # argument of periastron of the STAR, in unit of radians
V_0 = 0.0 #systematic velocity, in unit of m/s, set here universally as 0 m/s
rms = 1 # rms from original fitting , 0.00279
unc = 1 # rv data uncertainty
time_base = 2453000 # number to subtract for display purpose
# type of time stamp, either 'complete', i.e. provided by user input, or 'reference', i.e.
# only start and end time stamps provided by user
time_stamp = 'complete'
if time_stamp == 'complete':
# path to the dataset containing the needed timestamp
path = '/Users/zhexingli/Desktop/UCR/01-RESEARCH/PROJECTS/AAT/Targets/08-HD134606/HD134606AATbinned.txt'
data = pd.read_csv(path,header=None,skiprows=1,delim_whitespace=True,usecols=[0],names=['time'])
stamps = list(data['time'])
num = len(stamps)
elif time_stamp == 'reference':
# reference time 1, all new data starts after this
# reference time 2, all new data ends before this. If set to 0, then end time determined by outer period
ref1 = 2452984.7100400003
ref2 = 2459483.121358
stamps = [ref1,ref2]
num = 66 # number of data points
else:
print("Please provide a valid time stamp type.")
# path to store created data
savepath = '/Users/zhexingli/Desktop/UCR/01-RESEARCH/PROJECTS/AAT/Targets/08-HD134606/AATActivity/'
name = 'HD134606'
#######################################################################################
# compute the ending time if ref2 given is 0
if stamps[1] == 0:
start = stamps[0]
end = start + np.max(P)
stamps = [start,end]
else:
pass
def Synth_Data (error,scatter,number):
'''
Function that calculates required number of synthetic data points at the required
time specified above. Also randomizes and scatters the calculated RV points.
Input: 1) error: Error estimate for each data point
2) scatter: RMS from the original fitting as gaussian sigma to simulate scatter
3) number: Number of data points for each run
Output: One text file that stores time of data, rv, and err in the same directory
as other data files and setup file.
'''
startT, endT = stamps[0],stamps[-1]
length = int(endT - startT)
# plot out individual and total rv curves
datapoint(stamps,int(length*2))
if number > 2 and time_stamp == 'reference':
# get individual data points
# add extra number of points so some could be randomly excluded for the purpose
# of adding more randomness in the time of each data point
add = int(number*1)
number = number + add
vel, time_random, vel_ind = datapoint(stamps,number,data = 'Yes')
'''
# individual planetary components (noiseless)
pl_b = list(vel_ind[0])
pl_c = list(vel_ind[1])
pl_d = list(vel_ind[2])
'''
vel = list(vel)
time_random = list(time_random)
# randomly exclude number of data points that are added earlier to create more
# randomness in the time
point = np.arange(1,len(vel)-1,1) # number indices to be randomly selected except the first and the last data points
exclude = random.sample(list(point),add) # list containing the indices of the data points to be excluded
for item in exclude:
ind = item
vel[ind] = 'NaN'
time_random[ind] = 'NaN'
#pl_b[ind] = 'NaN' # individual planetary components are noiseless
#pl_c[ind] = 'NaN'
#pl_d[ind] = 'NaN'
vel = [x for x in vel if x != 'NaN']
time_random = [x for x in time_random if x != 'NaN']
#pl_b = [x for x in pl_b if x != 'NaN']
#pl_c = [x for x in pl_c if x != 'NaN']
#pl_d = [x for x in pl_d if x != 'NaN']
vel = np.array(vel)
time_random = np.array(time_random)
else:
vel, time_random, vel_ind = datapoint(stamps,number,data = 'Yes')
# Randonmize calculated total velocity data with the error provided using a Gaussian
# filter.
vel_random = []
for i in range (len(vel)):
new_vel = random.gauss(vel[i],scatter)
vel_random.append(new_vel)
vel_random = np.array(vel_random)
#pl_b = np.array(pl_b) # again, individual component is noiseless
#pl_c = np.array(pl_c)
#pl_d = np.array(pl_d)
plt.errorbar(time_random - time_base,vel_random,yerr=unc,marker = 'o', mfc = 'k',\
mec = 'cornflowerblue',markersize=5,ls='none',label='Total Randomized')
#plt.legend(loc='upper left',prop={'size':5},fontsize='large')
plt.legend(loc='upper left',fontsize='small')
plt.xlabel('Time - {base} (JD)'.format(base=time_base))
plt.ylabel('RV (m/s)')
plt.show()
df = pd.DataFrame()
df['time'] = time_random
df['rv'] = vel_random
df['err'] = error
'''
df1 = pd.DataFrame()
df1['time'] = time_random
df1['rv'] = pl_b
df1['err'] = error
df2 = pd.DataFrame()
df2['time'] = time_random
df2['rv'] = pl_c
df2['err'] = error
df3 = pd.DataFrame()
df3['time'] = time_random
df3['rv'] = pl_d
df3['err'] = error
'''
path0 = savepath + name + '_SynthRV.txt'
df.to_csv(path0,sep='\t',index=False, header=True)
'''
path0 = path + 'rvdata_noiseless_b.txt'
df1.to_csv(path0,sep='\t',index=False, header=True)
path0 = path + 'rvdata_noiseless_c.txt'
df2.to_csv(path0,sep='\t',index=False, header=True)
path0 = path + 'rvdata_noiseless_d.txt'
df3.to_csv(path0,sep='\t',index=False, header=True)
'''
figpath = savepath + name + '_SynthRV.png'
#plt.savefig(figpath,dpi=500)
#######################################################################################
def datapoint(stamps, rv_num, data = None):
'''
Function that produce theoretical RV data points for individual planet and the
entire system at the specified time stamps.
Input: 1) stamps: full time stamps provided; or just the start and end points
2) rv_num: number of rv data points to be produced
Output: Non-randomized non-scattered RV data points at time stamps provided for
the whole system as well as for the individual component
'''
# Determine the actual times to create data points based on the starting time and
# number of data points. Actual time depends on the one full period of the outer
# most planet. Or just take time stamps provided.
newtime = [] # Synthetic data points time in JD
if time_stamp == 'reference' or data is None:
start, end = stamps[0], stamps[-1]
if rv_num > 1:
time_step = (end - start)/(rv_num-1)
for i in range (rv_num):
t = start + time_step*i
newtime.append(t)
else:
t = start # number = 1 case, put data at the start
newtime.append(t)
elif time_stamp == 'complete':
newtime = stamps
else:
print("Invalid type of time stamps.")
# If just plotting individual rv curves, don't randomize time, otherwise,
# slightly randomize the time of observation to avoid false periodicity (aliases)
# to be picked up by periodogram
if time_stamp == 'complete' or data is None:
t_random = np.array(newtime)
else:
t_random = []
for i in range (len(newtime)):
new_time = random.gauss(newtime[i],3) # sigma of 3 day
t_random.append(new_time)
t_random.sort()
t_random = np.array(t_random)
# Determine the total velocity values for all planets
rv = 0 # total combined rv
rv_ind = [] # individual planetary rv signals stored in lists
for p in range (len(P)):
# Determine orbital phase for each planet
phi = []
for t in t_random:
ratio = (t-Tp[p])/P[p]
if ratio > 1.0: # when t is more than one period bigger than tp
phase = ratio - int(ratio)
phi.append(phase)
elif 0 <= ratio <= 1: # when t is within one period bigger than tp
phi.append(ratio)
elif -1 <= ratio < 0: # when t is within one period less than tp
phase = 1 + ratio
phi.append(phase)
else: # when t is more than one period less than tp
phase = 1 + (ratio - int(ratio))
phi.append(phase)
phi = np.array(phi)
# Calculate the corresponding mean anomaly according to orbital phase
m = 2*(np.pi)*phi/1.0 # mean anomaly
# Calculate the eccentric anomaly according to mean anomaly
eps = []
for i in range(len(phi)):
def func(x):
return m[i] - x + e[p]*(np.sin(x))
sol = root(func,1)
eps.append(sol.x[0])
eps = np.array(eps)
# Calculate the true anomaly according to eccentric anomaly
nu = []
for i in range(len(phi)):
v = np.arccos(((np.cos(eps[i])) - e[p])/(1 - e[p]*(np.cos(eps[i]))))
if eps[i] <= np.pi:
nu.append(v)
else:
v = np.pi + abs(np.pi - v)
nu.append(v)
nu = np.array(nu)
# Calculate the velocity according to true anomaly
V = V_0 + K[p]*(np.cos(w[p]+nu)+e[p]*(np.cos(w[p])))
rv_ind.append(V)
if data is None:
if p == 0:
plt.plot(t_random - time_base,V,'m-',alpha=0.7,label='Planet b')
elif p == 1:
plt.plot(t_random - time_base,V,'r-',alpha=0.7,label='Planet c')
elif p == 2:
plt.plot(t_random - time_base,V,'g-',alpha=0.7,label='Planet d')
else:
pass
rv = rv + V
else:
rv = rv + V
if data is None:
plt.plot(t_random - time_base,rv,'k-',label='Total')
else:
pass
rv = np.array(rv)
# store unrandomized rv curve data
df = pd.DataFrame()
df['time'] = t_random
df['rv'] = rv
path0 = savepath + name + '_SynthRV.txt'
#df.to_csv(path0,sep='\t',index=False, header=True)
return rv, t_random, rv_ind
Synth_Data(unc,rms,num)