-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_generator_learning.py
executable file
·95 lines (78 loc) · 3.32 KB
/
data_generator_learning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
__author__ = 'dipanjan'
import numpy as np
import scipy as sc
import matplotlib.pyplot as plt
version = '0.1'
class Config:
sample_dim =1000 # sample size
b=0.5
w=0.3
startpoint=1
endpoint=100
def generate_and_store_data(dim):
#np.random.seed(0)
#X = np.random.randint(Config.startpoint,Config.endpoint,size=dim) # discrete uniform distribution
#X = np.random.uniform(-10,10,size=(dim))
X = np.random.normal(0, 0.4, dim)
y = 0.1*X+0.5*np.power(X,2)+0.2*np.power(X,3)+0.1*np.sin(X)+0.01*np.cos(X)+0.022*np.tan(X)
#y=Config.b+Config.w*X
#y = 0.1*X+0.5*np.power(X,2)
np.savetxt(
'RawData.csv', # file name
X, # array to save
fmt='%.10f', # formatting, 2 digits in this case
delimiter=',', # column delimiter
newline='\n', # new line character
footer='end of file', # file footer
comments='# ', # character to use for comments
header='Input Data') # file header
np.savetxt(
'LearningSet.csv', # file name
y, # array to save
fmt='%.10f', # formatting, 10 digits in this case
delimiter=',', # column delimiter
newline='\n', # new line character
footer='end of file', # file footer
comments='# ', # character to use for comments
header='Output Data') # file header
# colors = np.random.rand(dim)
# area = np.pi * (3 * np.random.rand(dim))**2
# plt.scatter(X,y, s=area, c=colors, alpha=0.5)
# plt.show()
def read_data():
X = np.genfromtxt(
'RawData.csv', # file name
skip_header=0, # lines to skip at the top
skip_footer=0, # lines to skip at the bottom
delimiter=',', # column delimiter
dtype='f8', # data type
filling_values=0, # fill missing values with 0
usecols = (0), # columns to read
names=['first']) # column names
Y = np.genfromtxt(
'LearningSet.csv', # file name
skip_header=0, # lines to skip at the top
skip_footer=0, # lines to skip at the bottom
delimiter=',', # column delimiter
dtype='float32', # data type
filling_values=0, # fill missing values with 0
usecols = (0), # columns to read
names=['first']) # column names
# print(X)
# print(X.shape)
# X =np.array(X).astype('float')
# print('*******************')
# print(X)
# print('*******************')
# print(X.shape)
# X= X*1
return X ,Y
# colors = np.random.rand(Config.sample_dim)
# area = np.pi * (3 * np.random.rand(Config.sample_dim))**2
# plt.scatter(X,y, s=area, c=colors, alpha=0.5)
# plt.show()
def main():
generate_and_store_data(Config.sample_dim)
read_data()
if __name__ == "__main__":
main()