-
Notifications
You must be signed in to change notification settings - Fork 4
/
asac_NEW.py
221 lines (170 loc) · 8.79 KB
/
asac_NEW.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
from __future__ import (absolute_import, division, print_function,
unicode_literals)
import os
import numpy as np
import pandas as pd
import tushare as ts
import backtrader as bt
from ASAC_Trader import ASAC_Trader_wrapper
import matplotlib.pyplot as plt
import copy
############################### training ####################################
KEEP_DAY = 6
TIME_STEPS = 55
STATE_DIM = 5
trader = ASAC_Trader_wrapper(state_dim=STATE_DIM, action_dim=1, time_steps=TIME_STEPS,eval_mode=True)
# Create a Stratey
class TestStrategy(bt.Strategy):
params = dict(maperiod=55)
def log(self, txt, dt=None):
''' Logging function for this strategy'''
dt = dt or self.datas[0].datetime.date(0)
print('%s, %s' % (dt.isoformat(), txt))
def __init__(self):
volume_ema = bt.indicators.EMA(self.data.volume, period=self.p.maperiod, plot=False)
self.close_ema = bt.indicators.EMA(self.data.close, period=self.p.maperiod, plot=False)
self.volume_ratio = self.data.volume / (volume_ema)
self.close_ratio = self.data.close / (self.close_ema)
self.open_ratio = self.data.open / (self.close_ema)
self.high_ratio = self.data.high / (self.close_ema)
self.low_ratio = self.data.low / (self.close_ema)
self.state_list = []
self.last_state_value = 0
self.last_state = np.array([[]])
self.time_steps = TIME_STEPS
self.order = None
self.data_store = []
self.current_cold_day = 0
self.last_stock_size = 0
self.last_money = 0
self.current_stock_size = 0
self.avg_stock_cost = self.data.close[0]
def notify_order(self, order):
if order.status in [order.Submitted, order.Accepted]:
return
if order.status in [order.Completed]:
if order.isbuy():
self.buyprice = order.executed.price
self.buycomm = order.executed.comm
else:
pass
elif order.status in [order.Canceled, order.Margin, order.Rejected]:
self.log(order.status)
self.order = None
# def notify_cashvalue(self, cash, value):
# self.log('Cash %s Value %s' % (cash, value))
# def notify_trade(self, trade):
# if not trade.isclosed:
# return
# self.log('OPERATION PROFIT, GROSS %.2f, NET %.2f' %
# (trade.pnl, trade.pnlcomm))
def store_data(self, current_state, current_action, current_moneyRatio, chance_return):
sg_data = dict(
current_state= current_state,
current_moneyRatio = current_moneyRatio,
current_action = current_action,
chance_return = chance_return
)
self.data_store.append(sg_data)
if len(self.data_store) == 2:
trader.total_step = trader.total_step + 1 # the trader add experience
last_experience = self.data_store[0]
next_experience = self.data_store[1]
sg_return = 10 * next_experience['chance_return'] + 0.03 * np.abs(last_experience['current_action']) ## a single reward for ASAC
trader.add_experience( state=last_experience['current_state'],
moneyRatio=last_experience['current_moneyRatio'],
action=last_experience['current_action'],
reward=sg_return, ## this is shifted
next_state=next_experience['current_state'],
next_moneyRatio=next_experience['current_moneyRatio'],
done=[False])
# delete first-line data after a transition-tuple is generated
self.data_store = self.data_store[1:]
def collect_status(self):
self.last_state_value = self.broker.getvalue()
self.last_money = self.broker.getcash()
self.last_stock_size = self.position.size
if self.position.size > 0:
self.avg_stock_cost = (self.broker.getvalue() - self.broker.getcash() ) / self.position.size
def next(self):
self.current_cold_day = self.current_cold_day - 1
if self.order:
return
average_cost_ratio = self.avg_stock_cost / self.close_ema[0]
current_money = self.broker.getcash()
current_value = self.broker.getvalue()
money_ratio = current_money * 1.0 / (current_value)
current_moneyRatio = np.array([ average_cost_ratio, money_ratio ])
current_status = [self.close_ratio[0],self.open_ratio[0],self.volume_ratio[0], \
self.high_ratio[0],self.low_ratio[0]]
self.state_list.append(current_status)
self.state_list = self.state_list[-self.time_steps:]
current_state = np.array(self.state_list).reshape(1,-1,STATE_DIM)
source_action, invest_action = trader.choose_action(current_state, current_moneyRatio)
source_action, invest_action = source_action.item(), invest_action.item()
if self.last_state.shape[1] < self.time_steps:
self.last_state_value = self.broker.getvalue()
self.last_state = copy.deepcopy(current_state)
self.last_money = self.broker.get_cash()
return # making no action if data is not enough
else:
if self.current_cold_day > 0:
return # no action is made when cold
else:
self.current_cold_day = KEEP_DAY
last_return = current_value * 1.0 / (self.last_state_value) - 1
if_no_action_value = self.last_stock_size * self.data.close[0] + self.last_money
no_action_return = if_no_action_value * 1.0 / (self.last_state_value) - 1
chance_return = last_return - no_action_return
# the following status processing should be before the action is excuted
self.store_data(current_state, source_action, current_moneyRatio, chance_return)
self.collect_status()
trader.trader_learn()
# finally the actions for current_state is excuted
if invest_action > 0:
buy_size = int(invest_action * current_value / (self.data.close[0]) )
buy_size = int(buy_size * 0.90/100) * 100 ## in case of next open price > close_price
self.order = self.buy(size=buy_size)
elif invest_action < 0:
sell_size = int( - invest_action * (current_value ) / (self.data.close[0]) )
sell_size = int(sell_size/100 ) * 100
self.order = self.sell(size=sell_size)
final_value_list = []
def run_train():
data_folder = 'data'
file_name_list = os.listdir(data_folder)
np.random.shuffle(file_name_list)
for file_index in range(len(file_name_list)):
try:
file_name = file_name_list[file_index]
file_path = os.path.join(data_folder,file_name)
dataframe = pd.read_csv(file_path)
dataframe.index = dataframe['date'].apply(lambda x: pd.Timestamp(x))
dataframe = dataframe.where(dataframe != 0)
dataframe = dataframe.fillna(method='ffill')
dataframe['openinterest'] = 0
if dataframe.shape[0] < 1200:
continue
cerebro = bt.Cerebro()
cerebro.addstrategy(TestStrategy)
data = bt.feeds.PandasData(dataname=dataframe,
fromdate=dataframe.index[-300].to_pydatetime(),
todate=dataframe.index[-1].to_pydatetime()
)
cerebro.adddata(data)
cerebro.broker.setcash(800000)
cerebro.broker.set_shortcash(shortcash=False) ## this is important
cerebro.broker.setcommission(commission=0.001)
print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
cerebro.run()
print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())
reward = cerebro.broker.getvalue() * 1.0 / 800000 -1
final_value_list.append(reward)
except:
print(file_name_list[file_index])
continue
cerebro.plot()
run_train()
plt.plot(final_value_list)
plt.show()
print(np.mean(final_value_list))