-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
154 lines (131 loc) · 5.85 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import tensorflow as tf
from collections import deque
from data import actListCalls, optListCalls, lossListCalls
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import random
import numpy as np
GAMMA = 0.85 #discount factor for future value
LEARNING_RATE = 0.10 #how much the new information can change the existing information
ALPHA = LEARNING_RATE
MEMORY_SIZE = 100_000
BATCH_SIZE = 100
EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.01
EXPLORATION_DECAY = 0.995
EPOCHS = 10
LEARNING_SIZE = 100000000000000
INDIVIDUAL_LEARNING = False
class Teacher_Agent:
'''
Our Agent that uses Q-Learning to create a "smart" agent.
'''
def __init__(self):
"""
Training Parameters (Things that need to be trained)
Activations - Numpy array of activations for each layer
Type: Value of 0-len(actListCalls.keys())
Length: Number of layers
Optimizer - Integer value
Type: Int value, 0-len(optListCalls.keys())
Losses - Integer value
Type: Int value, 0-len(lossListCalls.keys())
"""
GAMMA = 0.85 #discount factor for future value
LEARNING_RATE = 0.10 #how much the new information can change the existing information
ALPHA = LEARNING_RATE
MEMORY_SIZE = 100_000
BATCH_SIZE = 100
EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.01
EXPLORATION_DECAY = 0.995
POSSIBLE_LAYERS = 3
self.exploration_rate = EXPLORATION_MAX
MSE = list(lossListCalls.keys()).index("mse")
RELU = list(actListCalls.keys()).index("relu")
ADAM = list(optListCalls.keys()).index("Adam")
input_size = 5
initial_layers = 2
initial_epochs = 5
initial_blank_layers = [[RELU, 0] for i in range(POSSIBLE_LAYERS)]
blank_layers = [item for sublist in initial_blank_layers for item in sublist]
initial_state = [GAMMA, ALPHA, EXPLORATION_MAX, EXPLORATION_MIN, EXPLORATION_DECAY, 5, MSE, ADAM, RELU, input_size, RELU, 24, RELU, 12] + blank_layers
initial_state_tensor = tf.convert_to_tensor([initial_state])
self.state = initial_state_tensor
self.q_values = initial_state
self.state_next = self.state
self.observation_space = len(initial_state)
self.action_space = self.observation_space
self.reward = 0
#create the space to store "training" steps
self.memory = deque(maxlen=MEMORY_SIZE)
#create the Neural Newtwork model - adjust as desired
self.model = Sequential()
self.model.add(Dense(input_size, input_shape=(self.observation_space,), activation="relu"))
self.model.add(Dense(24, activation="tanh"))
self.model.add(Dense(24, activation="tanh"))
self.model.add(Dense(self.action_space, activation="linear"))
self.model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE))
def remember(self):
#print(self.model.predict(state))
#save information for learning
myState = self.state.numpy()
myNext_state = self.state_next.numpy()
#print(flat_state)
#self.memory.append((np.asarray(flat_state).T, action, reward, np.asarray(flat_next_state).T))
self.memory.append((myState, self.reward, myNext_state))
def learn(self):
self.state = self.state_next
if len(self.memory) < LEARNING_SIZE:
batch = self.memory
#pick random data from all saved data to use to improve the model
else:
batch = random.sample(self.memory, LEARNING_SIZE)
states_batch = []
q_values_batch = []
#use each set of data to improve the Q values
for state, reward, state_next in batch:
self.q_values = list(self.model.predict(state_next)[0])
#print("stuff")
#q_update = (reward + GAMMA * np.amax(self.model.predict(state_next)))
#self.q_values = self.model.predict(state)
#q_values_old = self.q_values#[0][self.actionList.index(action[0])]
#self.q_values = (1-ALPHA)*q_values_old + ALPHA*q_update #[0][self.actionList.index(action[0])]
#if INDIVIDUAL_LEARNING:
# self.model.fit(state, self.q_values, epochs=EPOCHS, verbose=0)
#else:
# states_batch.append(state[0])
# q_values_batch.append(self.q_values[0])
#self.q_values = self.q_values[0]
self.q_values = [max(0,valueVal) for valueVal in self.q_values]
self.q_values = self.q_values[:5] + [int(y) for y in self.q_values[5:]]
#self.model.fit(self.state, tf.convert_to_tensor([self.q_values]), epochs=100, verbose=0)
#self.q_values = [int(val) for val in self.model.predict(self.state)[0]]
self.state_next = tf.convert_to_tensor([self.q_values])
if self.q_values[5] == 0:
print("Network Returned 0 Epochs")
#return
print(self.q_values)
print("Gamma:", self.q_values[0])
print("Alpha:", self.q_values[1])
print("Exp. Max:", self.q_values[2])
print("Exp. Min:", self.q_values[3])
print("Exp. Decay:", self.q_values[4])
print("Epochs:", self.q_values[5])
print("Loss:", list(lossListCalls.keys())[self.q_values[6]])
print("Optimizer:", list(optListCalls.keys())[self.q_values[7]])
q_val_layers = self.q_values[10:]
q_layers = []
i = 0
for i in range(0, len(q_val_layers), 2):
q_layers.append((q_val_layers[i], q_val_layers[i+1]))
for layer, density in q_layers:
if density == 0: continue
print(density, "nodes", list(actListCalls.keys())[layer])
teacher = Teacher_Agent()
for i in range(1, 11):
print("Training stage", i)
teacher.learn()
teacher.remember()
print("------------------------------")