-
Notifications
You must be signed in to change notification settings - Fork 0
/
mpcnn_train.py
118 lines (100 loc) · 3.86 KB
/
mpcnn_train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#https://dl.acm.org/doi/pdf/10.1145/2576768.2598358
# https://www.jmlr.org/papers/volume9/gomez08a/gomez08a.pdf
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
import mlagents
from mlagents_envs.environment import UnityEnvironment as UE
# import colorsys
###hyperparams
thresh = 5
N = 583
ps = 100
mutR = 0.8
gens = 3
k = 100
#
##evolve MPCNN
class MPCNN(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 10, 2)
self.pool = nn.MaxPool2d(3, 3)
self.conv2 = nn.Conv2d(10, 10, 2)
self.pool2 = nn.MaxPool2d(2, 2)
self.conv3 = nn.Conv2d(10, 3, 2)
self.pool3 = nn.MaxPool2d(2)
self.f = nn.ReLU()
def forward(self, x):
x = self.pool(self.conv1(x))
x = self.pool2(self.f(self.conv2(x)))
x = self.pool2(self.f(self.conv2(x)))
x = self.pool3(self.f(self.conv3(x)))
return x
def rgb2hsv(im):
return np.asarray(im.fromarray().convert('HSV'))
env = UE(file_name='/home/whale/CarUnity/RachelCar/car.x86_64', seed=1, side_channels=[])
env.reset()
behavior_name = list(env.behavior_specs)[0]
spec = env.behavior_specs[behavior_name]
data_sample = torch.zeros(k, 64, 64, 1)
for episode in range(3):
env.reset()
decision_steps, terminal_steps = env.get_steps(behavior_name)
tracked_agent = -1
done = False
while not done:
i = 0
if tracked_agent == -1 and len(decision_steps) >= 1:
tracked_agent = decision_steps.agent_id[0]
###----------------------This code needs to be fixed------------------
action = spec.random_action(len(decision_steps)) #what is the new version of this command????!?!?!?!!?!?
env.set_actions(behavior_name, action) #should be fine
env.step() #should be fine
decision_steps, terminal_steps = env.get_steps(behavior_name)
obs = rgb2hsv(np.asarray(decision_steps.obs)) ### check this, probably a shape issue
data_sample[i,...] = obs[:,:,2] #depends on shapes above
###-----------------------end fix code------------------------------------
data_sample = data_sample.permute(0,3,1,2)
# data_sample = torch.ones(100,1,64,64)
p = torch.zeros(N,ps,ps)
for g in range(gens):
for i in range(p.shape[1]):
mpcnn = MPCNN()
if g == 0:
p[:,i,0] = torch.cat([a.flatten() for a in mpcnn.parameters()])
elif g > 0:
wi=0
for param in mpcnn.parameters():
param = nn.parameter.Parameter(p[wi,i,0])
wi+=1
feature_out = mpcnn(data_sample)
Fnorm = nn.functional.normalize(feature_out)
Fnorm_shift = torch.roll(Fnorm, shifts=1)
Fdif = nn.functional.pairwise_distance(Fnorm, Fnorm_shift).abs()
p[:,:,i] = torch.min(Fdif) + torch.mean(Fdif)
evals = p[0,0,:] #bigger=better, "maximize"
print('gen:', g, 'max fitness:', torch.max(evals))
p_sort = torch.sort(p, 2)[0]
p_top = p_sort[:,:int(ps*.2),:int(ps*.2)]
inds = torch.multinomial(p_top[0,0,:], int(p_top.shape[1]*mutR))
n_genes, n_parents, _ = p_top.shape
combinations = [[p1, p2] for p1 in range(n_parents-1, 0, -1) for p2 in range(p1-1, -1, -1)]
comb_sample = list(np.random.choice(len(combinations), size=(n_parents), replace=False))
combinations = [comb for i, comb in enumerate(combinations) if i in comb_sample]
children = torch.zeros(p_top.shape)
for i_comb, comb in enumerate(combinations):
parent1 = p_top[:, comb[0],0]
parent2 = p_top[:, comb[1],0]
child = parent1.clone()
child[inds] = parent2[inds]
children[:, i_comb, 0] = child
children[:,inds,0] = torch.rand(len(inds))
for i in range(p.shape[1]):
p[:,i,0] = p[torch.randperm(len(p)), i, 0]
p[:,-int(ps*.2):,-int(ps*.2):] = children