-
Notifications
You must be signed in to change notification settings - Fork 0
/
best_lunar_lander copy.py
62 lines (50 loc) · 1.86 KB
/
best_lunar_lander copy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import torch
from classes.NEAT import *
#174
#386
species = torch.load('runs/lunar/2023-12-03 15:15:59.070545/species_18.pt')
import gymnasium as gym
import datetime
def lunar_fitness(genotype_and_env, inputs, targets):
#error = 0
genotype, env = genotype_and_env
network = NeuralNetwork(genotype)
fitness = 0
num_tries = 4
for _ in range(num_tries):
observation, info = env.reset()
terminated, truncated = False, False
while not terminated and not truncated:
input = {
0:torch.tensor([1.0]),# bias
1:torch.tensor([observation[0]]),
2:torch.tensor([observation[1]]),
3:torch.tensor([observation[2]]),
4:torch.tensor([observation[3]]),
5:torch.tensor([observation[4]]),
6:torch.tensor([observation[5]]),
7:torch.tensor([observation[6]]),
8:torch.tensor([observation[7]]),
}
actions = network.forward(input)
actions = torch.tensor(actions)
action = torch.argmax(actions).item()
observation, reward, terminated, truncated, info = env.step(action)
fitness += reward
fitness /= num_tries
return fitness
from tqdm.auto import tqdm
env = gym.make("LunarLander-v2")
max_fitness = -np.inf
best_genotype = None
n_workers = 8
gymnasium_env = [gym.make("LunarLander-v2") for _ in range(150)]
genotypes = species[0].genotypes
with Pool(n_workers) as p:
fitnesses = p.map(partial(lunar_fitness, inputs=None, targets=None), zip(genotypes, gymnasium_env[:len(genotypes)]))
best_genotype = genotypes[np.argmax(fitnesses)]
print(np.max(fitnesses))
env = gym.make("LunarLander-v2", render_mode='human')
env.reset()
while True:
fitness = lunar_fitness((best_genotype, env), None, None)