Skip to content

Commit

Permalink
edit model and training options
Browse files Browse the repository at this point in the history
  • Loading branch information
dnddnjs committed Jul 18, 2018
1 parent af12df1 commit 919086f
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 21 deletions.
4 changes: 3 additions & 1 deletion unity/agent/ppo2.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ def train_model(actor, critic, memory, actor_optim, critic_optim):

values = critic(inputs)
clipped_values = oldvalue_samples + \
torch.clamp(values - oldvalue_samples, -0.2, 0.2)
torch.clamp(values - oldvalue_samples,
-hp.clip_param,
hp.clip_param)
critic_loss1 = criterion(clipped_values, returns_samples)
critic_loss2 = criterion(values, returns_samples)
critic_loss = torch.max(critic_loss1, critic_loss2).mean()
Expand Down
2 changes: 1 addition & 1 deletion unity/env/unity-environment.log
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
7/15/2018 2:07:23 PM
7/18/2018 11:17:24 PM

14 changes: 7 additions & 7 deletions unity/hparams.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
class HyperParams:
gamma = 0.99
lamda = 0.98
hidden = 64
critic_lr = 0.0003
actor_lr = 0.0003
batch_size = 64
gamma = 0.995
lamda = 0.95
hidden = 512
critic_lr = 0.0001
actor_lr = 0.0001
batch_size = 1024
l2_rate = 0.001
max_kl = 0.01
clip_param = 0.2
clip_param = 0.1
11 changes: 9 additions & 2 deletions unity/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

parser = argparse.ArgumentParser()
parser.add_argument('--render', default=False)
parser.add_argument('--load_model', default=None)
args = parser.parse_args()


Expand All @@ -23,6 +24,7 @@

env = UnityEnvironment(file_name=env_name)

# setting for unity ml-agent
default_brain = env.brain_names[0]
brain = env.brains[default_brain]

Expand All @@ -34,13 +36,17 @@

actor = Actor(num_inputs, num_actions)
critic = Critic(num_inputs)
# actor = torch.load('save_model/actor')
# critic = torch.load('save_model/critic')

if args.load_model is not None:
model_path = args.load_model
actor = torch.load(model_path + '/actor')
critic = torch.load(model_path + '/critic')

actor_optim = optim.Adam(actor.parameters(), lr=hp.actor_lr)
critic_optim = optim.Adam(critic.parameters(), lr=hp.critic_lr,
weight_decay=hp.l2_rate)

# running average of state
running_state = ZFilter((num_inputs,), clip=5)
episodes = 0
for iter in range(10000):
Expand All @@ -55,6 +61,7 @@
state = env_info.vector_observations[0]
state = running_state(state)
score = 0

for _ in range(10000):
steps += 1
mu, std, _ = actor(torch.Tensor(state).unsqueeze(0))
Expand Down
21 changes: 11 additions & 10 deletions unity/model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from hparams import HyperParams as hp
from mujoco.hparams import HyperParams as hp


class Actor(nn.Module):
Expand All @@ -11,20 +11,21 @@ def __init__(self, num_inputs, num_outputs):
super(Actor, self).__init__()
self.fc1 = nn.Linear(num_inputs, hp.hidden)
self.fc2 = nn.Linear(hp.hidden, hp.hidden)
self.fc3 = nn.Linear(hp.hidden, num_outputs)
self.fc3 = nn.Linear(hp.hidden, hp.hidden)
self.fc4 = nn.Linear(hp.hidden, num_outputs)

self.fc3.weight.data.mul_(0.1)
self.fc3.bias.data.mul_(0.0)

self.fc4.weight.data.mul_(0.0)
self.fc4.weight.data.mul_(0.1)
self.fc4.bias.data.mul_(0.0)

def forward(self, x):
x = F.tanh(self.fc1(x))
x = F.tanh(self.fc2(x))
mu = self.fc3(x)
logstd = self.fc4(x)
x = self.fc1(x)
x = x * F.sigmoid(x)
x = self.fc2(x)
x = x * F.sigmoid(x)
x = self.fc3(x)
x = x * F.sigmoid(x)
mu = self.fc4(x)
logstd = torch.zeros_like(mu)
std = torch.exp(logstd)
return mu, std, logstd

Expand Down
Binary file modified unity/save_model/actor1
Binary file not shown.
Binary file modified unity/save_model/critic1
Binary file not shown.

0 comments on commit 919086f

Please sign in to comment.