Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Adding MultiAgent Utilities #323

Open
wants to merge 46 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
1d49049
Single actor critic shared params
hades-rp2010 Sep 1, 2020
ef4a179
Shared layers for multi ACs
hades-rp2010 Sep 1, 2020
2ecd086
Merge branch 'master' of https://github.com/SforAiDl/genrl
hades-rp2010 Sep 1, 2020
53450a8
Fix lint errors (1)
hades-rp2010 Sep 1, 2020
274aff9
Fixed tests
hades-rp2010 Sep 1, 2020
38f95f0
Changes to dicstrings and classes
hades-rp2010 Sep 2, 2020
0927001
adding MultiAgentBuffer
AdityaKapoor74 Sep 3, 2020
daa8b2a
shared mlp
AdityaKapoor74 Sep 3, 2020
44db72e
adding changes
AdityaKapoor74 Sep 3, 2020
4ef8f48
new mlp for maddpg
AdityaKapoor74 Sep 3, 2020
d8cf1a9
adding environment loader
AdityaKapoor74 Sep 3, 2020
8d2cf06
Adding Actor and Critic classes
AdityaKapoor74 Sep 3, 2020
1365585
adding new functionalities
AdityaKapoor74 Sep 3, 2020
5067e42
minor changes
AdityaKapoor74 Sep 3, 2020
6f0563e
added return statement to mlp_
AdityaKapoor74 Sep 3, 2020
5061abe
rectifying
AdityaKapoor74 Sep 4, 2020
e6a378c
rectifying 2
AdityaKapoor74 Sep 4, 2020
915d19d
rectifying 3
AdityaKapoor74 Sep 4, 2020
b0b5025
adding test for mlp_concat
AdityaKapoor74 Sep 4, 2020
8cc732b
adding test for mlp_concat
AdityaKapoor74 Sep 4, 2020
b8f7f6a
fixing errors
AdityaKapoor74 Sep 4, 2020
e50e230
adding docstring
AdityaKapoor74 Sep 4, 2020
835819e
Renaming Multi -> Two and comments
hades-rp2010 Sep 4, 2020
793c045
changing names
AdityaKapoor74 Sep 5, 2020
2635fd5
changing names
AdityaKapoor74 Sep 5, 2020
cd87506
Merge branch 'master' of https://github.com/AdityaKapoor74/genrl into…
hades-rp2010 Sep 5, 2020
65b6520
Shared params for single ACs
hades-rp2010 Sep 5, 2020
3d01b85
Merge branch 'multiagentutils' into shared
hades-rp2010 Sep 5, 2020
a62c100
Merge pull request #1 from hades-rp2010/shared
AdityaKapoor74 Oct 4, 2020
841ff66
Merge branch 'master' into multiagentutils
AdityaKapoor74 Oct 4, 2020
2be8df5
rollout buffer for MA
AdityaKapoor74 Oct 4, 2020
ac9b5a8
Merge branch 'multiagentutils' of https://github.com/AdityaKapoor74/g…
AdityaKapoor74 Oct 4, 2020
10282f0
Update genrl/utils/utils.py
AdityaKapoor74 Oct 4, 2020
79b531b
Update genrl/agents/deep/ppo1/ppo1.py
AdityaKapoor74 Oct 4, 2020
a3885a0
Update genrl/core/actor_critic.py
AdityaKapoor74 Oct 4, 2020
e3dc677
Update genrl/core/actor_critic.py
AdityaKapoor74 Oct 4, 2020
4c2ad51
Update genrl/core/actor_critic.py
AdityaKapoor74 Oct 4, 2020
43554e4
Update genrl/core/actor_critic.py
AdityaKapoor74 Oct 4, 2020
6828e93
removing SharedAC class
AdityaKapoor74 Oct 4, 2020
eac920c
removing SharedAC class
AdityaKapoor74 Oct 4, 2020
194065f
rectify
AdityaKapoor74 Oct 4, 2020
c0198bc
rectify
AdityaKapoor74 Oct 4, 2020
fe40835
rectify
AdityaKapoor74 Oct 4, 2020
a50204a
rectifying
AdityaKapoor74 Oct 4, 2020
4a3cd74
removing unecessary code
AdityaKapoor74 Oct 4, 2020
602a7b5
removing unecessary code
AdityaKapoor74 Oct 4, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
repos:
- repo: https://github.com/asottile/seed-isort-config
rev: v1.9.4
rev: v2.2.0
hooks:
- id: seed-isort-config
args: [--exclude=^((examples|docs)/.*)$]

- repo: https://github.com/timothycrosley/isort

rev: 5.4.2
hooks:
- id: isort
Expand All @@ -14,7 +15,7 @@ repos:
rev: 20.8b1
hooks:
- id: black
language_version: python3.7
language_version: python3

- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.3
Expand Down
2 changes: 2 additions & 0 deletions genrl/agents/deep/a2c/a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,12 @@ def _create_model(self) -> None:
state_dim, action_dim, discrete, action_lim = get_env_properties(
self.env, self.network
)

if isinstance(self.network, str):
arch_type = self.network
if self.shared_layers is not None:
arch_type += "s"

self.ac = get_model("ac", arch_type)(
state_dim,
action_dim,
Expand Down
3 changes: 3 additions & 0 deletions genrl/agents/deep/base/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(
create_model: bool = True,
batch_size: int = 64,
gamma: float = 0.99,

shared_layers=None,
policy_layers: Tuple = (64, 64),
value_layers: Tuple = (64, 64),
Expand All @@ -52,6 +53,8 @@ def __init__(
self.value_layers = value_layers
self.lr_policy = lr_policy
self.lr_value = lr_value
self.actor_prev = actor_prev
self.critic_prev = critic_prev

self.seed = kwargs["seed"] if "seed" in kwargs else None
self.render = kwargs["render"] if "render" in kwargs else False
Expand Down
13 changes: 13 additions & 0 deletions genrl/agents/deep/ddpg/ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def _create_model(self) -> None:
arch_type = self.network
if self.shared_layers is not None:
arch_type += "s"

self.ac = get_model("ac", arch_type)(
state_dim,
action_dim,
Expand All @@ -75,6 +76,18 @@ def _create_model(self) -> None:
"Qsa",
False,
).to(self.device)
elif isinstance(self.network, str) and self.shared_layers is not None:
arch_type = self.network + "s"
self.ac = get_model("ac", arch_type)(
state_dim,
action_dim,
critic_prev=self.critic_prev,
actor_prev=self.actor_prev,
shared_layers=self.shared_layers,
critic_post=self.value_layers,
actor_post=self.policy_layers,
val_type="Qsa",
).to(self.device)
else:
self.ac = self.network

Expand Down
12 changes: 12 additions & 0 deletions genrl/agents/deep/ppo1/ppo1.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def _create_model(self):
arch = self.network
if self.shared_layers is not None:
arch += "s"

AdityaKapoor74 marked this conversation as resolved.
Show resolved Hide resolved
self.ac = get_model("ac", arch)(
state_dim,
action_dim,
Expand All @@ -85,6 +86,17 @@ def _create_model(self):
action_lim=action_lim,
activation=self.activation,
).to(self.device)
elif isinstance(self.network, str) and self.shared_layers is not None:
arch_type = self.network + "s"
self.ac = get_model("ac", arch_type)(
state_dim,
action_dim,
critic_prev=self.critic_prev,
actor_prev=self.actor_prev,
shared_layers=self.shared_layers,
critic_post=self.value_layers,
actor_post=self.policy_layers,
).to(self.device)
else:
self.ac = self.network.to(self.device)

Expand Down
141 changes: 140 additions & 1 deletion genrl/core/actor_critic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@

import torch # noqa
import torch.nn as nn # noqa
import torch.nn.functional as F
from gym import spaces
from torch.distributions import Categorical, Normal

from genrl.core.base import BaseActorCritic
from genrl.core.policies import MlpPolicy
from genrl.core.values import MlpValue

from genrl.utils.utils import cnn, mlp

AdityaKapoor74 marked this conversation as resolved.
Show resolved Hide resolved


class MlpActorCritic(BaseActorCritic):
"""MLP Actor Critic

Expand Down Expand Up @@ -47,6 +50,7 @@ def get_params(self):
return actor_params, critic_params

AdityaKapoor74 marked this conversation as resolved.
Show resolved Hide resolved


class MlpSharedActorCritic(BaseActorCritic):
"""MLP Shared Actor Critic

Expand Down Expand Up @@ -162,7 +166,6 @@ def get_value(self, state: torch.Tensor):
value = self.critic(shared_features)
return value

AdityaKapoor74 marked this conversation as resolved.
Show resolved Hide resolved

class MlpSingleActorTwoCritic(BaseActorCritic):
"""MLP Actor Critic

Expand Down Expand Up @@ -469,6 +472,142 @@ def get_value(self, inp: torch.Tensor) -> torch.Tensor:
return value


class SharedActorCritic(BaseActorCritic):
AdityaKapoor74 marked this conversation as resolved.
Show resolved Hide resolved
def __init__(
self,
state_dim,
action_dim,
shared_layers,
critic_post,
actor_post,
val_type="V",
weight_init="xavier_uniform",
activation_func="relu",
critic_prev=[],
actor_prev=[],
):
super(SharedActorCritic, self).__init__()
if len(actor_prev) > 0 and len(critic_prev) > 0:
actor_prev = [state_dim] + list(actor_prev)
if val_type == "Qsa":
critic_prev = [state_dim + action_dim] + list(critic_prev)
else:
critic_prev = [state_dim] + critic_prev
else:
shared_layers = [state_dim] + list(shared_layers)

if val_type == "V" or val_type == "Qsa":
critic_post = list(critic_post) + [1]
elif val_type == "Qs":
critic_post = list(critic_post) + [action_dim]
else:
raise NotImplementedError

actor_post = list(actor_post) + [action_dim]
self.critic, self.actor = shared_mlp(
critic_prev,
actor_prev,
shared_layers,
critic_post,
actor_post,
weight_init,
activation_func,
False,
)
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def get_params(self):
actor_params = self.actor.parameters()
critic_params = self.critic.parameters()
return actor_params, critic_params

def forward(self, state_critic, state_action):

if state_critic is not None:
return self.critic(state_critic)

if state_action is not None:
return self.actor(state_action)

def get_action(self, state, deterministic=False):
# state = torch.FloatTensor(state).to(self.device)
logits = self.forward(None, state)


AdityaKapoor74 marked this conversation as resolved.
Show resolved Hide resolved
probs = nn.Softmax(dim=-1)(logits)
dist = Categorical(probs)
if deterministic:
index = torch.argmax(probs, dim=-1).unsqueeze(-1).float()
else:
index = dist.sample()
print(index.shape)
return index, dist

def get_value(self, state):
# state = torch.FloatTensor(state).to(self.device)
value = self.forward(state, None)
return value


class MultiAgentActor(MlpPolicy):
def __init__(
self,
state_dim: spaces.Space,
action_dim: spaces.Space,
hidden: Tuple = (32, 32),
discrete: bool = True,
**kwargs,
):
super(MultiAgentActor, self).__init__(
state_dim, action_dim, hidden, discrete ** kwargs
)

self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def forward(self, state):
state = self.model(state)
return state

def get_action(self, state, deterministic=False):
# state = torch.FloatTensor(state).to(self.device)
logits = self.forward(state)

dist = F.softmax(logits, dim=0)
probs = Categorical(dist)
if deterministic:
index = torch.argmax(probs)
else:
index = probs.sample().cpu().detach().item()
return index


class MultiAgentCritic(MlpValue):
def __init__(
self,
state_dim: spaces.Space,
action_dim: spaces.Space,
fc_layers: Tuple = (32, 32),
val_type: str = "V",
**kwargs,
):
super(MultiAgentCritic, self).__init__(
state_dim, action_dim, fc_layers, val_type, **kwargs
)

self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def forward(self, state):

state = self.model(state)

return state

def get_value(self, state):
# state = torch.FloatTensor(state).to(self.device)
value = self.forward(state)
return value


actor_critic_registry = {
"mlp": MlpActorCritic,
"cnn": CNNActorCritic,
Expand Down
Loading