SforAiDl · AdityaKapoor74 · Sep 1, 2020 · Sep 1, 2020 · Sep 1, 2020 · Sep 1, 2020
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,11 +1,12 @@
 repos:
   - repo: https://github.com/asottile/seed-isort-config
-    rev: v1.9.4
+    rev: v2.2.0
     hooks:
         - id: seed-isort-config
           args: [--exclude=^((examples|docs)/.*)$]
 
   - repo: https://github.com/timothycrosley/isort
+
     rev: 5.4.2
     hooks:
         - id: isort
@@ -14,7 +15,7 @@ repos:
     rev: 20.8b1
     hooks:
         - id: black
-          language_version: python3.7
+          language_version: python3
 
   - repo: https://gitlab.com/pycqa/flake8
     rev: 3.8.3

diff --git a/genrl/agents/deep/a2c/a2c.py b/genrl/agents/deep/a2c/a2c.py
@@ -70,10 +70,12 @@ def _create_model(self) -> None:
         state_dim, action_dim, discrete, action_lim = get_env_properties(
             self.env, self.network
         )
+
         if isinstance(self.network, str):
             arch_type = self.network
             if self.shared_layers is not None:
                 arch_type += "s"
+
             self.ac = get_model("ac", arch_type)(
                 state_dim,
                 action_dim,

diff --git a/genrl/agents/deep/base/base.py b/genrl/agents/deep/base/base.py
@@ -34,6 +34,7 @@ def __init__(
         create_model: bool = True,
         batch_size: int = 64,
         gamma: float = 0.99,
+
         shared_layers=None,
         policy_layers: Tuple = (64, 64),
         value_layers: Tuple = (64, 64),
@@ -52,6 +53,8 @@ def __init__(
         self.value_layers = value_layers
         self.lr_policy = lr_policy
         self.lr_value = lr_value
+        self.actor_prev = actor_prev
+        self.critic_prev = critic_prev
 
         self.seed = kwargs["seed"] if "seed" in kwargs else None
         self.render = kwargs["render"] if "render" in kwargs else False

diff --git a/genrl/agents/deep/ddpg/ddpg.py b/genrl/agents/deep/ddpg/ddpg.py
@@ -66,6 +66,7 @@ def _create_model(self) -> None:
             arch_type = self.network
             if self.shared_layers is not None:
                 arch_type += "s"
+
             self.ac = get_model("ac", arch_type)(
                 state_dim,
                 action_dim,
@@ -75,6 +76,18 @@ def _create_model(self) -> None:
                 "Qsa",
                 False,
             ).to(self.device)
+        elif isinstance(self.network, str) and self.shared_layers is not None:
+            arch_type = self.network + "s"
+            self.ac = get_model("ac", arch_type)(
+                state_dim,
+                action_dim,
+                critic_prev=self.critic_prev,
+                actor_prev=self.actor_prev,
+                shared_layers=self.shared_layers,
+                critic_post=self.value_layers,
+                actor_post=self.policy_layers,
+                val_type="Qsa",
+            ).to(self.device)
         else:
             self.ac = self.network
 

diff --git a/genrl/agents/deep/ppo1/ppo1.py b/genrl/agents/deep/ppo1/ppo1.py
@@ -74,6 +74,7 @@ def _create_model(self):
             arch = self.network
             if self.shared_layers is not None:
                 arch += "s"
+
             self.ac = get_model("ac", arch)(
                 state_dim,
                 action_dim,
@@ -85,6 +86,17 @@ def _create_model(self):
                 action_lim=action_lim,
                 activation=self.activation,
             ).to(self.device)
+        elif isinstance(self.network, str) and self.shared_layers is not None:
+            arch_type = self.network + "s"
+            self.ac = get_model("ac", arch_type)(
+                state_dim,
+                action_dim,
+                critic_prev=self.critic_prev,
+                actor_prev=self.actor_prev,
+                shared_layers=self.shared_layers,
+                critic_post=self.value_layers,
+                actor_post=self.policy_layers,
+            ).to(self.device)
         else:
             self.ac = self.network.to(self.device)
 

diff --git a/genrl/core/actor_critic.py b/genrl/core/actor_critic.py
@@ -2,15 +2,18 @@
 
 import torch  # noqa
 import torch.nn as nn  # noqa
+import torch.nn.functional as F
 from gym import spaces
 from torch.distributions import Categorical, Normal
 
 from genrl.core.base import BaseActorCritic
 from genrl.core.policies import MlpPolicy
 from genrl.core.values import MlpValue
+
 from genrl.utils.utils import cnn, mlp
 
 
+
 class MlpActorCritic(BaseActorCritic):
     """MLP Actor Critic
 
@@ -47,6 +50,7 @@ def get_params(self):
         return actor_params, critic_params
 
 
+
 class MlpSharedActorCritic(BaseActorCritic):
     """MLP Shared Actor Critic
 
@@ -162,7 +166,6 @@ def get_value(self, state: torch.Tensor):
             value = self.critic(shared_features)
         return value
 
-
 class MlpSingleActorTwoCritic(BaseActorCritic):
     """MLP Actor Critic
 
@@ -469,6 +472,142 @@ def get_value(self, inp: torch.Tensor) -> torch.Tensor:
         return value
 
 
+class SharedActorCritic(BaseActorCritic):
+    def __init__(
+        self,
+        state_dim,
+        action_dim,
+        shared_layers,
+        critic_post,
+        actor_post,
+        val_type="V",
+        weight_init="xavier_uniform",
+        activation_func="relu",
+        critic_prev=[],
+        actor_prev=[],
+    ):
+        super(SharedActorCritic, self).__init__()
+        if len(actor_prev) > 0 and len(critic_prev) > 0:
+            actor_prev = [state_dim] + list(actor_prev)
+            if val_type == "Qsa":
+                critic_prev = [state_dim + action_dim] + list(critic_prev)
+            else:
+                critic_prev = [state_dim] + critic_prev
+        else:
+            shared_layers = [state_dim] + list(shared_layers)
+
+        if val_type == "V" or val_type == "Qsa":
+            critic_post = list(critic_post) + [1]
+        elif val_type == "Qs":
+            critic_post = list(critic_post) + [action_dim]
+        else:
+            raise NotImplementedError
+
+        actor_post = list(actor_post) + [action_dim]
+        self.critic, self.actor = shared_mlp(
+            critic_prev,
+            actor_prev,
+            shared_layers,
+            critic_post,
+            actor_post,
+            weight_init,
+            activation_func,
+            False,
+        )
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    def get_params(self):
+        actor_params = self.actor.parameters()
+        critic_params = self.critic.parameters()
+        return actor_params, critic_params
+
+    def forward(self, state_critic, state_action):
+
+        if state_critic is not None:
+            return self.critic(state_critic)
+
+        if state_action is not None:
+            return self.actor(state_action)
+
+    def get_action(self, state, deterministic=False):
+        # state = torch.FloatTensor(state).to(self.device)
+        logits = self.forward(None, state)
+
+
+        probs = nn.Softmax(dim=-1)(logits)
+        dist = Categorical(probs)
+        if deterministic:
+            index = torch.argmax(probs, dim=-1).unsqueeze(-1).float()
+        else:
+            index = dist.sample()
+        print(index.shape)
+        return index, dist
+
+    def get_value(self, state):
+        # state = torch.FloatTensor(state).to(self.device)
+        value = self.forward(state, None)
+        return value
+
+
+class MultiAgentActor(MlpPolicy):
+    def __init__(
+        self,
+        state_dim: spaces.Space,
+        action_dim: spaces.Space,
+        hidden: Tuple = (32, 32),
+        discrete: bool = True,
+        **kwargs,
+    ):
+        super(MultiAgentActor, self).__init__(
+            state_dim, action_dim, hidden, discrete ** kwargs
+        )
+
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    def forward(self, state):
+        state = self.model(state)
+        return state
+
+    def get_action(self, state, deterministic=False):
+        # state = torch.FloatTensor(state).to(self.device)
+        logits = self.forward(state)
+
+        dist = F.softmax(logits, dim=0)
+        probs = Categorical(dist)
+        if deterministic:
+            index = torch.argmax(probs)
+        else:
+            index = probs.sample().cpu().detach().item()
+        return index
+
+
+class MultiAgentCritic(MlpValue):
+    def __init__(
+        self,
+        state_dim: spaces.Space,
+        action_dim: spaces.Space,
+        fc_layers: Tuple = (32, 32),
+        val_type: str = "V",
+        **kwargs,
+    ):
+        super(MultiAgentCritic, self).__init__(
+            state_dim, action_dim, fc_layers, val_type, **kwargs
+        )
+
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    def forward(self, state):
+
+        state = self.model(state)
+
+        return state
+
+    def get_value(self, state):
+        # state = torch.FloatTensor(state).to(self.device)
+        value = self.forward(state)
+        return value
+
+
 actor_critic_registry = {
     "mlp": MlpActorCritic,
     "cnn": CNNActorCritic,