SforAiDl · AdityaKapoor74 · Sep 1, 2020 · Sep 1, 2020 · Sep 1, 2020 · Sep 1, 2020
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,11 +1,12 @@
 repos:
   - repo: https://github.com/asottile/seed-isort-config
-    rev: v1.9.4
+    rev: v2.2.0
     hooks:
         - id: seed-isort-config
           args: [--exclude=^((examples|docs)/.*)$]
 
   - repo: https://github.com/timothycrosley/isort
+
     rev: 5.4.2
     hooks:
         - id: isort
@@ -14,7 +15,7 @@ repos:
     rev: 20.8b1
     hooks:
         - id: black
-          language_version: python3.7
+          language_version: python3
 
   - repo: https://gitlab.com/pycqa/flake8
     rev: 3.8.3

diff --git a/genrl/agents/deep/a2c/a2c.py b/genrl/agents/deep/a2c/a2c.py
@@ -70,10 +70,12 @@ def _create_model(self) -> None:
         state_dim, action_dim, discrete, action_lim = get_env_properties(
             self.env, self.network
         )
+
         if isinstance(self.network, str):
             arch_type = self.network
             if self.shared_layers is not None:
                 arch_type += "s"
+
             self.ac = get_model("ac", arch_type)(
                 state_dim,
                 action_dim,

diff --git a/genrl/agents/deep/ddpg/ddpg.py b/genrl/agents/deep/ddpg/ddpg.py
@@ -66,6 +66,7 @@ def _create_model(self) -> None:
             arch_type = self.network
             if self.shared_layers is not None:
                 arch_type += "s"
+
             self.ac = get_model("ac", arch_type)(
                 state_dim,
                 action_dim,
@@ -75,6 +76,18 @@ def _create_model(self) -> None:
                 "Qsa",
                 False,
             ).to(self.device)
+        elif isinstance(self.network, str) and self.shared_layers is not None:
+            arch_type = self.network + "s"
+            self.ac = get_model("ac", arch_type)(
+                state_dim,
+                action_dim,
+                critic_prev=self.critic_prev,
+                actor_prev=self.actor_prev,
+                shared_layers=self.shared_layers,
+                critic_post=self.value_layers,
+                actor_post=self.policy_layers,
+                val_type="Qsa",
+            ).to(self.device)
         else:
             self.ac = self.network
 

diff --git a/genrl/agents/deep/ppo1/ppo1.py b/genrl/agents/deep/ppo1/ppo1.py
@@ -85,6 +85,17 @@ def _create_model(self):
                 action_lim=action_lim,
                 activation=self.activation,
             ).to(self.device)
+        elif isinstance(self.network, str) and self.shared_layers is not None:
+            arch_type = self.network + "s"
+            self.ac = get_model("ac", arch_type)(
+                state_dim,
+                action_dim,
+                critic_prev=self.critic_prev,
+                actor_prev=self.actor_prev,
+                shared_layers=self.shared_layers,
+                critic_post=self.value_layers,
+                actor_post=self.policy_layers,
+            ).to(self.device)
         else:
             self.ac = self.network.to(self.device)
 

diff --git a/genrl/core/buffers.py b/genrl/core/buffers.py
@@ -117,15 +117,15 @@ def sample(
         ]
     ):
         """
-                (Returns randomly sampled memories from replay memory along with their
+        (Returns randomly sampled memories from replay memory along with their
         respective indices and weights)
 
-                :param batch_size: Number of samples per batch
-                :param beta: (Bias exponent used to correct
+        :param batch_size: Number of samples per batch
+        :param beta: (Bias exponent used to correct
         Importance Sampling (IS) weights)
-                :type batch_size: int
-                :type beta: float
-                :returns: (Tuple containing `states`, `actions`, `next_states`,
+        :type batch_size: int
+        :type beta: float
+        :returns: (Tuple containing `states`, `actions`, `next_states`,
         `rewards`, `dones`, `indices` and `weights`)
         """
         if beta is None:
@@ -181,3 +181,118 @@ def __len__(self) -> int:
     @property
     def pos(self):
         return len(self.buffer)
+
+
+class MultiAgentReplayBuffer:
+    """
+    Implements the basic Experience Replay Mechanism for MultiAgents
+    by feeding in global states, global actions, global rewards,
+    global next_states, global dones
+
+        :param capacity: Size of the replay buffer
+        :type capacity: int
+        :param num_agents: Number of agents in the environment
+        :type num_agents: int
+    """
+
+    def __init__(self, num_agents: int, capacity: int):
+        """
+        Initialising the buffer
+            :param num_agents: number of agents in the environment
+            :type num_agents: int
+            :param capacity: Max buffer size
+            :type capacity: int
+
+        """
+        self.capacity = capacity
+        self.num_agents = num_agents
+        self.buffer = deque(maxlen=self.capacity)
+
+    def push(self, inp: Tuple) -> None:
+        """
+        Adds new experience to buffer
+
+            :param inp: (Tuple containing `state`, `action`, `reward`,
+            `next_state` and `done`)
+            :type inp: tuple
+            :returns: None
+        """
+        self.buffer.append(inp)
+
+    def sample(self, batch_size):
+
+        """
+        Returns randomly sampled experiences from replay memory
+
+            :param batch_size: Number of samples per batch
+            :type batch_size: int
+            :returns: (Tuple composing of `indiv_obs_batch`,
+            `indiv_action_batch`, `indiv_reward_batch`, `indiv_next_obs_batch`,
+            `global_state_batch`, `global_actions_batch`, `global_next_state_batch`,
+            `done_batch`)
+        """
+        indiv_obs_batch = [
+            [] for _ in range(self.num_agents)
+        ]  # [ [states of agent 1], ... ,[states of agent n] ]    ]
+        indiv_action_batch = [
+            [] for _ in range(self.num_agents)
+        ]  # [ [actions of agent 1], ... , [actions of agent n]]
+        indiv_reward_batch = [[] for _ in range(self.num_agents)]
+        indiv_next_obs_batch = [[] for _ in range(self.num_agents)]
+
+        global_state_batch = []
+        global_next_state_batch = []
+        global_actions_batch = []
+        done_batch = []
+
+        batch = random.sample(self.buffer, batch_size)
+
+        for experience in batch:
+            state, action, reward, next_state, done = experience
+
+            for i in range(self.num_agents):
+                indiv_obs_batch[i].append(state[i])
+                indiv_action_batch[i].append(action[i])
+                indiv_reward_batch[i].append(reward[i])
+                indiv_next_obs_batch[i].append(next_state[i])
+
+            global_state_batch.append(torch.cat(state))
+            global_actions_batch.append(torch.cat(action))
+            global_next_state_batch.append(torch.cat(next_state))
+            done_batch.append(done)
+
+        global_state_batch = torch.stack(global_state_batch)
+        global_actions_batch = torch.stack(global_actions_batch)
+        global_next_state_batch = torch.stack(global_next_state_batch)
+        done_batch = torch.stack(done_batch)
+        indiv_obs_batch = torch.stack(
+            [torch.FloatTensor(obs) for obs in indiv_obs_batch]
+        )
+        indiv_action_batch = torch.stack(
+            [torch.FloatTensor(act) for act in indiv_action_batch]
+        )
+        indiv_reward_batch = torch.stack(
+            [torch.FloatTensor(rew) for rew in indiv_reward_batch]
+        )
+        indiv_next_obs_batch = torch.stack(
+            [torch.FloatTensor(next_obs) for next_obs in indiv_next_obs_batch]
+        )
+
+        return (
+            indiv_obs_batch,
+            indiv_action_batch,
+            indiv_reward_batch,
+            indiv_next_obs_batch,
+            global_state_batch,
+            global_actions_batch,
+            global_next_state_batch,
+            done_batch,
+        )
+
+    def __len__(self):
+        """
+        Gives number of experiences in buffer currently
+
+        :returns: Length of replay memory
+        """
+        return len(self.buffer)