Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for the infinite loop in Off Policy Agent tests #384

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions genrl/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
NeuralNoiseSamplingAgent,
)
from genrl.agents.bandits.contextual.variational import VariationalAgent # noqa
from genrl.agents.bandits.multiarmed.base import MABAgent # noqa
from genrl.agents.bandits.multiarmed.bayesian import BayesianUCBMABAgent # noqa
from genrl.agents.bandits.multiarmed.bernoulli_mab import BernoulliMAB # noqa
from genrl.agents.bandits.multiarmed.epsgreedy import EpsGreedyMABAgent # noqa
Expand All @@ -41,5 +42,3 @@
from genrl.agents.deep.sac.sac import SAC # noqa
from genrl.agents.deep.td3.td3 import TD3 # noqa
from genrl.agents.deep.vpg.vpg import VPG # noqa

from genrl.agents.bandits.multiarmed.base import MABAgent # noqa; noqa; noqa
25 changes: 14 additions & 11 deletions genrl/trainers/offpolicy.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,11 +142,12 @@ def train(self) -> None:

self.training_rewards = []
self.episodes = 0
self.timesteps = 0

for timestep in range(0, self.max_timesteps, self.env.n_envs):
self.agent.update_params_before_select_action(timestep)
while self.timesteps <= self.max_timesteps and self.episodes <= self.epochs:
self.agent.update_params_before_select_action(self.timesteps)

action = self.get_action(state, timestep)
action = self.get_action(state, self.timesteps)
next_state, reward, done, info = self.env.step(action)

if self.render:
Expand All @@ -164,20 +165,22 @@ def train(self) -> None:
self.noise_reset()

if self.episodes % self.log_interval == 0:
self.log(timestep)
self.log(self.timesteps)

if self.episodes == self.epochs:
break

if timestep >= self.start_update and timestep % self.update_interval == 0:
if (
self.timesteps >= self.start_update
and self.timesteps % self.update_interval == 0
):
self.agent.update_params(self.update_interval)

if (
timestep >= self.start_update
self.timesteps >= self.start_update
and self.save_interval != 0
and timestep % self.save_interval == 0
and self.timesteps % self.save_interval == 0
):
self.save(timestep)
self.save(self.timesteps)

self.timesteps += self.env.n_envs

self.env.close()
self.logger.close()
4 changes: 2 additions & 2 deletions tests/test_agents/test_bandit/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from tests.test_agents.test_bandit.test_cb_agents import TestCBAgent # noqa
from tests.test_agents.test_bandit.test_data_bandits import TestDataBandit # noqa
from tests.test_agents.test_bandit.test_mab_agents import TestMABAgent # noqa
from tests.test_agents.test_bandit.test_multi_armed_bandits import (
TestMultiArmedBandit, # noqa
from tests.test_agents.test_bandit.test_multi_armed_bandits import ( # noqa
TestMultiArmedBandit,
)