Skip to content

Commit

Permalink
Compatibility update, release 0.5.3
Browse files Browse the repository at this point in the history
  • Loading branch information
yannbouteiller committed Oct 10, 2023
1 parent e325488 commit e400412
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 21 deletions.
6 changes: 3 additions & 3 deletions readme/tuto_library.md
Original file line number Diff line number Diff line change
Expand Up @@ -1161,11 +1161,11 @@ Note that `train()` returns a python dictionary in which you can store the metri
p_targ.data.mul_(self.polyak)
p_targ.data.add_((1 - self.polyak) * p.data)
ret_dict = dict(
loss_actor=loss_pi.detach(),
loss_critic=loss_q.detach(),
loss_actor=loss_pi.detach().item(),
loss_critic=loss_q.detach().item(),
)
if self.learn_entropy_coef:
ret_dict["loss_entropy_coef"] = loss_alpha.detach()
ret_dict["loss_entropy_coef"] = loss_alpha.detach().item()
ret_dict["entropy_coef"] = alpha_t.item()
return ret_dict # dictionary of metrics to be logged
```
Expand Down
9 changes: 5 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def url_retrieve(url: str, outfile: Path, overwrite: bool = False):

install_req = [
'numpy',
'torch',
'torch>=2.0.0',
'pandas',
'gymnasium',
'rtgym>=0.9',
Expand All @@ -111,7 +111,8 @@ def url_retrieve(url: str, outfile: Path, overwrite: bool = False):
'keyboard',
'pyautogui',
'pyinstrument',
'tlspyo>=0.2.5'
'tlspyo>=0.2.5',
'chardet' # requests dependency
]

if platform.system() == "Windows":
Expand All @@ -127,13 +128,13 @@ def url_retrieve(url: str, outfile: Path, overwrite: bool = False):

setup(
name='tmrl',
version='0.5.2',
version='0.5.3',
description='Network-based framework for real-time robot learning',
long_description=README,
long_description_content_type='text/markdown',
keywords='reinforcement learning, robot learning, trackmania, self driving, roborace',
url='https://github.com/trackmania-rl/tmrl',
download_url='https://github.com/trackmania-rl/tmrl/archive/refs/tags/v0.5.2.tar.gz',
download_url='https://github.com/trackmania-rl/tmrl/archive/refs/tags/v0.5.3.tar.gz',
author='Yann Bouteiller, Edouard Geze',
author_email='[email protected], [email protected]',
license='MIT',
Expand Down
16 changes: 8 additions & 8 deletions tmrl/custom/custom_algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ def train(self, batch):

if not cfg.DEBUG_MODE:
ret_dict = dict(
loss_actor=loss_pi.detach(),
loss_critic=loss_q.detach(),
loss_actor=loss_pi.detach().item(),
loss_critic=loss_q.detach().item(),
)
else:
q1_o2_a2 = self.model.q1(o2, a2)
Expand All @@ -182,8 +182,8 @@ def train(self, batch):
diff_q2_backup_r = (q2 - backup + r).detach()

ret_dict = dict(
loss_actor=loss_pi.detach(),
loss_critic=loss_q.detach(),
loss_actor=loss_pi.detach().item(),
loss_critic=loss_q.detach().item(),
# debug:
debug_log_pi=logp_pi.detach().mean(),
debug_log_pi_std=logp_pi.detach().std(),
Expand Down Expand Up @@ -248,7 +248,7 @@ def train(self, batch):
)

if self.learn_entropy_coef:
ret_dict["loss_entropy_coef"] = loss_alpha.detach()
ret_dict["loss_entropy_coef"] = loss_alpha.detach().item()
ret_dict["entropy_coef"] = alpha_t.item()

return ret_dict
Expand Down Expand Up @@ -375,12 +375,12 @@ def train(self, batch):
if update_policy:
self.loss_pi = loss_pi.detach()
ret_dict = dict(
loss_actor=self.loss_pi,
loss_critic=loss_q.detach(),
loss_actor=self.loss_pi.detach().item(),
loss_critic=loss_q.detach().item(),
)

if self.learn_entropy_coef:
ret_dict["loss_entropy_coef"] = loss_alpha.detach()
ret_dict["loss_entropy_coef"] = loss_alpha.detach().item()
ret_dict["entropy_coef"] = alpha_t.item()

return ret_dict
2 changes: 1 addition & 1 deletion tmrl/custom/utils/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def copy_shared(model_a):
for key in sda:
a, b = sda[key], sdb[key]
b.data = a.data # strangely this will not make a.data and b.data the same object but their underlying data_ptr will be the same
assert b.storage().data_ptr() == a.storage().data_ptr()
assert b.untyped_storage().data_ptr() == a.untyped_storage().data_ptr()
return model_b


Expand Down
4 changes: 2 additions & 2 deletions tmrl/tuto/competition/custom_actor_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,8 +781,8 @@ def train(self, batch):

# TMRL enables us to log training metrics to wandb:
ret_dict = dict(
loss_actor=loss_pi.detach(),
loss_critic=loss_q.detach(),
loss_actor=loss_pi.detach().item(),
loss_critic=loss_q.detach().item(),
)
return ret_dict

Expand Down
6 changes: 3 additions & 3 deletions tmrl/tuto/tuto.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,11 +551,11 @@ def train(self, batch):
p_targ.data.mul_(self.polyak)
p_targ.data.add_((1 - self.polyak) * p.data)
ret_dict = dict(
loss_actor=loss_pi.detach(),
loss_critic=loss_q.detach(),
loss_actor=loss_pi.detach().item(),
loss_critic=loss_q.detach().item(),
)
if self.learn_entropy_coef:
ret_dict["loss_entropy_coef"] = loss_alpha.detach()
ret_dict["loss_entropy_coef"] = loss_alpha.detach().item()
ret_dict["entropy_coef"] = alpha_t.item()
return ret_dict

Expand Down

0 comments on commit e400412

Please sign in to comment.