diff --git a/README.md b/README.md index 3b1653c..66c13af 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,32 @@ # PG Travel PyTorch implementation of Vanilla Policy Gradient, Truncated Natural Policy Gradient, Trust Region Policy Optimization, Proximal Policy Optimization +# Environment +We have train PG agents in following environment +* mujoco-py: [https://github.com/openai/mujoco-py](https://github.com/openai/mujoco-py) +* Unity ml-agent walker: [https://github.com/Unity-Technologies/ml-agents](https://github.com/Unity-Technologies/ml-agents) + +# Requirements +* python == 3.6 +* numpy +* pytorch == 0.4 +* mujoco-py +* ml-agent + # Train -* **algorithm**: PG, NPG, TRPO, PPO +## 1. mujoco-py +* **algorithm**: PG, TNPG, TRPO, PPO * **env**: Ant-v2, HalfCheetah-v2, Hopper-v2, Humanoid-v2, HumanoidStandup-v2, InvertedPendulum-v2, Reacher-v2, Swimmer-v2, Walker2d-v2 ~~~ -python train.py --algorithm "algorithm name" --env "environment name" +python mujoco/train.py --algorithm "algorithm name" --env "environment name" ~~~ # Reference This code is modified version of codes * [OpenAI Baseline](https://github.com/openai/baselines/tree/master/baselines/trpo_mpi) * [Pytorch implemetation of TRPO](https://github.com/ikostrikov/pytorch-trpo) + + +# Trained Agent +* hopper +![image](/img/hopper.gif) \ No newline at end of file diff --git a/img/hopper.gif b/img/hopper.gif new file mode 100644 index 0000000..e33a319 Binary files /dev/null and b/img/hopper.gif differ