From 615e7511b2ff8426caa54b8c5c801ffd2725b9b5 Mon Sep 17 00:00:00 2001 From: Christopher Hesse Date: Wed, 3 Jun 2020 08:37:23 -0700 Subject: [PATCH] new version (#43) --- CHANGES.md | 9 + README.md | 102 ++- environment.yml | 7 +- procgen-build/procgen_build/build_package.py | 21 +- procgen-build/procgen_build/common.py | 7 +- procgen-build/procgen_build/dev_test.py | 38 +- procgen-build/setup.py | 2 + procgen/CMakeLists.txt | 6 +- procgen/__init__.py | 4 +- procgen/build.py | 100 ++- procgen/env.py | 109 ++- procgen/env_test.py | 50 +- .../{random_agent.py => random_agent_gym.py} | 2 +- procgen/examples/random_agent_gym3.py | 15 + procgen/gym_registration.py | 29 +- procgen/interactive.py | 69 -- procgen/interactive_base.py | 348 -------- procgen/libenv.h | 199 ----- procgen/libenv.py | 560 ------------ procgen/scalarize.py | 88 -- procgen/src/basic-abstract-game.cpp | 219 ++++- procgen/src/basic-abstract-game.h | 21 +- procgen/src/buffer.h | 132 +++ procgen/src/cpp-utils.h | 10 + procgen/src/entity.cpp | 94 +- procgen/src/entity.h | 64 +- procgen/src/game.cpp | 136 ++- procgen/src/game.h | 24 +- procgen/src/games/bigfish.cpp | 24 +- procgen/src/games/bossfight.cpp | 145 +++- procgen/src/games/caveflyer.cpp | 9 +- procgen/src/games/chaser.cpp | 47 +- procgen/src/games/climber.cpp | 42 +- procgen/src/games/coinrun.cpp | 63 +- procgen/src/games/dodgeball.cpp | 71 +- procgen/src/games/fruitbot.cpp | 50 +- procgen/src/games/heist.cpp | 32 +- procgen/src/games/jumper.cpp | 45 +- procgen/src/games/leaper.cpp | 38 +- procgen/src/games/maze.cpp | 23 +- procgen/src/games/miner.cpp | 20 +- procgen/src/games/ninja.cpp | 45 +- procgen/src/games/plunder.cpp | 79 +- procgen/src/games/starpilot.cpp | 263 +++--- procgen/src/grid.h | 15 +- procgen/src/object-ids.h | 21 +- procgen/src/randgen.cpp | 19 +- procgen/src/randgen.h | 8 +- procgen/src/resources.cpp | 802 +++++++++++++++++- procgen/src/resources.h | 4 +- procgen/src/vecgame.cpp | 342 ++++---- procgen/src/vecgame.h | 20 +- procgen/state_test.py | 129 +++ procgen/version.txt | 2 +- setup.py | 13 +- 55 files changed, 2848 insertions(+), 1988 deletions(-) rename procgen/examples/{random_agent.py => random_agent_gym.py} (76%) create mode 100644 procgen/examples/random_agent_gym3.py delete mode 100644 procgen/interactive.py delete mode 100644 procgen/interactive_base.py delete mode 100644 procgen/libenv.h delete mode 100644 procgen/libenv.py delete mode 100644 procgen/scalarize.py create mode 100644 procgen/src/buffer.h create mode 100644 procgen/state_test.py diff --git a/CHANGES.md b/CHANGES.md index 008920db..0f70bfec 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,14 @@ # Changelog +## 0.10.0 + +* add `set_state`, `get_state` methods to save/restore environment state +* new flags: `use_backgrounds`, `restrict_themes`, `use_monocrhome_assets` +* switch to use `gym3` instead of `libenv` + `Scalarize`, `gym` and `baselines.VecEnv` interfaces are still available with the same names, the `gym3` environment is called `ProcgenGym3Env` +* zero initialize more member variables +* changed `info` dict to have more clear keys, `prev_level_complete` tells you if the level was complete on the previous timestep, since the `info` dict corresponds to the current timestep, and the current timestep is never on a complete level due to automatic resetting. Similarly, `prev_level_seed` is the level seed from the previous timestep. +* environment creation should be slightly faster + ## 0.9.5 * zero initialize member variables from base classes diff --git a/README.md b/README.md index bdae1ab1..44553c25 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ These environments are associated with the paper [Leveraging Procedural Generati Compared to [Gym Retro](https://github.com/openai/retro), these environments are: * Faster: Gym Retro environments are already fast, but Procgen environments can run >4x faster. -* Non-deterministic: Gym Retro environments are always the same, so you can memorize a sequence of actions that will get the highest reward. Procgen environments are randomized so this is not possible. +* Randomized: Gym Retro environments are always the same, so you can memorize a sequence of actions that will get the highest reward. Procgen environments are randomized so this is not possible. * Customizable: If you install from source, you can perform experiments where you change the environments, or build your own environments. The environment-specific code for each environment is often less than 300 lines. This is almost impossible with Gym Retro. Supported platforms: @@ -56,7 +56,7 @@ To try an environment out interactively: python -m procgen.interactive --env-name coinrun ``` -The keys are: left/right/up/down + q, w, e, a, s, d for the different (environment-dependent) actions. Your score is displayed as "episode_return" on the right. At the end of an episode, you can see your final "episode_return" as well as "level_completed" which will be `1` if you successfully completed the level. +The keys are: left/right/up/down + q, w, e, a, s, d for the different (environment-dependent) actions. Your score is displayed as "episode_return" in the lower left. At the end of an episode, you can see your final "episode_return" as well as "prev_level_complete" which will be `1` if you successfully completed the level. To create an instance of the [gym](https://github.com/openai/gym) environment: @@ -65,22 +65,20 @@ import gym env = gym.make("procgen:procgen-coinrun-v0") ``` -To create an instance of the vectorized environment: +To create an instance of the [gym3](https://github.com/openai/gym3) (vectorized) environment: ``` -from procgen import ProcgenEnv -venv = ProcgenEnv(num_envs=1, env_name="coinrun") +from procgen import ProcgenGym3Env +env = ProcgenGym3Env(num_envs=1, env_name="coinrun") ``` -The environment uses the [`VecEnv`](https://github.com/openai/baselines/blob/master/baselines/common/vec_env/vec_env.py#L29) interface from [`baselines`](https://github.com/openai/baselines), `baselines` is not a dependency of this library. - ### Docker A [`Dockerfile`](docker/Dockerfile) is included to demonstrate a minimal Docker-based setup that works for running random agent. ``` docker build docker --tag procgen -docker run --rm -it procgen python3 -m procgen.examples.random_agent +docker run --rm -it procgen python3 -m procgen.examples.random_agent_gym ``` ## Environments @@ -115,14 +113,18 @@ Here are the 16 environments: ## Environment Options * `env_name` - Name of environment, or comma-separate list of environment names to instantiate as each env in the VecEnv. -* `num_levels` - The number of unique levels that can be generated. Set to 0 to use unlimited levels. -* `start_level` - The lowest seed that will be used to generated levels. 'start_level' and 'num_levels' fully specify the set of possible levels. -* `paint_vel_info` - Paint player velocity info in the top left corner. Only supported by certain games. -* `use_generated_assets` - Use randomly generated assets in place of human designed assets. -* `debug_mode` - A useful flag that's passed through to procgen envs. Use however you want during debugging. -* `center_agent` - Determines whether observations are centered on the agent or display the full level. Override at your own risk. -* `use_sequential_levels` - When you reach the end of a level, the episode is ended and a new level is selected. If `use_sequential_levels` is set to `True`, reaching the end of a level does not end the episode, and the seed for the new level is derived from the current level seed. If you combine this with `start_level=` and `num_levels=1`, you can have a single linear series of levels similar to a gym-retro or ALE game. -* `distribution_mode` - What variant of the levels to use, the options are `"easy", "hard", "extreme", "memory", "exploration"`. All games support `"easy"` and `"hard"`, while other options are game-specific. The default is `"hard"`. Switching to `"easy"` will reduce the number of timesteps required to solve each game and is useful for testing or when working with limited compute resources. +* `num_levels=0` - The number of unique levels that can be generated. Set to 0 to use unlimited levels. +* `start_level=0` - The lowest seed that will be used to generated levels. 'start_level' and 'num_levels' fully specify the set of possible levels. +* `paint_vel_info=False` - Paint player velocity info in the top left corner. Only supported by certain games. +* `use_generated_assets=False` - Use randomly generated assets in place of human designed assets. +* `debug=False` - Set to `True` to use the debug build if building from source. +* `debug_mode=0` - A useful flag that's passed through to procgen envs. Use however you want during debugging. +* `center_agent=True` - Determines whether observations are centered on the agent or display the full level. Override at your own risk. +* `use_sequential_levels=False` - When you reach the end of a level, the episode is ended and a new level is selected. If `use_sequential_levels` is set to `True`, reaching the end of a level does not end the episode, and the seed for the new level is derived from the current level seed. If you combine this with `start_level=` and `num_levels=1`, you can have a single linear series of levels similar to a gym-retro or ALE game. +* `distribution_mode="hard"` - What variant of the levels to use, the options are `"easy", "hard", "extreme", "memory", "exploration"`. All games support `"easy"` and `"hard"`, while other options are game-specific. The default is `"hard"`. Switching to `"easy"` will reduce the number of timesteps required to solve each game and is useful for testing or when working with limited compute resources. +* `use_backgrounds=True` - Normally games use human designed backgrounds, if this flag is set to `False`, games will use pure black backgrounds. +* `restrict_themes=False` - Some games select assets from multiple themes, if this flag is set to `True`, those games will only use a single theme. +* `use_monochrome_assets=False` - If set to `True`, games will use monochromatic rectangles instead of human designed assets. best used with `restrict_themes=True`. Here's how to set the options: @@ -131,19 +133,33 @@ import gym env = gym.make("procgen:procgen-coinrun-v0", start_level=0, num_levels=1) ``` -For the vectorized environment: +Since the gym environment is adapted from a gym3 environment, early calls to `reset()` are disallowed and the `render()` method does not do anything. To render the environment, pass `render=True`, which will set `render_human=True` to the environment and wrap it in a `gym3.ViewerWrapper`. + +For the gym3 vectorized environment: + +``` +from procgen import ProcgenGym3Env +env = ProcgenGym3Env(num=1, env_name="coinrun", start_level=0, num_levels=1) +``` + +## Saving and loading the environment state + +If you are using the gym3 interface, you can save and load the environment state: ``` -from procgen import ProcgenEnv -venv = ProcgenEnv(num_envs=1, env_name="coinrun", start_level=0, num_levels=1) +from procgen import ProcgenGym3Env +env = ProcgenGym3Env(num=1, env_name="coinrun", start_level=0, num_levels=1) +states = env.callmethod("get_state") +env.callmethod("set_state", states) ``` +This returns a list of byte strings representing the state of each game in the vectorized environment. + ## Notes * You should depend on a specific version of this library (using `==`) for your experiments to ensure they are reproducible. You can get the current installed version with `pip show procgen`. * This library does not require or make use of GPUs. * While the library should be thread safe, each individual environment instance should only be used from a single thread. The library is not fork safe unless you set `num_threads=0`. Even if you do that, `Qt` is not guaranteed to be fork safe, so you should probably create the environment after forking or not use fork at all. -* Calling `reset()` early will not do anything, please re-create the environment if you want to reset it early. # Install from Source @@ -156,12 +172,12 @@ conda env update --name procgen --file environment.yml conda activate procgen pip install -e . # this should say "building procgen...done" -python -c "from procgen import ProcgenEnv; ProcgenEnv(num_envs=1, env_name='coinrun')" +python -c "from procgen import ProcgenGym3Env; ProcgenGym3Env(num=1, env_name='coinrun')" # this should create a window where you can play the coinrun environment python -m procgen.interactive ``` -The environment code is in C++ and is compiled into a shared library loaded by python using a C interface based on [`libenv`](https://github.com/cshesse/libenv). The C++ code uses [Qt](https://www.qt.io/) for drawing. +The environment code is in C++ and is compiled into a shared library loaded by python using a C interface using [`gym3.libenv`](https://github.com/gym3). The C++ code uses [Qt](https://www.qt.io/) for drawing. # Create a new environment @@ -174,6 +190,48 @@ Once you have installed from source, you can customize an existing environment o This repo includes a travis configuration that will compile your environment and build python wheels for easy installation. In order to have this build more quickly by caching the Qt compilation, you will want to configure a GCS bucket in [common.py](https://github.com/openai/procgen/blob/master/procgen-build/procgen_build/common.py#L5) and [setup service account credentials](https://github.com/openai/procgen/blob/master/procgen-build/procgen_build/build_package.py#L41). +# Add information to the info dictionary + +To export game information from the C++ game code to Python, you can define a new `info_type`. `info_type`s appear in the `info` dict returned by the gym environment, or in `get_info()` from the gym3 environment. + +To define a new one, add the following code to the `VecGame` constructor here: [vecgame.cpp](https://github.com/openai/procgen/blob/master/procgen/src/vecgame.cpp#L290) + +``` +{ + struct libenv_tensortype s; + strcpy(s.name, "heist_key_count"); + s.scalar_type = LIBENV_SCALAR_TYPE_DISCRETE; + s.dtype = LIBENV_DTYPE_INT32; + s.ndim = 0, + s.low.int32 = 0; + s.high.int32 = INT32_MAX; + info_types.push_back(s); +} +``` + +This lets the Python code know to expect a single integer and expose it in the `info` dict. + +After adding that, you can add the following code to [heist.cpp](https://github.com/openai/procgen/blob/master/procgen/src/games/heist.cpp#L93): + +``` +void observe() override { + Game::observe(); + int32_t key_count = 0; + for (const auto& has_key : has_keys) { + if (has_key) { + key_count++; + } + } + *(int32_t *)(info_bufs[info_name_to_offset.at("heist_key_count")]) = key_count; +} +``` + +This populates the `heist_key_count` info value each time the environment is observed. + +If you run the interactive script (making sure that you installed from source), the new keys should appear in the bottom left hand corner: + +`python -m procgen.interactive --env-name heist` + # Changelog See [CHANGES](CHANGES.md) for changes present in each release. diff --git a/environment.yml b/environment.yml index f0490965..1528adfe 100644 --- a/environment.yml +++ b/environment.yml @@ -9,10 +9,7 @@ dependencies: - qt=5.12.5 # conda-forge does not have 5.13.2 available - pip - pip: + - gym3==0.3.0 - numpy==1.17.2 - gym==0.15.3 - - filelock==3.0.10 - - cffi==1.13.2 - - pyglet==1.3.2 - - imageio==2.6.1 - - imageio-ffmpeg==0.3.0 \ No newline at end of file + - filelock==3.0.10 \ No newline at end of file diff --git a/procgen-build/procgen_build/build_package.py b/procgen-build/procgen_build/build_package.py index 7ad9574c..c367b938 100644 --- a/procgen-build/procgen_build/build_package.py +++ b/procgen-build/procgen_build/build_package.py @@ -2,6 +2,7 @@ from urllib.request import urlretrieve import os import subprocess as sp +import fnmatch import blobfile as bf @@ -37,6 +38,13 @@ def init_vsvars(): os.environ[k] = v +def get_var(pattern): + for key, value in os.environ: + if fnmatch.fnmatch(key, pattern): + return os.environ[key] + return None + + def setup_google_credentials(): # brew install travis # travis login --org @@ -46,10 +54,15 @@ def setup_google_credentials(): # travis encrypt-file --org /tmp/key.json input_path = os.path.join(SCRIPT_DIR, "key.json.enc") output_path = os.path.join(os.getcwd(), "key.json") - if "encrypted_d853b3b05b79_key" not in os.environ: + for h in ["d853b3b05b79", "41b34d34b52c"]: + key = os.environ.get(f"encrypted_{h}_key") + iv = os.environ.get(f"encrypted_{h}_iv") + if key is not None: + break + if key is None: # being compiled on a fork return False - sp.run(["openssl", "aes-256-cbc", "-K", os.environ["encrypted_d853b3b05b79_key"], "-iv", os.environ["encrypted_d853b3b05b79_iv"], "-in", input_path, "-out", output_path, "-d"], check=True) + sp.run(["openssl", "aes-256-cbc", "-K", key, "-iv", iv, "-in", input_path, "-out", output_path, "-d"], check=True) os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = output_path return True @@ -59,7 +72,7 @@ def main(): os.environ.update( { - "CIBW_BUILD": "cp36-macosx_10_6_intel cp37-macosx_10_6_intel cp38-macosx_10_9_x86_64 cp36-manylinux_x86_64 cp37-manylinux_x86_64 cp38-manylinux_x86_64 cp36-win_amd64 cp37-win_amd64 cp38-win_amd64", + "CIBW_BUILD": "cp36-macosx_x86_64 cp37-macosx_x86_64 cp38-macosx_x86_64 cp36-manylinux_x86_64 cp37-manylinux_x86_64 cp38-manylinux_x86_64 cp36-win_amd64 cp37-win_amd64 cp38-win_amd64", "CIBW_BEFORE_BUILD": "pip install -e procgen-build && python -u -m procgen_build.build_qt --output-dir /tmp/qt5", "CIBW_TEST_EXTRAS": "test", # the --pyargs option causes pytest to use the installed procgen wheel @@ -95,7 +108,7 @@ def main(): elif platform.system() == "Windows": init_vsvars() - run("pip install cibuildwheel==1.0.0") + run("pip install cibuildwheel==1.4.1") run("cibuildwheel --output-dir wheelhouse") if have_credentials: diff --git a/procgen-build/procgen_build/common.py b/procgen-build/procgen_build/common.py index 71ec5f58..aa35645a 100644 --- a/procgen-build/procgen_build/common.py +++ b/procgen-build/procgen_build/common.py @@ -1,14 +1,17 @@ import subprocess as sp import time +import shlex GCS_BUCKET = "openai-procgen" -def run(cmd, **kwargs): +def run(cmd, shell=True, **kwargs): print(f"RUN: {cmd}") start = time.time() - p = sp.run(cmd, shell=True, encoding="utf8", **kwargs) + if not shell: + cmd = shlex.split(cmd) + p = sp.run(cmd, shell=shell, encoding="utf8", **kwargs) print(f"ELAPSED: {time.time() - start}") if p.returncode != 0: print(f"cmd {cmd} failed") diff --git a/procgen-build/procgen_build/dev_test.py b/procgen-build/procgen_build/dev_test.py index cfe405c3..d8e784ea 100644 --- a/procgen-build/procgen_build/dev_test.py +++ b/procgen-build/procgen_build/dev_test.py @@ -19,28 +19,32 @@ def main(): if platform.system() == "Linux": apt_install(["mesa-common-dev"]) + installer_urls = { + "Linux": "https://repo.anaconda.com/miniconda/Miniconda3-4.7.12.1-Linux-x86_64.sh", + "Darwin": "https://repo.anaconda.com/miniconda/Miniconda3-4.7.12.1-MacOSX-x86_64.sh", + "Windows": "https://repo.anaconda.com/miniconda/Miniconda3-4.7.12.1-Windows-x86_64.exe", + } + installer_url = installer_urls[platform.system()] + urlretrieve( + installer_url, + "miniconda-installer.exe" if platform.system() == "Windows" else "miniconda-installer.sh", + ) if platform.system() == "Windows": - # using the installer seems to hang so use chocolatey instead - run("choco install miniconda3 --version 4.7.12.1 --no-progress --yes") - os.environ["PATH"] = "C:\\tools\\miniconda3;C:\\tools\\miniconda3\\Library\\bin;C:\\tools\\miniconda3\\Scripts;" + os.environ["PATH"] + run("miniconda-installer.exe /S /D=c:\\miniconda3") + os.environ["PATH"] = "C:\\miniconda3;C:\\miniconda3\\Library\\bin;C:\\miniconda3\\Scripts;" + os.environ["PATH"] else: - installer_urls = { - "Linux": "https://repo.anaconda.com/miniconda/Miniconda2-4.7.12.1-Linux-x86_64.sh", - "Darwin": "https://repo.anaconda.com/miniconda/Miniconda2-4.7.12.1-MacOSX-x86_64.sh", - } - installer_url = installer_urls[platform.system()] - urlretrieve( - installer_url, - "miniconda-installer.sh", - ) conda_path = os.path.join(os.getcwd(), "miniconda") run(f"bash miniconda-installer.sh -b -p {conda_path}") os.environ["PATH"] = f"/{conda_path}/bin/:" + os.environ["PATH"] - run("conda env update --name base --file environment.yml") - run("conda init") - run("pip install -e .[test]") - run("""python -c "from procgen import ProcgenEnv; ProcgenEnv(num_envs=1, env_name='coinrun')" """) - run("pytest --verbose --benchmark-disable --durations=16 .") + + def run_in_conda_env(cmd): + run(f"conda run --name dev {cmd}", shell=False) + + run("conda env update --name dev --file environment.yml") + run_in_conda_env("pip show gym3") + run_in_conda_env("pip install -e .[test]") + run_in_conda_env("""python -c "from procgen import ProcgenGym3Env; ProcgenGym3Env(num=1, env_name='coinrun')" """) + run_in_conda_env("pytest --verbose --benchmark-disable --durations=16 .") if __name__ == "__main__": diff --git a/procgen-build/setup.py b/procgen-build/setup.py index 56eb356c..a58aa13a 100644 --- a/procgen-build/setup.py +++ b/procgen-build/setup.py @@ -8,5 +8,7 @@ "blobfile==0.8.0", # rather than rely on system cmake, install it here "cmake==3.15.3", + # this is required by procgen/build.py + "gym3==0.3.0", ], ) diff --git a/procgen/CMakeLists.txt b/procgen/CMakeLists.txt index e2ab7261..31084aa5 100644 --- a/procgen/CMakeLists.txt +++ b/procgen/CMakeLists.txt @@ -36,9 +36,6 @@ if (APPLE OR UNIX) set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-omit-frame-pointer") endif() -# find libenv.h header -set(CMAKE_INCLUDE_CURRENT_DIR ON) - # include qt5 find_package(Qt5 COMPONENTS Gui REQUIRED) @@ -75,4 +72,7 @@ add_library(env src/vecoptions.cpp ) +# find libenv.h header +target_include_directories(env PUBLIC ${LIBENV_DIR}) + target_link_libraries(env Qt5::Gui) \ No newline at end of file diff --git a/procgen/__init__.py b/procgen/__init__.py index 2e816ffd..bd1dc3fb 100644 --- a/procgen/__init__.py +++ b/procgen/__init__.py @@ -4,9 +4,9 @@ version_path = os.path.join(SCRIPT_DIR, "version.txt") __version__ = open(version_path).read() -from .env import ProcgenEnv +from .env import ProcgenEnv, ProcgenGym3Env from .gym_registration import register_environments register_environments() -__all__ = ["ProcgenEnv"] +__all__ = ["ProcgenEnv", "ProcgenGym3Env"] diff --git a/procgen/build.py b/procgen/build.py index 55b097d9..ebe1dddf 100644 --- a/procgen/build.py +++ b/procgen/build.py @@ -8,6 +8,8 @@ import platform import multiprocessing as mp +import gym3 + SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -15,6 +17,10 @@ global_builds = set() +class RunFailure(Exception): + pass + + @contextlib.contextmanager def nullcontext(): # this is here for python 3.6 support @@ -38,25 +44,12 @@ def run(cmd): def check(proc, verbose): if proc.returncode != 0: print(f"RUN FAILED {proc.args}:\n{proc.stdout}") - raise Exception("failed to build procgen from source") + raise RunFailure("failed to build procgen from source") if verbose: print(f"RUN {proc.args}:\n{proc.stdout}") -def build(package=False, debug=False): - """ - Build the requested environment in a process-safe manner and only once per process. - """ - build_dir = os.path.join(SCRIPT_DIR, ".build") - os.makedirs(build_dir, exist_ok=True) - - build_type = "relwithdebinfo" - if debug: - build_type = "debug" - - if "MAKEFLAGS" not in os.environ: - os.environ["MAKEFLAGS"] = f"-j{mp.cpu_count()}" - +def _attempt_configure(build_type, package): if "PROCGEN_CMAKE_PREFIX_PATH" in os.environ: cmake_prefix_paths = [os.environ["PROCGEN_CMAKE_PREFIX_PATH"]] else: @@ -76,6 +69,37 @@ def build(package=False, debug=False): # prepend this qt since it's likely to be loaded already by the python process cmake_prefix_paths.insert(0, conda_cmake_path) + generator = "Unix Makefiles" + if platform.system() == "Windows": + generator = "Visual Studio 15 2017 Win64" + configure_cmd = [ + "cmake", + "-G", + generator, + "-DCMAKE_PREFIX_PATH=" + ";".join(cmake_prefix_paths), + f"-DLIBENV_DIR={gym3.libenv.get_header_dir()}", + "../..", + ] + if package: + configure_cmd.append("-DPROCGEN_PACKAGE=ON") + if platform.system() != "Windows": + # this is not used on windows, the option needs to be passed to cmake --build instead + configure_cmd.append(f"-DCMAKE_BUILD_TYPE={build_type}") + + check(run(configure_cmd), verbose=package) + + +def build(package=False, debug=False): + """ + Build the requested environment in a process-safe manner and only once per process. + """ + build_dir = os.path.join(SCRIPT_DIR, ".build") + os.makedirs(build_dir, exist_ok=True) + + build_type = "relwithdebinfo" + if debug: + build_type = "debug" + with chdir(build_dir), global_build_lock: # check if we have built yet in this process if build_type not in global_builds: @@ -85,39 +109,31 @@ def build(package=False, debug=False): else: # prevent multiple processes from trying to build at the same time import filelock + lock_ctx = filelock.FileLock(".build-lock") with lock_ctx: - os.makedirs(build_type, exist_ok=True) - with chdir(build_type): - sys.stdout.write("building procgen...") + sys.stdout.write("building procgen...") + sys.stdout.flush() + try: + os.makedirs(build_type, exist_ok=True) + with chdir(build_type): + _attempt_configure(build_type, package) + except RunFailure: + # cmake can get into a weird state, so nuke the build directory and retry once + sys.stdout.write("retrying configure due to failure...") sys.stdout.flush() - generator = "Unix Makefiles" - if platform.system() == "Windows": - generator = "Visual Studio 15 2017 Win64" - configure_cmd = [ - "cmake", - "-G", generator, - "-DCMAKE_PREFIX_PATH=" + ";".join(cmake_prefix_paths), - "../..", - ] - if package: - configure_cmd.append("-DPROCGEN_PACKAGE=ON") - if platform.system() != "Windows": - # this is not used on windows, the option needs to be passed to cmake --build instead - configure_cmd.append(f"-DCMAKE_BUILD_TYPE={build_type}") - - p = run(configure_cmd) - if "CMakeCache.txt is different" in p.stdout: - # if the folder is moved we can end up with an invalid CMakeCache.txt - # in which case we should re-run configure - os.remove("CMakeCache.txt") - check(run(configure_cmd), verbose=package) - else: - check(p, verbose=package) + shutil.rmtree(build_type) + os.makedirs(build_type, exist_ok=True) + with chdir(build_type): + _attempt_configure(build_type, package) + if "MAKEFLAGS" not in os.environ: + os.environ["MAKEFLAGS"] = f"-j{mp.cpu_count()}" + + with chdir(build_type): build_cmd = ["cmake", "--build", ".", "--config", build_type] check(run(build_cmd), verbose=package) - print("done") + print("done") global_builds.add(build_type) diff --git a/procgen/env.py b/procgen/env.py index a16387cc..72f46b2e 100644 --- a/procgen/env.py +++ b/procgen/env.py @@ -1,12 +1,16 @@ import os import random +from typing import Sequence, Optional, List -from .libenv import CVecEnv +import gym3 +from gym3.libenv import CEnv import numpy as np from .build import build SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +MAX_STATE_SIZE = 2 ** 20 + ENV_NAMES = [ "bigfish", "bossfight", @@ -59,14 +63,14 @@ def create_random_seed(): return rand_seed -class BaseProcgenEnv(CVecEnv): +class BaseProcgenEnv(CEnv): """ Base procedurally generated environment """ def __init__( self, - num_envs, + num, env_name, options, debug=False, @@ -77,6 +81,7 @@ def __init__( debug_mode=0, resource_root=None, num_threads=4, + render_mode=None, ): if resource_root is None: resource_root = os.path.join(SCRIPT_DIR, "data", "assets") + os.sep @@ -89,9 +94,16 @@ def __init__( else: # only compile if we don't find a pre-built binary lib_dir = build(debug=debug) - + self.combos = self.get_combos() + if render_mode is None: + render_human = False + elif render_mode == "rgb_array": + render_human = True + else: + raise Exception(f"invalid render mode {render_mode}") + if rand_seed is None: rand_seed = create_random_seed() @@ -105,6 +117,7 @@ def __init__( "debug_mode": debug_mode, "rand_seed": rand_seed, "num_threads": num_threads, + "render_human": render_human, # these will only be used the first time an environment is created in a process "resource_root": resource_root, } @@ -113,8 +126,31 @@ def __init__( self.options = options super().__init__( - lib_dir=lib_dir, num_envs=num_envs, debug=debug, options=options + lib_dir=lib_dir, + num=num, + options=options, + c_func_defs=[ + "int get_state(libenv_env *, int, char *, int);", + "void set_state(libenv_env *, int, char *, int);", + ], ) + # don't use the dict space for actions + self.ac_space = self.ac_space["action"] + + def get_state(self): + length = MAX_STATE_SIZE + buf = self._ffi.new(f"char[{length}]") + result = [] + for env_idx in range(self.num): + n = self.call_c_func("get_state", env_idx, buf, length) + result.append(bytes(self._ffi.buffer(buf, n))) + return result + + def set_state(self, states): + assert len(states) == self.num + for env_idx in range(self.num): + state = states[env_idx] + self.call_c_func("set_state", env_idx, state, len(state)) def get_combos(self): return [ @@ -135,35 +171,60 @@ def get_combos(self): ("E",), ] - def step_async(self, actions): + def keys_to_act(self, keys_list: Sequence[Sequence[str]]) -> List[Optional[np.ndarray]]: + """ + Convert list of keys being pressed to actions, used in interactive mode + """ + result = [] + for keys in keys_list: + action = None + max_len = -1 + for i, combo in enumerate(self.get_combos()): + pressed = True + for key in combo: + if key not in keys: + pressed = False + + if pressed and (max_len < len(combo)): + action = i + max_len = len(combo) + + if action is not None: + action = np.array([action]) + result.append(action) + return result + + def act(self, ac): # tensorflow may return int64 actions (https://github.com/openai/gym/blob/master/gym/spaces/discrete.py#L13) # so always cast actions to int32 - return super().step_async(actions.astype(np.int32)) + return super().act({"action": ac.astype(np.int32)}) -class ProcgenEnv(BaseProcgenEnv): +class ProcgenGym3Env(BaseProcgenEnv): + """ + gym3 interface for Procgen + """ def __init__( self, - num_envs, + num, env_name, center_agent=True, - options=None, + use_backgrounds=True, + use_monochrome_assets=False, + restrict_themes=False, use_generated_assets=False, paint_vel_info=False, distribution_mode="hard", **kwargs, ): - if options is None: - options = {} - else: - options = dict(options) - assert ( distribution_mode in DISTRIBUTION_MODE_DICT ), f'"{distribution_mode}" is not a valid distribution mode.' if distribution_mode == "exploration": - assert env_name in EXPLORATION_LEVEL_SEEDS, f"{env_name} does not support exploration mode" + assert ( + env_name in EXPLORATION_LEVEL_SEEDS + ), f"{env_name} does not support exploration mode" distribution_mode = DISTRIBUTION_MODE_DICT["hard"] assert "num_levels" not in kwargs, "exploration mode overrides num_levels" @@ -173,12 +234,20 @@ def __init__( else: distribution_mode = DISTRIBUTION_MODE_DICT[distribution_mode] - options.update( - { + options = { "center_agent": bool(center_agent), "use_generated_assets": bool(use_generated_assets), + "use_monochrome_assets": bool(use_monochrome_assets), + "restrict_themes": bool(restrict_themes), + "use_backgrounds": bool(use_backgrounds), "paint_vel_info": bool(paint_vel_info), "distribution_mode": distribution_mode, } - ) - super().__init__(num_envs, env_name, options, **kwargs) + super().__init__(num, env_name, options, **kwargs) + + +def ProcgenEnv(num_envs, env_name, **kwargs): + """ + Baselines VecEnv interface for Procgen + """ + return gym3.ToBaselinesVecEnv(ProcgenGym3Env(num=num_envs, env_name=env_name, **kwargs)) \ No newline at end of file diff --git a/procgen/env_test.py b/procgen/env_test.py index 9ae4ed5e..a4099585 100644 --- a/procgen/env_test.py +++ b/procgen/env_test.py @@ -1,30 +1,30 @@ import numpy as np import pytest from .env import ENV_NAMES -from procgen import ProcgenEnv +from procgen import ProcgenGym3Env @pytest.mark.parametrize("env_name", ["coinrun", "starpilot"]) def test_seeding(env_name): num_envs = 1 - def make_venv(level_num): - venv = ProcgenEnv( - num_envs=num_envs, env_name=env_name, num_levels=1, start_level=level_num + def make_env(level_num): + venv = ProcgenGym3Env( + num=num_envs, env_name=env_name, num_levels=1, start_level=level_num ) return venv - venv1 = make_venv(0) - venv2 = make_venv(0) - venv3 = make_venv(1) + env1 = make_env(0) + env2 = make_env(0) + env3 = make_env(1) - venv1.reset() - venv2.reset() - venv3.reset() + env1.act(np.zeros(num_envs)) + env2.act(np.zeros(num_envs)) + env3.act(np.zeros(num_envs)) - obs1, _, _, _ = venv1.step(np.zeros(num_envs)) - obs2, _, _, _ = venv2.step(np.zeros(num_envs)) - obs3, _, _, _ = venv3.step(np.zeros(num_envs)) + _, obs1, _ = env1.observe() + _, obs2, _ = env2.observe() + _, obs3, _ = env3.observe() assert np.array_equal(obs1["rgb"], obs2["rgb"]) assert not np.array_equal(obs1["rgb"], obs3["rgb"]) @@ -34,18 +34,16 @@ def make_venv(level_num): def test_determinism(env_name): def collect_observations(): rng = np.random.RandomState(0) - venv = ProcgenEnv(num_envs=2, env_name=env_name, rand_seed=23) - obs = venv.reset() + env = ProcgenGym3Env(num=2, env_name=env_name, rand_seed=23) + _, obs, _ = env.observe() obses = [obs["rgb"]] for _ in range(128): - obs, _rew, _done, _info = venv.step( + env.act( rng.randint( - low=0, - high=venv.action_space.n, - size=(venv.num_envs,), - dtype=np.int32, + low=0, high=env.ac_space.eltype.n, size=(env.num,), dtype=np.int32 ) ) + _, obs, _ = env.observe() obses.append(obs["rgb"]) return np.array(obses) @@ -57,17 +55,15 @@ def collect_observations(): @pytest.mark.parametrize("env_name", ENV_NAMES) @pytest.mark.parametrize("num_envs", [1, 2, 16]) def test_multi_speed(env_name, num_envs, benchmark): - venv = ProcgenEnv(num_envs=num_envs, env_name=env_name) + env = ProcgenGym3Env(num=num_envs, env_name=env_name) - venv.reset() - actions = np.zeros([venv.num_envs]) + actions = np.zeros([env.num]) def rollout(max_steps): step_count = 0 while step_count < max_steps: - _obs, _rews, _dones, _infos = venv.step(actions) + env.act(actions) + env.observe() step_count += 1 - benchmark(lambda: rollout(1000)) - - venv.close() \ No newline at end of file + benchmark(lambda: rollout(1000)) \ No newline at end of file diff --git a/procgen/examples/random_agent.py b/procgen/examples/random_agent_gym.py similarity index 76% rename from procgen/examples/random_agent.py rename to procgen/examples/random_agent_gym.py index 2e41f9a1..dc4bd039 100644 --- a/procgen/examples/random_agent.py +++ b/procgen/examples/random_agent_gym.py @@ -1,5 +1,5 @@ """ -Example random agent script to demonstrate that procgen works +Example random agent script using the gym API to demonstrate that procgen works """ import gym diff --git a/procgen/examples/random_agent_gym3.py b/procgen/examples/random_agent_gym3.py new file mode 100644 index 00000000..3f428f16 --- /dev/null +++ b/procgen/examples/random_agent_gym3.py @@ -0,0 +1,15 @@ +""" +Example random agent script using the gym3 API to demonstrate that procgen works +""" + +from gym3 import types_np +from procgen import ProcgenGym3Env +env = ProcgenGym3Env(num=1, env_name="coinrun") +step = 0 +while True: + env.act(types_np.sample(env.ac_space, bshape=(env.num,))) + rew, obs, first = env.observe() + print(f"step {step} reward {rew} first {first}") + if step > 0 and first: + break + step += 1 \ No newline at end of file diff --git a/procgen/gym_registration.py b/procgen/gym_registration.py index 8b85f476..78ba2f72 100644 --- a/procgen/gym_registration.py +++ b/procgen/gym_registration.py @@ -1,24 +1,17 @@ from gym.envs.registration import register -from gym import ObservationWrapper -from .env import ENV_NAMES, ProcgenEnv -from .scalarize import Scalarize +from gym3 import ToGymEnv, ViewerWrapper, ExtractDictObWrapper +from .env import ENV_NAMES, ProcgenGym3Env -class RemoveDictObs(ObservationWrapper): - def __init__(self, env, key): - self.key = key - super().__init__(env=env) - self.observation_space = env.observation_space.spaces[self.key] - - def observation(self, obs): - return obs[self.key] - - -def make_env(**kwargs): - venv = ProcgenEnv(num_envs=1, num_threads=0, **kwargs) - env = Scalarize(venv) - env = RemoveDictObs(env, key="rgb") - return env +def make_env(render=False, **kwargs): + if render: + kwargs["render_human"] = True + env = ProcgenGym3Env(num=1, num_threads=0, **kwargs) + env = ExtractDictObWrapper(env, key="rgb") + if render: + env = ViewerWrapper(env, tps=15, info_key="rgb") + gym_env = ToGymEnv(env) + return gym_env def register_environments(): diff --git a/procgen/interactive.py b/procgen/interactive.py deleted file mode 100644 index 9c7c2edf..00000000 --- a/procgen/interactive.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python -import argparse - -from .interactive_base import Interactive -from procgen import ProcgenEnv -from .env import ENV_NAMES -from .scalarize import Scalarize - - -class ProcgenInteractive(Interactive): - """ - Interactive version of Procgen environments for humans to use - """ - - def __init__(self, vision, **kwargs): - self._vision = vision - venv = ProcgenEnv(num_envs=1, **kwargs) - self.combos = list(venv.unwrapped.combos) - self.last_keys = [] - env = Scalarize(venv) - super().__init__(env=env, sync=False, tps=15, display_info=True) - - def get_image(self, obs, env): - if self._vision == "human": - return env.render(mode="rgb_array") - else: - return obs["rgb"] - - def keys_to_act(self, keys): - action = None - max_len = -1 - - if "RETURN" in keys and "RETURN" not in self.last_keys: - action = -1 - else: - for i, combo in enumerate(self.combos): - pressed = True - for key in combo: - if key not in keys: - pressed = False - - if pressed and (max_len < len(combo)): - action = i - max_len = len(combo) - - self.last_keys = list(keys) - - return action - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--vision", choices=["agent", "human"], default="human") - parser.add_argument("--record-dir", help="directory to record movies to") - parser.add_argument("--distribution-mode", default="hard", help="which distribution mode to use for the level generation") - parser.add_argument("--env-name", default="coinrun", help="name of game to create", choices=ENV_NAMES) - parser.add_argument("--level-seed", type=int, help="select an individual level to use") - args = parser.parse_args() - - kwargs = {"distribution_mode": args.distribution_mode} - if args.level_seed is not None: - kwargs["start_level"] = args.level_seed - kwargs["num_levels"] = 1 - ia = ProcgenInteractive(args.vision, env_name=args.env_name, **kwargs) - ia.run(record_dir=args.record_dir) - - -if __name__ == "__main__": - main() diff --git a/procgen/interactive_base.py b/procgen/interactive_base.py deleted file mode 100644 index d13b4e33..00000000 --- a/procgen/interactive_base.py +++ /dev/null @@ -1,348 +0,0 @@ -""" -Interact with Gym environments using the keyboard - -An adapter object is defined for each environment to map keyboard commands to actions and extract observations as pixels. -""" - -import pyglet - -# don't require an x server to import this file -pyglet.options["shadow_window"] = False -import sys -import ctypes -import os -import abc -import time - -import numpy as np -from pyglet import gl -from pyglet.window import key as keycodes -import imageio - - -SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) -CHAR_SIZE = 32 -FONT = None -SECONDS_TO_DISPLAY_DONE_INFO = 3 - - -def convert_ascii_to_rgb(ascii): - """ - Convert ascii observations to an image using the loaded font - """ - global FONT - if FONT is None: - FONT = np.load(os.path.join(SCRIPT_DIR, "font.bin"))["font"] - - height, width = ascii.shape - images = np.zeros((height * CHAR_SIZE, width * CHAR_SIZE, 3), dtype=np.uint8) - for y in range(height): - for x in range(width): - ch = ascii[y, x] - images[ - y * CHAR_SIZE : (y + 1) * CHAR_SIZE, - x * CHAR_SIZE : (x + 1) * CHAR_SIZE, - :, - ] = FONT[ch] - return images - - -class Interactive(abc.ABC): - """ - Base class for making gym environments interactive for human use - """ - - def __init__(self, env, sync=True, tps=60, aspect_ratio=None, display_info=False): - self._record_dir = None - self._movie_writer = None - self._episode = 0 - self._display_info = display_info - self._seconds_to_display_done_info = 0 - - obs = env.reset() - self._image = self.get_image(obs, env) - assert ( - len(self._image.shape) == 3 and self._image.shape[2] == 3 - ), "must be an RGB image" - image_height, image_width = self._image.shape[:2] - - if aspect_ratio is None: - aspect_ratio = image_width / image_height - - # guess a screen size that doesn't distort the image too much but also is not tiny or huge - display = pyglet.canvas.get_display() - screen = display.get_default_screen() - max_win_width = screen.width * 0.9 - max_win_height = screen.height * 0.9 - win_width = image_width - win_height = int(win_width / aspect_ratio) - - while win_width > max_win_width or win_height > max_win_height: - win_width //= 2 - win_height //= 2 - while win_width < max_win_width / 2 and win_height < max_win_height / 2: - win_width *= 2 - win_height *= 2 - - self._info_width = win_width // 2 - if display_info: - win_width += self._info_width - win = pyglet.window.Window(width=win_width, height=win_height) - - self._key_handler = pyglet.window.key.KeyStateHandler() - win.push_handlers(self._key_handler) - win.on_close = self._on_close - - gl.glEnable(gl.GL_TEXTURE_2D) - self._texture_id = gl.GLuint(0) - gl.glGenTextures(1, ctypes.byref(self._texture_id)) - gl.glBindTexture(gl.GL_TEXTURE_2D, self._texture_id) - gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_S, gl.GL_CLAMP) - gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_T, gl.GL_CLAMP) - gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_NEAREST) - gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_NEAREST) - gl.glTexImage2D( - gl.GL_TEXTURE_2D, - 0, - gl.GL_RGBA8, - image_width, - image_height, - 0, - gl.GL_RGB, - gl.GL_UNSIGNED_BYTE, - None, - ) - - self._env = env - self._win = win - - self._key_previous_states = {} - - self._steps = 0 - self._episode_steps = 0 - self._episode_return = 0 - self._prev_episode_return = 0 - self._last_info = {} - - self._tps = tps - self._sync = sync - self._current_time = 0 - self._sim_time = 0 - self._max_sim_frames_per_update = 4 - - self._info_label = pyglet.text.Label( - "