Merge remote-tracking branch 'upstream/mujoco-update' into cleanup_spdr

famura · Oct 27, 2023 · 1e19a6a · 1e19a6a
2 parents 7af231c + 08ef8c8
commit 1e19a6a
Show file tree

Hide file tree

Showing 13 changed files with 75 additions and 148 deletions.
diff --git a/.github/.pre-commit-config.yaml b/.github/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/pycqa/isort
-    rev: 5.8.0
+    rev: 5.12.0
     hooks:
       - id: isort
         name: isort (python)
@@ -11,7 +11,6 @@ repos:
         name: isort (pyi)
         types: [pyi]
   - repo: https://github.com/psf/black
-    rev: 20.8b1
+    rev: 23.10.0
     hooks:
       - id: black
-        language_version: python3.7
diff --git a/.gitignore b/.gitignore
@@ -93,3 +93,6 @@ MUJOCO_LOG.*
 ### sbi-logs
 Pyrado/sbi-logs/
 Pyrado/tests/sbi-logs/
+
+### pre-commit local/symlink
+.pre-commit-config.yaml
diff --git a/.gitmodules b/.gitmodules
@@ -10,9 +10,6 @@
 [submodule "thirdParty/catch2"]
 	path = thirdParty/catch2
 	url = https://github.com/catchorg/Catch2.git
-[submodule "thirdParty/mujoco-py"]
-	path = thirdParty/mujoco-py
-	url = https://github.com/openai/mujoco-py.git
 [submodule "thirdParty/RenderPipeline"]
 	path = thirdParty/RenderPipeline
 	url = https://github.com/famura/RenderPipeline.git

diff --git a/Pyrado/pyrado/environments/mujoco/__init__.py b/Pyrado/pyrado/environments/mujoco/__init__.py
@@ -26,35 +26,8 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-import os
 
 import pyrado
-from pyrado.utils.input_output import print_cbt
 
 
-try:
-    import mujoco_py
-except (ImportError, Exception):
-    # The ImportError is raised if mujoco-py is simply not installed
-    # The Exception catches the case that you have everything installed properly but your IDE does not set the
-    # LD_LIBRARY_PATH correctly (happens for PyCharm & CLion). To check this, try to run your script from the terminal.
-    ld_library_path = os.environ.get("LD_LIBRARY_PATH")
-    ld_preload = os.environ.get("LD_PRELOAD")
-    print_cbt(
-        "You are trying to use are MuJoCo-based environment, but the required mujoco_py module can not be imported.\n"
-        "Try adding\n"
-        "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/$USER/.mujoco/mujoco200/bin\n"
-        "export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so\n"
-        "to your shell's rc-file.\n"
-        "The current values of the environment variables are:\n"
-        f"LD_LIBRARY_PATH={ld_library_path}\n"
-        f"LD_PRELOAD={ld_preload}"
-        "If you are using PyCharm or CLion, also add the environment variables above to your run configurations. "
-        "Note that the IDE will not resolve $USER for some reason, so enter the user name directly, "
-        "or run it from your terminal.\n\n"
-        "Here comes the mujoco-py error message:\n\n",
-        "r",
-    )
-    pyrado.mujoco_loaded = False
-else:
-    pyrado.mujoco_loaded = True
+pyrado.mujoco_loaded = True
diff --git a/Pyrado/pyrado/environments/mujoco/base.py b/Pyrado/pyrado/environments/mujoco/base.py
@@ -31,10 +31,10 @@
 from math import floor
 from typing import Optional
 
-import mujoco_py
+import mujoco
+import mujoco.viewer
 import numpy as np
 from init_args_serializer import Serializable
-from mujoco_py.generated.const import RND_FOG
 
 import pyrado
 from pyrado.environments.sim_base import SimEnv
@@ -87,7 +87,7 @@ def __init__(
                 xml_model_temp = file_raw.read()
             xml_model_temp = self._adapt_model_file(xml_model_temp, self.domain_param)
             # Create a dummy model to extract the solver's time step size
-            model_tmp = mujoco_py.load_model_from_xml(xml_model_temp)
+            model_tmp = mujoco.MjModel.from_xml_path(xml_model_temp)
             frame_skip = dt / model_tmp.opt.timestep
             if frame_skip.is_integer():
                 self.frame_skip = int(frame_skip)
@@ -112,8 +112,8 @@ def __init__(
         super().__init__(dt=self.model.opt.timestep * self.frame_skip, max_steps=max_steps)
 
         # Memorize the initial states of the model from the xml (for fixed init space or later reset)
-        self.init_qpos = self.sim.data.qpos.copy()
-        self.init_qvel = self.sim.data.qvel.copy()
+        self.init_qpos = self.data.qpos.copy()
+        self.init_qvel = self.data.qvel.copy()
 
         # Initialize space (to be overwritten in constructor of subclasses)
         self._init_space = None
@@ -177,13 +177,7 @@ def domain_param(self, domain_param: dict):
         # Update MuJoCo model
         self._create_mujoco_model()
 
-        if self.viewer is not None:
-            # If the viewer already exists and we reset the domain parameters, we must also recreate the viewer since
-            # it references to the simulation object which get's reconstructed during _create_mujoco_model()
-            import glfw
-
-            glfw.destroy_window(self.viewer.window)
-            self.viewer = None
+        self.viewer = None
 
         # Update task
         self._task = self._create_task(self.task_args)
@@ -202,7 +196,7 @@ def _adapt_model_file(self, xml_model: str, domain_param: dict) -> str:
         :return: adapted model file where the placeholders are filled with numerical values
         """
         # The mesh dir is not resolved when later passed as a string, thus we do it manually
-        xml_model = xml_model.replace(f"[ASSETS_DIR]", pyrado.MUJOCO_ASSETS_DIR)
+        xml_model = xml_model.replace("[ASSETS_DIR]", pyrado.MUJOCO_ASSETS_DIR)
 
         # Replace all occurrences of the domain parameter placeholder with its value
         for key, value in domain_param.items():
@@ -230,14 +224,14 @@ def _create_mujoco_model(self):
         xml_model = self._adapt_model_file(xml_model, self.domain_param)
 
         # Create MuJoCo model from parsed XML file
-        self.model = mujoco_py.load_model_from_xml(xml_model)
-        self.sim = mujoco_py.MjSim(self.model, nsubsteps=self.frame_skip)
+        self.model = mujoco.MjModel.from_xml_string(xml_model)
+        self.data = mujoco.MjData(self.model)
 
     def configure_viewer(self):
         """Configure the camera when the viewer is initialized. You need to set `self.camera_config` before."""
         # Render a fog around the scene by default
         if self.camera_config.pop("render_fog", True):
-            self.viewer.scn.flags[RND_FOG] = 1
+            self.viewer.scn.flags[mujoco.mjtRndFlag.mjRND_FOG] = 1
 
         # Parse all other options
         for key, value in self.camera_config.items():
@@ -274,25 +268,18 @@ def reset(self, init_state: np.ndarray = None, domain_param: dict = None) -> np.
         self._task.reset(env_spec=self.spec, init_state=init_state.copy())
 
         # Reset MuJoCo simulation model (only reset the joint configuration)
-        self.sim.reset()
-        old_state = self.sim.get_state()
+        mujoco.mj_resetData(self.model, self.data)
+        old_state = self.data
         nq = self.model.nq
         nv = self.model.nv
         if not init_state[:nq].shape == old_state.qpos.shape:  # check joint positions dimension
             raise pyrado.ShapeErr(given=init_state[:nq], expected_match=old_state.qpos)
         # Exclude everything that is appended to the state (at the end), e.g. the ball position for WAMBallInCupSim
         if not init_state[nq : nq + nv].shape == old_state.qvel.shape:  # check joint velocities dimension
             raise pyrado.ShapeErr(given=init_state[nq : nq + nv], expected_match=old_state.qvel)
-        new_state = mujoco_py.MjSimState(
-            # Exclude everything that is appended to the state (at the end), e.g. the ball position for WAMBallInCupSim
-            old_state.time,
-            init_state[:nq],
-            init_state[nq : nq + nv],
-            old_state.act,
-            old_state.udd_state,
-        )
-        self.sim.set_state(new_state)
-        self.sim.forward()
+        self.data.qpos[:] = np.copy(init_state[:nq])
+        self.data.qvel[:] = np.copy(init_state[nq : nq + nv])
+        mujoco.mj_forward(self.model, self.data)
 
         # Return an observation
         return self.observe(self.state)
@@ -342,14 +329,7 @@ def render(self, mode: RenderMode = RenderMode(), render_step: int = 1):
             if mode.video:
                 if self.viewer is None:
                     # Create viewer if not existent (see 'human' mode of OpenAI Gym's MujocoEnv)
-                    self.viewer = mujoco_py.MjViewer(self.sim)
-
-                    # Adjust window size and position to custom values
-                    import glfw
-
-                    glfw.make_context_current(self.viewer.window)
-                    glfw.set_window_size(self.viewer.window, 1280, 720)
-                    glfw.set_window_pos(self.viewer.window, 50, 50)
+                    self.viewer = mujoco.viewer.launch_passive(self.model, self.data)
 
                     self.configure_viewer()
-                self.viewer.render()
+                self.viewer.sync()
diff --git a/Pyrado/pyrado/environments/mujoco/openai_ant.py b/Pyrado/pyrado/environments/mujoco/openai_ant.py
@@ -29,6 +29,7 @@
 import os.path as osp
 from typing import Optional
 
+import mujoco
 import numpy as np
 from init_args_serializer import Serializable
 
@@ -98,9 +99,7 @@ def __init__(
 
     @property
     def state_space(self) -> Space:
-        state_shape = np.concatenate(
-            [self.sim.data.qpos.flat, self.sim.data.qvel.flat, self.sim.data.cfrc_ext.flat]
-        ).shape
+        state_shape = np.concatenate([self.data.qpos.flat, self.data.qvel.flat, self.data.cfrc_ext.flat]).shape
         return BoxSpace(-pyrado.inf, pyrado.inf, shape=state_shape)
 
     @property
@@ -148,11 +147,11 @@ def contact_forces(self):
         return contact_forces
 
     def _mujoco_step(self, act: np.ndarray) -> dict:
-        self.sim.data.ctrl[:] = act
-        self.sim.step()
+        self.data.ctrl[:] = act
+        mujoco.mj_step(self.model, self.data)
 
-        pos = self.sim.data.qpos.flat.copy()
-        vel = self.sim.data.qvel.flat.copy()
+        pos = self.data.qpos.flat.copy()
+        vel = self.data.qvel.flat.copy()
         cfrc_ext = self.contact_forces.flat.copy()
 
         self.state = np.concatenate([pos, vel, cfrc_ext])

diff --git a/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py b/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py
@@ -29,6 +29,7 @@
 import os.path as osp
 from typing import Optional
 
+import mujoco
 import numpy as np
 from init_args_serializer import Serializable
 
@@ -91,7 +92,7 @@ def __init__(
 
     @property
     def state_space(self) -> Space:
-        state_shape = np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).shape
+        state_shape = np.concatenate([self.data.qpos, self.data.qvel]).shape
         return BoxSpace(-pyrado.inf, pyrado.inf, shape=state_shape)
 
     @property
@@ -123,11 +124,11 @@ def _create_task(self, task_args: dict) -> Task:
         return GoallessTask(self.spec, ForwardVelocityRewFcn(self._dt, idx_fwd=0, **task_args))
 
     def _mujoco_step(self, act: np.ndarray) -> dict:
-        self.sim.data.ctrl[:] = act
-        self.sim.step()
+        self.data.ctrl[:] = act
+        mujoco.mj_step(self.model, self.data)
 
-        pos = self.sim.data.qpos.copy()
-        vel = self.sim.data.qvel.copy()
+        pos = self.data.qpos.copy()
+        vel = self.data.qvel.copy()
         self.state = np.concatenate([pos, vel])
 
         return dict()

diff --git a/Pyrado/pyrado/environments/mujoco/openai_hopper.py b/Pyrado/pyrado/environments/mujoco/openai_hopper.py
@@ -29,6 +29,7 @@
 import os.path as osp
 from typing import Optional
 
+import mujoco
 import numpy as np
 from init_args_serializer import Serializable
 
@@ -141,11 +142,11 @@ def _create_task(self, task_args: dict) -> Task:
         return GoallessTask(self.spec, rew_fcn)
 
     def _mujoco_step(self, act: np.ndarray) -> dict:
-        self.sim.data.ctrl[:] = act
-        self.sim.step()
+        self.data.ctrl[:] = act
+        mujoco.mj_step(self.model, self.data)
 
-        pos = self.sim.data.qpos.copy()
-        vel = self.sim.data.qvel.copy()
+        pos = self.data.qpos.copy()
+        vel = self.data.qvel.copy()
         self.state = np.concatenate([pos, vel])
 
         return dict()

diff --git a/Pyrado/pyrado/environments/mujoco/openai_humanoid.py b/Pyrado/pyrado/environments/mujoco/openai_humanoid.py
@@ -29,6 +29,7 @@
 import os.path as osp
 from typing import Optional
 
+import mujoco
 import numpy as np
 from init_args_serializer import Serializable
 
@@ -84,10 +85,10 @@ def __init__(
 
         rest_shape = np.concatenate(
             [
-                self.sim.data.cinert.flat,
-                self.sim.data.cvel.flat,
-                self.sim.data.qfrc_actuator.flat,
-                self.sim.data.cfrc_ext.flat,
+                self.data.cinert.flat,
+                self.data.cvel.flat,
+                self.data.qfrc_actuator.flat,
+                self.data.cfrc_ext.flat,
             ]
         ).shape
         min_init_rest = -np.ones(rest_shape)
@@ -102,12 +103,12 @@ def __init__(
     def state_space(self) -> Space:
         state_shape = np.concatenate(
             [
-                self.sim.data.qpos.flat,
-                self.sim.data.qvel.flat,
-                self.sim.data.cinert.flat,
-                self.sim.data.cvel.flat,
-                self.sim.data.qfrc_actuator.flat,
-                self.sim.data.cfrc_ext.flat,
+                self.data.qpos.flat,
+                self.data.qvel.flat,
+                self.data.cinert.flat,
+                self.data.cvel.flat,
+                self.data.qfrc_actuator.flat,
+                self.data.cfrc_ext.flat,
             ]
         ).shape
         return BoxSpace(-pyrado.inf, pyrado.inf, shape=state_shape)
@@ -162,15 +163,15 @@ def _create_task(self, task_args: dict) -> Task:
         return GoallessTask(self.spec, ForwardVelocityRewFcnHumanoid(self._dt, **task_args))
 
     def _mujoco_step(self, act: np.ndarray) -> dict:
-        self.sim.data.ctrl[:] = act
-        self.sim.step()
-
-        position = self.sim.data.qpos.flat.copy()
-        velocity = self.sim.data.qvel.flat.copy()
-        com_inertia = self.sim.data.cinert.flat.copy()
-        com_velocity = self.sim.data.cvel.flat.copy()
-        actuator_forces = self.sim.data.qfrc_actuator.flat.copy()
-        external_contact_forces = self.sim.data.cfrc_ext.flat.copy()
+        self.data.ctrl[:] = act
+        mujoco.mj_step(self.model, self.data)
+
+        position = self.data.qpos.flat.copy()
+        velocity = self.data.qvel.flat.copy()
+        com_inertia = self.data.cinert.flat.copy()
+        com_velocity = self.data.cvel.flat.copy()
+        actuator_forces = self.data.qfrc_actuator.flat.copy()
+        external_contact_forces = self.data.cfrc_ext.flat.copy()
 
         self.state = np.concatenate(
             [

diff --git a/Pyrado/pyrado/environments/mujoco/quanser_qube.py b/Pyrado/pyrado/environments/mujoco/quanser_qube.py
@@ -30,9 +30,8 @@
 from abc import abstractmethod
 from typing import Optional
 
-import mujoco_py
+import mujoco
 import numpy as np
-import torch as to
 from init_args_serializer import Serializable
 
 import pyrado
@@ -122,19 +121,13 @@ def _mujoco_step(self, act: np.ndarray) -> dict:
         )  # act is a scalar array, causing warning on later np.array construction
 
         # Apply the torques to the robot
-        self.sim.data.ctrl[:] = torque
+        self.data.ctrl[:] = torque
 
         # Call MuJoCo
-        try:
-            self.sim.step()
-            mjsim_crashed = False
-        except mujoco_py.builder.MujocoException:
-            # When MuJoCo recognized instabilities in the simulation, it simply kills it.
-            # Instead, we want the episode to end with a failure.
-            mjsim_crashed = True
-
-        qpos = self.sim.data.qpos.copy()
-        qvel = self.sim.data.qvel.copy()
+        mujoco.mj_step(self.model, self.data)
+
+        qpos = self.data.qpos.copy()
+        qvel = self.data.qvel.copy()
         self.state = np.concatenate([qpos, qvel])
 
         # If state is out of bounds (this is normally checked by the task, but does not work because of the mask)
@@ -143,7 +136,7 @@ def _mujoco_step(self, act: np.ndarray) -> dict:
         return dict(
             qpos=qpos,
             qvel=qvel,
-            failed=mjsim_crashed or state_oob,
+            failed=state_oob,
         )
 
     def observe(self, state: np.ndarray) -> np.ndarray: