From 2477928d71d01022a3717593806d66d8cadc9477 Mon Sep 17 00:00:00 2001
From: Fabian Damken <fabian.damken@stud.tu-darmstadt.de>
Date: Sun, 29 Oct 2023 04:00:25 -0400
Subject: [PATCH] Refactoring (#98)

---------

Co-authored-by: Heiko Carrasco <heiko.carrasco@yahoo.com>
---
 .github/.pre-commit-config.yaml               |   5 +-
 .gitignore                                    |   3 +
 .gitmodules                                   |   3 -
 .python-version                               |   1 +
 Pyrado/pyrado/algorithms/meta/spdr.py         | 329 +++++++++---------
 .../domain_randomization/domain_parameter.py  |  48 +++
 Pyrado/pyrado/environments/mujoco/__init__.py |  29 +-
 Pyrado/pyrado/environments/mujoco/base.py     |  54 +--
 .../pyrado/environments/mujoco/openai_ant.py  |  13 +-
 .../mujoco/openai_half_cheetah.py             |  11 +-
 .../environments/mujoco/openai_hopper.py      |   9 +-
 .../environments/mujoco/openai_humanoid.py    |  39 ++-
 .../environments/mujoco/quanser_qube.py       |  21 +-
 Pyrado/pyrado/plotting/heatmap.py             |   9 +-
 Pyrado/pyrado/sampling/parallel_evaluation.py |   8 +-
 Pyrado/tests/test_domain_randomization.py     |  51 +++
 requirements.txt                              |   1 +
 setup_deps.py                                 |  34 +-
 thirdParty/mujoco-py                          |   1 -
 19 files changed, 355 insertions(+), 314 deletions(-)
 create mode 100644 .python-version
 delete mode 160000 thirdParty/mujoco-py

diff --git a/.github/.pre-commit-config.yaml b/.github/.pre-commit-config.yaml
index 87d2bc0b8cd..c8fea89bcd7 100644
--- a/.github/.pre-commit-config.yaml
+++ b/.github/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/pycqa/isort
-    rev: 5.8.0
+    rev: 5.12.0
     hooks:
       - id: isort
         name: isort (python)
@@ -11,7 +11,6 @@ repos:
         name: isort (pyi)
         types: [pyi]
   - repo: https://github.com/psf/black
-    rev: 20.8b1
+    rev: 23.10.0
     hooks:
       - id: black
-        language_version: python3.7
diff --git a/.gitignore b/.gitignore
index adf9265e48f..ee1aa7a5c17 100644
--- a/.gitignore
+++ b/.gitignore
@@ -93,3 +93,6 @@ MUJOCO_LOG.*
 ### sbi-logs
 Pyrado/sbi-logs/
 Pyrado/tests/sbi-logs/
+
+### pre-commit local/symlink
+.pre-commit-config.yaml
diff --git a/.gitmodules b/.gitmodules
index 5b7a8e3399a..fb5900aea92 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -10,9 +10,6 @@
 [submodule "thirdParty/catch2"]
 	path = thirdParty/catch2
 	url = https://github.com/catchorg/Catch2.git
-[submodule "thirdParty/mujoco-py"]
-	path = thirdParty/mujoco-py
-	url = https://github.com/openai/mujoco-py.git
 [submodule "thirdParty/RenderPipeline"]
 	path = thirdParty/RenderPipeline
 	url = https://github.com/famura/RenderPipeline.git
diff --git a/.python-version b/.python-version
new file mode 100644
index 00000000000..475ba515c04
--- /dev/null
+++ b/.python-version
@@ -0,0 +1 @@
+3.7
diff --git a/Pyrado/pyrado/algorithms/meta/spdr.py b/Pyrado/pyrado/algorithms/meta/spdr.py
index ad8424f6bc6..7cd3d99573b 100644
--- a/Pyrado/pyrado/algorithms/meta/spdr.py
+++ b/Pyrado/pyrado/algorithms/meta/spdr.py
@@ -28,7 +28,7 @@
 
 import os.path
 from csv import DictWriter
-from typing import Iterator, Optional, Tuple
+from typing import Callable, Iterator, List, Optional, Tuple
 
 import numpy as np
 import torch as to
@@ -283,6 +283,16 @@ def sample_count(self) -> int:
         # Forward to subroutine
         return self._subrtn.sample_count
 
+    @property
+    def dim(self) -> int:
+        return self._spl_parameter.target_mean.shape[0]
+
+    @property
+    def subrtn_sampler(self) -> RolloutSavingWrapper:
+        # It is checked in the constructor that the sampler is a RolloutSavingWrapper.
+        # noinspection PyTypeChecker
+        return self._subrtn.sampler
+
     def step(self, snapshot_mode: str, meta_info: dict = None):
         """
         Perform a step of SPDR. This includes training the subroutine and updating the context distribution accordingly.
@@ -290,30 +300,12 @@ def step(self, snapshot_mode: str, meta_info: dict = None):
         """
         self.save_snapshot()
 
-        context_mean = self._spl_parameter.context_mean.double()
-        context_cov = self._spl_parameter.context_cov.double()
-        context_cov_chol = self._spl_parameter.context_cov_chol.double()
-        target_mean = self._spl_parameter.target_mean.double()
-        target_cov_chol = self._spl_parameter.target_cov_chol.double()
-
         # Add these keys to the logger as dummy values.
+        self.logger.add_value("sprl number of particles", 0)
         self.logger.add_value("spdr constraint kl", 0.0)
         self.logger.add_value("spdr constraint performance", 0.0)
         self.logger.add_value("spdr objective", 0.0)
-        for param_a_idx, param_a_name in enumerate(self._spl_parameter.name):
-            for param_b_idx, param_b_name in enumerate(self._spl_parameter.name):
-                self.logger.add_value(
-                    f"context cov for {param_a_name}--{param_b_name}", context_cov[param_a_idx, param_b_idx].item()
-                )
-                self.logger.add_value(
-                    f"context cov_chol for {param_a_name}--{param_b_name}",
-                    context_cov_chol[param_a_idx, param_b_idx].item(),
-                )
-                if param_a_name == param_b_name:
-                    self.logger.add_value(f"context mean for {param_a_name}", context_mean[param_a_idx].item())
-                    break
-
-        dim = context_mean.shape[0]
+        self._log_context_distribution()
 
         # If we are in the first iteration and have a bad performance,
         # we want to completely reset the policy if training is unsuccessful
@@ -324,173 +316,199 @@ def step(self, snapshot_mode: str, meta_info: dict = None):
             self._train_subroutine_and_evaluate_perf
         )(snapshot_mode, meta_info, reset_policy)
 
-        # Update distribution
-        previous_distribution = MultivariateNormalWrapper(context_mean, context_cov_chol)
-        target_distribution = MultivariateNormalWrapper(target_mean, target_cov_chol)
+        previous_distribution = MultivariateNormalWrapper(
+            self._spl_parameter.context_mean.double(), self._spl_parameter.context_cov_chol.double()
+        )
+        target_distribution = MultivariateNormalWrapper(
+            self._spl_parameter.target_mean.double(), self._spl_parameter.target_cov_chol.double()
+        )
 
-        def get_domain_param_value(ro: StepSequence, param_name: str) -> np.ndarray:
-            domain_param_dict = ro.rollout_info["domain_param"]
-            untransformed_param_name = param_name + DomainParamTransform.UNTRANSFORMED_DOMAIN_PARAMETER_SUFFIX
-            if untransformed_param_name in domain_param_dict:
-                return domain_param_dict[untransformed_param_name]
-            return domain_param_dict[param_name]
+        proposal_rollouts = self._sample_proposal_rollouts()
+        contexts, contexts_old_log_prob, values = self._extract_particles(proposal_rollouts, previous_distribution)
 
-        rollouts_all = self._get_sampler().rollouts
-        contexts = to.tensor(
-            [
-                [to.from_numpy(get_domain_param_value(ro, name)) for rollouts in rollouts_all for ro in rollouts]
-                for name in self._spl_parameter.name
-            ],
-            requires_grad=True,
-        ).T
+        # Define the SPRL optimization problem
+        kl_constraint = self._make_kl_constraint(previous_distribution, self._kl_constraints_ub)
+        performance_constraint = self._make_performance_constraint(
+            contexts, contexts_old_log_prob, values, self._performance_lower_bound
+        )
+        constraints = [kl_constraint, performance_constraint]
+        objective_fn = self._make_objective_fn(target_distribution)
+        x0 = previous_distribution.get_stacked()
+        minimize_args = dict(
+            fun=objective_fn,
+            x0=x0,
+            method="trust-constr",
+            jac=True,
+            constraints=constraints,
+            options={"gtol": 1e-4, "xtol": 1e-6},
+            # bounds=bounds,
+        )
 
-        contexts_old_log_prob = previous_distribution.distribution.log_prob(contexts.double())
-        # kl_divergence = to.distributions.kl_divergence(previous_distribution.distribution, target_distribution.distribution)
+        print("Performing SPDR update.")
+        try:
+            result = minimize(**minimize_args)
+            new_x = result.x
+
+            # Reset parameters if optimization was not successful
+            if not result.success:
+                # If optimization process was not a success
+                old_f = objective_fn(previous_distribution.get_stacked())[0]
+                constraints_satisfied = all((const.lb <= const.fun(result.x) <= const.ub for const in constraints))
+
+                # std_ok = bounds is None or (np.all(bounds.lb <= result.x)) and np.all(result.x <= bounds.ub)
+                std_ok = True
+
+                update_successful = constraints_satisfied and std_ok and result.fun < old_f
+                if not update_successful:
+                    print(f"Update unsuccessful, keeping old SPDR parameters.")
+                    new_x = x0
+        except ValueError as e:
+            print(f"Update failed with error, keeping old SPDR parameters.", e)
+            new_x = x0
+
+        self._adapt_parameters(new_x)
+
+        # we can't use the stored values here as new_x might not be result.x
+        self.logger.add_value("spdr constraint kl", kl_constraint.fun(new_x))
+        self.logger.add_value("spdr constraint performance", performance_constraint.fun(new_x))
+        self.logger.add_value("spdr objective", objective_fn(new_x)[0])
 
-        values = to.tensor([ro.undiscounted_return() for rollouts in rollouts_all for ro in rollouts])
+    def reset(self, seed: int = None):
+        # Forward to subroutine
+        self._subrtn.reset(seed)
+        self.subrtn_sampler.reset_rollouts()
+
+    def save_snapshot(self, meta_info: dict = None):
+        self.subrtn_sampler.reset_rollouts()
+        super().save_snapshot(meta_info)
+
+        if meta_info is None:
+            # This algorithm instance is not a subroutine of another algorithm
+            self._subrtn.save_snapshot(meta_info)
 
-        constraints = []
+    def load_snapshot(self, parsed_args) -> Tuple[Env, Policy, dict]:
+        env, policy, extra = super().load_snapshot(parsed_args)
+
+        # Algorithm specific
+        if isinstance(self._subrtn, ActorCritic):
+            ex_dir = self._save_dir or getattr(parsed_args, "dir", None)
+            extra["vfcn"] = pyrado.load(
+                f"{parsed_args.vfcn_name}.pt", ex_dir, obj=self._subrtn.critic.vfcn, verbose=True
+            )
+
+        return env, policy, extra
+
+    def _make_objective_fn(
+        self, target_distribution: MultivariateNormalWrapper
+    ) -> Callable[[np.ndarray], Tuple[float, np.ndarray]]:
+        def objective_fn(x):
+            """Tries to find the minimum kl divergence between the current and the update distribution, which
+            still satisfies the minimum update constraint and the performance constraint."""
+            distribution = MultivariateNormalWrapper.from_stacked(self.dim, x)
+            kl_divergence = to.distributions.kl_divergence(distribution.distribution, target_distribution.distribution)
+            grads = to.autograd.grad(kl_divergence, list(distribution.parameters()))
+
+            return (
+                kl_divergence.detach().numpy().item(),
+                np.concatenate([g.detach().numpy() for g in grads]),
+            )
 
+        return objective_fn
+
+    def _make_kl_constraint(
+        self, previous_distribution: MultivariateNormalWrapper, kl_constraint_ub: float
+    ) -> NonlinearConstraint:
         def kl_constraint_fn(x):
             """Compute the constraint for the KL-divergence between current and proposed distribution."""
-            distribution = MultivariateNormalWrapper.from_stacked(dim, x)
+            distribution = MultivariateNormalWrapper.from_stacked(self.dim, x)
             kl_divergence = to.distributions.kl_divergence(
                 previous_distribution.distribution, distribution.distribution
             )
-            return kl_divergence.detach().numpy()
+            return kl_divergence.detach().numpy().item()
 
         def kl_constraint_fn_prime(x):
             """Compute the derivative for the KL-constraint (used for scipy optimizer)."""
-            distribution = MultivariateNormalWrapper.from_stacked(dim, x)
+            distribution = MultivariateNormalWrapper.from_stacked(self.dim, x)
             kl_divergence = to.distributions.kl_divergence(
                 previous_distribution.distribution, distribution.distribution
             )
             grads = to.autograd.grad(kl_divergence, list(distribution.parameters()))
             return np.concatenate([g.detach().numpy() for g in grads])
 
-        constraints.append(
-            NonlinearConstraint(
-                fun=kl_constraint_fn,
-                lb=-np.inf,
-                ub=self._kl_constraints_ub,
-                jac=kl_constraint_fn_prime,
-                # keep_feasible=True,
-            )
+        return NonlinearConstraint(
+            fun=kl_constraint_fn,
+            lb=-np.inf,
+            ub=kl_constraint_ub,
+            jac=kl_constraint_fn_prime,
         )
 
+    def _make_performance_constraint(
+        self, contexts: to.Tensor, contexts_old_log_prob: to.Tensor, values: to.Tensor, performance_lower_bound: float
+    ) -> NonlinearConstraint:
         def performance_constraint_fn(x):
             """Compute the constraint for the expected performance under the proposed distribution."""
-            distribution = MultivariateNormalWrapper.from_stacked(dim, x)
+            distribution = MultivariateNormalWrapper.from_stacked(self.dim, x)
             performance = self._compute_expected_performance(distribution, contexts, contexts_old_log_prob, values)
-            return performance.detach().numpy()
+            return performance.detach().numpy().item()
 
         def performance_constraint_fn_prime(x):
             """Compute the derivative for the performance-constraint (used for scipy optimizer)."""
-            distribution = MultivariateNormalWrapper.from_stacked(dim, x)
+            distribution = MultivariateNormalWrapper.from_stacked(self.dim, x)
             performance = self._compute_expected_performance(distribution, contexts, contexts_old_log_prob, values)
             grads = to.autograd.grad(performance, list(distribution.parameters()))
             return np.concatenate([g.detach().numpy() for g in grads])
 
-        constraints.append(
-            NonlinearConstraint(
-                fun=performance_constraint_fn,
-                lb=self._performance_lower_bound,
-                ub=np.inf,
-                jac=performance_constraint_fn_prime,
-                # keep_feasible=True,
-            )
+        return NonlinearConstraint(
+            fun=performance_constraint_fn,
+            lb=performance_lower_bound,
+            ub=np.inf,
+            jac=performance_constraint_fn_prime,
         )
 
-        # # Clip the bounds of the new variance either if the applied covariance transformation does not ensure
-        # # non-negativity or when the KL threshold has been crossed.
-        # bounds = None
-        # x0, _, x0_cov_indices = previous_distribution.get_stacked()
-        # if self._cov_transformation.ensures_non_negativity():
-        #     lower_bound = -np.inf * np.ones_like(x0)
-        #     lower_bound_is_inf = True
-        # else:
-        #     lower_bound = np.zeros_like(x0)
-        #     lower_bound_is_inf = False
-        # if self._kl_threshold != -np.inf and (self._kl_threshold < kl_divergence):
-        #     if x0_cov_indices is not None and self._var_lower_bound is not None:
-        #         # Further clip the x values if a standard deviation lower bound was set.
-        #         lower_bound[dim:] = self._var_lower_bound
-        #         lower_bound_is_inf = False
-        # if not lower_bound_is_inf:
-        #     # Only set the bounds if the lower bound is not negative infinity. Makes it easier for the optimizer.
-        #     upper_bound = np.ones_like(x0) * np.inf
-        #     bounds = Bounds(lb=lower_bound, ub=upper_bound, keep_feasible=True)
-        #     x0 = np.clip(x0, bounds.lb, bounds.ub)
-
-        # We now optimize based on the kl-divergence between target and context distribution by minimizing it
-        def objective_fn(x):
-            """Tries to find the minimum kl divergence between the current and the update distribution, which
-            still satisfies the minimum update constraint and the performance constraint."""
-            distribution = MultivariateNormalWrapper.from_stacked(dim, x)
-            kl_divergence = to.distributions.kl_divergence(distribution.distribution, target_distribution.distribution)
-            grads = to.autograd.grad(kl_divergence, list(distribution.parameters()))
-
-            return (
-                kl_divergence.detach().numpy(),
-                np.concatenate([g.detach().numpy() for g in grads]),
-            )
-
-        x0 = previous_distribution.get_stacked()
-
-        print("Performing SPDR update.")
-        # noinspection PyTypeChecker
-        result = minimize(
-            objective_fn,
-            x0,
-            method="trust-constr",
-            jac=True,
-            constraints=constraints,
-            options={"gtol": 1e-4, "xtol": 1e-6},
-            # bounds=bounds,
-        )
-        new_x = result.x
-        if not result.success:
-            # If optimization process was not a success
-            old_f = objective_fn(previous_distribution.get_stacked())[0]
-            constraints_satisfied = all((const.lb <= const.fun(result.x) <= const.ub for const in constraints))
-
-            # std_ok = bounds is None or (np.all(bounds.lb <= result.x)) and np.all(result.x <= bounds.ub)
-            std_ok = True
-
-            if not (constraints_satisfied and std_ok and result.fun < old_f):
-                print(f"Update unsuccessful, keeping old values spl parameters.")
-                new_x = x0
-
-        self._adapt_parameters(dim, new_x)
-        self.logger.add_value("spdr constraint kl", kl_constraint_fn(new_x).item())
-        self.logger.add_value("spdr constraint performance", performance_constraint_fn(new_x).item())
-        self.logger.add_value("spdr objective", objective_fn(new_x)[0].item())
-
-    def reset(self, seed: int = None):
-        # Forward to subroutine
-        self._subrtn.reset(seed)
-        self._get_sampler().reset_rollouts()
-
-    def save_snapshot(self, meta_info: dict = None):
-        self._get_sampler().reset_rollouts()
-        super().save_snapshot(meta_info)
-
-        if meta_info is None:
-            # This algorithm instance is not a subroutine of another algorithm
-            self._subrtn.save_snapshot(meta_info)
+    def _log_context_distribution(self):
+        context_mean = self._spl_parameter.context_mean.double()
+        context_cov = self._spl_parameter.context_cov.double()
+        context_cov_chol = self._spl_parameter.context_cov_chol.double()
+        for param_a_idx, param_a_name in enumerate(self._spl_parameter.name):
+            for param_b_idx, param_b_name in enumerate(self._spl_parameter.name):
+                self.logger.add_value(
+                    f"context cov for {param_a_name}--{param_b_name}",
+                    context_cov[param_a_idx, param_b_idx].item(),
+                )
+                self.logger.add_value(
+                    f"context cov_chol for {param_a_name}--{param_b_name}",
+                    context_cov_chol[param_a_idx, param_b_idx].item(),
+                )
+                if param_a_name == param_b_name:
+                    self.logger.add_value(f"context mean for {param_a_name}", context_mean[param_a_idx].item())
+                    break
 
-    def load_snapshot(self, parsed_args) -> Tuple[Env, Policy, dict]:
-        env, policy, extra = super().load_snapshot(parsed_args)
+    def _sample_proposal_rollouts(self) -> List[List[StepSequence]]:
+        return self.subrtn_sampler.rollouts
 
-        # Algorithm specific
-        if isinstance(self._subrtn, ActorCritic):
-            ex_dir = self._save_dir or getattr(parsed_args, "dir", None)
-            extra["vfcn"] = pyrado.load(
-                f"{parsed_args.vfcn_name}.pt", ex_dir, obj=self._subrtn.critic.vfcn, verbose=True
-            )
+    def _extract_particles(
+        self, rollouts_all: List[List[StepSequence]], distribution: MultivariateNormalWrapper
+    ) -> Tuple[to.Tensor, to.Tensor, to.Tensor]:
+        def get_domain_param_value(ro: StepSequence, param_name: str) -> np.ndarray:
+            domain_param_dict = ro.rollout_info["domain_param"]
+            untransformed_param_name = param_name + DomainParamTransform.UNTRANSFORMED_DOMAIN_PARAMETER_SUFFIX
+            if untransformed_param_name in domain_param_dict:
+                return domain_param_dict[untransformed_param_name]
+            return domain_param_dict[param_name]
 
-        return env, policy, extra
+        contexts = to.tensor(
+            [
+                [to.from_numpy(get_domain_param_value(ro, name)) for rollouts in rollouts_all for ro in rollouts]
+                for name in self._spl_parameter.name
+            ],
+            requires_grad=True,
+        ).T
+        self.logger.add_value("sprl number of particles", contexts.shape[0])
+        contexts_log_prob = distribution.distribution.log_prob(contexts.double())
+        values = to.tensor([ro.undiscounted_return() for rollouts in rollouts_all for ro in rollouts])
+        return contexts, contexts_log_prob, values
 
+    # noinspection PyMethodMayBeStatic
     def _compute_expected_performance(
         self, distribution: MultivariateNormalWrapper, context: to.Tensor, old_log_prop: to.Tensor, values: to.Tensor
     ) -> to.Tensor:
@@ -498,10 +516,10 @@ def _compute_expected_performance(
         context_ratio = to.exp(distribution.distribution.log_prob(context) - old_log_prop)
         return to.mean(context_ratio * values)
 
-    def _adapt_parameters(self, dim: int, result: np.ndarray) -> None:
+    def _adapt_parameters(self, result: np.ndarray) -> None:
         """Update the parameters of the distribution based on the result of
         the optimization step and the general algorithm settings."""
-        context_distr = MultivariateNormalWrapper.from_stacked(dim, result)
+        context_distr = MultivariateNormalWrapper.from_stacked(self.dim, result)
         self._spl_parameter.adapt("context_mean", context_distr.mean)
         self._spl_parameter.adapt("context_cov_chol", context_distr.cov_chol)
 
@@ -520,10 +538,5 @@ def _train_subroutine_and_evaluate_perf(
         self._subrtn.reset()
 
         self._subrtn.train(snapshot_mode, None, meta_info)
-        rollouts_all = self._get_sampler().rollouts
+        rollouts_all = self.subrtn_sampler.rollouts
         return np.median([[ro.undiscounted_return() for rollouts in rollouts_all for ro in rollouts]]).item()
-
-    def _get_sampler(self) -> RolloutSavingWrapper:
-        # It is checked in the constructor that the sampler is a RolloutSavingWrapper.
-        # noinspection PyTypeChecker
-        return self._subrtn.sampler
diff --git a/Pyrado/pyrado/domain_randomization/domain_parameter.py b/Pyrado/pyrado/domain_randomization/domain_parameter.py
index 0486215bf5c..02eeb067501 100644
--- a/Pyrado/pyrado/domain_randomization/domain_parameter.py
+++ b/Pyrado/pyrado/domain_randomization/domain_parameter.py
@@ -393,6 +393,43 @@ def make_broadening(
             clip_up=clip_up,
         )
 
+    @staticmethod
+    def from_domain_randomizer(domain_randomizer, *, target_cov_factor=1.0, init_cov_factor=1 / 100):
+        """
+        Creates a self-paced domain parameter having the same initial and target mean and target variance given by the domain randomizer's variance (scaled by `target_cov_factor`). The initial variance is also given by the domain randomizer's variance (scaled by `init_cov_factor`).
+
+        :param domain_randomizer: randomizer to grab the data from
+        :param target_cov_factor: scaling of the randomizer's variance to get the target variance; defaults to `1`
+        :param init_cov_factor: scaling of the randomizer's variance to get the init variance; defaults to `1/100`
+        :return: the self-paced domain parameter
+        """
+        name = []
+        target_mean = []
+        target_cov_flat = []
+        init_mean = []
+        init_cov_flat = []
+        for domain_param in domain_randomizer.domain_params:
+            if not isinstance(domain_param, NormalDomainParam):
+                raise pyrado.TypeErr(
+                    given=domain_param,
+                    expected_type=NormalDomainParam,
+                    msg="each domain_param must be a NormalDomainParam",
+                )
+            name.append(domain_param.name)
+            target_mean.append(domain_param.mean)
+            target_cov_flat.append(target_cov_factor * domain_param.std**2)
+            init_mean.append(domain_param.mean)
+            init_cov_flat.append(init_cov_factor * domain_param.std**2)
+        return SelfPacedDomainParam(
+            name=name,
+            target_mean=to.tensor(target_mean),
+            target_cov_flat=to.tensor(target_cov_flat),
+            init_mean=to.tensor(init_mean),
+            init_cov_flat=to.tensor(init_cov_flat),
+            clip_lo=-pyrado.inf,
+            clip_up=+pyrado.inf,
+        )
+
     @property
     def target_distr(self) -> MultivariateNormal:
         """Get the target distribution."""
@@ -413,6 +450,17 @@ def context_cov(self) -> to.Tensor:
         """Get the current covariance matrix."""
         return self.context_cov_chol @ self.context_cov_chol.T
 
+    def info(self) -> dict:
+        return {
+            "name": self.name,
+            "target_mean": self.target_mean,
+            "target_cov_chol": self.target_cov_chol,
+            "init_mean": self.init_mean,
+            "init_cov_chol": self.init_cov_chol,
+            "clip_lo": self.clip_lo,
+            "clip_up": self.clip_up,
+        }
+
     def adapt(self, domain_distr_param: str, domain_distr_param_value: to.Tensor):
         """
         Update this domain parameter.
diff --git a/Pyrado/pyrado/environments/mujoco/__init__.py b/Pyrado/pyrado/environments/mujoco/__init__.py
index 1a9b6925814..94590e58b4d 100644
--- a/Pyrado/pyrado/environments/mujoco/__init__.py
+++ b/Pyrado/pyrado/environments/mujoco/__init__.py
@@ -26,35 +26,8 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-import os
 
 import pyrado
-from pyrado.utils.input_output import print_cbt
 
 
-try:
-    import mujoco_py
-except (ImportError, Exception):
-    # The ImportError is raised if mujoco-py is simply not installed
-    # The Exception catches the case that you have everything installed properly but your IDE does not set the
-    # LD_LIBRARY_PATH correctly (happens for PyCharm & CLion). To check this, try to run your script from the terminal.
-    ld_library_path = os.environ.get("LD_LIBRARY_PATH")
-    ld_preload = os.environ.get("LD_PRELOAD")
-    print_cbt(
-        "You are trying to use are MuJoCo-based environment, but the required mujoco_py module can not be imported.\n"
-        "Try adding\n"
-        "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/$USER/.mujoco/mujoco200/bin\n"
-        "export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so\n"
-        "to your shell's rc-file.\n"
-        "The current values of the environment variables are:\n"
-        f"LD_LIBRARY_PATH={ld_library_path}\n"
-        f"LD_PRELOAD={ld_preload}"
-        "If you are using PyCharm or CLion, also add the environment variables above to your run configurations. "
-        "Note that the IDE will not resolve $USER for some reason, so enter the user name directly, "
-        "or run it from your terminal.\n\n"
-        "Here comes the mujoco-py error message:\n\n",
-        "r",
-    )
-    pyrado.mujoco_loaded = False
-else:
-    pyrado.mujoco_loaded = True
+pyrado.mujoco_loaded = True
diff --git a/Pyrado/pyrado/environments/mujoco/base.py b/Pyrado/pyrado/environments/mujoco/base.py
index 6f19c2b9a3b..19e3b27df6e 100644
--- a/Pyrado/pyrado/environments/mujoco/base.py
+++ b/Pyrado/pyrado/environments/mujoco/base.py
@@ -31,10 +31,10 @@
 from math import floor
 from typing import Optional
 
-import mujoco_py
+import mujoco
+import mujoco.viewer
 import numpy as np
 from init_args_serializer import Serializable
-from mujoco_py.generated.const import RND_FOG
 
 import pyrado
 from pyrado.environments.sim_base import SimEnv
@@ -87,7 +87,7 @@ def __init__(
                 xml_model_temp = file_raw.read()
             xml_model_temp = self._adapt_model_file(xml_model_temp, self.domain_param)
             # Create a dummy model to extract the solver's time step size
-            model_tmp = mujoco_py.load_model_from_xml(xml_model_temp)
+            model_tmp = mujoco.MjModel.from_xml_path(xml_model_temp)
             frame_skip = dt / model_tmp.opt.timestep
             if frame_skip.is_integer():
                 self.frame_skip = int(frame_skip)
@@ -112,8 +112,8 @@ def __init__(
         super().__init__(dt=self.model.opt.timestep * self.frame_skip, max_steps=max_steps)
 
         # Memorize the initial states of the model from the xml (for fixed init space or later reset)
-        self.init_qpos = self.sim.data.qpos.copy()
-        self.init_qvel = self.sim.data.qvel.copy()
+        self.init_qpos = self.data.qpos.copy()
+        self.init_qvel = self.data.qvel.copy()
 
         # Initialize space (to be overwritten in constructor of subclasses)
         self._init_space = None
@@ -177,13 +177,7 @@ def domain_param(self, domain_param: dict):
         # Update MuJoCo model
         self._create_mujoco_model()
 
-        if self.viewer is not None:
-            # If the viewer already exists and we reset the domain parameters, we must also recreate the viewer since
-            # it references to the simulation object which get's reconstructed during _create_mujoco_model()
-            import glfw
-
-            glfw.destroy_window(self.viewer.window)
-            self.viewer = None
+        self.viewer = None
 
         # Update task
         self._task = self._create_task(self.task_args)
@@ -202,7 +196,7 @@ def _adapt_model_file(self, xml_model: str, domain_param: dict) -> str:
         :return: adapted model file where the placeholders are filled with numerical values
         """
         # The mesh dir is not resolved when later passed as a string, thus we do it manually
-        xml_model = xml_model.replace(f"[ASSETS_DIR]", pyrado.MUJOCO_ASSETS_DIR)
+        xml_model = xml_model.replace("[ASSETS_DIR]", pyrado.MUJOCO_ASSETS_DIR)
 
         # Replace all occurrences of the domain parameter placeholder with its value
         for key, value in domain_param.items():
@@ -230,14 +224,14 @@ def _create_mujoco_model(self):
         xml_model = self._adapt_model_file(xml_model, self.domain_param)
 
         # Create MuJoCo model from parsed XML file
-        self.model = mujoco_py.load_model_from_xml(xml_model)
-        self.sim = mujoco_py.MjSim(self.model, nsubsteps=self.frame_skip)
+        self.model = mujoco.MjModel.from_xml_string(xml_model)
+        self.data = mujoco.MjData(self.model)
 
     def configure_viewer(self):
         """Configure the camera when the viewer is initialized. You need to set `self.camera_config` before."""
         # Render a fog around the scene by default
         if self.camera_config.pop("render_fog", True):
-            self.viewer.scn.flags[RND_FOG] = 1
+            self.viewer.scn.flags[mujoco.mjtRndFlag.mjRND_FOG] = 1
 
         # Parse all other options
         for key, value in self.camera_config.items():
@@ -274,8 +268,8 @@ def reset(self, init_state: np.ndarray = None, domain_param: dict = None) -> np.
         self._task.reset(env_spec=self.spec, init_state=init_state.copy())
 
         # Reset MuJoCo simulation model (only reset the joint configuration)
-        self.sim.reset()
-        old_state = self.sim.get_state()
+        mujoco.mj_resetData(self.model, self.data)
+        old_state = self.data
         nq = self.model.nq
         nv = self.model.nv
         if not init_state[:nq].shape == old_state.qpos.shape:  # check joint positions dimension
@@ -283,16 +277,9 @@ def reset(self, init_state: np.ndarray = None, domain_param: dict = None) -> np.
         # Exclude everything that is appended to the state (at the end), e.g. the ball position for WAMBallInCupSim
         if not init_state[nq : nq + nv].shape == old_state.qvel.shape:  # check joint velocities dimension
             raise pyrado.ShapeErr(given=init_state[nq : nq + nv], expected_match=old_state.qvel)
-        new_state = mujoco_py.MjSimState(
-            # Exclude everything that is appended to the state (at the end), e.g. the ball position for WAMBallInCupSim
-            old_state.time,
-            init_state[:nq],
-            init_state[nq : nq + nv],
-            old_state.act,
-            old_state.udd_state,
-        )
-        self.sim.set_state(new_state)
-        self.sim.forward()
+        self.data.qpos[:] = np.copy(init_state[:nq])
+        self.data.qvel[:] = np.copy(init_state[nq : nq + nv])
+        mujoco.mj_forward(self.model, self.data)
 
         # Return an observation
         return self.observe(self.state)
@@ -342,14 +329,7 @@ def render(self, mode: RenderMode = RenderMode(), render_step: int = 1):
             if mode.video:
                 if self.viewer is None:
                     # Create viewer if not existent (see 'human' mode of OpenAI Gym's MujocoEnv)
-                    self.viewer = mujoco_py.MjViewer(self.sim)
-
-                    # Adjust window size and position to custom values
-                    import glfw
-
-                    glfw.make_context_current(self.viewer.window)
-                    glfw.set_window_size(self.viewer.window, 1280, 720)
-                    glfw.set_window_pos(self.viewer.window, 50, 50)
+                    self.viewer = mujoco.viewer.launch_passive(self.model, self.data)
 
                     self.configure_viewer()
-                self.viewer.render()
+                self.viewer.sync()
diff --git a/Pyrado/pyrado/environments/mujoco/openai_ant.py b/Pyrado/pyrado/environments/mujoco/openai_ant.py
index d78dbc2d765..862948ecc42 100644
--- a/Pyrado/pyrado/environments/mujoco/openai_ant.py
+++ b/Pyrado/pyrado/environments/mujoco/openai_ant.py
@@ -29,6 +29,7 @@
 import os.path as osp
 from typing import Optional
 
+import mujoco
 import numpy as np
 from init_args_serializer import Serializable
 
@@ -98,9 +99,7 @@ def __init__(
 
     @property
     def state_space(self) -> Space:
-        state_shape = np.concatenate(
-            [self.sim.data.qpos.flat, self.sim.data.qvel.flat, self.sim.data.cfrc_ext.flat]
-        ).shape
+        state_shape = np.concatenate([self.data.qpos.flat, self.data.qvel.flat, self.data.cfrc_ext.flat]).shape
         return BoxSpace(-pyrado.inf, pyrado.inf, shape=state_shape)
 
     @property
@@ -148,11 +147,11 @@ def contact_forces(self):
         return contact_forces
 
     def _mujoco_step(self, act: np.ndarray) -> dict:
-        self.sim.data.ctrl[:] = act
-        self.sim.step()
+        self.data.ctrl[:] = act
+        mujoco.mj_step(self.model, self.data)
 
-        pos = self.sim.data.qpos.flat.copy()
-        vel = self.sim.data.qvel.flat.copy()
+        pos = self.data.qpos.flat.copy()
+        vel = self.data.qvel.flat.copy()
         cfrc_ext = self.contact_forces.flat.copy()
 
         self.state = np.concatenate([pos, vel, cfrc_ext])
diff --git a/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py b/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py
index f72288a1068..088698c8fc1 100644
--- a/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py
+++ b/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py
@@ -29,6 +29,7 @@
 import os.path as osp
 from typing import Optional
 
+import mujoco
 import numpy as np
 from init_args_serializer import Serializable
 
@@ -91,7 +92,7 @@ def __init__(
 
     @property
     def state_space(self) -> Space:
-        state_shape = np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).shape
+        state_shape = np.concatenate([self.data.qpos, self.data.qvel]).shape
         return BoxSpace(-pyrado.inf, pyrado.inf, shape=state_shape)
 
     @property
@@ -123,11 +124,11 @@ def _create_task(self, task_args: dict) -> Task:
         return GoallessTask(self.spec, ForwardVelocityRewFcn(self._dt, idx_fwd=0, **task_args))
 
     def _mujoco_step(self, act: np.ndarray) -> dict:
-        self.sim.data.ctrl[:] = act
-        self.sim.step()
+        self.data.ctrl[:] = act
+        mujoco.mj_step(self.model, self.data)
 
-        pos = self.sim.data.qpos.copy()
-        vel = self.sim.data.qvel.copy()
+        pos = self.data.qpos.copy()
+        vel = self.data.qvel.copy()
         self.state = np.concatenate([pos, vel])
 
         return dict()
diff --git a/Pyrado/pyrado/environments/mujoco/openai_hopper.py b/Pyrado/pyrado/environments/mujoco/openai_hopper.py
index 57be4b61624..c1dd9e0b859 100644
--- a/Pyrado/pyrado/environments/mujoco/openai_hopper.py
+++ b/Pyrado/pyrado/environments/mujoco/openai_hopper.py
@@ -29,6 +29,7 @@
 import os.path as osp
 from typing import Optional
 
+import mujoco
 import numpy as np
 from init_args_serializer import Serializable
 
@@ -141,11 +142,11 @@ def _create_task(self, task_args: dict) -> Task:
         return GoallessTask(self.spec, rew_fcn)
 
     def _mujoco_step(self, act: np.ndarray) -> dict:
-        self.sim.data.ctrl[:] = act
-        self.sim.step()
+        self.data.ctrl[:] = act
+        mujoco.mj_step(self.model, self.data)
 
-        pos = self.sim.data.qpos.copy()
-        vel = self.sim.data.qvel.copy()
+        pos = self.data.qpos.copy()
+        vel = self.data.qvel.copy()
         self.state = np.concatenate([pos, vel])
 
         return dict()
diff --git a/Pyrado/pyrado/environments/mujoco/openai_humanoid.py b/Pyrado/pyrado/environments/mujoco/openai_humanoid.py
index 4de37cdc86f..7b56f64c1b5 100644
--- a/Pyrado/pyrado/environments/mujoco/openai_humanoid.py
+++ b/Pyrado/pyrado/environments/mujoco/openai_humanoid.py
@@ -29,6 +29,7 @@
 import os.path as osp
 from typing import Optional
 
+import mujoco
 import numpy as np
 from init_args_serializer import Serializable
 
@@ -84,10 +85,10 @@ def __init__(
 
         rest_shape = np.concatenate(
             [
-                self.sim.data.cinert.flat,
-                self.sim.data.cvel.flat,
-                self.sim.data.qfrc_actuator.flat,
-                self.sim.data.cfrc_ext.flat,
+                self.data.cinert.flat,
+                self.data.cvel.flat,
+                self.data.qfrc_actuator.flat,
+                self.data.cfrc_ext.flat,
             ]
         ).shape
         min_init_rest = -np.ones(rest_shape)
@@ -102,12 +103,12 @@ def __init__(
     def state_space(self) -> Space:
         state_shape = np.concatenate(
             [
-                self.sim.data.qpos.flat,
-                self.sim.data.qvel.flat,
-                self.sim.data.cinert.flat,
-                self.sim.data.cvel.flat,
-                self.sim.data.qfrc_actuator.flat,
-                self.sim.data.cfrc_ext.flat,
+                self.data.qpos.flat,
+                self.data.qvel.flat,
+                self.data.cinert.flat,
+                self.data.cvel.flat,
+                self.data.qfrc_actuator.flat,
+                self.data.cfrc_ext.flat,
             ]
         ).shape
         return BoxSpace(-pyrado.inf, pyrado.inf, shape=state_shape)
@@ -162,15 +163,15 @@ def _create_task(self, task_args: dict) -> Task:
         return GoallessTask(self.spec, ForwardVelocityRewFcnHumanoid(self._dt, **task_args))
 
     def _mujoco_step(self, act: np.ndarray) -> dict:
-        self.sim.data.ctrl[:] = act
-        self.sim.step()
-
-        position = self.sim.data.qpos.flat.copy()
-        velocity = self.sim.data.qvel.flat.copy()
-        com_inertia = self.sim.data.cinert.flat.copy()
-        com_velocity = self.sim.data.cvel.flat.copy()
-        actuator_forces = self.sim.data.qfrc_actuator.flat.copy()
-        external_contact_forces = self.sim.data.cfrc_ext.flat.copy()
+        self.data.ctrl[:] = act
+        mujoco.mj_step(self.model, self.data)
+
+        position = self.data.qpos.flat.copy()
+        velocity = self.data.qvel.flat.copy()
+        com_inertia = self.data.cinert.flat.copy()
+        com_velocity = self.data.cvel.flat.copy()
+        actuator_forces = self.data.qfrc_actuator.flat.copy()
+        external_contact_forces = self.data.cfrc_ext.flat.copy()
 
         self.state = np.concatenate(
             [
diff --git a/Pyrado/pyrado/environments/mujoco/quanser_qube.py b/Pyrado/pyrado/environments/mujoco/quanser_qube.py
index a609f763555..20e136fb33e 100644
--- a/Pyrado/pyrado/environments/mujoco/quanser_qube.py
+++ b/Pyrado/pyrado/environments/mujoco/quanser_qube.py
@@ -30,9 +30,8 @@
 from abc import abstractmethod
 from typing import Optional
 
-import mujoco_py
+import mujoco
 import numpy as np
-import torch as to
 from init_args_serializer import Serializable
 
 import pyrado
@@ -122,19 +121,13 @@ def _mujoco_step(self, act: np.ndarray) -> dict:
         )  # act is a scalar array, causing warning on later np.array construction
 
         # Apply the torques to the robot
-        self.sim.data.ctrl[:] = torque
+        self.data.ctrl[:] = torque
 
         # Call MuJoCo
-        try:
-            self.sim.step()
-            mjsim_crashed = False
-        except mujoco_py.builder.MujocoException:
-            # When MuJoCo recognized instabilities in the simulation, it simply kills it.
-            # Instead, we want the episode to end with a failure.
-            mjsim_crashed = True
-
-        qpos = self.sim.data.qpos.copy()
-        qvel = self.sim.data.qvel.copy()
+        mujoco.mj_step(self.model, self.data)
+
+        qpos = self.data.qpos.copy()
+        qvel = self.data.qvel.copy()
         self.state = np.concatenate([qpos, qvel])
 
         # If state is out of bounds (this is normally checked by the task, but does not work because of the mask)
@@ -143,7 +136,7 @@ def _mujoco_step(self, act: np.ndarray) -> dict:
         return dict(
             qpos=qpos,
             qvel=qvel,
-            failed=mjsim_crashed or state_oob,
+            failed=state_oob,
         )
 
     def observe(self, state: np.ndarray) -> np.ndarray:
diff --git a/Pyrado/pyrado/plotting/heatmap.py b/Pyrado/pyrado/plotting/heatmap.py
index 8f7e32ec054..fbf1049b8a3 100644
--- a/Pyrado/pyrado/plotting/heatmap.py
+++ b/Pyrado/pyrado/plotting/heatmap.py
@@ -34,7 +34,6 @@
 from matplotlib import colors
 from matplotlib import pyplot as plt
 from matplotlib import ticker
-from pandas.core.indexes.numeric import NumericIndex
 
 import pyrado
 from pyrado.plotting.utils import draw_sep_cbar
@@ -191,10 +190,10 @@ def draw_heatmap(
     :return: handles to the heat map and the color bar figures (`None` if not existent)
     """
     if isinstance(data, pd.DataFrame):
-        if not isinstance(data.index, NumericIndex):
-            raise pyrado.TypeErr(given=data.index, expected_type=NumericIndex)
-        if not isinstance(data.columns, NumericIndex):
-            raise pyrado.TypeErr(given=data.columns, expected_type=NumericIndex)
+        if not data.index.is_numeric():
+            raise pyrado.TypeErr(given=data.index, msg="expected numeric index")
+        if not data.index.is_numeric():
+            raise pyrado.TypeErr(given=data.columns, msg="expected numeric index")
         # Extract the data
         x = data.columns
         y = data.index
diff --git a/Pyrado/pyrado/sampling/parallel_evaluation.py b/Pyrado/pyrado/sampling/parallel_evaluation.py
index 728518d92bc..cfd31298243 100644
--- a/Pyrado/pyrado/sampling/parallel_evaluation.py
+++ b/Pyrado/pyrado/sampling/parallel_evaluation.py
@@ -106,7 +106,9 @@ def eval_domain_params(
     # Run with progress bar
     with tqdm(leave=False, file=sys.stdout, unit="rollouts", desc="Sampling") as pb:
         # we set the sub seed to zero since every run will have its personal sub sub seed
-        return pool.run_map(functools.partial(_ps_run_one_domain_param, eval=True, seed=seed, sub_seed=0), params, pb)
+        return pool.run_map(
+            functools.partial(_ps_run_one_domain_param, eval=True, seed=seed, sub_seed=0), list(enumerate(params)), pb
+        )
 
 
 def eval_nominal_domain(
@@ -128,7 +130,7 @@ def eval_nominal_domain(
 
     # Run with progress bar
     with tqdm(leave=False, file=sys.stdout, unit="rollouts", desc="Sampling") as pb:
-        return pool.run_map(functools.partial(_ps_run_one_init_state, eval=True), init_states, pb)
+        return pool.run_map(functools.partial(_ps_run_one_init_state, eval=True), list(enumerate(init_states)), pb)
 
 
 def eval_randomized_domain(
@@ -152,7 +154,7 @@ def eval_randomized_domain(
 
     # Run with progress bar
     with tqdm(leave=False, file=sys.stdout, unit="rollouts", desc="Sampling") as pb:
-        return pool.run_map(functools.partial(_ps_run_one_init_state, eval=True), init_states, pb)
+        return pool.run_map(functools.partial(_ps_run_one_init_state, eval=True), list(enumerate(init_states)), pb)
 
 
 def eval_domain_params_with_segmentwise_reset(
diff --git a/Pyrado/tests/test_domain_randomization.py b/Pyrado/tests/test_domain_randomization.py
index 8809f6e4ac1..1b794090a01 100644
--- a/Pyrado/tests/test_domain_randomization.py
+++ b/Pyrado/tests/test_domain_randomization.py
@@ -33,16 +33,31 @@
 import torch as to
 from tests.conftest import m_needs_bullet, m_needs_mujoco
 
+import pyrado
 from pyrado.domain_randomization.domain_parameter import (
     BernoulliDomainParam,
     MultivariateNormalDomainParam,
     NormalDomainParam,
+    SelfPacedDomainParam,
     UniformDomainParam,
 )
+from pyrado.domain_randomization.domain_randomizer import DomainRandomizer
 from pyrado.domain_randomization.utils import param_grid
 from pyrado.environments.sim_base import SimEnv
 
 
+def assert_is_close(param, info_expected):
+    info_actual = param.info()
+    assert list(sorted(info_actual.keys())) == list(sorted(info_expected.keys()))
+    for key, expected in info_expected.items():
+        if key in ["name", "clip_lo", "clip_up"]:
+            assert info_actual[key] == expected
+        else:
+            assert to.allclose(
+                info_actual[key], expected
+            ), f"key: {key}, actual: {info_actual[key]}, expected: {expected}"
+
+
 @pytest.mark.parametrize(
     "dp",
     [
@@ -66,6 +81,42 @@ def test_domain_param(dp, num_samples):
     assert len(s) == num_samples
 
 
+def test_self_paced_domain_param_make_broadening():
+    param = SelfPacedDomainParam.make_broadening(["a"], [1.0], 0.0004, 0.04)
+    assert_is_close(
+        param,
+        {
+            "name": ["a"],
+            "target_mean": to.tensor([1.0]).double(),
+            "target_cov_chol": to.tensor([0.2]).double(),
+            "init_mean": to.tensor([1.0]).double(),
+            "init_cov_chol": to.tensor([0.02]).double(),
+            "clip_lo": -pyrado.inf,
+            "clip_up": pyrado.inf,
+        },
+    )
+
+
+def test_self_paced_domain_param_from_domain_randomizer():
+    param = SelfPacedDomainParam.from_domain_randomizer(
+        DomainRandomizer(NormalDomainParam(name="a", mean=1.0, std=1.0)),
+        init_cov_factor=0.0004,
+        target_cov_factor=0.04,
+    )
+    assert_is_close(
+        param,
+        {
+            "name": ["a"],
+            "target_mean": to.tensor([1.0]).double(),
+            "target_cov_chol": to.tensor([0.2]).double(),
+            "init_mean": to.tensor([1.0]).double(),
+            "init_cov_chol": to.tensor([0.02]).double(),
+            "clip_lo": -pyrado.inf,
+            "clip_up": pyrado.inf,
+        },
+    )
+
+
 def test_randomizer(default_randomizer):
     print(default_randomizer)
     # Generate 7 samples
diff --git a/requirements.txt b/requirements.txt
index 6b0c4970b34..48a4668e21d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -28,3 +28,4 @@ sphinx_rtd_theme
 tabulate
 tensorboard
 tqdm
+mujoco
\ No newline at end of file
diff --git a/setup_deps.py b/setup_deps.py
index 6fe7b8333ba..56076d4fc18 100644
--- a/setup_deps.py
+++ b/setup_deps.py
@@ -40,8 +40,6 @@
 import zipfile
 from urllib.request import urlretrieve
 
-import yaml
-
 
 # Get the project's root directory
 project_dir = osp.dirname(osp.abspath(__file__))
@@ -138,11 +136,6 @@
 ]
 # using --headless: conda install -c conda-forge bullet freetype libglu freeglut mesalib lapack
 
-required_packages_mujocopy = [
-    "libglew-dev",
-    "libosmesa6-dev",
-]
-
 env_vars = {
     # Global cmake prefix path
     "CMAKE_PREFIX_PATH": ":".join(cmake_prefix_path)
@@ -292,7 +285,6 @@ def downloadAndExtract(url, destdir, archiveContentPath=None):
             # Taken from https://stackoverflow.com/a/43094365
             def members(ml):
                 subfolder = osp.normpath(archiveContentPath)
-                l = len(subfolder)
                 for member in ml:
                     # Skip directories in zip
                     isdir = getattr(member, "is_dir", None)
@@ -386,7 +378,7 @@ def setup_dep_libraries():
     quiet = [] if not CI else ["-qq"]
     sp.check_call(["sudo", "apt-get"] + quiet + ["update", "-y"])
     # Install dependencies
-    sp.check_call(["sudo", "apt-get"] + quiet + ["install", "-y"] + required_packages + required_packages_mujocopy)
+    sp.check_call(["sudo", "apt-get"] + quiet + ["install", "-y"] + required_packages)
 
 
 def setup_wm5():
@@ -553,20 +545,15 @@ def setup_render_pipeline():
     sp.check_call([sys.executable, "setup.py"], cwd=osp.join(project_dir, "thirdParty", "RenderPipeline"))
 
 
-def setup_mujoco_py():
-    # Set up mujoco-py (doing it via pip caused problems on some machines)
-    sp.check_call([sys.executable, "setup.py", "install"], cwd=osp.join(project_dir, "thirdParty", "mujoco-py"))
-
-
 def setup_pytorch_based():
     # Locally build PyTorch>=1.7.0 requires dataclasses (does not harm when using pytorch from pip)
     sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "dataclasses"])
     # Set up GPyTorch without touching the PyTorch installation (requires scikit-learn which requires threadpoolctl)
     sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "threadpoolctl"])
     sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "scikit-learn"])
-    sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "gpytorch"])
+    sp.check_call([sys.executable, "-m", "pip", "install", "-U", "gpytorch"])
     # Set up BoTorch without touching the PyTorch installation (requires gpytorch)
-    sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "botorch"])
+    sp.check_call([sys.executable, "-m", "pip", "install", "-U", "botorch"])
     # Set up Pyro without touching the PyTorch installation (requires opt-einsum)
     sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "opt-einsum"])
     sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "pyro-api"])
@@ -675,13 +662,12 @@ def setup_wo_rcs_wo_pytorch():
     print("\nStarting Option Red Velvet Setup\n")
     # Rcs will still be downloaded since it is a submodule
     setup_wam()  # ignoring the meshes used in RcsPySim
-    setup_mujoco_py()
     if not CI:
         setup_pyrado()
         if not args.headless:
             setup_render_pipeline()
     setup_pytorch_based()
-    print("\nWAM meshes, mujoco-py, Pyrado (with GPyTorch, BoTorch, and Pyro using the --no-deps flag) are set up!\n")
+    print("\nWAM meshes, Pyrado (with GPyTorch, BoTorch, and Pyro using the --no-deps flag) are set up!\n")
 
 
 def setup_wo_rcs_w_pytorch():
@@ -689,16 +675,12 @@ def setup_wo_rcs_w_pytorch():
     # Rcs will still be downloaded since it is a submodule
     setup_pytorch()
     setup_wam()  # ignoring the meshes used in RcsPySim
-    setup_mujoco_py()
     if not CI:
         setup_pyrado()
         if not args.headless:
             setup_render_pipeline()
     setup_pytorch_based()
-    print(
-        "\nPyTorch, WAM meshes, mujoco-py, Pyrado (with GPyTorch, BoTorch, and Pyro using the --no-deps flag) are "
-        "set up!\n"
-    )
+    print("\nPyTorch, WAM meshes, Pyrado (with GPyTorch, BoTorch, and Pyro using the --no-deps flag) are " "set up!\n")
 
 
 def setup_w_rcs_wo_pytorch():
@@ -712,14 +694,13 @@ def setup_w_rcs_wo_pytorch():
     if not CI:
         setup_rcspysim()
     setup_meshes()
-    setup_mujoco_py()
     if not CI:
         setup_pyrado()
         if not args.headless:
             setup_render_pipeline()
     setup_pytorch_based()
     print(
-        "\nWM5, Rcs, RcsPySim, iiwa & Schunk & WAM meshes, mujoco-py, and Pyrado (with GPyTorch, BoTorch, and Pyro "
+        "\nWM5, Rcs, RcsPySim, iiwa & Schunk & WAM meshes and Pyrado (with GPyTorch, BoTorch, and Pyro "
         "using the --no-deps flag) are set up!\n"
     )
 
@@ -734,14 +715,13 @@ def setup_w_rcs_w_pytorch():
     if not CI:
         setup_rcspysim()
     setup_meshes()
-    setup_mujoco_py()
     if not CI:
         setup_pyrado()
         if not args.headless:
             setup_render_pipeline()
     setup_pytorch_based()
     print(
-        "\nWM5, Rcs, PyTorch, RcsPySim, iiwa & Schunk & WAM meshes, mujoco-py, Pyrado (with GPyTorch, BoTorch, and "
+        "\nWM5, Rcs, PyTorch, RcsPySim, iiwa & Schunk & WAM meshes, Pyrado (with GPyTorch, BoTorch, and "
         "Pyro using the --no-deps flag) are set up!\n"
     )
 
diff --git a/thirdParty/mujoco-py b/thirdParty/mujoco-py
deleted file mode 160000
index d73ce6e91d0..00000000000
--- a/thirdParty/mujoco-py
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit d73ce6e91d096b74da2a2fcb0a4164e10db5f641