From 2477928d71d01022a3717593806d66d8cadc9477 Mon Sep 17 00:00:00 2001 From: Fabian Damken Date: Sun, 29 Oct 2023 04:00:25 -0400 Subject: [PATCH] Refactoring (#98) --------- Co-authored-by: Heiko Carrasco --- .github/.pre-commit-config.yaml | 5 +- .gitignore | 3 + .gitmodules | 3 - .python-version | 1 + Pyrado/pyrado/algorithms/meta/spdr.py | 329 +++++++++--------- .../domain_randomization/domain_parameter.py | 48 +++ Pyrado/pyrado/environments/mujoco/__init__.py | 29 +- Pyrado/pyrado/environments/mujoco/base.py | 54 +-- .../pyrado/environments/mujoco/openai_ant.py | 13 +- .../mujoco/openai_half_cheetah.py | 11 +- .../environments/mujoco/openai_hopper.py | 9 +- .../environments/mujoco/openai_humanoid.py | 39 ++- .../environments/mujoco/quanser_qube.py | 21 +- Pyrado/pyrado/plotting/heatmap.py | 9 +- Pyrado/pyrado/sampling/parallel_evaluation.py | 8 +- Pyrado/tests/test_domain_randomization.py | 51 +++ requirements.txt | 1 + setup_deps.py | 34 +- thirdParty/mujoco-py | 1 - 19 files changed, 355 insertions(+), 314 deletions(-) create mode 100644 .python-version delete mode 160000 thirdParty/mujoco-py diff --git a/.github/.pre-commit-config.yaml b/.github/.pre-commit-config.yaml index 87d2bc0b8cd..c8fea89bcd7 100644 --- a/.github/.pre-commit-config.yaml +++ b/.github/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pycqa/isort - rev: 5.8.0 + rev: 5.12.0 hooks: - id: isort name: isort (python) @@ -11,7 +11,6 @@ repos: name: isort (pyi) types: [pyi] - repo: https://github.com/psf/black - rev: 20.8b1 + rev: 23.10.0 hooks: - id: black - language_version: python3.7 diff --git a/.gitignore b/.gitignore index adf9265e48f..ee1aa7a5c17 100644 --- a/.gitignore +++ b/.gitignore @@ -93,3 +93,6 @@ MUJOCO_LOG.* ### sbi-logs Pyrado/sbi-logs/ Pyrado/tests/sbi-logs/ + +### pre-commit local/symlink +.pre-commit-config.yaml diff --git a/.gitmodules b/.gitmodules index 5b7a8e3399a..fb5900aea92 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,9 +10,6 @@ [submodule "thirdParty/catch2"] path = thirdParty/catch2 url = https://github.com/catchorg/Catch2.git -[submodule "thirdParty/mujoco-py"] - path = thirdParty/mujoco-py - url = https://github.com/openai/mujoco-py.git [submodule "thirdParty/RenderPipeline"] path = thirdParty/RenderPipeline url = https://github.com/famura/RenderPipeline.git diff --git a/.python-version b/.python-version new file mode 100644 index 00000000000..475ba515c04 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.7 diff --git a/Pyrado/pyrado/algorithms/meta/spdr.py b/Pyrado/pyrado/algorithms/meta/spdr.py index ad8424f6bc6..7cd3d99573b 100644 --- a/Pyrado/pyrado/algorithms/meta/spdr.py +++ b/Pyrado/pyrado/algorithms/meta/spdr.py @@ -28,7 +28,7 @@ import os.path from csv import DictWriter -from typing import Iterator, Optional, Tuple +from typing import Callable, Iterator, List, Optional, Tuple import numpy as np import torch as to @@ -283,6 +283,16 @@ def sample_count(self) -> int: # Forward to subroutine return self._subrtn.sample_count + @property + def dim(self) -> int: + return self._spl_parameter.target_mean.shape[0] + + @property + def subrtn_sampler(self) -> RolloutSavingWrapper: + # It is checked in the constructor that the sampler is a RolloutSavingWrapper. + # noinspection PyTypeChecker + return self._subrtn.sampler + def step(self, snapshot_mode: str, meta_info: dict = None): """ Perform a step of SPDR. This includes training the subroutine and updating the context distribution accordingly. @@ -290,30 +300,12 @@ def step(self, snapshot_mode: str, meta_info: dict = None): """ self.save_snapshot() - context_mean = self._spl_parameter.context_mean.double() - context_cov = self._spl_parameter.context_cov.double() - context_cov_chol = self._spl_parameter.context_cov_chol.double() - target_mean = self._spl_parameter.target_mean.double() - target_cov_chol = self._spl_parameter.target_cov_chol.double() - # Add these keys to the logger as dummy values. + self.logger.add_value("sprl number of particles", 0) self.logger.add_value("spdr constraint kl", 0.0) self.logger.add_value("spdr constraint performance", 0.0) self.logger.add_value("spdr objective", 0.0) - for param_a_idx, param_a_name in enumerate(self._spl_parameter.name): - for param_b_idx, param_b_name in enumerate(self._spl_parameter.name): - self.logger.add_value( - f"context cov for {param_a_name}--{param_b_name}", context_cov[param_a_idx, param_b_idx].item() - ) - self.logger.add_value( - f"context cov_chol for {param_a_name}--{param_b_name}", - context_cov_chol[param_a_idx, param_b_idx].item(), - ) - if param_a_name == param_b_name: - self.logger.add_value(f"context mean for {param_a_name}", context_mean[param_a_idx].item()) - break - - dim = context_mean.shape[0] + self._log_context_distribution() # If we are in the first iteration and have a bad performance, # we want to completely reset the policy if training is unsuccessful @@ -324,173 +316,199 @@ def step(self, snapshot_mode: str, meta_info: dict = None): self._train_subroutine_and_evaluate_perf )(snapshot_mode, meta_info, reset_policy) - # Update distribution - previous_distribution = MultivariateNormalWrapper(context_mean, context_cov_chol) - target_distribution = MultivariateNormalWrapper(target_mean, target_cov_chol) + previous_distribution = MultivariateNormalWrapper( + self._spl_parameter.context_mean.double(), self._spl_parameter.context_cov_chol.double() + ) + target_distribution = MultivariateNormalWrapper( + self._spl_parameter.target_mean.double(), self._spl_parameter.target_cov_chol.double() + ) - def get_domain_param_value(ro: StepSequence, param_name: str) -> np.ndarray: - domain_param_dict = ro.rollout_info["domain_param"] - untransformed_param_name = param_name + DomainParamTransform.UNTRANSFORMED_DOMAIN_PARAMETER_SUFFIX - if untransformed_param_name in domain_param_dict: - return domain_param_dict[untransformed_param_name] - return domain_param_dict[param_name] + proposal_rollouts = self._sample_proposal_rollouts() + contexts, contexts_old_log_prob, values = self._extract_particles(proposal_rollouts, previous_distribution) - rollouts_all = self._get_sampler().rollouts - contexts = to.tensor( - [ - [to.from_numpy(get_domain_param_value(ro, name)) for rollouts in rollouts_all for ro in rollouts] - for name in self._spl_parameter.name - ], - requires_grad=True, - ).T + # Define the SPRL optimization problem + kl_constraint = self._make_kl_constraint(previous_distribution, self._kl_constraints_ub) + performance_constraint = self._make_performance_constraint( + contexts, contexts_old_log_prob, values, self._performance_lower_bound + ) + constraints = [kl_constraint, performance_constraint] + objective_fn = self._make_objective_fn(target_distribution) + x0 = previous_distribution.get_stacked() + minimize_args = dict( + fun=objective_fn, + x0=x0, + method="trust-constr", + jac=True, + constraints=constraints, + options={"gtol": 1e-4, "xtol": 1e-6}, + # bounds=bounds, + ) - contexts_old_log_prob = previous_distribution.distribution.log_prob(contexts.double()) - # kl_divergence = to.distributions.kl_divergence(previous_distribution.distribution, target_distribution.distribution) + print("Performing SPDR update.") + try: + result = minimize(**minimize_args) + new_x = result.x + + # Reset parameters if optimization was not successful + if not result.success: + # If optimization process was not a success + old_f = objective_fn(previous_distribution.get_stacked())[0] + constraints_satisfied = all((const.lb <= const.fun(result.x) <= const.ub for const in constraints)) + + # std_ok = bounds is None or (np.all(bounds.lb <= result.x)) and np.all(result.x <= bounds.ub) + std_ok = True + + update_successful = constraints_satisfied and std_ok and result.fun < old_f + if not update_successful: + print(f"Update unsuccessful, keeping old SPDR parameters.") + new_x = x0 + except ValueError as e: + print(f"Update failed with error, keeping old SPDR parameters.", e) + new_x = x0 + + self._adapt_parameters(new_x) + + # we can't use the stored values here as new_x might not be result.x + self.logger.add_value("spdr constraint kl", kl_constraint.fun(new_x)) + self.logger.add_value("spdr constraint performance", performance_constraint.fun(new_x)) + self.logger.add_value("spdr objective", objective_fn(new_x)[0]) - values = to.tensor([ro.undiscounted_return() for rollouts in rollouts_all for ro in rollouts]) + def reset(self, seed: int = None): + # Forward to subroutine + self._subrtn.reset(seed) + self.subrtn_sampler.reset_rollouts() + + def save_snapshot(self, meta_info: dict = None): + self.subrtn_sampler.reset_rollouts() + super().save_snapshot(meta_info) + + if meta_info is None: + # This algorithm instance is not a subroutine of another algorithm + self._subrtn.save_snapshot(meta_info) - constraints = [] + def load_snapshot(self, parsed_args) -> Tuple[Env, Policy, dict]: + env, policy, extra = super().load_snapshot(parsed_args) + + # Algorithm specific + if isinstance(self._subrtn, ActorCritic): + ex_dir = self._save_dir or getattr(parsed_args, "dir", None) + extra["vfcn"] = pyrado.load( + f"{parsed_args.vfcn_name}.pt", ex_dir, obj=self._subrtn.critic.vfcn, verbose=True + ) + + return env, policy, extra + + def _make_objective_fn( + self, target_distribution: MultivariateNormalWrapper + ) -> Callable[[np.ndarray], Tuple[float, np.ndarray]]: + def objective_fn(x): + """Tries to find the minimum kl divergence between the current and the update distribution, which + still satisfies the minimum update constraint and the performance constraint.""" + distribution = MultivariateNormalWrapper.from_stacked(self.dim, x) + kl_divergence = to.distributions.kl_divergence(distribution.distribution, target_distribution.distribution) + grads = to.autograd.grad(kl_divergence, list(distribution.parameters())) + + return ( + kl_divergence.detach().numpy().item(), + np.concatenate([g.detach().numpy() for g in grads]), + ) + return objective_fn + + def _make_kl_constraint( + self, previous_distribution: MultivariateNormalWrapper, kl_constraint_ub: float + ) -> NonlinearConstraint: def kl_constraint_fn(x): """Compute the constraint for the KL-divergence between current and proposed distribution.""" - distribution = MultivariateNormalWrapper.from_stacked(dim, x) + distribution = MultivariateNormalWrapper.from_stacked(self.dim, x) kl_divergence = to.distributions.kl_divergence( previous_distribution.distribution, distribution.distribution ) - return kl_divergence.detach().numpy() + return kl_divergence.detach().numpy().item() def kl_constraint_fn_prime(x): """Compute the derivative for the KL-constraint (used for scipy optimizer).""" - distribution = MultivariateNormalWrapper.from_stacked(dim, x) + distribution = MultivariateNormalWrapper.from_stacked(self.dim, x) kl_divergence = to.distributions.kl_divergence( previous_distribution.distribution, distribution.distribution ) grads = to.autograd.grad(kl_divergence, list(distribution.parameters())) return np.concatenate([g.detach().numpy() for g in grads]) - constraints.append( - NonlinearConstraint( - fun=kl_constraint_fn, - lb=-np.inf, - ub=self._kl_constraints_ub, - jac=kl_constraint_fn_prime, - # keep_feasible=True, - ) + return NonlinearConstraint( + fun=kl_constraint_fn, + lb=-np.inf, + ub=kl_constraint_ub, + jac=kl_constraint_fn_prime, ) + def _make_performance_constraint( + self, contexts: to.Tensor, contexts_old_log_prob: to.Tensor, values: to.Tensor, performance_lower_bound: float + ) -> NonlinearConstraint: def performance_constraint_fn(x): """Compute the constraint for the expected performance under the proposed distribution.""" - distribution = MultivariateNormalWrapper.from_stacked(dim, x) + distribution = MultivariateNormalWrapper.from_stacked(self.dim, x) performance = self._compute_expected_performance(distribution, contexts, contexts_old_log_prob, values) - return performance.detach().numpy() + return performance.detach().numpy().item() def performance_constraint_fn_prime(x): """Compute the derivative for the performance-constraint (used for scipy optimizer).""" - distribution = MultivariateNormalWrapper.from_stacked(dim, x) + distribution = MultivariateNormalWrapper.from_stacked(self.dim, x) performance = self._compute_expected_performance(distribution, contexts, contexts_old_log_prob, values) grads = to.autograd.grad(performance, list(distribution.parameters())) return np.concatenate([g.detach().numpy() for g in grads]) - constraints.append( - NonlinearConstraint( - fun=performance_constraint_fn, - lb=self._performance_lower_bound, - ub=np.inf, - jac=performance_constraint_fn_prime, - # keep_feasible=True, - ) + return NonlinearConstraint( + fun=performance_constraint_fn, + lb=performance_lower_bound, + ub=np.inf, + jac=performance_constraint_fn_prime, ) - # # Clip the bounds of the new variance either if the applied covariance transformation does not ensure - # # non-negativity or when the KL threshold has been crossed. - # bounds = None - # x0, _, x0_cov_indices = previous_distribution.get_stacked() - # if self._cov_transformation.ensures_non_negativity(): - # lower_bound = -np.inf * np.ones_like(x0) - # lower_bound_is_inf = True - # else: - # lower_bound = np.zeros_like(x0) - # lower_bound_is_inf = False - # if self._kl_threshold != -np.inf and (self._kl_threshold < kl_divergence): - # if x0_cov_indices is not None and self._var_lower_bound is not None: - # # Further clip the x values if a standard deviation lower bound was set. - # lower_bound[dim:] = self._var_lower_bound - # lower_bound_is_inf = False - # if not lower_bound_is_inf: - # # Only set the bounds if the lower bound is not negative infinity. Makes it easier for the optimizer. - # upper_bound = np.ones_like(x0) * np.inf - # bounds = Bounds(lb=lower_bound, ub=upper_bound, keep_feasible=True) - # x0 = np.clip(x0, bounds.lb, bounds.ub) - - # We now optimize based on the kl-divergence between target and context distribution by minimizing it - def objective_fn(x): - """Tries to find the minimum kl divergence between the current and the update distribution, which - still satisfies the minimum update constraint and the performance constraint.""" - distribution = MultivariateNormalWrapper.from_stacked(dim, x) - kl_divergence = to.distributions.kl_divergence(distribution.distribution, target_distribution.distribution) - grads = to.autograd.grad(kl_divergence, list(distribution.parameters())) - - return ( - kl_divergence.detach().numpy(), - np.concatenate([g.detach().numpy() for g in grads]), - ) - - x0 = previous_distribution.get_stacked() - - print("Performing SPDR update.") - # noinspection PyTypeChecker - result = minimize( - objective_fn, - x0, - method="trust-constr", - jac=True, - constraints=constraints, - options={"gtol": 1e-4, "xtol": 1e-6}, - # bounds=bounds, - ) - new_x = result.x - if not result.success: - # If optimization process was not a success - old_f = objective_fn(previous_distribution.get_stacked())[0] - constraints_satisfied = all((const.lb <= const.fun(result.x) <= const.ub for const in constraints)) - - # std_ok = bounds is None or (np.all(bounds.lb <= result.x)) and np.all(result.x <= bounds.ub) - std_ok = True - - if not (constraints_satisfied and std_ok and result.fun < old_f): - print(f"Update unsuccessful, keeping old values spl parameters.") - new_x = x0 - - self._adapt_parameters(dim, new_x) - self.logger.add_value("spdr constraint kl", kl_constraint_fn(new_x).item()) - self.logger.add_value("spdr constraint performance", performance_constraint_fn(new_x).item()) - self.logger.add_value("spdr objective", objective_fn(new_x)[0].item()) - - def reset(self, seed: int = None): - # Forward to subroutine - self._subrtn.reset(seed) - self._get_sampler().reset_rollouts() - - def save_snapshot(self, meta_info: dict = None): - self._get_sampler().reset_rollouts() - super().save_snapshot(meta_info) - - if meta_info is None: - # This algorithm instance is not a subroutine of another algorithm - self._subrtn.save_snapshot(meta_info) + def _log_context_distribution(self): + context_mean = self._spl_parameter.context_mean.double() + context_cov = self._spl_parameter.context_cov.double() + context_cov_chol = self._spl_parameter.context_cov_chol.double() + for param_a_idx, param_a_name in enumerate(self._spl_parameter.name): + for param_b_idx, param_b_name in enumerate(self._spl_parameter.name): + self.logger.add_value( + f"context cov for {param_a_name}--{param_b_name}", + context_cov[param_a_idx, param_b_idx].item(), + ) + self.logger.add_value( + f"context cov_chol for {param_a_name}--{param_b_name}", + context_cov_chol[param_a_idx, param_b_idx].item(), + ) + if param_a_name == param_b_name: + self.logger.add_value(f"context mean for {param_a_name}", context_mean[param_a_idx].item()) + break - def load_snapshot(self, parsed_args) -> Tuple[Env, Policy, dict]: - env, policy, extra = super().load_snapshot(parsed_args) + def _sample_proposal_rollouts(self) -> List[List[StepSequence]]: + return self.subrtn_sampler.rollouts - # Algorithm specific - if isinstance(self._subrtn, ActorCritic): - ex_dir = self._save_dir or getattr(parsed_args, "dir", None) - extra["vfcn"] = pyrado.load( - f"{parsed_args.vfcn_name}.pt", ex_dir, obj=self._subrtn.critic.vfcn, verbose=True - ) + def _extract_particles( + self, rollouts_all: List[List[StepSequence]], distribution: MultivariateNormalWrapper + ) -> Tuple[to.Tensor, to.Tensor, to.Tensor]: + def get_domain_param_value(ro: StepSequence, param_name: str) -> np.ndarray: + domain_param_dict = ro.rollout_info["domain_param"] + untransformed_param_name = param_name + DomainParamTransform.UNTRANSFORMED_DOMAIN_PARAMETER_SUFFIX + if untransformed_param_name in domain_param_dict: + return domain_param_dict[untransformed_param_name] + return domain_param_dict[param_name] - return env, policy, extra + contexts = to.tensor( + [ + [to.from_numpy(get_domain_param_value(ro, name)) for rollouts in rollouts_all for ro in rollouts] + for name in self._spl_parameter.name + ], + requires_grad=True, + ).T + self.logger.add_value("sprl number of particles", contexts.shape[0]) + contexts_log_prob = distribution.distribution.log_prob(contexts.double()) + values = to.tensor([ro.undiscounted_return() for rollouts in rollouts_all for ro in rollouts]) + return contexts, contexts_log_prob, values + # noinspection PyMethodMayBeStatic def _compute_expected_performance( self, distribution: MultivariateNormalWrapper, context: to.Tensor, old_log_prop: to.Tensor, values: to.Tensor ) -> to.Tensor: @@ -498,10 +516,10 @@ def _compute_expected_performance( context_ratio = to.exp(distribution.distribution.log_prob(context) - old_log_prop) return to.mean(context_ratio * values) - def _adapt_parameters(self, dim: int, result: np.ndarray) -> None: + def _adapt_parameters(self, result: np.ndarray) -> None: """Update the parameters of the distribution based on the result of the optimization step and the general algorithm settings.""" - context_distr = MultivariateNormalWrapper.from_stacked(dim, result) + context_distr = MultivariateNormalWrapper.from_stacked(self.dim, result) self._spl_parameter.adapt("context_mean", context_distr.mean) self._spl_parameter.adapt("context_cov_chol", context_distr.cov_chol) @@ -520,10 +538,5 @@ def _train_subroutine_and_evaluate_perf( self._subrtn.reset() self._subrtn.train(snapshot_mode, None, meta_info) - rollouts_all = self._get_sampler().rollouts + rollouts_all = self.subrtn_sampler.rollouts return np.median([[ro.undiscounted_return() for rollouts in rollouts_all for ro in rollouts]]).item() - - def _get_sampler(self) -> RolloutSavingWrapper: - # It is checked in the constructor that the sampler is a RolloutSavingWrapper. - # noinspection PyTypeChecker - return self._subrtn.sampler diff --git a/Pyrado/pyrado/domain_randomization/domain_parameter.py b/Pyrado/pyrado/domain_randomization/domain_parameter.py index 0486215bf5c..02eeb067501 100644 --- a/Pyrado/pyrado/domain_randomization/domain_parameter.py +++ b/Pyrado/pyrado/domain_randomization/domain_parameter.py @@ -393,6 +393,43 @@ def make_broadening( clip_up=clip_up, ) + @staticmethod + def from_domain_randomizer(domain_randomizer, *, target_cov_factor=1.0, init_cov_factor=1 / 100): + """ + Creates a self-paced domain parameter having the same initial and target mean and target variance given by the domain randomizer's variance (scaled by `target_cov_factor`). The initial variance is also given by the domain randomizer's variance (scaled by `init_cov_factor`). + + :param domain_randomizer: randomizer to grab the data from + :param target_cov_factor: scaling of the randomizer's variance to get the target variance; defaults to `1` + :param init_cov_factor: scaling of the randomizer's variance to get the init variance; defaults to `1/100` + :return: the self-paced domain parameter + """ + name = [] + target_mean = [] + target_cov_flat = [] + init_mean = [] + init_cov_flat = [] + for domain_param in domain_randomizer.domain_params: + if not isinstance(domain_param, NormalDomainParam): + raise pyrado.TypeErr( + given=domain_param, + expected_type=NormalDomainParam, + msg="each domain_param must be a NormalDomainParam", + ) + name.append(domain_param.name) + target_mean.append(domain_param.mean) + target_cov_flat.append(target_cov_factor * domain_param.std**2) + init_mean.append(domain_param.mean) + init_cov_flat.append(init_cov_factor * domain_param.std**2) + return SelfPacedDomainParam( + name=name, + target_mean=to.tensor(target_mean), + target_cov_flat=to.tensor(target_cov_flat), + init_mean=to.tensor(init_mean), + init_cov_flat=to.tensor(init_cov_flat), + clip_lo=-pyrado.inf, + clip_up=+pyrado.inf, + ) + @property def target_distr(self) -> MultivariateNormal: """Get the target distribution.""" @@ -413,6 +450,17 @@ def context_cov(self) -> to.Tensor: """Get the current covariance matrix.""" return self.context_cov_chol @ self.context_cov_chol.T + def info(self) -> dict: + return { + "name": self.name, + "target_mean": self.target_mean, + "target_cov_chol": self.target_cov_chol, + "init_mean": self.init_mean, + "init_cov_chol": self.init_cov_chol, + "clip_lo": self.clip_lo, + "clip_up": self.clip_up, + } + def adapt(self, domain_distr_param: str, domain_distr_param_value: to.Tensor): """ Update this domain parameter. diff --git a/Pyrado/pyrado/environments/mujoco/__init__.py b/Pyrado/pyrado/environments/mujoco/__init__.py index 1a9b6925814..94590e58b4d 100644 --- a/Pyrado/pyrado/environments/mujoco/__init__.py +++ b/Pyrado/pyrado/environments/mujoco/__init__.py @@ -26,35 +26,8 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -import os import pyrado -from pyrado.utils.input_output import print_cbt -try: - import mujoco_py -except (ImportError, Exception): - # The ImportError is raised if mujoco-py is simply not installed - # The Exception catches the case that you have everything installed properly but your IDE does not set the - # LD_LIBRARY_PATH correctly (happens for PyCharm & CLion). To check this, try to run your script from the terminal. - ld_library_path = os.environ.get("LD_LIBRARY_PATH") - ld_preload = os.environ.get("LD_PRELOAD") - print_cbt( - "You are trying to use are MuJoCo-based environment, but the required mujoco_py module can not be imported.\n" - "Try adding\n" - "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/$USER/.mujoco/mujoco200/bin\n" - "export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so\n" - "to your shell's rc-file.\n" - "The current values of the environment variables are:\n" - f"LD_LIBRARY_PATH={ld_library_path}\n" - f"LD_PRELOAD={ld_preload}" - "If you are using PyCharm or CLion, also add the environment variables above to your run configurations. " - "Note that the IDE will not resolve $USER for some reason, so enter the user name directly, " - "or run it from your terminal.\n\n" - "Here comes the mujoco-py error message:\n\n", - "r", - ) - pyrado.mujoco_loaded = False -else: - pyrado.mujoco_loaded = True +pyrado.mujoco_loaded = True diff --git a/Pyrado/pyrado/environments/mujoco/base.py b/Pyrado/pyrado/environments/mujoco/base.py index 6f19c2b9a3b..19e3b27df6e 100644 --- a/Pyrado/pyrado/environments/mujoco/base.py +++ b/Pyrado/pyrado/environments/mujoco/base.py @@ -31,10 +31,10 @@ from math import floor from typing import Optional -import mujoco_py +import mujoco +import mujoco.viewer import numpy as np from init_args_serializer import Serializable -from mujoco_py.generated.const import RND_FOG import pyrado from pyrado.environments.sim_base import SimEnv @@ -87,7 +87,7 @@ def __init__( xml_model_temp = file_raw.read() xml_model_temp = self._adapt_model_file(xml_model_temp, self.domain_param) # Create a dummy model to extract the solver's time step size - model_tmp = mujoco_py.load_model_from_xml(xml_model_temp) + model_tmp = mujoco.MjModel.from_xml_path(xml_model_temp) frame_skip = dt / model_tmp.opt.timestep if frame_skip.is_integer(): self.frame_skip = int(frame_skip) @@ -112,8 +112,8 @@ def __init__( super().__init__(dt=self.model.opt.timestep * self.frame_skip, max_steps=max_steps) # Memorize the initial states of the model from the xml (for fixed init space or later reset) - self.init_qpos = self.sim.data.qpos.copy() - self.init_qvel = self.sim.data.qvel.copy() + self.init_qpos = self.data.qpos.copy() + self.init_qvel = self.data.qvel.copy() # Initialize space (to be overwritten in constructor of subclasses) self._init_space = None @@ -177,13 +177,7 @@ def domain_param(self, domain_param: dict): # Update MuJoCo model self._create_mujoco_model() - if self.viewer is not None: - # If the viewer already exists and we reset the domain parameters, we must also recreate the viewer since - # it references to the simulation object which get's reconstructed during _create_mujoco_model() - import glfw - - glfw.destroy_window(self.viewer.window) - self.viewer = None + self.viewer = None # Update task self._task = self._create_task(self.task_args) @@ -202,7 +196,7 @@ def _adapt_model_file(self, xml_model: str, domain_param: dict) -> str: :return: adapted model file where the placeholders are filled with numerical values """ # The mesh dir is not resolved when later passed as a string, thus we do it manually - xml_model = xml_model.replace(f"[ASSETS_DIR]", pyrado.MUJOCO_ASSETS_DIR) + xml_model = xml_model.replace("[ASSETS_DIR]", pyrado.MUJOCO_ASSETS_DIR) # Replace all occurrences of the domain parameter placeholder with its value for key, value in domain_param.items(): @@ -230,14 +224,14 @@ def _create_mujoco_model(self): xml_model = self._adapt_model_file(xml_model, self.domain_param) # Create MuJoCo model from parsed XML file - self.model = mujoco_py.load_model_from_xml(xml_model) - self.sim = mujoco_py.MjSim(self.model, nsubsteps=self.frame_skip) + self.model = mujoco.MjModel.from_xml_string(xml_model) + self.data = mujoco.MjData(self.model) def configure_viewer(self): """Configure the camera when the viewer is initialized. You need to set `self.camera_config` before.""" # Render a fog around the scene by default if self.camera_config.pop("render_fog", True): - self.viewer.scn.flags[RND_FOG] = 1 + self.viewer.scn.flags[mujoco.mjtRndFlag.mjRND_FOG] = 1 # Parse all other options for key, value in self.camera_config.items(): @@ -274,8 +268,8 @@ def reset(self, init_state: np.ndarray = None, domain_param: dict = None) -> np. self._task.reset(env_spec=self.spec, init_state=init_state.copy()) # Reset MuJoCo simulation model (only reset the joint configuration) - self.sim.reset() - old_state = self.sim.get_state() + mujoco.mj_resetData(self.model, self.data) + old_state = self.data nq = self.model.nq nv = self.model.nv if not init_state[:nq].shape == old_state.qpos.shape: # check joint positions dimension @@ -283,16 +277,9 @@ def reset(self, init_state: np.ndarray = None, domain_param: dict = None) -> np. # Exclude everything that is appended to the state (at the end), e.g. the ball position for WAMBallInCupSim if not init_state[nq : nq + nv].shape == old_state.qvel.shape: # check joint velocities dimension raise pyrado.ShapeErr(given=init_state[nq : nq + nv], expected_match=old_state.qvel) - new_state = mujoco_py.MjSimState( - # Exclude everything that is appended to the state (at the end), e.g. the ball position for WAMBallInCupSim - old_state.time, - init_state[:nq], - init_state[nq : nq + nv], - old_state.act, - old_state.udd_state, - ) - self.sim.set_state(new_state) - self.sim.forward() + self.data.qpos[:] = np.copy(init_state[:nq]) + self.data.qvel[:] = np.copy(init_state[nq : nq + nv]) + mujoco.mj_forward(self.model, self.data) # Return an observation return self.observe(self.state) @@ -342,14 +329,7 @@ def render(self, mode: RenderMode = RenderMode(), render_step: int = 1): if mode.video: if self.viewer is None: # Create viewer if not existent (see 'human' mode of OpenAI Gym's MujocoEnv) - self.viewer = mujoco_py.MjViewer(self.sim) - - # Adjust window size and position to custom values - import glfw - - glfw.make_context_current(self.viewer.window) - glfw.set_window_size(self.viewer.window, 1280, 720) - glfw.set_window_pos(self.viewer.window, 50, 50) + self.viewer = mujoco.viewer.launch_passive(self.model, self.data) self.configure_viewer() - self.viewer.render() + self.viewer.sync() diff --git a/Pyrado/pyrado/environments/mujoco/openai_ant.py b/Pyrado/pyrado/environments/mujoco/openai_ant.py index d78dbc2d765..862948ecc42 100644 --- a/Pyrado/pyrado/environments/mujoco/openai_ant.py +++ b/Pyrado/pyrado/environments/mujoco/openai_ant.py @@ -29,6 +29,7 @@ import os.path as osp from typing import Optional +import mujoco import numpy as np from init_args_serializer import Serializable @@ -98,9 +99,7 @@ def __init__( @property def state_space(self) -> Space: - state_shape = np.concatenate( - [self.sim.data.qpos.flat, self.sim.data.qvel.flat, self.sim.data.cfrc_ext.flat] - ).shape + state_shape = np.concatenate([self.data.qpos.flat, self.data.qvel.flat, self.data.cfrc_ext.flat]).shape return BoxSpace(-pyrado.inf, pyrado.inf, shape=state_shape) @property @@ -148,11 +147,11 @@ def contact_forces(self): return contact_forces def _mujoco_step(self, act: np.ndarray) -> dict: - self.sim.data.ctrl[:] = act - self.sim.step() + self.data.ctrl[:] = act + mujoco.mj_step(self.model, self.data) - pos = self.sim.data.qpos.flat.copy() - vel = self.sim.data.qvel.flat.copy() + pos = self.data.qpos.flat.copy() + vel = self.data.qvel.flat.copy() cfrc_ext = self.contact_forces.flat.copy() self.state = np.concatenate([pos, vel, cfrc_ext]) diff --git a/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py b/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py index f72288a1068..088698c8fc1 100644 --- a/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py +++ b/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py @@ -29,6 +29,7 @@ import os.path as osp from typing import Optional +import mujoco import numpy as np from init_args_serializer import Serializable @@ -91,7 +92,7 @@ def __init__( @property def state_space(self) -> Space: - state_shape = np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).shape + state_shape = np.concatenate([self.data.qpos, self.data.qvel]).shape return BoxSpace(-pyrado.inf, pyrado.inf, shape=state_shape) @property @@ -123,11 +124,11 @@ def _create_task(self, task_args: dict) -> Task: return GoallessTask(self.spec, ForwardVelocityRewFcn(self._dt, idx_fwd=0, **task_args)) def _mujoco_step(self, act: np.ndarray) -> dict: - self.sim.data.ctrl[:] = act - self.sim.step() + self.data.ctrl[:] = act + mujoco.mj_step(self.model, self.data) - pos = self.sim.data.qpos.copy() - vel = self.sim.data.qvel.copy() + pos = self.data.qpos.copy() + vel = self.data.qvel.copy() self.state = np.concatenate([pos, vel]) return dict() diff --git a/Pyrado/pyrado/environments/mujoco/openai_hopper.py b/Pyrado/pyrado/environments/mujoco/openai_hopper.py index 57be4b61624..c1dd9e0b859 100644 --- a/Pyrado/pyrado/environments/mujoco/openai_hopper.py +++ b/Pyrado/pyrado/environments/mujoco/openai_hopper.py @@ -29,6 +29,7 @@ import os.path as osp from typing import Optional +import mujoco import numpy as np from init_args_serializer import Serializable @@ -141,11 +142,11 @@ def _create_task(self, task_args: dict) -> Task: return GoallessTask(self.spec, rew_fcn) def _mujoco_step(self, act: np.ndarray) -> dict: - self.sim.data.ctrl[:] = act - self.sim.step() + self.data.ctrl[:] = act + mujoco.mj_step(self.model, self.data) - pos = self.sim.data.qpos.copy() - vel = self.sim.data.qvel.copy() + pos = self.data.qpos.copy() + vel = self.data.qvel.copy() self.state = np.concatenate([pos, vel]) return dict() diff --git a/Pyrado/pyrado/environments/mujoco/openai_humanoid.py b/Pyrado/pyrado/environments/mujoco/openai_humanoid.py index 4de37cdc86f..7b56f64c1b5 100644 --- a/Pyrado/pyrado/environments/mujoco/openai_humanoid.py +++ b/Pyrado/pyrado/environments/mujoco/openai_humanoid.py @@ -29,6 +29,7 @@ import os.path as osp from typing import Optional +import mujoco import numpy as np from init_args_serializer import Serializable @@ -84,10 +85,10 @@ def __init__( rest_shape = np.concatenate( [ - self.sim.data.cinert.flat, - self.sim.data.cvel.flat, - self.sim.data.qfrc_actuator.flat, - self.sim.data.cfrc_ext.flat, + self.data.cinert.flat, + self.data.cvel.flat, + self.data.qfrc_actuator.flat, + self.data.cfrc_ext.flat, ] ).shape min_init_rest = -np.ones(rest_shape) @@ -102,12 +103,12 @@ def __init__( def state_space(self) -> Space: state_shape = np.concatenate( [ - self.sim.data.qpos.flat, - self.sim.data.qvel.flat, - self.sim.data.cinert.flat, - self.sim.data.cvel.flat, - self.sim.data.qfrc_actuator.flat, - self.sim.data.cfrc_ext.flat, + self.data.qpos.flat, + self.data.qvel.flat, + self.data.cinert.flat, + self.data.cvel.flat, + self.data.qfrc_actuator.flat, + self.data.cfrc_ext.flat, ] ).shape return BoxSpace(-pyrado.inf, pyrado.inf, shape=state_shape) @@ -162,15 +163,15 @@ def _create_task(self, task_args: dict) -> Task: return GoallessTask(self.spec, ForwardVelocityRewFcnHumanoid(self._dt, **task_args)) def _mujoco_step(self, act: np.ndarray) -> dict: - self.sim.data.ctrl[:] = act - self.sim.step() - - position = self.sim.data.qpos.flat.copy() - velocity = self.sim.data.qvel.flat.copy() - com_inertia = self.sim.data.cinert.flat.copy() - com_velocity = self.sim.data.cvel.flat.copy() - actuator_forces = self.sim.data.qfrc_actuator.flat.copy() - external_contact_forces = self.sim.data.cfrc_ext.flat.copy() + self.data.ctrl[:] = act + mujoco.mj_step(self.model, self.data) + + position = self.data.qpos.flat.copy() + velocity = self.data.qvel.flat.copy() + com_inertia = self.data.cinert.flat.copy() + com_velocity = self.data.cvel.flat.copy() + actuator_forces = self.data.qfrc_actuator.flat.copy() + external_contact_forces = self.data.cfrc_ext.flat.copy() self.state = np.concatenate( [ diff --git a/Pyrado/pyrado/environments/mujoco/quanser_qube.py b/Pyrado/pyrado/environments/mujoco/quanser_qube.py index a609f763555..20e136fb33e 100644 --- a/Pyrado/pyrado/environments/mujoco/quanser_qube.py +++ b/Pyrado/pyrado/environments/mujoco/quanser_qube.py @@ -30,9 +30,8 @@ from abc import abstractmethod from typing import Optional -import mujoco_py +import mujoco import numpy as np -import torch as to from init_args_serializer import Serializable import pyrado @@ -122,19 +121,13 @@ def _mujoco_step(self, act: np.ndarray) -> dict: ) # act is a scalar array, causing warning on later np.array construction # Apply the torques to the robot - self.sim.data.ctrl[:] = torque + self.data.ctrl[:] = torque # Call MuJoCo - try: - self.sim.step() - mjsim_crashed = False - except mujoco_py.builder.MujocoException: - # When MuJoCo recognized instabilities in the simulation, it simply kills it. - # Instead, we want the episode to end with a failure. - mjsim_crashed = True - - qpos = self.sim.data.qpos.copy() - qvel = self.sim.data.qvel.copy() + mujoco.mj_step(self.model, self.data) + + qpos = self.data.qpos.copy() + qvel = self.data.qvel.copy() self.state = np.concatenate([qpos, qvel]) # If state is out of bounds (this is normally checked by the task, but does not work because of the mask) @@ -143,7 +136,7 @@ def _mujoco_step(self, act: np.ndarray) -> dict: return dict( qpos=qpos, qvel=qvel, - failed=mjsim_crashed or state_oob, + failed=state_oob, ) def observe(self, state: np.ndarray) -> np.ndarray: diff --git a/Pyrado/pyrado/plotting/heatmap.py b/Pyrado/pyrado/plotting/heatmap.py index 8f7e32ec054..fbf1049b8a3 100644 --- a/Pyrado/pyrado/plotting/heatmap.py +++ b/Pyrado/pyrado/plotting/heatmap.py @@ -34,7 +34,6 @@ from matplotlib import colors from matplotlib import pyplot as plt from matplotlib import ticker -from pandas.core.indexes.numeric import NumericIndex import pyrado from pyrado.plotting.utils import draw_sep_cbar @@ -191,10 +190,10 @@ def draw_heatmap( :return: handles to the heat map and the color bar figures (`None` if not existent) """ if isinstance(data, pd.DataFrame): - if not isinstance(data.index, NumericIndex): - raise pyrado.TypeErr(given=data.index, expected_type=NumericIndex) - if not isinstance(data.columns, NumericIndex): - raise pyrado.TypeErr(given=data.columns, expected_type=NumericIndex) + if not data.index.is_numeric(): + raise pyrado.TypeErr(given=data.index, msg="expected numeric index") + if not data.index.is_numeric(): + raise pyrado.TypeErr(given=data.columns, msg="expected numeric index") # Extract the data x = data.columns y = data.index diff --git a/Pyrado/pyrado/sampling/parallel_evaluation.py b/Pyrado/pyrado/sampling/parallel_evaluation.py index 728518d92bc..cfd31298243 100644 --- a/Pyrado/pyrado/sampling/parallel_evaluation.py +++ b/Pyrado/pyrado/sampling/parallel_evaluation.py @@ -106,7 +106,9 @@ def eval_domain_params( # Run with progress bar with tqdm(leave=False, file=sys.stdout, unit="rollouts", desc="Sampling") as pb: # we set the sub seed to zero since every run will have its personal sub sub seed - return pool.run_map(functools.partial(_ps_run_one_domain_param, eval=True, seed=seed, sub_seed=0), params, pb) + return pool.run_map( + functools.partial(_ps_run_one_domain_param, eval=True, seed=seed, sub_seed=0), list(enumerate(params)), pb + ) def eval_nominal_domain( @@ -128,7 +130,7 @@ def eval_nominal_domain( # Run with progress bar with tqdm(leave=False, file=sys.stdout, unit="rollouts", desc="Sampling") as pb: - return pool.run_map(functools.partial(_ps_run_one_init_state, eval=True), init_states, pb) + return pool.run_map(functools.partial(_ps_run_one_init_state, eval=True), list(enumerate(init_states)), pb) def eval_randomized_domain( @@ -152,7 +154,7 @@ def eval_randomized_domain( # Run with progress bar with tqdm(leave=False, file=sys.stdout, unit="rollouts", desc="Sampling") as pb: - return pool.run_map(functools.partial(_ps_run_one_init_state, eval=True), init_states, pb) + return pool.run_map(functools.partial(_ps_run_one_init_state, eval=True), list(enumerate(init_states)), pb) def eval_domain_params_with_segmentwise_reset( diff --git a/Pyrado/tests/test_domain_randomization.py b/Pyrado/tests/test_domain_randomization.py index 8809f6e4ac1..1b794090a01 100644 --- a/Pyrado/tests/test_domain_randomization.py +++ b/Pyrado/tests/test_domain_randomization.py @@ -33,16 +33,31 @@ import torch as to from tests.conftest import m_needs_bullet, m_needs_mujoco +import pyrado from pyrado.domain_randomization.domain_parameter import ( BernoulliDomainParam, MultivariateNormalDomainParam, NormalDomainParam, + SelfPacedDomainParam, UniformDomainParam, ) +from pyrado.domain_randomization.domain_randomizer import DomainRandomizer from pyrado.domain_randomization.utils import param_grid from pyrado.environments.sim_base import SimEnv +def assert_is_close(param, info_expected): + info_actual = param.info() + assert list(sorted(info_actual.keys())) == list(sorted(info_expected.keys())) + for key, expected in info_expected.items(): + if key in ["name", "clip_lo", "clip_up"]: + assert info_actual[key] == expected + else: + assert to.allclose( + info_actual[key], expected + ), f"key: {key}, actual: {info_actual[key]}, expected: {expected}" + + @pytest.mark.parametrize( "dp", [ @@ -66,6 +81,42 @@ def test_domain_param(dp, num_samples): assert len(s) == num_samples +def test_self_paced_domain_param_make_broadening(): + param = SelfPacedDomainParam.make_broadening(["a"], [1.0], 0.0004, 0.04) + assert_is_close( + param, + { + "name": ["a"], + "target_mean": to.tensor([1.0]).double(), + "target_cov_chol": to.tensor([0.2]).double(), + "init_mean": to.tensor([1.0]).double(), + "init_cov_chol": to.tensor([0.02]).double(), + "clip_lo": -pyrado.inf, + "clip_up": pyrado.inf, + }, + ) + + +def test_self_paced_domain_param_from_domain_randomizer(): + param = SelfPacedDomainParam.from_domain_randomizer( + DomainRandomizer(NormalDomainParam(name="a", mean=1.0, std=1.0)), + init_cov_factor=0.0004, + target_cov_factor=0.04, + ) + assert_is_close( + param, + { + "name": ["a"], + "target_mean": to.tensor([1.0]).double(), + "target_cov_chol": to.tensor([0.2]).double(), + "init_mean": to.tensor([1.0]).double(), + "init_cov_chol": to.tensor([0.02]).double(), + "clip_lo": -pyrado.inf, + "clip_up": pyrado.inf, + }, + ) + + def test_randomizer(default_randomizer): print(default_randomizer) # Generate 7 samples diff --git a/requirements.txt b/requirements.txt index 6b0c4970b34..48a4668e21d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,3 +28,4 @@ sphinx_rtd_theme tabulate tensorboard tqdm +mujoco \ No newline at end of file diff --git a/setup_deps.py b/setup_deps.py index 6fe7b8333ba..56076d4fc18 100644 --- a/setup_deps.py +++ b/setup_deps.py @@ -40,8 +40,6 @@ import zipfile from urllib.request import urlretrieve -import yaml - # Get the project's root directory project_dir = osp.dirname(osp.abspath(__file__)) @@ -138,11 +136,6 @@ ] # using --headless: conda install -c conda-forge bullet freetype libglu freeglut mesalib lapack -required_packages_mujocopy = [ - "libglew-dev", - "libosmesa6-dev", -] - env_vars = { # Global cmake prefix path "CMAKE_PREFIX_PATH": ":".join(cmake_prefix_path) @@ -292,7 +285,6 @@ def downloadAndExtract(url, destdir, archiveContentPath=None): # Taken from https://stackoverflow.com/a/43094365 def members(ml): subfolder = osp.normpath(archiveContentPath) - l = len(subfolder) for member in ml: # Skip directories in zip isdir = getattr(member, "is_dir", None) @@ -386,7 +378,7 @@ def setup_dep_libraries(): quiet = [] if not CI else ["-qq"] sp.check_call(["sudo", "apt-get"] + quiet + ["update", "-y"]) # Install dependencies - sp.check_call(["sudo", "apt-get"] + quiet + ["install", "-y"] + required_packages + required_packages_mujocopy) + sp.check_call(["sudo", "apt-get"] + quiet + ["install", "-y"] + required_packages) def setup_wm5(): @@ -553,20 +545,15 @@ def setup_render_pipeline(): sp.check_call([sys.executable, "setup.py"], cwd=osp.join(project_dir, "thirdParty", "RenderPipeline")) -def setup_mujoco_py(): - # Set up mujoco-py (doing it via pip caused problems on some machines) - sp.check_call([sys.executable, "setup.py", "install"], cwd=osp.join(project_dir, "thirdParty", "mujoco-py")) - - def setup_pytorch_based(): # Locally build PyTorch>=1.7.0 requires dataclasses (does not harm when using pytorch from pip) sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "dataclasses"]) # Set up GPyTorch without touching the PyTorch installation (requires scikit-learn which requires threadpoolctl) sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "threadpoolctl"]) sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "scikit-learn"]) - sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "gpytorch"]) + sp.check_call([sys.executable, "-m", "pip", "install", "-U", "gpytorch"]) # Set up BoTorch without touching the PyTorch installation (requires gpytorch) - sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "botorch"]) + sp.check_call([sys.executable, "-m", "pip", "install", "-U", "botorch"]) # Set up Pyro without touching the PyTorch installation (requires opt-einsum) sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "opt-einsum"]) sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "pyro-api"]) @@ -675,13 +662,12 @@ def setup_wo_rcs_wo_pytorch(): print("\nStarting Option Red Velvet Setup\n") # Rcs will still be downloaded since it is a submodule setup_wam() # ignoring the meshes used in RcsPySim - setup_mujoco_py() if not CI: setup_pyrado() if not args.headless: setup_render_pipeline() setup_pytorch_based() - print("\nWAM meshes, mujoco-py, Pyrado (with GPyTorch, BoTorch, and Pyro using the --no-deps flag) are set up!\n") + print("\nWAM meshes, Pyrado (with GPyTorch, BoTorch, and Pyro using the --no-deps flag) are set up!\n") def setup_wo_rcs_w_pytorch(): @@ -689,16 +675,12 @@ def setup_wo_rcs_w_pytorch(): # Rcs will still be downloaded since it is a submodule setup_pytorch() setup_wam() # ignoring the meshes used in RcsPySim - setup_mujoco_py() if not CI: setup_pyrado() if not args.headless: setup_render_pipeline() setup_pytorch_based() - print( - "\nPyTorch, WAM meshes, mujoco-py, Pyrado (with GPyTorch, BoTorch, and Pyro using the --no-deps flag) are " - "set up!\n" - ) + print("\nPyTorch, WAM meshes, Pyrado (with GPyTorch, BoTorch, and Pyro using the --no-deps flag) are " "set up!\n") def setup_w_rcs_wo_pytorch(): @@ -712,14 +694,13 @@ def setup_w_rcs_wo_pytorch(): if not CI: setup_rcspysim() setup_meshes() - setup_mujoco_py() if not CI: setup_pyrado() if not args.headless: setup_render_pipeline() setup_pytorch_based() print( - "\nWM5, Rcs, RcsPySim, iiwa & Schunk & WAM meshes, mujoco-py, and Pyrado (with GPyTorch, BoTorch, and Pyro " + "\nWM5, Rcs, RcsPySim, iiwa & Schunk & WAM meshes and Pyrado (with GPyTorch, BoTorch, and Pyro " "using the --no-deps flag) are set up!\n" ) @@ -734,14 +715,13 @@ def setup_w_rcs_w_pytorch(): if not CI: setup_rcspysim() setup_meshes() - setup_mujoco_py() if not CI: setup_pyrado() if not args.headless: setup_render_pipeline() setup_pytorch_based() print( - "\nWM5, Rcs, PyTorch, RcsPySim, iiwa & Schunk & WAM meshes, mujoco-py, Pyrado (with GPyTorch, BoTorch, and " + "\nWM5, Rcs, PyTorch, RcsPySim, iiwa & Schunk & WAM meshes, Pyrado (with GPyTorch, BoTorch, and " "Pyro using the --no-deps flag) are set up!\n" ) diff --git a/thirdParty/mujoco-py b/thirdParty/mujoco-py deleted file mode 160000 index d73ce6e91d0..00000000000 --- a/thirdParty/mujoco-py +++ /dev/null @@ -1 +0,0 @@ -Subproject commit d73ce6e91d096b74da2a2fcb0a4164e10db5f641