From 25345fce9f081d81eb959b7c4b70451ed81b39e8 Mon Sep 17 00:00:00 2001 From: Fabian Damken Date: Tue, 15 Aug 2023 14:43:36 +0200 Subject: [PATCH 01/22] Set Python version to 3.7 using PyEnv --- .python-version | 1 + 1 file changed, 1 insertion(+) create mode 100644 .python-version diff --git a/.python-version b/.python-version new file mode 100644 index 0000000000..475ba515c0 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.7 From 5013bcf2b95b23cfb565aeec14bbcbcd39b6549c Mon Sep 17 00:00:00 2001 From: Fabian Damken Date: Tue, 15 Aug 2023 14:52:21 +0200 Subject: [PATCH 02/22] Fix parallel evaluation --- Pyrado/pyrado/sampling/parallel_evaluation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Pyrado/pyrado/sampling/parallel_evaluation.py b/Pyrado/pyrado/sampling/parallel_evaluation.py index 728518d92b..0c2fd71c71 100644 --- a/Pyrado/pyrado/sampling/parallel_evaluation.py +++ b/Pyrado/pyrado/sampling/parallel_evaluation.py @@ -106,7 +106,7 @@ def eval_domain_params( # Run with progress bar with tqdm(leave=False, file=sys.stdout, unit="rollouts", desc="Sampling") as pb: # we set the sub seed to zero since every run will have its personal sub sub seed - return pool.run_map(functools.partial(_ps_run_one_domain_param, eval=True, seed=seed, sub_seed=0), params, pb) + return pool.run_map(functools.partial(_ps_run_one_domain_param, eval=True, seed=seed, sub_seed=0), list(enumerate(params)), pb) def eval_nominal_domain( @@ -128,7 +128,7 @@ def eval_nominal_domain( # Run with progress bar with tqdm(leave=False, file=sys.stdout, unit="rollouts", desc="Sampling") as pb: - return pool.run_map(functools.partial(_ps_run_one_init_state, eval=True), init_states, pb) + return pool.run_map(functools.partial(_ps_run_one_init_state, eval=True), list(enumerate(init_states)), pb) def eval_randomized_domain( @@ -152,7 +152,7 @@ def eval_randomized_domain( # Run with progress bar with tqdm(leave=False, file=sys.stdout, unit="rollouts", desc="Sampling") as pb: - return pool.run_map(functools.partial(_ps_run_one_init_state, eval=True), init_states, pb) + return pool.run_map(functools.partial(_ps_run_one_init_state, eval=True), list(enumerate(init_states)), pb) def eval_domain_params_with_segmentwise_reset( From 4b5603007e96e3978bbc386e26dfe4c521fb30b2 Mon Sep 17 00:00:00 2001 From: Fabian Damken Date: Tue, 15 Aug 2023 14:52:49 +0200 Subject: [PATCH 03/22] Remove non-existing Pandas import --- Pyrado/pyrado/plotting/heatmap.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Pyrado/pyrado/plotting/heatmap.py b/Pyrado/pyrado/plotting/heatmap.py index 8f7e32ec05..de2e3bd970 100644 --- a/Pyrado/pyrado/plotting/heatmap.py +++ b/Pyrado/pyrado/plotting/heatmap.py @@ -34,7 +34,6 @@ from matplotlib import colors from matplotlib import pyplot as plt from matplotlib import ticker -from pandas.core.indexes.numeric import NumericIndex import pyrado from pyrado.plotting.utils import draw_sep_cbar @@ -191,10 +190,6 @@ def draw_heatmap( :return: handles to the heat map and the color bar figures (`None` if not existent) """ if isinstance(data, pd.DataFrame): - if not isinstance(data.index, NumericIndex): - raise pyrado.TypeErr(given=data.index, expected_type=NumericIndex) - if not isinstance(data.columns, NumericIndex): - raise pyrado.TypeErr(given=data.columns, expected_type=NumericIndex) # Extract the data x = data.columns y = data.index From 3c5d16cdc398a8f13d206a69dfa45c5fd226f5ba Mon Sep 17 00:00:00 2001 From: Fabian Damken Date: Tue, 15 Aug 2023 14:53:32 +0200 Subject: [PATCH 04/22] Add SPDR parameter factory --- .../domain_randomization/domain_parameter.py | 81 ++++++++++++++----- 1 file changed, 61 insertions(+), 20 deletions(-) diff --git a/Pyrado/pyrado/domain_randomization/domain_parameter.py b/Pyrado/pyrado/domain_randomization/domain_parameter.py index 0486215bf5..5f150a052b 100644 --- a/Pyrado/pyrado/domain_randomization/domain_parameter.py +++ b/Pyrado/pyrado/domain_randomization/domain_parameter.py @@ -44,11 +44,11 @@ class DomainParam: """Class to store and manage (probably multiple) domain parameter a.k.a. physics parameter a.k.a. simulator parameter""" def __init__( - self, - name: Union[str, List[str]], - clip_lo: Optional[Union[int, float]] = -pyrado.inf, - clip_up: Optional[Union[int, float]] = pyrado.inf, - roundint: bool = False, + self, + name: Union[str, List[str]], + clip_lo: Optional[Union[int, float]] = -pyrado.inf, + clip_up: Optional[Union[int, float]] = pyrado.inf, + roundint: bool = False, ): """ Constructor, also see the constructor of DomainRandomizer. @@ -97,7 +97,7 @@ def adapt(self, domain_distr_param: str, domain_distr_param_value: Union[float, if domain_distr_param not in self.get_field_names(): raise pyrado.KeyErr( msg=f"The domain parameter {self.name} does not have a domain distribution parameter " - f"called {domain_distr_param}!" + f"called {domain_distr_param}!" ) setattr(self, domain_distr_param, domain_distr_param_value) @@ -314,14 +314,14 @@ def sample(self, num_samples: int = 1) -> List[to.Tensor]: class SelfPacedDomainParam(DomainParam): def __init__( - self, - name: List[str], - target_mean: to.Tensor, - target_cov_flat: to.Tensor, - init_mean: to.Tensor, - init_cov_flat: to.Tensor, - clip_lo: float, - clip_up: float, + self, + name: List[str], + target_mean: to.Tensor, + target_cov_flat: to.Tensor, + init_mean: to.Tensor, + init_cov_flat: to.Tensor, + clip_lo: float, + clip_up: float, ): """ Constructor @@ -362,12 +362,12 @@ def get_field_names(self) -> Sequence[str]: @staticmethod def make_broadening( - name: List[str], - mean: List[float], - init_cov_portion: float = 0.001, - target_cov_portion: float = 0.1, - clip_lo: float = -pyrado.inf, - clip_up: float = pyrado.inf, + name: List[str], + mean: List[float], + init_cov_portion: float = 0.001, + target_cov_portion: float = 0.1, + clip_lo: float = -pyrado.inf, + clip_up: float = pyrado.inf, ) -> "SelfPacedDomainParam": """ Creates a self-paced domain parameter having the same initial and target mean, but a larger variance on the @@ -393,6 +393,35 @@ def make_broadening( clip_up=clip_up, ) + @staticmethod + def from_domain_randomizer(domain_randomizer, *, target_cov_factor=1., init_cov_factor=1 / 100): + """ + Creates a self-paced domain parameter having the same initial and target mean and target variance given by the domain randomizer's variance (scaled by `target_cov_factor`). The initial variance is also given by the domain randomizer's variance (scaled by `init_cov_factor`). + + :param domain_randomizer: randomizer to grab the data from + :param target_cov_factor: scaling of the randomizer's variance to get the target variance; defaults to `1` + :param init_cov_factor: scaling of the randomizer's variance to get the init variance; defaults to `1/100` + :return: the self-paced domain parameter + """ + name, target_mean, target_cov_flat, init_mean, init_cov_flat, = [], [], [], [], [] + for domain_param in domain_randomizer.domain_params: + if not isinstance(domain_param, NormalDomainParam): + raise pyrado.TypeErr(given=domain_param, expected_type=NormalDomainParam, msg="each domain_param must be a NormalDomainParam") + name.append(domain_param.name) + target_mean.append(domain_param.mean) + target_cov_flat.append(target_cov_factor * domain_param.std ** 2) + init_mean.append(domain_param.mean) + init_cov_flat.append(init_cov_factor * domain_param.std ** 2) + return SelfPacedDomainParam( + name=name, + target_mean=to.tensor(target_mean), + target_cov_flat=to.tensor(target_cov_flat), + init_mean=to.tensor(init_mean), + init_cov_flat=to.tensor(init_cov_flat), + clip_lo=-pyrado.inf, + clip_up=+pyrado.inf, + ) + @property def target_distr(self) -> MultivariateNormal: """Get the target distribution.""" @@ -413,6 +442,18 @@ def context_cov(self) -> to.Tensor: """Get the current covariance matrix.""" return self.context_cov_chol @ self.context_cov_chol.T + def info(self) -> dict: + "" + return { + "name": self.name, + "target_mean": self.target_mean, + "target_cov_chol": self.target_cov_chol, + "init_mean": self.init_mean, + "init_cov_chol": self.init_cov_chol, + "clip_lo": self.clip_lo, + "clip_up": self.clip_up, + } + def adapt(self, domain_distr_param: str, domain_distr_param_value: to.Tensor): """ Update this domain parameter. From dfd13f1b047fc8fcc32a6a001d44ea59c04d4d64 Mon Sep 17 00:00:00 2001 From: Fabian Damken Date: Tue, 15 Aug 2023 14:53:39 +0200 Subject: [PATCH 05/22] Add logging of number of particles --- Pyrado/pyrado/algorithms/meta/spdr.py | 52 +++++++++++++++------------ 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/Pyrado/pyrado/algorithms/meta/spdr.py b/Pyrado/pyrado/algorithms/meta/spdr.py index ad8424f6bc..ae1cc8342e 100644 --- a/Pyrado/pyrado/algorithms/meta/spdr.py +++ b/Pyrado/pyrado/algorithms/meta/spdr.py @@ -297,6 +297,7 @@ def step(self, snapshot_mode: str, meta_info: dict = None): target_cov_chol = self._spl_parameter.target_cov_chol.double() # Add these keys to the logger as dummy values. + self.logger.add_value("sprl number of particles", 0) self.logger.add_value("spdr constraint kl", 0.0) self.logger.add_value("spdr constraint performance", 0.0) self.logger.add_value("spdr objective", 0.0) @@ -344,6 +345,8 @@ def get_domain_param_value(ro: StepSequence, param_name: str) -> np.ndarray: requires_grad=True, ).T + self.logger.add_value("sprl number of particles", contexts.shape[0]) + contexts_old_log_prob = previous_distribution.distribution.log_prob(contexts.double()) # kl_divergence = to.distributions.kl_divergence(previous_distribution.distribution, target_distribution.distribution) @@ -438,28 +441,33 @@ def objective_fn(x): x0 = previous_distribution.get_stacked() print("Performing SPDR update.") - # noinspection PyTypeChecker - result = minimize( - objective_fn, - x0, - method="trust-constr", - jac=True, - constraints=constraints, - options={"gtol": 1e-4, "xtol": 1e-6}, - # bounds=bounds, - ) - new_x = result.x - if not result.success: - # If optimization process was not a success - old_f = objective_fn(previous_distribution.get_stacked())[0] - constraints_satisfied = all((const.lb <= const.fun(result.x) <= const.ub for const in constraints)) - - # std_ok = bounds is None or (np.all(bounds.lb <= result.x)) and np.all(result.x <= bounds.ub) - std_ok = True - - if not (constraints_satisfied and std_ok and result.fun < old_f): - print(f"Update unsuccessful, keeping old values spl parameters.") - new_x = x0 + try: + # noinspection PyTypeChecker + result = minimize( + objective_fn, + x0, + method="trust-constr", + jac=True, + constraints=constraints, + options={"gtol": 1e-4, "xtol": 1e-6}, + # bounds=bounds, + ) + new_x = result.x + if not result.success: + # If optimization process was not a success + old_f = objective_fn(previous_distribution.get_stacked())[0] + constraints_satisfied = all((const.lb <= const.fun(result.x) <= const.ub for const in constraints)) + + # std_ok = bounds is None or (np.all(bounds.lb <= result.x)) and np.all(result.x <= bounds.ub) + std_ok = True + + update_successful = constraints_satisfied and std_ok and result.fun < old_f + if not update_successful: + print(f"Update unsuccessful, keeping old SPDR parameters.") + new_x = x0 + except ValueError as e: + print(f"Update failed with error, keeping old SPDR parameters.", e) + new_x = x0 self._adapt_parameters(dim, new_x) self.logger.add_value("spdr constraint kl", kl_constraint_fn(new_x).item()) From 2a3451c19507c6d9f8de5de85ed769ee186bfe3a Mon Sep 17 00:00:00 2001 From: Fabian Damken Date: Tue, 15 Aug 2023 14:54:26 +0200 Subject: [PATCH 06/22] Remove commented code --- Pyrado/pyrado/algorithms/meta/spdr.py | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/Pyrado/pyrado/algorithms/meta/spdr.py b/Pyrado/pyrado/algorithms/meta/spdr.py index ae1cc8342e..7357c6c6cb 100644 --- a/Pyrado/pyrado/algorithms/meta/spdr.py +++ b/Pyrado/pyrado/algorithms/meta/spdr.py @@ -348,7 +348,6 @@ def get_domain_param_value(ro: StepSequence, param_name: str) -> np.ndarray: self.logger.add_value("sprl number of particles", contexts.shape[0]) contexts_old_log_prob = previous_distribution.distribution.log_prob(contexts.double()) - # kl_divergence = to.distributions.kl_divergence(previous_distribution.distribution, target_distribution.distribution) values = to.tensor([ro.undiscounted_return() for rollouts in rollouts_all for ro in rollouts]) @@ -377,7 +376,6 @@ def kl_constraint_fn_prime(x): lb=-np.inf, ub=self._kl_constraints_ub, jac=kl_constraint_fn_prime, - # keep_feasible=True, ) ) @@ -400,31 +398,9 @@ def performance_constraint_fn_prime(x): lb=self._performance_lower_bound, ub=np.inf, jac=performance_constraint_fn_prime, - # keep_feasible=True, ) ) - # # Clip the bounds of the new variance either if the applied covariance transformation does not ensure - # # non-negativity or when the KL threshold has been crossed. - # bounds = None - # x0, _, x0_cov_indices = previous_distribution.get_stacked() - # if self._cov_transformation.ensures_non_negativity(): - # lower_bound = -np.inf * np.ones_like(x0) - # lower_bound_is_inf = True - # else: - # lower_bound = np.zeros_like(x0) - # lower_bound_is_inf = False - # if self._kl_threshold != -np.inf and (self._kl_threshold < kl_divergence): - # if x0_cov_indices is not None and self._var_lower_bound is not None: - # # Further clip the x values if a standard deviation lower bound was set. - # lower_bound[dim:] = self._var_lower_bound - # lower_bound_is_inf = False - # if not lower_bound_is_inf: - # # Only set the bounds if the lower bound is not negative infinity. Makes it easier for the optimizer. - # upper_bound = np.ones_like(x0) * np.inf - # bounds = Bounds(lb=lower_bound, ub=upper_bound, keep_feasible=True) - # x0 = np.clip(x0, bounds.lb, bounds.ub) - # We now optimize based on the kl-divergence between target and context distribution by minimizing it def objective_fn(x): """Tries to find the minimum kl divergence between the current and the update distribution, which From a4dc9d0d58dbf3086a3a4bfd40bf47bc2645cb8e Mon Sep 17 00:00:00 2001 From: Fabian Damken Date: Tue, 15 Aug 2023 15:22:28 +0200 Subject: [PATCH 07/22] Refactor SPDR for readability --- Pyrado/pyrado/algorithms/meta/spdr.py | 279 +++++++++++++------------- 1 file changed, 144 insertions(+), 135 deletions(-) diff --git a/Pyrado/pyrado/algorithms/meta/spdr.py b/Pyrado/pyrado/algorithms/meta/spdr.py index 7357c6c6cb..492439f6e7 100644 --- a/Pyrado/pyrado/algorithms/meta/spdr.py +++ b/Pyrado/pyrado/algorithms/meta/spdr.py @@ -28,7 +28,7 @@ import os.path from csv import DictWriter -from typing import Iterator, Optional, Tuple +from typing import Iterator, Optional, Tuple, List, Callable import numpy as np import torch as to @@ -283,6 +283,16 @@ def sample_count(self) -> int: # Forward to subroutine return self._subrtn.sample_count + @property + def dim(self) -> int: + return self._spl_parameter.target_mean.shape[0] + + @property + def subrtn_sampler(self) -> RolloutSavingWrapper: + # It is checked in the constructor that the sampler is a RolloutSavingWrapper. + # noinspection PyTypeChecker + return self._subrtn.sampler + def step(self, snapshot_mode: str, meta_info: dict = None): """ Perform a step of SPDR. This includes training the subroutine and updating the context distribution accordingly. @@ -290,31 +300,12 @@ def step(self, snapshot_mode: str, meta_info: dict = None): """ self.save_snapshot() - context_mean = self._spl_parameter.context_mean.double() - context_cov = self._spl_parameter.context_cov.double() - context_cov_chol = self._spl_parameter.context_cov_chol.double() - target_mean = self._spl_parameter.target_mean.double() - target_cov_chol = self._spl_parameter.target_cov_chol.double() - # Add these keys to the logger as dummy values. self.logger.add_value("sprl number of particles", 0) self.logger.add_value("spdr constraint kl", 0.0) self.logger.add_value("spdr constraint performance", 0.0) self.logger.add_value("spdr objective", 0.0) - for param_a_idx, param_a_name in enumerate(self._spl_parameter.name): - for param_b_idx, param_b_name in enumerate(self._spl_parameter.name): - self.logger.add_value( - f"context cov for {param_a_name}--{param_b_name}", context_cov[param_a_idx, param_b_idx].item() - ) - self.logger.add_value( - f"context cov_chol for {param_a_name}--{param_b_name}", - context_cov_chol[param_a_idx, param_b_idx].item(), - ) - if param_a_name == param_b_name: - self.logger.add_value(f"context mean for {param_a_name}", context_mean[param_a_idx].item()) - break - - dim = context_mean.shape[0] + self._log_context_distribution() # If we are in the first iteration and have a bad performance, # we want to completely reset the policy if training is unsuccessful @@ -325,110 +316,34 @@ def step(self, snapshot_mode: str, meta_info: dict = None): self._train_subroutine_and_evaluate_perf )(snapshot_mode, meta_info, reset_policy) - # Update distribution - previous_distribution = MultivariateNormalWrapper(context_mean, context_cov_chol) - target_distribution = MultivariateNormalWrapper(target_mean, target_cov_chol) - - def get_domain_param_value(ro: StepSequence, param_name: str) -> np.ndarray: - domain_param_dict = ro.rollout_info["domain_param"] - untransformed_param_name = param_name + DomainParamTransform.UNTRANSFORMED_DOMAIN_PARAMETER_SUFFIX - if untransformed_param_name in domain_param_dict: - return domain_param_dict[untransformed_param_name] - return domain_param_dict[param_name] + previous_distribution = MultivariateNormalWrapper(self._spl_parameter.context_mean.double(), self._spl_parameter.context_cov_chol.double()) + target_distribution = MultivariateNormalWrapper(self._spl_parameter.target_mean.double(), self._spl_parameter.target_cov_chol.double()) - rollouts_all = self._get_sampler().rollouts - contexts = to.tensor( - [ - [to.from_numpy(get_domain_param_value(ro, name)) for rollouts in rollouts_all for ro in rollouts] - for name in self._spl_parameter.name - ], - requires_grad=True, - ).T - - self.logger.add_value("sprl number of particles", contexts.shape[0]) - - contexts_old_log_prob = previous_distribution.distribution.log_prob(contexts.double()) - - values = to.tensor([ro.undiscounted_return() for rollouts in rollouts_all for ro in rollouts]) - - constraints = [] - - def kl_constraint_fn(x): - """Compute the constraint for the KL-divergence between current and proposed distribution.""" - distribution = MultivariateNormalWrapper.from_stacked(dim, x) - kl_divergence = to.distributions.kl_divergence( - previous_distribution.distribution, distribution.distribution - ) - return kl_divergence.detach().numpy() - - def kl_constraint_fn_prime(x): - """Compute the derivative for the KL-constraint (used for scipy optimizer).""" - distribution = MultivariateNormalWrapper.from_stacked(dim, x) - kl_divergence = to.distributions.kl_divergence( - previous_distribution.distribution, distribution.distribution - ) - grads = to.autograd.grad(kl_divergence, list(distribution.parameters())) - return np.concatenate([g.detach().numpy() for g in grads]) - - constraints.append( - NonlinearConstraint( - fun=kl_constraint_fn, - lb=-np.inf, - ub=self._kl_constraints_ub, - jac=kl_constraint_fn_prime, - ) - ) - - def performance_constraint_fn(x): - """Compute the constraint for the expected performance under the proposed distribution.""" - distribution = MultivariateNormalWrapper.from_stacked(dim, x) - performance = self._compute_expected_performance(distribution, contexts, contexts_old_log_prob, values) - return performance.detach().numpy() - - def performance_constraint_fn_prime(x): - """Compute the derivative for the performance-constraint (used for scipy optimizer).""" - distribution = MultivariateNormalWrapper.from_stacked(dim, x) - performance = self._compute_expected_performance(distribution, contexts, contexts_old_log_prob, values) - grads = to.autograd.grad(performance, list(distribution.parameters())) - return np.concatenate([g.detach().numpy() for g in grads]) - - constraints.append( - NonlinearConstraint( - fun=performance_constraint_fn, - lb=self._performance_lower_bound, - ub=np.inf, - jac=performance_constraint_fn_prime, - ) - ) - - # We now optimize based on the kl-divergence between target and context distribution by minimizing it - def objective_fn(x): - """Tries to find the minimum kl divergence between the current and the update distribution, which - still satisfies the minimum update constraint and the performance constraint.""" - distribution = MultivariateNormalWrapper.from_stacked(dim, x) - kl_divergence = to.distributions.kl_divergence(distribution.distribution, target_distribution.distribution) - grads = to.autograd.grad(kl_divergence, list(distribution.parameters())) - - return ( - kl_divergence.detach().numpy(), - np.concatenate([g.detach().numpy() for g in grads]), - ) + proposal_rollouts = self._sample_proposal_rollouts() + contexts, contexts_old_log_prob, values = self._extract_particles(proposal_rollouts, previous_distribution) + # Define the SPRL optimization problem + kl_constraint = self._make_kl_constraint(previous_distribution, self._kl_constraints_ub) + performance_constraint = self._make_performance_constraint(contexts, contexts_old_log_prob, values, self._performance_lower_bound) + constraints = [kl_constraint, performance_constraint] + objective_fn = self._make_objective_fn(target_distribution) x0 = previous_distribution.get_stacked() + minimize_args = dict( + fun=objective_fn, + x0=x0, + method="trust-constr", + jac=True, + constraints=constraints, + options={"gtol": 1e-4, "xtol": 1e-6}, + # bounds=bounds, + ) print("Performing SPDR update.") try: - # noinspection PyTypeChecker - result = minimize( - objective_fn, - x0, - method="trust-constr", - jac=True, - constraints=constraints, - options={"gtol": 1e-4, "xtol": 1e-6}, - # bounds=bounds, - ) + result = minimize(**minimize_args) new_x = result.x + + # Reset parameters if optimization was not successful if not result.success: # If optimization process was not a success old_f = objective_fn(previous_distribution.get_stacked())[0] @@ -445,18 +360,20 @@ def objective_fn(x): print(f"Update failed with error, keeping old SPDR parameters.", e) new_x = x0 - self._adapt_parameters(dim, new_x) - self.logger.add_value("spdr constraint kl", kl_constraint_fn(new_x).item()) - self.logger.add_value("spdr constraint performance", performance_constraint_fn(new_x).item()) - self.logger.add_value("spdr objective", objective_fn(new_x)[0].item()) + self._adapt_parameters(new_x) + + # we can't use the stored values here as new_x might not be result.x + self.logger.add_value("spdr constraint kl", kl_constraint.fun(new_x)) + self.logger.add_value("spdr constraint performance", performance_constraint.fun(new_x)) + self.logger.add_value("spdr objective", objective_fn(new_x)[0]) def reset(self, seed: int = None): # Forward to subroutine self._subrtn.reset(seed) - self._get_sampler().reset_rollouts() + self.subrtn_sampler.reset_rollouts() def save_snapshot(self, meta_info: dict = None): - self._get_sampler().reset_rollouts() + self.subrtn_sampler.reset_rollouts() super().save_snapshot(meta_info) if meta_info is None: @@ -475,17 +392,114 @@ def load_snapshot(self, parsed_args) -> Tuple[Env, Policy, dict]: return env, policy, extra - def _compute_expected_performance( - self, distribution: MultivariateNormalWrapper, context: to.Tensor, old_log_prop: to.Tensor, values: to.Tensor - ) -> to.Tensor: + def _make_objective_fn(self, target_distribution: MultivariateNormalWrapper) -> Callable[[np.ndarray], Tuple[float, np.ndarray]]: + def objective_fn(x): + """Tries to find the minimum kl divergence between the current and the update distribution, which + still satisfies the minimum update constraint and the performance constraint.""" + distribution = MultivariateNormalWrapper.from_stacked(self.dim, x) + kl_divergence = to.distributions.kl_divergence(distribution.distribution, target_distribution.distribution) + grads = to.autograd.grad(kl_divergence, list(distribution.parameters())) + + return ( + kl_divergence.detach().numpy().item(), + np.concatenate([g.detach().numpy() for g in grads]), + ) + + return objective_fn + + def _make_kl_constraint(self, previous_distribution: MultivariateNormalWrapper, kl_constraint_ub: float) -> NonlinearConstraint: + def kl_constraint_fn(x): + """Compute the constraint for the KL-divergence between current and proposed distribution.""" + distribution = MultivariateNormalWrapper.from_stacked(self.dim, x) + kl_divergence = to.distributions.kl_divergence(previous_distribution.distribution, distribution.distribution) + return kl_divergence.detach().numpy().item() + + def kl_constraint_fn_prime(x): + """Compute the derivative for the KL-constraint (used for scipy optimizer).""" + distribution = MultivariateNormalWrapper.from_stacked(self.dim, x) + kl_divergence = to.distributions.kl_divergence(previous_distribution.distribution, distribution.distribution) + grads = to.autograd.grad(kl_divergence, list(distribution.parameters())) + return np.concatenate([g.detach().numpy() for g in grads]) + + return NonlinearConstraint( + fun=kl_constraint_fn, + lb=-np.inf, + ub=kl_constraint_ub, + jac=kl_constraint_fn_prime, + ) + + def _make_performance_constraint(self, contexts: to.Tensor, contexts_old_log_prob: to.Tensor, values: to.Tensor, performance_lower_bound: float) -> NonlinearConstraint: + def performance_constraint_fn(x): + """Compute the constraint for the expected performance under the proposed distribution.""" + distribution = MultivariateNormalWrapper.from_stacked(self.dim, x) + performance = self._compute_expected_performance(distribution, contexts, contexts_old_log_prob, values) + return performance.detach().numpy().item() + + def performance_constraint_fn_prime(x): + """Compute the derivative for the performance-constraint (used for scipy optimizer).""" + distribution = MultivariateNormalWrapper.from_stacked(self.dim, x) + performance = self._compute_expected_performance(distribution, contexts, contexts_old_log_prob, values) + grads = to.autograd.grad(performance, list(distribution.parameters())) + return np.concatenate([g.detach().numpy() for g in grads]) + + return NonlinearConstraint( + fun=performance_constraint_fn, + lb=performance_lower_bound, + ub=np.inf, + jac=performance_constraint_fn_prime, + ) + + def _log_context_distribution(self): + context_mean = self._spl_parameter.context_mean.double() + context_cov = self._spl_parameter.context_cov.double() + context_cov_chol = self._spl_parameter.context_cov_chol.double() + for param_a_idx, param_a_name in enumerate(self._spl_parameter.name): + for param_b_idx, param_b_name in enumerate(self._spl_parameter.name): + self.logger.add_value( + f"context cov for {param_a_name}--{param_b_name}", + context_cov[param_a_idx, param_b_idx].item(), + ) + self.logger.add_value( + f"context cov_chol for {param_a_name}--{param_b_name}", + context_cov_chol[param_a_idx, param_b_idx].item(), + ) + if param_a_name == param_b_name: + self.logger.add_value(f"context mean for {param_a_name}", context_mean[param_a_idx].item()) + break + + def _sample_proposal_rollouts(self) -> List[List[StepSequence]]: + return self.subrtn_sampler.rollouts + + def _extract_particles(self, rollouts_all: List[List[StepSequence]], distribution: MultivariateNormalWrapper) -> Tuple[to.Tensor, to.Tensor, to.Tensor]: + def get_domain_param_value(ro: StepSequence, param_name: str) -> np.ndarray: + domain_param_dict = ro.rollout_info["domain_param"] + untransformed_param_name = param_name + DomainParamTransform.UNTRANSFORMED_DOMAIN_PARAMETER_SUFFIX + if untransformed_param_name in domain_param_dict: + return domain_param_dict[untransformed_param_name] + return domain_param_dict[param_name] + + contexts = to.tensor( + [ + [to.from_numpy(get_domain_param_value(ro, name)) for rollouts in rollouts_all for ro in rollouts] + for name in self._spl_parameter.name + ], + requires_grad=True, + ).T + self.logger.add_value("sprl number of particles", contexts.shape[0]) + contexts_log_prob = distribution.distribution.log_prob(contexts.double()) + values = to.tensor([ro.undiscounted_return() for rollouts in rollouts_all for ro in rollouts]) + return contexts, contexts_log_prob, values + + # noinspection PyMethodMayBeStatic + def _compute_expected_performance(self, distribution: MultivariateNormalWrapper, context: to.Tensor, old_log_prop: to.Tensor, values: to.Tensor) -> to.Tensor: """Calculate the expected performance after an update step.""" context_ratio = to.exp(distribution.distribution.log_prob(context) - old_log_prop) return to.mean(context_ratio * values) - def _adapt_parameters(self, dim: int, result: np.ndarray) -> None: + def _adapt_parameters(self, result: np.ndarray) -> None: """Update the parameters of the distribution based on the result of the optimization step and the general algorithm settings.""" - context_distr = MultivariateNormalWrapper.from_stacked(dim, result) + context_distr = MultivariateNormalWrapper.from_stacked(self.dim, result) self._spl_parameter.adapt("context_mean", context_distr.mean) self._spl_parameter.adapt("context_cov_chol", context_distr.cov_chol) @@ -504,10 +518,5 @@ def _train_subroutine_and_evaluate_perf( self._subrtn.reset() self._subrtn.train(snapshot_mode, None, meta_info) - rollouts_all = self._get_sampler().rollouts + rollouts_all = self.subrtn_sampler.rollouts return np.median([[ro.undiscounted_return() for rollouts in rollouts_all for ro in rollouts]]).item() - - def _get_sampler(self) -> RolloutSavingWrapper: - # It is checked in the constructor that the sampler is a RolloutSavingWrapper. - # noinspection PyTypeChecker - return self._subrtn.sampler From 1645b0bef8da863072b40004c774b73de4d91ba0 Mon Sep 17 00:00:00 2001 From: Fabian Damken Date: Tue, 3 Oct 2023 17:53:17 -0400 Subject: [PATCH 08/22] virtualenv is all you need --- setup_deps.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/setup_deps.py b/setup_deps.py index de48f72d12..6386e9bd3e 100644 --- a/setup_deps.py +++ b/setup_deps.py @@ -111,10 +111,11 @@ resources_dir = osp.join(dependency_dir, "resources") # Global cmake prefix path -cmake_prefix_path = [ +cmake_prefix_path = [] +conda_prefix = os.getenv("CONDA_PREFIX") +if conda_prefix: # Anaconda env root directory - os.environ["CONDA_PREFIX"] -] + cmake_prefix_path.append(conda_prefix) # Required packages required_packages = [ From c5268700c4eecf468dcb05f44aaef30f8d1327b0 Mon Sep 17 00:00:00 2001 From: Fabian Damken Date: Sun, 8 Oct 2023 10:36:22 -0400 Subject: [PATCH 09/22] apply black and isort --- Pyrado/pyrado/algorithms/episodic/predefined_lqr.py | 1 - Pyrado/pyrado/algorithms/meta/adr.py | 2 -- Pyrado/pyrado/algorithms/regression/timeseries_prediction.py | 1 - Pyrado/pyrado/algorithms/step_based/dql.py | 1 - Pyrado/pyrado/algorithms/step_based/gae.py | 1 - Pyrado/pyrado/algorithms/step_based/ppo.py | 2 -- Pyrado/pyrado/environments/rcspysim/planar_insert.py | 1 - Pyrado/pyrado/policies/feed_forward/poly_time.py | 4 +--- Pyrado/pyrado/sampling/parallel_rollout_sampler.py | 1 - Pyrado/pyrado/sampling/sequences.py | 5 +++++ .../evaluation/paper_specific/sob_illustrative_example.py | 1 - Pyrado/scripts/hyperparam_optimization/hopt_qq-su_ppo2.py | 1 - setup_deps.py | 2 -- 13 files changed, 6 insertions(+), 17 deletions(-) diff --git a/Pyrado/pyrado/algorithms/episodic/predefined_lqr.py b/Pyrado/pyrado/algorithms/episodic/predefined_lqr.py index 48d8b5a8cc..3c773f90cd 100644 --- a/Pyrado/pyrado/algorithms/episodic/predefined_lqr.py +++ b/Pyrado/pyrado/algorithms/episodic/predefined_lqr.py @@ -102,7 +102,6 @@ def sampler(self, sampler: ParallelRolloutSampler): self._sampler = sampler def step(self, snapshot_mode: str, meta_info: dict = None): - if isinstance(inner_env(self._env), BallOnPlate5DSim): ctrl_gains = to.tensor( [ diff --git a/Pyrado/pyrado/algorithms/meta/adr.py b/Pyrado/pyrado/algorithms/meta/adr.py index 14d34f4a22..35eb386cb8 100644 --- a/Pyrado/pyrado/algorithms/meta/adr.py +++ b/Pyrado/pyrado/algorithms/meta/adr.py @@ -431,7 +431,6 @@ def __init__( logger: StepLogger = None, device: str = "cuda" if to.cuda.is_available() else "cpu", ): - """ Constructor @@ -472,7 +471,6 @@ def get_reward(self, traj: StepSequence) -> to.Tensor: def train( self, reference_trajectory: StepSequence, randomized_trajectory: StepSequence, num_epoch: int ) -> to.Tensor: - reference_batch_generator = reference_trajectory.iterate_rollouts() random_batch_generator = randomized_trajectory.iterate_rollouts() diff --git a/Pyrado/pyrado/algorithms/regression/timeseries_prediction.py b/Pyrado/pyrado/algorithms/regression/timeseries_prediction.py index 11326aee1f..d33ea819c9 100644 --- a/Pyrado/pyrado/algorithms/regression/timeseries_prediction.py +++ b/Pyrado/pyrado/algorithms/regression/timeseries_prediction.py @@ -104,7 +104,6 @@ def __init__( self._lr_scheduler = lr_scheduler(self.optim, **lr_scheduler_hparam) def step(self, snapshot_mode: str, meta_info: dict = None): - # Feed one epoch of the training set to the policy if self.windowed: # Predict diff --git a/Pyrado/pyrado/algorithms/step_based/dql.py b/Pyrado/pyrado/algorithms/step_based/dql.py index ff38c66ab8..36ca2e6c6b 100644 --- a/Pyrado/pyrado/algorithms/step_based/dql.py +++ b/Pyrado/pyrado/algorithms/step_based/dql.py @@ -192,7 +192,6 @@ def update(self): file=sys.stdout, leave=False, ): - # Sample steps and the associated next step from the replay memory steps, next_steps = self._memory.sample(self.batch_size) steps.torch(data_type=to.get_default_dtype()) diff --git a/Pyrado/pyrado/algorithms/step_based/gae.py b/Pyrado/pyrado/algorithms/step_based/gae.py index 0f80a42f85..ac125f51d2 100644 --- a/Pyrado/pyrado/algorithms/step_based/gae.py +++ b/Pyrado/pyrado/algorithms/step_based/gae.py @@ -239,7 +239,6 @@ def update(self, rollouts: Sequence[StepSequence], use_empirical_returns: bool = # Iterate over all gathered samples num_epoch times for e in range(self.num_epoch): - for batch in tqdm( concat_ros.split_shuffled_batches( self.batch_size, complete_rollouts=isinstance(self.vfcn, RecurrentPolicy) diff --git a/Pyrado/pyrado/algorithms/step_based/ppo.py b/Pyrado/pyrado/algorithms/step_based/ppo.py index 2c93f0ccd5..7f8d094318 100644 --- a/Pyrado/pyrado/algorithms/step_based/ppo.py +++ b/Pyrado/pyrado/algorithms/step_based/ppo.py @@ -176,7 +176,6 @@ def update(self, rollouts: Sequence[StepSequence]): # Iterations over the whole data set for e in range(self.num_epoch): - for batch in tqdm( concat_ros.split_shuffled_batches(self.batch_size, complete_rollouts=self._policy.is_recurrent), total=num_iter_from_rollouts(None, concat_ros, self.batch_size), @@ -412,7 +411,6 @@ def update(self, rollouts: Sequence[StepSequence]): # Iterations over the whole data set for e in range(self.num_epoch): - for batch in tqdm( concat_ros.split_shuffled_batches( self.batch_size, diff --git a/Pyrado/pyrado/environments/rcspysim/planar_insert.py b/Pyrado/pyrado/environments/rcspysim/planar_insert.py index 5dcedde15c..82e68b5431 100644 --- a/Pyrado/pyrado/environments/rcspysim/planar_insert.py +++ b/Pyrado/pyrado/environments/rcspysim/planar_insert.py @@ -75,7 +75,6 @@ class PlanarInsertSim(RcsSim, Serializable): """ def __init__(self, task_args: dict, collision_config: dict = None, max_dist_force: float = None, **kwargs): - """ Constructor diff --git a/Pyrado/pyrado/policies/feed_forward/poly_time.py b/Pyrado/pyrado/policies/feed_forward/poly_time.py index cf5eb2c9db..c01702774a 100644 --- a/Pyrado/pyrado/policies/feed_forward/poly_time.py +++ b/Pyrado/pyrado/policies/feed_forward/poly_time.py @@ -251,9 +251,7 @@ class TraceablePolySplineTimePolicy(nn.Module): t_init: float t_curr: float overtime_behavior: str - act_space_shape: Tuple[ - int, - ] + act_space_shape: Tuple[int,] act_space_flat_dim: int def __init__( diff --git a/Pyrado/pyrado/sampling/parallel_rollout_sampler.py b/Pyrado/pyrado/sampling/parallel_rollout_sampler.py index 59006730c4..dbb9055862 100644 --- a/Pyrado/pyrado/sampling/parallel_rollout_sampler.py +++ b/Pyrado/pyrado/sampling/parallel_rollout_sampler.py @@ -276,7 +276,6 @@ def sample( disable=(not self.show_progress_bar), unit="steps" if self.min_steps is not None else "rollouts", ) as pb: - if self.min_steps is None: if init_states is None and domain_params is None: # Simply run min_rollouts times diff --git a/Pyrado/pyrado/sampling/sequences.py b/Pyrado/pyrado/sampling/sequences.py index bfa5d2be8f..de132f422a 100644 --- a/Pyrado/pyrado/sampling/sequences.py +++ b/Pyrado/pyrado/sampling/sequences.py @@ -99,6 +99,7 @@ def sequence_add_init(x_init, iter, dtype=int): :param dtype: data type to cast to (either int of float) :return: element at the given iteration and array of the whole sequence """ + # non-exponential growth def iter_function(x_seq, i, x_init): return x_seq[0, :] * (i + 1) @@ -115,6 +116,7 @@ def sequence_rec_double(x_init, iter, dtype=int): :param dtype: data type to cast to (either int of float) :return: element at the given iteration and array of the whole sequence """ + # exponential growth def iter_function(x_seq, i, x_init): return x_seq[i - 1, :] * 2.0 @@ -131,6 +133,7 @@ def sequence_sqrt(x_init, iter, dtype=int): :param dtype: data type to cast to (either int of float) :return: element at the given iteration and array of the whole sequence """ + # non-exponential growth def iter_function(x_seq, i, x_init): return x_seq[0, :] * np.sqrt(i + 1) # i+1 because sqrt(1) = 1 @@ -147,6 +150,7 @@ def sequence_rec_sqrt(x_init, iter, dtype=int): :param dtype: data type to cast to (either int of float) :return: element at the given iteration and array of the whole sequence """ + # exponential growth def iter_function(x_seq, i, x_init): return x_seq[i - 1, :] * np.sqrt(i + 1) # i+1 because sqrt(1) = 1 @@ -163,6 +167,7 @@ def sequence_nlog2(x_init, iter, dtype=int): :param dtype: data type to cast to (either int of float) :return: element at the given iteration and array of the whole sequence """ + # non-exponential growth def iter_function(x_seq, i, x_init): return x_seq[0, :] * i * np.log2(i + 2) # i+2 because log2(1) = 0 and log2(2) = 1 diff --git a/Pyrado/scripts/evaluation/paper_specific/sob_illustrative_example.py b/Pyrado/scripts/evaluation/paper_specific/sob_illustrative_example.py index e9ecfda363..c035c4671d 100755 --- a/Pyrado/scripts/evaluation/paper_specific/sob_illustrative_example.py +++ b/Pyrado/scripts/evaluation/paper_specific/sob_illustrative_example.py @@ -139,7 +139,6 @@ def check_E_n_Jhat(th_n_opt, n): b_Jhat_n_hist = np.empty((num_samples, num_iter)) for s in range(num_samples): - for n in range(1, num_iter + 1): n_V = np.random.binomial(n, psi) # perform n Bernoulli trials n_M = n - n_V diff --git a/Pyrado/scripts/hyperparam_optimization/hopt_qq-su_ppo2.py b/Pyrado/scripts/hyperparam_optimization/hopt_qq-su_ppo2.py index 6991db4a80..2caccaf77d 100755 --- a/Pyrado/scripts/hyperparam_optimization/hopt_qq-su_ppo2.py +++ b/Pyrado/scripts/hyperparam_optimization/hopt_qq-su_ppo2.py @@ -138,7 +138,6 @@ def train_and_eval(trial: optuna.Trial, study_dir: str, seed: int): if __name__ == "__main__": - # Parse command line arguments args = get_argparser().parse_args() diff --git a/setup_deps.py b/setup_deps.py index de48f72d12..290266e773 100644 --- a/setup_deps.py +++ b/setup_deps.py @@ -314,7 +314,6 @@ def members(ml): with tarfile.open(tf.name) as tar: def is_within_directory(directory, target): - abs_directory = os.path.abspath(directory) abs_target = os.path.abspath(target) @@ -323,7 +322,6 @@ def is_within_directory(directory, target): return prefix == abs_directory def safe_extract(tar, path=".", members=None, *, numeric_owner=False): - for member in tar.getmembers(): member_path = os.path.join(path, member.name) if not is_within_directory(path, member_path): From 54e7eb0d29a1636baf91735e432996bbaed0e2f8 Mon Sep 17 00:00:00 2001 From: Fabian Damken Date: Sun, 8 Oct 2023 10:37:19 -0400 Subject: [PATCH 10/22] formatting --- Pyrado/pyrado/algorithms/meta/spdr.py | 42 +++++++++--- .../domain_randomization/domain_parameter.py | 68 ++++++++++++------- Pyrado/pyrado/sampling/parallel_evaluation.py | 4 +- 3 files changed, 76 insertions(+), 38 deletions(-) diff --git a/Pyrado/pyrado/algorithms/meta/spdr.py b/Pyrado/pyrado/algorithms/meta/spdr.py index 492439f6e7..7cd3d99573 100644 --- a/Pyrado/pyrado/algorithms/meta/spdr.py +++ b/Pyrado/pyrado/algorithms/meta/spdr.py @@ -28,7 +28,7 @@ import os.path from csv import DictWriter -from typing import Iterator, Optional, Tuple, List, Callable +from typing import Callable, Iterator, List, Optional, Tuple import numpy as np import torch as to @@ -316,15 +316,21 @@ def step(self, snapshot_mode: str, meta_info: dict = None): self._train_subroutine_and_evaluate_perf )(snapshot_mode, meta_info, reset_policy) - previous_distribution = MultivariateNormalWrapper(self._spl_parameter.context_mean.double(), self._spl_parameter.context_cov_chol.double()) - target_distribution = MultivariateNormalWrapper(self._spl_parameter.target_mean.double(), self._spl_parameter.target_cov_chol.double()) + previous_distribution = MultivariateNormalWrapper( + self._spl_parameter.context_mean.double(), self._spl_parameter.context_cov_chol.double() + ) + target_distribution = MultivariateNormalWrapper( + self._spl_parameter.target_mean.double(), self._spl_parameter.target_cov_chol.double() + ) proposal_rollouts = self._sample_proposal_rollouts() contexts, contexts_old_log_prob, values = self._extract_particles(proposal_rollouts, previous_distribution) # Define the SPRL optimization problem kl_constraint = self._make_kl_constraint(previous_distribution, self._kl_constraints_ub) - performance_constraint = self._make_performance_constraint(contexts, contexts_old_log_prob, values, self._performance_lower_bound) + performance_constraint = self._make_performance_constraint( + contexts, contexts_old_log_prob, values, self._performance_lower_bound + ) constraints = [kl_constraint, performance_constraint] objective_fn = self._make_objective_fn(target_distribution) x0 = previous_distribution.get_stacked() @@ -392,7 +398,9 @@ def load_snapshot(self, parsed_args) -> Tuple[Env, Policy, dict]: return env, policy, extra - def _make_objective_fn(self, target_distribution: MultivariateNormalWrapper) -> Callable[[np.ndarray], Tuple[float, np.ndarray]]: + def _make_objective_fn( + self, target_distribution: MultivariateNormalWrapper + ) -> Callable[[np.ndarray], Tuple[float, np.ndarray]]: def objective_fn(x): """Tries to find the minimum kl divergence between the current and the update distribution, which still satisfies the minimum update constraint and the performance constraint.""" @@ -407,17 +415,23 @@ def objective_fn(x): return objective_fn - def _make_kl_constraint(self, previous_distribution: MultivariateNormalWrapper, kl_constraint_ub: float) -> NonlinearConstraint: + def _make_kl_constraint( + self, previous_distribution: MultivariateNormalWrapper, kl_constraint_ub: float + ) -> NonlinearConstraint: def kl_constraint_fn(x): """Compute the constraint for the KL-divergence between current and proposed distribution.""" distribution = MultivariateNormalWrapper.from_stacked(self.dim, x) - kl_divergence = to.distributions.kl_divergence(previous_distribution.distribution, distribution.distribution) + kl_divergence = to.distributions.kl_divergence( + previous_distribution.distribution, distribution.distribution + ) return kl_divergence.detach().numpy().item() def kl_constraint_fn_prime(x): """Compute the derivative for the KL-constraint (used for scipy optimizer).""" distribution = MultivariateNormalWrapper.from_stacked(self.dim, x) - kl_divergence = to.distributions.kl_divergence(previous_distribution.distribution, distribution.distribution) + kl_divergence = to.distributions.kl_divergence( + previous_distribution.distribution, distribution.distribution + ) grads = to.autograd.grad(kl_divergence, list(distribution.parameters())) return np.concatenate([g.detach().numpy() for g in grads]) @@ -428,7 +442,9 @@ def kl_constraint_fn_prime(x): jac=kl_constraint_fn_prime, ) - def _make_performance_constraint(self, contexts: to.Tensor, contexts_old_log_prob: to.Tensor, values: to.Tensor, performance_lower_bound: float) -> NonlinearConstraint: + def _make_performance_constraint( + self, contexts: to.Tensor, contexts_old_log_prob: to.Tensor, values: to.Tensor, performance_lower_bound: float + ) -> NonlinearConstraint: def performance_constraint_fn(x): """Compute the constraint for the expected performance under the proposed distribution.""" distribution = MultivariateNormalWrapper.from_stacked(self.dim, x) @@ -470,7 +486,9 @@ def _log_context_distribution(self): def _sample_proposal_rollouts(self) -> List[List[StepSequence]]: return self.subrtn_sampler.rollouts - def _extract_particles(self, rollouts_all: List[List[StepSequence]], distribution: MultivariateNormalWrapper) -> Tuple[to.Tensor, to.Tensor, to.Tensor]: + def _extract_particles( + self, rollouts_all: List[List[StepSequence]], distribution: MultivariateNormalWrapper + ) -> Tuple[to.Tensor, to.Tensor, to.Tensor]: def get_domain_param_value(ro: StepSequence, param_name: str) -> np.ndarray: domain_param_dict = ro.rollout_info["domain_param"] untransformed_param_name = param_name + DomainParamTransform.UNTRANSFORMED_DOMAIN_PARAMETER_SUFFIX @@ -491,7 +509,9 @@ def get_domain_param_value(ro: StepSequence, param_name: str) -> np.ndarray: return contexts, contexts_log_prob, values # noinspection PyMethodMayBeStatic - def _compute_expected_performance(self, distribution: MultivariateNormalWrapper, context: to.Tensor, old_log_prop: to.Tensor, values: to.Tensor) -> to.Tensor: + def _compute_expected_performance( + self, distribution: MultivariateNormalWrapper, context: to.Tensor, old_log_prop: to.Tensor, values: to.Tensor + ) -> to.Tensor: """Calculate the expected performance after an update step.""" context_ratio = to.exp(distribution.distribution.log_prob(context) - old_log_prop) return to.mean(context_ratio * values) diff --git a/Pyrado/pyrado/domain_randomization/domain_parameter.py b/Pyrado/pyrado/domain_randomization/domain_parameter.py index 5f150a052b..4e91db4892 100644 --- a/Pyrado/pyrado/domain_randomization/domain_parameter.py +++ b/Pyrado/pyrado/domain_randomization/domain_parameter.py @@ -44,11 +44,11 @@ class DomainParam: """Class to store and manage (probably multiple) domain parameter a.k.a. physics parameter a.k.a. simulator parameter""" def __init__( - self, - name: Union[str, List[str]], - clip_lo: Optional[Union[int, float]] = -pyrado.inf, - clip_up: Optional[Union[int, float]] = pyrado.inf, - roundint: bool = False, + self, + name: Union[str, List[str]], + clip_lo: Optional[Union[int, float]] = -pyrado.inf, + clip_up: Optional[Union[int, float]] = pyrado.inf, + roundint: bool = False, ): """ Constructor, also see the constructor of DomainRandomizer. @@ -97,7 +97,7 @@ def adapt(self, domain_distr_param: str, domain_distr_param_value: Union[float, if domain_distr_param not in self.get_field_names(): raise pyrado.KeyErr( msg=f"The domain parameter {self.name} does not have a domain distribution parameter " - f"called {domain_distr_param}!" + f"called {domain_distr_param}!" ) setattr(self, domain_distr_param, domain_distr_param_value) @@ -314,14 +314,14 @@ def sample(self, num_samples: int = 1) -> List[to.Tensor]: class SelfPacedDomainParam(DomainParam): def __init__( - self, - name: List[str], - target_mean: to.Tensor, - target_cov_flat: to.Tensor, - init_mean: to.Tensor, - init_cov_flat: to.Tensor, - clip_lo: float, - clip_up: float, + self, + name: List[str], + target_mean: to.Tensor, + target_cov_flat: to.Tensor, + init_mean: to.Tensor, + init_cov_flat: to.Tensor, + clip_lo: float, + clip_up: float, ): """ Constructor @@ -362,12 +362,12 @@ def get_field_names(self) -> Sequence[str]: @staticmethod def make_broadening( - name: List[str], - mean: List[float], - init_cov_portion: float = 0.001, - target_cov_portion: float = 0.1, - clip_lo: float = -pyrado.inf, - clip_up: float = pyrado.inf, + name: List[str], + mean: List[float], + init_cov_portion: float = 0.001, + target_cov_portion: float = 0.1, + clip_lo: float = -pyrado.inf, + clip_up: float = pyrado.inf, ) -> "SelfPacedDomainParam": """ Creates a self-paced domain parameter having the same initial and target mean, but a larger variance on the @@ -394,7 +394,7 @@ def make_broadening( ) @staticmethod - def from_domain_randomizer(domain_randomizer, *, target_cov_factor=1., init_cov_factor=1 / 100): + def from_domain_randomizer(domain_randomizer, *, target_cov_factor=1.0, init_cov_factor=1 / 100): """ Creates a self-paced domain parameter having the same initial and target mean and target variance given by the domain randomizer's variance (scaled by `target_cov_factor`). The initial variance is also given by the domain randomizer's variance (scaled by `init_cov_factor`). @@ -403,15 +403,31 @@ def from_domain_randomizer(domain_randomizer, *, target_cov_factor=1., init_cov_ :param init_cov_factor: scaling of the randomizer's variance to get the init variance; defaults to `1/100` :return: the self-paced domain parameter """ - name, target_mean, target_cov_flat, init_mean, init_cov_flat, = [], [], [], [], [] + ( + name, + target_mean, + target_cov_flat, + init_mean, + init_cov_flat, + ) = ( + [], + [], + [], + [], + [], + ) for domain_param in domain_randomizer.domain_params: if not isinstance(domain_param, NormalDomainParam): - raise pyrado.TypeErr(given=domain_param, expected_type=NormalDomainParam, msg="each domain_param must be a NormalDomainParam") + raise pyrado.TypeErr( + given=domain_param, + expected_type=NormalDomainParam, + msg="each domain_param must be a NormalDomainParam", + ) name.append(domain_param.name) target_mean.append(domain_param.mean) - target_cov_flat.append(target_cov_factor * domain_param.std ** 2) + target_cov_flat.append(target_cov_factor * domain_param.std**2) init_mean.append(domain_param.mean) - init_cov_flat.append(init_cov_factor * domain_param.std ** 2) + init_cov_flat.append(init_cov_factor * domain_param.std**2) return SelfPacedDomainParam( name=name, target_mean=to.tensor(target_mean), @@ -443,7 +459,7 @@ def context_cov(self) -> to.Tensor: return self.context_cov_chol @ self.context_cov_chol.T def info(self) -> dict: - "" + """""" return { "name": self.name, "target_mean": self.target_mean, diff --git a/Pyrado/pyrado/sampling/parallel_evaluation.py b/Pyrado/pyrado/sampling/parallel_evaluation.py index 0c2fd71c71..cfd3129824 100644 --- a/Pyrado/pyrado/sampling/parallel_evaluation.py +++ b/Pyrado/pyrado/sampling/parallel_evaluation.py @@ -106,7 +106,9 @@ def eval_domain_params( # Run with progress bar with tqdm(leave=False, file=sys.stdout, unit="rollouts", desc="Sampling") as pb: # we set the sub seed to zero since every run will have its personal sub sub seed - return pool.run_map(functools.partial(_ps_run_one_domain_param, eval=True, seed=seed, sub_seed=0), list(enumerate(params)), pb) + return pool.run_map( + functools.partial(_ps_run_one_domain_param, eval=True, seed=seed, sub_seed=0), list(enumerate(params)), pb + ) def eval_nominal_domain( From 98dfea1b6732ba3cf99422e57115ea9983517239 Mon Sep 17 00:00:00 2001 From: Fabian Damken Date: Sun, 8 Oct 2023 10:44:36 -0400 Subject: [PATCH 11/22] fix black and isort linting --- .github/workflows/linting.yml | 2 +- Pyrado/pyrado/algorithms/episodic/predefined_lqr.py | 1 - Pyrado/pyrado/algorithms/meta/adr.py | 2 -- Pyrado/pyrado/algorithms/regression/timeseries_prediction.py | 1 - Pyrado/pyrado/algorithms/step_based/dql.py | 1 - Pyrado/pyrado/algorithms/step_based/gae.py | 1 - Pyrado/pyrado/algorithms/step_based/ppo.py | 2 -- Pyrado/pyrado/environments/rcspysim/planar_insert.py | 1 - Pyrado/pyrado/policies/feed_forward/poly_time.py | 4 +--- Pyrado/pyrado/sampling/parallel_rollout_sampler.py | 1 - Pyrado/pyrado/sampling/sequences.py | 5 +++++ .../evaluation/paper_specific/sob_illustrative_example.py | 1 - Pyrado/scripts/hyperparam_optimization/hopt_qq-su_ppo2.py | 1 - setup_deps.py | 2 -- 14 files changed, 7 insertions(+), 18 deletions(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 5f7851140a..abb95a7d05 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -12,7 +12,7 @@ jobs: - name: Run black uses: psf/black@stable with: - args: "--check Pyrado setup_deps.py RcsPySim/setup.py" + src: "Pyrado setup_deps.py RcsPySim/setup.py" isort: name: Checking Import Order with isort runs-on: ubuntu-latest diff --git a/Pyrado/pyrado/algorithms/episodic/predefined_lqr.py b/Pyrado/pyrado/algorithms/episodic/predefined_lqr.py index 48d8b5a8cc..3c773f90cd 100644 --- a/Pyrado/pyrado/algorithms/episodic/predefined_lqr.py +++ b/Pyrado/pyrado/algorithms/episodic/predefined_lqr.py @@ -102,7 +102,6 @@ def sampler(self, sampler: ParallelRolloutSampler): self._sampler = sampler def step(self, snapshot_mode: str, meta_info: dict = None): - if isinstance(inner_env(self._env), BallOnPlate5DSim): ctrl_gains = to.tensor( [ diff --git a/Pyrado/pyrado/algorithms/meta/adr.py b/Pyrado/pyrado/algorithms/meta/adr.py index 14d34f4a22..35eb386cb8 100644 --- a/Pyrado/pyrado/algorithms/meta/adr.py +++ b/Pyrado/pyrado/algorithms/meta/adr.py @@ -431,7 +431,6 @@ def __init__( logger: StepLogger = None, device: str = "cuda" if to.cuda.is_available() else "cpu", ): - """ Constructor @@ -472,7 +471,6 @@ def get_reward(self, traj: StepSequence) -> to.Tensor: def train( self, reference_trajectory: StepSequence, randomized_trajectory: StepSequence, num_epoch: int ) -> to.Tensor: - reference_batch_generator = reference_trajectory.iterate_rollouts() random_batch_generator = randomized_trajectory.iterate_rollouts() diff --git a/Pyrado/pyrado/algorithms/regression/timeseries_prediction.py b/Pyrado/pyrado/algorithms/regression/timeseries_prediction.py index 11326aee1f..d33ea819c9 100644 --- a/Pyrado/pyrado/algorithms/regression/timeseries_prediction.py +++ b/Pyrado/pyrado/algorithms/regression/timeseries_prediction.py @@ -104,7 +104,6 @@ def __init__( self._lr_scheduler = lr_scheduler(self.optim, **lr_scheduler_hparam) def step(self, snapshot_mode: str, meta_info: dict = None): - # Feed one epoch of the training set to the policy if self.windowed: # Predict diff --git a/Pyrado/pyrado/algorithms/step_based/dql.py b/Pyrado/pyrado/algorithms/step_based/dql.py index ff38c66ab8..36ca2e6c6b 100644 --- a/Pyrado/pyrado/algorithms/step_based/dql.py +++ b/Pyrado/pyrado/algorithms/step_based/dql.py @@ -192,7 +192,6 @@ def update(self): file=sys.stdout, leave=False, ): - # Sample steps and the associated next step from the replay memory steps, next_steps = self._memory.sample(self.batch_size) steps.torch(data_type=to.get_default_dtype()) diff --git a/Pyrado/pyrado/algorithms/step_based/gae.py b/Pyrado/pyrado/algorithms/step_based/gae.py index 0f80a42f85..ac125f51d2 100644 --- a/Pyrado/pyrado/algorithms/step_based/gae.py +++ b/Pyrado/pyrado/algorithms/step_based/gae.py @@ -239,7 +239,6 @@ def update(self, rollouts: Sequence[StepSequence], use_empirical_returns: bool = # Iterate over all gathered samples num_epoch times for e in range(self.num_epoch): - for batch in tqdm( concat_ros.split_shuffled_batches( self.batch_size, complete_rollouts=isinstance(self.vfcn, RecurrentPolicy) diff --git a/Pyrado/pyrado/algorithms/step_based/ppo.py b/Pyrado/pyrado/algorithms/step_based/ppo.py index 2c93f0ccd5..7f8d094318 100644 --- a/Pyrado/pyrado/algorithms/step_based/ppo.py +++ b/Pyrado/pyrado/algorithms/step_based/ppo.py @@ -176,7 +176,6 @@ def update(self, rollouts: Sequence[StepSequence]): # Iterations over the whole data set for e in range(self.num_epoch): - for batch in tqdm( concat_ros.split_shuffled_batches(self.batch_size, complete_rollouts=self._policy.is_recurrent), total=num_iter_from_rollouts(None, concat_ros, self.batch_size), @@ -412,7 +411,6 @@ def update(self, rollouts: Sequence[StepSequence]): # Iterations over the whole data set for e in range(self.num_epoch): - for batch in tqdm( concat_ros.split_shuffled_batches( self.batch_size, diff --git a/Pyrado/pyrado/environments/rcspysim/planar_insert.py b/Pyrado/pyrado/environments/rcspysim/planar_insert.py index 5dcedde15c..82e68b5431 100644 --- a/Pyrado/pyrado/environments/rcspysim/planar_insert.py +++ b/Pyrado/pyrado/environments/rcspysim/planar_insert.py @@ -75,7 +75,6 @@ class PlanarInsertSim(RcsSim, Serializable): """ def __init__(self, task_args: dict, collision_config: dict = None, max_dist_force: float = None, **kwargs): - """ Constructor diff --git a/Pyrado/pyrado/policies/feed_forward/poly_time.py b/Pyrado/pyrado/policies/feed_forward/poly_time.py index cf5eb2c9db..c01702774a 100644 --- a/Pyrado/pyrado/policies/feed_forward/poly_time.py +++ b/Pyrado/pyrado/policies/feed_forward/poly_time.py @@ -251,9 +251,7 @@ class TraceablePolySplineTimePolicy(nn.Module): t_init: float t_curr: float overtime_behavior: str - act_space_shape: Tuple[ - int, - ] + act_space_shape: Tuple[int,] act_space_flat_dim: int def __init__( diff --git a/Pyrado/pyrado/sampling/parallel_rollout_sampler.py b/Pyrado/pyrado/sampling/parallel_rollout_sampler.py index 59006730c4..dbb9055862 100644 --- a/Pyrado/pyrado/sampling/parallel_rollout_sampler.py +++ b/Pyrado/pyrado/sampling/parallel_rollout_sampler.py @@ -276,7 +276,6 @@ def sample( disable=(not self.show_progress_bar), unit="steps" if self.min_steps is not None else "rollouts", ) as pb: - if self.min_steps is None: if init_states is None and domain_params is None: # Simply run min_rollouts times diff --git a/Pyrado/pyrado/sampling/sequences.py b/Pyrado/pyrado/sampling/sequences.py index bfa5d2be8f..de132f422a 100644 --- a/Pyrado/pyrado/sampling/sequences.py +++ b/Pyrado/pyrado/sampling/sequences.py @@ -99,6 +99,7 @@ def sequence_add_init(x_init, iter, dtype=int): :param dtype: data type to cast to (either int of float) :return: element at the given iteration and array of the whole sequence """ + # non-exponential growth def iter_function(x_seq, i, x_init): return x_seq[0, :] * (i + 1) @@ -115,6 +116,7 @@ def sequence_rec_double(x_init, iter, dtype=int): :param dtype: data type to cast to (either int of float) :return: element at the given iteration and array of the whole sequence """ + # exponential growth def iter_function(x_seq, i, x_init): return x_seq[i - 1, :] * 2.0 @@ -131,6 +133,7 @@ def sequence_sqrt(x_init, iter, dtype=int): :param dtype: data type to cast to (either int of float) :return: element at the given iteration and array of the whole sequence """ + # non-exponential growth def iter_function(x_seq, i, x_init): return x_seq[0, :] * np.sqrt(i + 1) # i+1 because sqrt(1) = 1 @@ -147,6 +150,7 @@ def sequence_rec_sqrt(x_init, iter, dtype=int): :param dtype: data type to cast to (either int of float) :return: element at the given iteration and array of the whole sequence """ + # exponential growth def iter_function(x_seq, i, x_init): return x_seq[i - 1, :] * np.sqrt(i + 1) # i+1 because sqrt(1) = 1 @@ -163,6 +167,7 @@ def sequence_nlog2(x_init, iter, dtype=int): :param dtype: data type to cast to (either int of float) :return: element at the given iteration and array of the whole sequence """ + # non-exponential growth def iter_function(x_seq, i, x_init): return x_seq[0, :] * i * np.log2(i + 2) # i+2 because log2(1) = 0 and log2(2) = 1 diff --git a/Pyrado/scripts/evaluation/paper_specific/sob_illustrative_example.py b/Pyrado/scripts/evaluation/paper_specific/sob_illustrative_example.py index e9ecfda363..c035c4671d 100755 --- a/Pyrado/scripts/evaluation/paper_specific/sob_illustrative_example.py +++ b/Pyrado/scripts/evaluation/paper_specific/sob_illustrative_example.py @@ -139,7 +139,6 @@ def check_E_n_Jhat(th_n_opt, n): b_Jhat_n_hist = np.empty((num_samples, num_iter)) for s in range(num_samples): - for n in range(1, num_iter + 1): n_V = np.random.binomial(n, psi) # perform n Bernoulli trials n_M = n - n_V diff --git a/Pyrado/scripts/hyperparam_optimization/hopt_qq-su_ppo2.py b/Pyrado/scripts/hyperparam_optimization/hopt_qq-su_ppo2.py index 6991db4a80..2caccaf77d 100755 --- a/Pyrado/scripts/hyperparam_optimization/hopt_qq-su_ppo2.py +++ b/Pyrado/scripts/hyperparam_optimization/hopt_qq-su_ppo2.py @@ -138,7 +138,6 @@ def train_and_eval(trial: optuna.Trial, study_dir: str, seed: int): if __name__ == "__main__": - # Parse command line arguments args = get_argparser().parse_args() diff --git a/setup_deps.py b/setup_deps.py index de48f72d12..290266e773 100644 --- a/setup_deps.py +++ b/setup_deps.py @@ -314,7 +314,6 @@ def members(ml): with tarfile.open(tf.name) as tar: def is_within_directory(directory, target): - abs_directory = os.path.abspath(directory) abs_target = os.path.abspath(target) @@ -323,7 +322,6 @@ def is_within_directory(directory, target): return prefix == abs_directory def safe_extract(tar, path=".", members=None, *, numeric_owner=False): - for member in tar.getmembers(): member_path = os.path.join(path, member.name) if not is_within_directory(path, member_path): From 3836256fda04630d3894075a350d8bc66c7300e7 Mon Sep 17 00:00:00 2001 From: Fabian Damken Date: Sun, 15 Oct 2023 13:53:39 -0400 Subject: [PATCH 12/22] remove empty docstring --- Pyrado/pyrado/domain_randomization/domain_parameter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Pyrado/pyrado/domain_randomization/domain_parameter.py b/Pyrado/pyrado/domain_randomization/domain_parameter.py index 4e91db4892..331ed895b5 100644 --- a/Pyrado/pyrado/domain_randomization/domain_parameter.py +++ b/Pyrado/pyrado/domain_randomization/domain_parameter.py @@ -459,7 +459,6 @@ def context_cov(self) -> to.Tensor: return self.context_cov_chol @ self.context_cov_chol.T def info(self) -> dict: - """""" return { "name": self.name, "target_mean": self.target_mean, From 7af231c9b2b4cc36d97dd145875e2579d54cf646 Mon Sep 17 00:00:00 2001 From: Fabian Damken Date: Sun, 22 Oct 2023 16:20:33 -0400 Subject: [PATCH 13/22] address review issues --- .../domain_randomization/domain_parameter.py | 18 ++----- Pyrado/pyrado/plotting/heatmap.py | 4 ++ Pyrado/tests/test_domain_randomization.py | 51 +++++++++++++++++++ 3 files changed, 60 insertions(+), 13 deletions(-) diff --git a/Pyrado/pyrado/domain_randomization/domain_parameter.py b/Pyrado/pyrado/domain_randomization/domain_parameter.py index 331ed895b5..02eeb06750 100644 --- a/Pyrado/pyrado/domain_randomization/domain_parameter.py +++ b/Pyrado/pyrado/domain_randomization/domain_parameter.py @@ -403,19 +403,11 @@ def from_domain_randomizer(domain_randomizer, *, target_cov_factor=1.0, init_cov :param init_cov_factor: scaling of the randomizer's variance to get the init variance; defaults to `1/100` :return: the self-paced domain parameter """ - ( - name, - target_mean, - target_cov_flat, - init_mean, - init_cov_flat, - ) = ( - [], - [], - [], - [], - [], - ) + name = [] + target_mean = [] + target_cov_flat = [] + init_mean = [] + init_cov_flat = [] for domain_param in domain_randomizer.domain_params: if not isinstance(domain_param, NormalDomainParam): raise pyrado.TypeErr( diff --git a/Pyrado/pyrado/plotting/heatmap.py b/Pyrado/pyrado/plotting/heatmap.py index de2e3bd970..fbf1049b8a 100644 --- a/Pyrado/pyrado/plotting/heatmap.py +++ b/Pyrado/pyrado/plotting/heatmap.py @@ -190,6 +190,10 @@ def draw_heatmap( :return: handles to the heat map and the color bar figures (`None` if not existent) """ if isinstance(data, pd.DataFrame): + if not data.index.is_numeric(): + raise pyrado.TypeErr(given=data.index, msg="expected numeric index") + if not data.index.is_numeric(): + raise pyrado.TypeErr(given=data.columns, msg="expected numeric index") # Extract the data x = data.columns y = data.index diff --git a/Pyrado/tests/test_domain_randomization.py b/Pyrado/tests/test_domain_randomization.py index 8809f6e4ac..1b794090a0 100644 --- a/Pyrado/tests/test_domain_randomization.py +++ b/Pyrado/tests/test_domain_randomization.py @@ -33,16 +33,31 @@ import torch as to from tests.conftest import m_needs_bullet, m_needs_mujoco +import pyrado from pyrado.domain_randomization.domain_parameter import ( BernoulliDomainParam, MultivariateNormalDomainParam, NormalDomainParam, + SelfPacedDomainParam, UniformDomainParam, ) +from pyrado.domain_randomization.domain_randomizer import DomainRandomizer from pyrado.domain_randomization.utils import param_grid from pyrado.environments.sim_base import SimEnv +def assert_is_close(param, info_expected): + info_actual = param.info() + assert list(sorted(info_actual.keys())) == list(sorted(info_expected.keys())) + for key, expected in info_expected.items(): + if key in ["name", "clip_lo", "clip_up"]: + assert info_actual[key] == expected + else: + assert to.allclose( + info_actual[key], expected + ), f"key: {key}, actual: {info_actual[key]}, expected: {expected}" + + @pytest.mark.parametrize( "dp", [ @@ -66,6 +81,42 @@ def test_domain_param(dp, num_samples): assert len(s) == num_samples +def test_self_paced_domain_param_make_broadening(): + param = SelfPacedDomainParam.make_broadening(["a"], [1.0], 0.0004, 0.04) + assert_is_close( + param, + { + "name": ["a"], + "target_mean": to.tensor([1.0]).double(), + "target_cov_chol": to.tensor([0.2]).double(), + "init_mean": to.tensor([1.0]).double(), + "init_cov_chol": to.tensor([0.02]).double(), + "clip_lo": -pyrado.inf, + "clip_up": pyrado.inf, + }, + ) + + +def test_self_paced_domain_param_from_domain_randomizer(): + param = SelfPacedDomainParam.from_domain_randomizer( + DomainRandomizer(NormalDomainParam(name="a", mean=1.0, std=1.0)), + init_cov_factor=0.0004, + target_cov_factor=0.04, + ) + assert_is_close( + param, + { + "name": ["a"], + "target_mean": to.tensor([1.0]).double(), + "target_cov_chol": to.tensor([0.2]).double(), + "init_mean": to.tensor([1.0]).double(), + "init_cov_chol": to.tensor([0.02]).double(), + "clip_lo": -pyrado.inf, + "clip_up": pyrado.inf, + }, + ) + + def test_randomizer(default_randomizer): print(default_randomizer) # Generate 7 samples From 034c453de83dcf93e8f7e44ebcf103279d09ecd5 Mon Sep 17 00:00:00 2001 From: Heiko Carrasco Date: Mon, 9 Oct 2023 19:21:25 -0400 Subject: [PATCH 14/22] Ignore local changes/symlinked pre-commit --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index adf9265e48..ee1aa7a5c1 100644 --- a/.gitignore +++ b/.gitignore @@ -93,3 +93,6 @@ MUJOCO_LOG.* ### sbi-logs Pyrado/sbi-logs/ Pyrado/tests/sbi-logs/ + +### pre-commit local/symlink +.pre-commit-config.yaml From 386d9f7d896c48cc2222810fff36da327aafe0ed Mon Sep 17 00:00:00 2001 From: Heiko Carrasco Date: Sat, 14 Oct 2023 23:57:11 -0400 Subject: [PATCH 15/22] Install torch-based deps with dependencies --- setup_deps.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup_deps.py b/setup_deps.py index 6fe7b8333b..5a27a6f40c 100644 --- a/setup_deps.py +++ b/setup_deps.py @@ -564,9 +564,9 @@ def setup_pytorch_based(): # Set up GPyTorch without touching the PyTorch installation (requires scikit-learn which requires threadpoolctl) sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "threadpoolctl"]) sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "scikit-learn"]) - sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "gpytorch"]) + sp.check_call([sys.executable, "-m", "pip", "install", "-U", "gpytorch"]) # Set up BoTorch without touching the PyTorch installation (requires gpytorch) - sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "botorch"]) + sp.check_call([sys.executable, "-m", "pip", "install", "-U", "botorch"]) # Set up Pyro without touching the PyTorch installation (requires opt-einsum) sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "opt-einsum"]) sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "pyro-api"]) From 81b36c7bdaadd134fe1855490f12f96868fbc562 Mon Sep 17 00:00:00 2001 From: Heiko Carrasco Date: Sat, 14 Oct 2023 23:57:51 -0400 Subject: [PATCH 16/22] Add mujoco as a python dependency --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 6b0c4970b3..48a4668e21 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,3 +28,4 @@ sphinx_rtd_theme tabulate tensorboard tqdm +mujoco \ No newline at end of file From bcedd706a28bdd7b1b835fb3dc2a8a04f6386f6a Mon Sep 17 00:00:00 2001 From: Heiko Carrasco Date: Sat, 14 Oct 2023 23:58:27 -0400 Subject: [PATCH 17/22] Remove mujoco-py installation from install script --- setup_deps.py | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/setup_deps.py b/setup_deps.py index 5a27a6f40c..0f0ca2b505 100644 --- a/setup_deps.py +++ b/setup_deps.py @@ -40,7 +40,6 @@ import zipfile from urllib.request import urlretrieve -import yaml # Get the project's root directory @@ -138,11 +137,6 @@ ] # using --headless: conda install -c conda-forge bullet freetype libglu freeglut mesalib lapack -required_packages_mujocopy = [ - "libglew-dev", - "libosmesa6-dev", -] - env_vars = { # Global cmake prefix path "CMAKE_PREFIX_PATH": ":".join(cmake_prefix_path) @@ -292,7 +286,7 @@ def downloadAndExtract(url, destdir, archiveContentPath=None): # Taken from https://stackoverflow.com/a/43094365 def members(ml): subfolder = osp.normpath(archiveContentPath) - l = len(subfolder) + len(subfolder) for member in ml: # Skip directories in zip isdir = getattr(member, "is_dir", None) @@ -386,7 +380,7 @@ def setup_dep_libraries(): quiet = [] if not CI else ["-qq"] sp.check_call(["sudo", "apt-get"] + quiet + ["update", "-y"]) # Install dependencies - sp.check_call(["sudo", "apt-get"] + quiet + ["install", "-y"] + required_packages + required_packages_mujocopy) + sp.check_call(["sudo", "apt-get"] + quiet + ["install", "-y"] + required_packages) def setup_wm5(): @@ -553,10 +547,6 @@ def setup_render_pipeline(): sp.check_call([sys.executable, "setup.py"], cwd=osp.join(project_dir, "thirdParty", "RenderPipeline")) -def setup_mujoco_py(): - # Set up mujoco-py (doing it via pip caused problems on some machines) - sp.check_call([sys.executable, "setup.py", "install"], cwd=osp.join(project_dir, "thirdParty", "mujoco-py")) - def setup_pytorch_based(): # Locally build PyTorch>=1.7.0 requires dataclasses (does not harm when using pytorch from pip) @@ -675,13 +665,12 @@ def setup_wo_rcs_wo_pytorch(): print("\nStarting Option Red Velvet Setup\n") # Rcs will still be downloaded since it is a submodule setup_wam() # ignoring the meshes used in RcsPySim - setup_mujoco_py() if not CI: setup_pyrado() if not args.headless: setup_render_pipeline() setup_pytorch_based() - print("\nWAM meshes, mujoco-py, Pyrado (with GPyTorch, BoTorch, and Pyro using the --no-deps flag) are set up!\n") + print("\nWAM meshes, Pyrado (with GPyTorch, BoTorch, and Pyro using the --no-deps flag) are set up!\n") def setup_wo_rcs_w_pytorch(): @@ -689,14 +678,13 @@ def setup_wo_rcs_w_pytorch(): # Rcs will still be downloaded since it is a submodule setup_pytorch() setup_wam() # ignoring the meshes used in RcsPySim - setup_mujoco_py() if not CI: setup_pyrado() if not args.headless: setup_render_pipeline() setup_pytorch_based() print( - "\nPyTorch, WAM meshes, mujoco-py, Pyrado (with GPyTorch, BoTorch, and Pyro using the --no-deps flag) are " + "\nPyTorch, WAM meshes, Pyrado (with GPyTorch, BoTorch, and Pyro using the --no-deps flag) are " "set up!\n" ) @@ -712,14 +700,13 @@ def setup_w_rcs_wo_pytorch(): if not CI: setup_rcspysim() setup_meshes() - setup_mujoco_py() if not CI: setup_pyrado() if not args.headless: setup_render_pipeline() setup_pytorch_based() print( - "\nWM5, Rcs, RcsPySim, iiwa & Schunk & WAM meshes, mujoco-py, and Pyrado (with GPyTorch, BoTorch, and Pyro " + "\nWM5, Rcs, RcsPySim, iiwa & Schunk & WAM meshes and Pyrado (with GPyTorch, BoTorch, and Pyro " "using the --no-deps flag) are set up!\n" ) @@ -734,14 +721,13 @@ def setup_w_rcs_w_pytorch(): if not CI: setup_rcspysim() setup_meshes() - setup_mujoco_py() if not CI: setup_pyrado() if not args.headless: setup_render_pipeline() setup_pytorch_based() print( - "\nWM5, Rcs, PyTorch, RcsPySim, iiwa & Schunk & WAM meshes, mujoco-py, Pyrado (with GPyTorch, BoTorch, and " + "\nWM5, Rcs, PyTorch, RcsPySim, iiwa & Schunk & WAM meshes, Pyrado (with GPyTorch, BoTorch, and " "Pyro using the --no-deps flag) are set up!\n" ) From 00cd6d069151d2679a1e1e1a1994619ecee1e581 Mon Sep 17 00:00:00 2001 From: Heiko Carrasco Date: Sun, 22 Oct 2023 16:02:08 -0400 Subject: [PATCH 18/22] Switch to new mujoco package --- Pyrado/pyrado/environments/mujoco/__init__.py | 29 +--------- Pyrado/pyrado/environments/mujoco/base.py | 54 ++++++------------- .../pyrado/environments/mujoco/openai_ant.py | 13 +++-- .../mujoco/openai_half_cheetah.py | 11 ++-- .../environments/mujoco/openai_hopper.py | 9 ++-- .../environments/mujoco/openai_humanoid.py | 39 +++++++------- .../environments/mujoco/quanser_qube.py | 21 +++----- setup_deps.py | 7 +-- 8 files changed, 63 insertions(+), 120 deletions(-) diff --git a/Pyrado/pyrado/environments/mujoco/__init__.py b/Pyrado/pyrado/environments/mujoco/__init__.py index 1a9b692581..94590e58b4 100644 --- a/Pyrado/pyrado/environments/mujoco/__init__.py +++ b/Pyrado/pyrado/environments/mujoco/__init__.py @@ -26,35 +26,8 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. -import os import pyrado -from pyrado.utils.input_output import print_cbt -try: - import mujoco_py -except (ImportError, Exception): - # The ImportError is raised if mujoco-py is simply not installed - # The Exception catches the case that you have everything installed properly but your IDE does not set the - # LD_LIBRARY_PATH correctly (happens for PyCharm & CLion). To check this, try to run your script from the terminal. - ld_library_path = os.environ.get("LD_LIBRARY_PATH") - ld_preload = os.environ.get("LD_PRELOAD") - print_cbt( - "You are trying to use are MuJoCo-based environment, but the required mujoco_py module can not be imported.\n" - "Try adding\n" - "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/$USER/.mujoco/mujoco200/bin\n" - "export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so\n" - "to your shell's rc-file.\n" - "The current values of the environment variables are:\n" - f"LD_LIBRARY_PATH={ld_library_path}\n" - f"LD_PRELOAD={ld_preload}" - "If you are using PyCharm or CLion, also add the environment variables above to your run configurations. " - "Note that the IDE will not resolve $USER for some reason, so enter the user name directly, " - "or run it from your terminal.\n\n" - "Here comes the mujoco-py error message:\n\n", - "r", - ) - pyrado.mujoco_loaded = False -else: - pyrado.mujoco_loaded = True +pyrado.mujoco_loaded = True diff --git a/Pyrado/pyrado/environments/mujoco/base.py b/Pyrado/pyrado/environments/mujoco/base.py index 6f19c2b9a3..19e3b27df6 100644 --- a/Pyrado/pyrado/environments/mujoco/base.py +++ b/Pyrado/pyrado/environments/mujoco/base.py @@ -31,10 +31,10 @@ from math import floor from typing import Optional -import mujoco_py +import mujoco +import mujoco.viewer import numpy as np from init_args_serializer import Serializable -from mujoco_py.generated.const import RND_FOG import pyrado from pyrado.environments.sim_base import SimEnv @@ -87,7 +87,7 @@ def __init__( xml_model_temp = file_raw.read() xml_model_temp = self._adapt_model_file(xml_model_temp, self.domain_param) # Create a dummy model to extract the solver's time step size - model_tmp = mujoco_py.load_model_from_xml(xml_model_temp) + model_tmp = mujoco.MjModel.from_xml_path(xml_model_temp) frame_skip = dt / model_tmp.opt.timestep if frame_skip.is_integer(): self.frame_skip = int(frame_skip) @@ -112,8 +112,8 @@ def __init__( super().__init__(dt=self.model.opt.timestep * self.frame_skip, max_steps=max_steps) # Memorize the initial states of the model from the xml (for fixed init space or later reset) - self.init_qpos = self.sim.data.qpos.copy() - self.init_qvel = self.sim.data.qvel.copy() + self.init_qpos = self.data.qpos.copy() + self.init_qvel = self.data.qvel.copy() # Initialize space (to be overwritten in constructor of subclasses) self._init_space = None @@ -177,13 +177,7 @@ def domain_param(self, domain_param: dict): # Update MuJoCo model self._create_mujoco_model() - if self.viewer is not None: - # If the viewer already exists and we reset the domain parameters, we must also recreate the viewer since - # it references to the simulation object which get's reconstructed during _create_mujoco_model() - import glfw - - glfw.destroy_window(self.viewer.window) - self.viewer = None + self.viewer = None # Update task self._task = self._create_task(self.task_args) @@ -202,7 +196,7 @@ def _adapt_model_file(self, xml_model: str, domain_param: dict) -> str: :return: adapted model file where the placeholders are filled with numerical values """ # The mesh dir is not resolved when later passed as a string, thus we do it manually - xml_model = xml_model.replace(f"[ASSETS_DIR]", pyrado.MUJOCO_ASSETS_DIR) + xml_model = xml_model.replace("[ASSETS_DIR]", pyrado.MUJOCO_ASSETS_DIR) # Replace all occurrences of the domain parameter placeholder with its value for key, value in domain_param.items(): @@ -230,14 +224,14 @@ def _create_mujoco_model(self): xml_model = self._adapt_model_file(xml_model, self.domain_param) # Create MuJoCo model from parsed XML file - self.model = mujoco_py.load_model_from_xml(xml_model) - self.sim = mujoco_py.MjSim(self.model, nsubsteps=self.frame_skip) + self.model = mujoco.MjModel.from_xml_string(xml_model) + self.data = mujoco.MjData(self.model) def configure_viewer(self): """Configure the camera when the viewer is initialized. You need to set `self.camera_config` before.""" # Render a fog around the scene by default if self.camera_config.pop("render_fog", True): - self.viewer.scn.flags[RND_FOG] = 1 + self.viewer.scn.flags[mujoco.mjtRndFlag.mjRND_FOG] = 1 # Parse all other options for key, value in self.camera_config.items(): @@ -274,8 +268,8 @@ def reset(self, init_state: np.ndarray = None, domain_param: dict = None) -> np. self._task.reset(env_spec=self.spec, init_state=init_state.copy()) # Reset MuJoCo simulation model (only reset the joint configuration) - self.sim.reset() - old_state = self.sim.get_state() + mujoco.mj_resetData(self.model, self.data) + old_state = self.data nq = self.model.nq nv = self.model.nv if not init_state[:nq].shape == old_state.qpos.shape: # check joint positions dimension @@ -283,16 +277,9 @@ def reset(self, init_state: np.ndarray = None, domain_param: dict = None) -> np. # Exclude everything that is appended to the state (at the end), e.g. the ball position for WAMBallInCupSim if not init_state[nq : nq + nv].shape == old_state.qvel.shape: # check joint velocities dimension raise pyrado.ShapeErr(given=init_state[nq : nq + nv], expected_match=old_state.qvel) - new_state = mujoco_py.MjSimState( - # Exclude everything that is appended to the state (at the end), e.g. the ball position for WAMBallInCupSim - old_state.time, - init_state[:nq], - init_state[nq : nq + nv], - old_state.act, - old_state.udd_state, - ) - self.sim.set_state(new_state) - self.sim.forward() + self.data.qpos[:] = np.copy(init_state[:nq]) + self.data.qvel[:] = np.copy(init_state[nq : nq + nv]) + mujoco.mj_forward(self.model, self.data) # Return an observation return self.observe(self.state) @@ -342,14 +329,7 @@ def render(self, mode: RenderMode = RenderMode(), render_step: int = 1): if mode.video: if self.viewer is None: # Create viewer if not existent (see 'human' mode of OpenAI Gym's MujocoEnv) - self.viewer = mujoco_py.MjViewer(self.sim) - - # Adjust window size and position to custom values - import glfw - - glfw.make_context_current(self.viewer.window) - glfw.set_window_size(self.viewer.window, 1280, 720) - glfw.set_window_pos(self.viewer.window, 50, 50) + self.viewer = mujoco.viewer.launch_passive(self.model, self.data) self.configure_viewer() - self.viewer.render() + self.viewer.sync() diff --git a/Pyrado/pyrado/environments/mujoco/openai_ant.py b/Pyrado/pyrado/environments/mujoco/openai_ant.py index d78dbc2d76..862948ecc4 100644 --- a/Pyrado/pyrado/environments/mujoco/openai_ant.py +++ b/Pyrado/pyrado/environments/mujoco/openai_ant.py @@ -29,6 +29,7 @@ import os.path as osp from typing import Optional +import mujoco import numpy as np from init_args_serializer import Serializable @@ -98,9 +99,7 @@ def __init__( @property def state_space(self) -> Space: - state_shape = np.concatenate( - [self.sim.data.qpos.flat, self.sim.data.qvel.flat, self.sim.data.cfrc_ext.flat] - ).shape + state_shape = np.concatenate([self.data.qpos.flat, self.data.qvel.flat, self.data.cfrc_ext.flat]).shape return BoxSpace(-pyrado.inf, pyrado.inf, shape=state_shape) @property @@ -148,11 +147,11 @@ def contact_forces(self): return contact_forces def _mujoco_step(self, act: np.ndarray) -> dict: - self.sim.data.ctrl[:] = act - self.sim.step() + self.data.ctrl[:] = act + mujoco.mj_step(self.model, self.data) - pos = self.sim.data.qpos.flat.copy() - vel = self.sim.data.qvel.flat.copy() + pos = self.data.qpos.flat.copy() + vel = self.data.qvel.flat.copy() cfrc_ext = self.contact_forces.flat.copy() self.state = np.concatenate([pos, vel, cfrc_ext]) diff --git a/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py b/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py index f72288a106..dafceef2a3 100644 --- a/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py +++ b/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py @@ -31,6 +31,7 @@ import numpy as np from init_args_serializer import Serializable +import mujoco import pyrado from pyrado.environments.mujoco.base import MujocoSimEnv @@ -91,7 +92,7 @@ def __init__( @property def state_space(self) -> Space: - state_shape = np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).shape + state_shape = np.concatenate([self.data.qpos, self.data.qvel]).shape return BoxSpace(-pyrado.inf, pyrado.inf, shape=state_shape) @property @@ -123,11 +124,11 @@ def _create_task(self, task_args: dict) -> Task: return GoallessTask(self.spec, ForwardVelocityRewFcn(self._dt, idx_fwd=0, **task_args)) def _mujoco_step(self, act: np.ndarray) -> dict: - self.sim.data.ctrl[:] = act - self.sim.step() + self.data.ctrl[:] = act + mujoco.mj_step(self.model, self.data) - pos = self.sim.data.qpos.copy() - vel = self.sim.data.qvel.copy() + pos = self.data.qpos.copy() + vel = self.data.qvel.copy() self.state = np.concatenate([pos, vel]) return dict() diff --git a/Pyrado/pyrado/environments/mujoco/openai_hopper.py b/Pyrado/pyrado/environments/mujoco/openai_hopper.py index 57be4b6162..c1dd9e0b85 100644 --- a/Pyrado/pyrado/environments/mujoco/openai_hopper.py +++ b/Pyrado/pyrado/environments/mujoco/openai_hopper.py @@ -29,6 +29,7 @@ import os.path as osp from typing import Optional +import mujoco import numpy as np from init_args_serializer import Serializable @@ -141,11 +142,11 @@ def _create_task(self, task_args: dict) -> Task: return GoallessTask(self.spec, rew_fcn) def _mujoco_step(self, act: np.ndarray) -> dict: - self.sim.data.ctrl[:] = act - self.sim.step() + self.data.ctrl[:] = act + mujoco.mj_step(self.model, self.data) - pos = self.sim.data.qpos.copy() - vel = self.sim.data.qvel.copy() + pos = self.data.qpos.copy() + vel = self.data.qvel.copy() self.state = np.concatenate([pos, vel]) return dict() diff --git a/Pyrado/pyrado/environments/mujoco/openai_humanoid.py b/Pyrado/pyrado/environments/mujoco/openai_humanoid.py index 4de37cdc86..7b56f64c1b 100644 --- a/Pyrado/pyrado/environments/mujoco/openai_humanoid.py +++ b/Pyrado/pyrado/environments/mujoco/openai_humanoid.py @@ -29,6 +29,7 @@ import os.path as osp from typing import Optional +import mujoco import numpy as np from init_args_serializer import Serializable @@ -84,10 +85,10 @@ def __init__( rest_shape = np.concatenate( [ - self.sim.data.cinert.flat, - self.sim.data.cvel.flat, - self.sim.data.qfrc_actuator.flat, - self.sim.data.cfrc_ext.flat, + self.data.cinert.flat, + self.data.cvel.flat, + self.data.qfrc_actuator.flat, + self.data.cfrc_ext.flat, ] ).shape min_init_rest = -np.ones(rest_shape) @@ -102,12 +103,12 @@ def __init__( def state_space(self) -> Space: state_shape = np.concatenate( [ - self.sim.data.qpos.flat, - self.sim.data.qvel.flat, - self.sim.data.cinert.flat, - self.sim.data.cvel.flat, - self.sim.data.qfrc_actuator.flat, - self.sim.data.cfrc_ext.flat, + self.data.qpos.flat, + self.data.qvel.flat, + self.data.cinert.flat, + self.data.cvel.flat, + self.data.qfrc_actuator.flat, + self.data.cfrc_ext.flat, ] ).shape return BoxSpace(-pyrado.inf, pyrado.inf, shape=state_shape) @@ -162,15 +163,15 @@ def _create_task(self, task_args: dict) -> Task: return GoallessTask(self.spec, ForwardVelocityRewFcnHumanoid(self._dt, **task_args)) def _mujoco_step(self, act: np.ndarray) -> dict: - self.sim.data.ctrl[:] = act - self.sim.step() - - position = self.sim.data.qpos.flat.copy() - velocity = self.sim.data.qvel.flat.copy() - com_inertia = self.sim.data.cinert.flat.copy() - com_velocity = self.sim.data.cvel.flat.copy() - actuator_forces = self.sim.data.qfrc_actuator.flat.copy() - external_contact_forces = self.sim.data.cfrc_ext.flat.copy() + self.data.ctrl[:] = act + mujoco.mj_step(self.model, self.data) + + position = self.data.qpos.flat.copy() + velocity = self.data.qvel.flat.copy() + com_inertia = self.data.cinert.flat.copy() + com_velocity = self.data.cvel.flat.copy() + actuator_forces = self.data.qfrc_actuator.flat.copy() + external_contact_forces = self.data.cfrc_ext.flat.copy() self.state = np.concatenate( [ diff --git a/Pyrado/pyrado/environments/mujoco/quanser_qube.py b/Pyrado/pyrado/environments/mujoco/quanser_qube.py index a609f76355..20e136fb33 100644 --- a/Pyrado/pyrado/environments/mujoco/quanser_qube.py +++ b/Pyrado/pyrado/environments/mujoco/quanser_qube.py @@ -30,9 +30,8 @@ from abc import abstractmethod from typing import Optional -import mujoco_py +import mujoco import numpy as np -import torch as to from init_args_serializer import Serializable import pyrado @@ -122,19 +121,13 @@ def _mujoco_step(self, act: np.ndarray) -> dict: ) # act is a scalar array, causing warning on later np.array construction # Apply the torques to the robot - self.sim.data.ctrl[:] = torque + self.data.ctrl[:] = torque # Call MuJoCo - try: - self.sim.step() - mjsim_crashed = False - except mujoco_py.builder.MujocoException: - # When MuJoCo recognized instabilities in the simulation, it simply kills it. - # Instead, we want the episode to end with a failure. - mjsim_crashed = True - - qpos = self.sim.data.qpos.copy() - qvel = self.sim.data.qvel.copy() + mujoco.mj_step(self.model, self.data) + + qpos = self.data.qpos.copy() + qvel = self.data.qvel.copy() self.state = np.concatenate([qpos, qvel]) # If state is out of bounds (this is normally checked by the task, but does not work because of the mask) @@ -143,7 +136,7 @@ def _mujoco_step(self, act: np.ndarray) -> dict: return dict( qpos=qpos, qvel=qvel, - failed=mjsim_crashed or state_oob, + failed=state_oob, ) def observe(self, state: np.ndarray) -> np.ndarray: diff --git a/setup_deps.py b/setup_deps.py index 0f0ca2b505..344ae2ddc0 100644 --- a/setup_deps.py +++ b/setup_deps.py @@ -41,7 +41,6 @@ from urllib.request import urlretrieve - # Get the project's root directory project_dir = osp.dirname(osp.abspath(__file__)) @@ -547,7 +546,6 @@ def setup_render_pipeline(): sp.check_call([sys.executable, "setup.py"], cwd=osp.join(project_dir, "thirdParty", "RenderPipeline")) - def setup_pytorch_based(): # Locally build PyTorch>=1.7.0 requires dataclasses (does not harm when using pytorch from pip) sp.check_call([sys.executable, "-m", "pip", "install", "-U", "--no-deps", "dataclasses"]) @@ -683,10 +681,7 @@ def setup_wo_rcs_w_pytorch(): if not args.headless: setup_render_pipeline() setup_pytorch_based() - print( - "\nPyTorch, WAM meshes, Pyrado (with GPyTorch, BoTorch, and Pyro using the --no-deps flag) are " - "set up!\n" - ) + print("\nPyTorch, WAM meshes, Pyrado (with GPyTorch, BoTorch, and Pyro using the --no-deps flag) are " "set up!\n") def setup_w_rcs_wo_pytorch(): From 9b48aa8001927d0a4b81b9565df77d3cf5b9cff2 Mon Sep 17 00:00:00 2001 From: Heiko Carrasco Date: Sun, 22 Oct 2023 16:07:43 -0400 Subject: [PATCH 19/22] Remove mujoco submodule --- .gitmodules | 3 --- thirdParty/mujoco-py | 1 - 2 files changed, 4 deletions(-) delete mode 160000 thirdParty/mujoco-py diff --git a/.gitmodules b/.gitmodules index 5b7a8e3399..fb5900aea9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,9 +10,6 @@ [submodule "thirdParty/catch2"] path = thirdParty/catch2 url = https://github.com/catchorg/Catch2.git -[submodule "thirdParty/mujoco-py"] - path = thirdParty/mujoco-py - url = https://github.com/openai/mujoco-py.git [submodule "thirdParty/RenderPipeline"] path = thirdParty/RenderPipeline url = https://github.com/famura/RenderPipeline.git diff --git a/thirdParty/mujoco-py b/thirdParty/mujoco-py deleted file mode 160000 index d73ce6e91d..0000000000 --- a/thirdParty/mujoco-py +++ /dev/null @@ -1 +0,0 @@ -Subproject commit d73ce6e91d096b74da2a2fcb0a4164e10db5f641 From 4fcfaa706edea263db56565129cf6ead9710d100 Mon Sep 17 00:00:00 2001 From: Heiko Carrasco Date: Sun, 22 Oct 2023 16:26:33 -0400 Subject: [PATCH 20/22] Update pre-commit hooks to latest version --- .github/.pre-commit-config.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/.pre-commit-config.yaml b/.github/.pre-commit-config.yaml index 87d2bc0b8c..c8fea89bcd 100644 --- a/.github/.pre-commit-config.yaml +++ b/.github/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pycqa/isort - rev: 5.8.0 + rev: 5.12.0 hooks: - id: isort name: isort (python) @@ -11,7 +11,6 @@ repos: name: isort (pyi) types: [pyi] - repo: https://github.com/psf/black - rev: 20.8b1 + rev: 23.10.0 hooks: - id: black - language_version: python3.7 From f5022d4d32188d5cf8835af3826d378212f5e993 Mon Sep 17 00:00:00 2001 From: Heiko Carrasco Date: Sun, 22 Oct 2023 16:27:16 -0400 Subject: [PATCH 21/22] Apply isort to half_cheetah --- Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py b/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py index dafceef2a3..088698c8fc 100644 --- a/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py +++ b/Pyrado/pyrado/environments/mujoco/openai_half_cheetah.py @@ -29,9 +29,9 @@ import os.path as osp from typing import Optional +import mujoco import numpy as np from init_args_serializer import Serializable -import mujoco import pyrado from pyrado.environments.mujoco.base import MujocoSimEnv From 08ef8c8074f03a91a1c2e7b13050eaae3d7c9da9 Mon Sep 17 00:00:00 2001 From: Heiko Carrasco Date: Thu, 26 Oct 2023 23:53:57 -0400 Subject: [PATCH 22/22] Remove unnecessary subfolder length call --- setup_deps.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup_deps.py b/setup_deps.py index 344ae2ddc0..56076d4fc1 100644 --- a/setup_deps.py +++ b/setup_deps.py @@ -285,7 +285,6 @@ def downloadAndExtract(url, destdir, archiveContentPath=None): # Taken from https://stackoverflow.com/a/43094365 def members(ml): subfolder = osp.normpath(archiveContentPath) - len(subfolder) for member in ml: # Skip directories in zip isdir = getattr(member, "is_dir", None)