From 4d69d10789153d60975e00b93c543bc7c789dae3 Mon Sep 17 00:00:00 2001 From: Fabian Damken Date: Sun, 8 Oct 2023 10:33:19 -0400 Subject: [PATCH] apply black and isort --- .../algorithms/episodic/predefined_lqr.py | 1 - Pyrado/pyrado/algorithms/meta/adr.py | 2 - Pyrado/pyrado/algorithms/meta/spdr.py | 42 +++++++++--- .../regression/timeseries_prediction.py | 1 - Pyrado/pyrado/algorithms/step_based/dql.py | 1 - Pyrado/pyrado/algorithms/step_based/gae.py | 1 - Pyrado/pyrado/algorithms/step_based/ppo.py | 2 - .../domain_randomization/domain_parameter.py | 68 ++++++++++++------- .../environments/rcspysim/planar_insert.py | 1 - .../pyrado/policies/feed_forward/poly_time.py | 4 +- Pyrado/pyrado/sampling/parallel_evaluation.py | 4 +- .../sampling/parallel_rollout_sampler.py | 1 - Pyrado/pyrado/sampling/sequences.py | 5 ++ .../sob_illustrative_example.py | 1 - .../hopt_qq-su_ppo2.py | 1 - setup_deps.py | 2 - 16 files changed, 82 insertions(+), 55 deletions(-) diff --git a/Pyrado/pyrado/algorithms/episodic/predefined_lqr.py b/Pyrado/pyrado/algorithms/episodic/predefined_lqr.py index 48d8b5a8cca..3c773f90cdf 100644 --- a/Pyrado/pyrado/algorithms/episodic/predefined_lqr.py +++ b/Pyrado/pyrado/algorithms/episodic/predefined_lqr.py @@ -102,7 +102,6 @@ def sampler(self, sampler: ParallelRolloutSampler): self._sampler = sampler def step(self, snapshot_mode: str, meta_info: dict = None): - if isinstance(inner_env(self._env), BallOnPlate5DSim): ctrl_gains = to.tensor( [ diff --git a/Pyrado/pyrado/algorithms/meta/adr.py b/Pyrado/pyrado/algorithms/meta/adr.py index 14d34f4a22c..35eb386cb88 100644 --- a/Pyrado/pyrado/algorithms/meta/adr.py +++ b/Pyrado/pyrado/algorithms/meta/adr.py @@ -431,7 +431,6 @@ def __init__( logger: StepLogger = None, device: str = "cuda" if to.cuda.is_available() else "cpu", ): - """ Constructor @@ -472,7 +471,6 @@ def get_reward(self, traj: StepSequence) -> to.Tensor: def train( self, reference_trajectory: StepSequence, randomized_trajectory: StepSequence, num_epoch: int ) -> to.Tensor: - reference_batch_generator = reference_trajectory.iterate_rollouts() random_batch_generator = randomized_trajectory.iterate_rollouts() diff --git a/Pyrado/pyrado/algorithms/meta/spdr.py b/Pyrado/pyrado/algorithms/meta/spdr.py index 492439f6e75..7cd3d99573b 100644 --- a/Pyrado/pyrado/algorithms/meta/spdr.py +++ b/Pyrado/pyrado/algorithms/meta/spdr.py @@ -28,7 +28,7 @@ import os.path from csv import DictWriter -from typing import Iterator, Optional, Tuple, List, Callable +from typing import Callable, Iterator, List, Optional, Tuple import numpy as np import torch as to @@ -316,15 +316,21 @@ def step(self, snapshot_mode: str, meta_info: dict = None): self._train_subroutine_and_evaluate_perf )(snapshot_mode, meta_info, reset_policy) - previous_distribution = MultivariateNormalWrapper(self._spl_parameter.context_mean.double(), self._spl_parameter.context_cov_chol.double()) - target_distribution = MultivariateNormalWrapper(self._spl_parameter.target_mean.double(), self._spl_parameter.target_cov_chol.double()) + previous_distribution = MultivariateNormalWrapper( + self._spl_parameter.context_mean.double(), self._spl_parameter.context_cov_chol.double() + ) + target_distribution = MultivariateNormalWrapper( + self._spl_parameter.target_mean.double(), self._spl_parameter.target_cov_chol.double() + ) proposal_rollouts = self._sample_proposal_rollouts() contexts, contexts_old_log_prob, values = self._extract_particles(proposal_rollouts, previous_distribution) # Define the SPRL optimization problem kl_constraint = self._make_kl_constraint(previous_distribution, self._kl_constraints_ub) - performance_constraint = self._make_performance_constraint(contexts, contexts_old_log_prob, values, self._performance_lower_bound) + performance_constraint = self._make_performance_constraint( + contexts, contexts_old_log_prob, values, self._performance_lower_bound + ) constraints = [kl_constraint, performance_constraint] objective_fn = self._make_objective_fn(target_distribution) x0 = previous_distribution.get_stacked() @@ -392,7 +398,9 @@ def load_snapshot(self, parsed_args) -> Tuple[Env, Policy, dict]: return env, policy, extra - def _make_objective_fn(self, target_distribution: MultivariateNormalWrapper) -> Callable[[np.ndarray], Tuple[float, np.ndarray]]: + def _make_objective_fn( + self, target_distribution: MultivariateNormalWrapper + ) -> Callable[[np.ndarray], Tuple[float, np.ndarray]]: def objective_fn(x): """Tries to find the minimum kl divergence between the current and the update distribution, which still satisfies the minimum update constraint and the performance constraint.""" @@ -407,17 +415,23 @@ def objective_fn(x): return objective_fn - def _make_kl_constraint(self, previous_distribution: MultivariateNormalWrapper, kl_constraint_ub: float) -> NonlinearConstraint: + def _make_kl_constraint( + self, previous_distribution: MultivariateNormalWrapper, kl_constraint_ub: float + ) -> NonlinearConstraint: def kl_constraint_fn(x): """Compute the constraint for the KL-divergence between current and proposed distribution.""" distribution = MultivariateNormalWrapper.from_stacked(self.dim, x) - kl_divergence = to.distributions.kl_divergence(previous_distribution.distribution, distribution.distribution) + kl_divergence = to.distributions.kl_divergence( + previous_distribution.distribution, distribution.distribution + ) return kl_divergence.detach().numpy().item() def kl_constraint_fn_prime(x): """Compute the derivative for the KL-constraint (used for scipy optimizer).""" distribution = MultivariateNormalWrapper.from_stacked(self.dim, x) - kl_divergence = to.distributions.kl_divergence(previous_distribution.distribution, distribution.distribution) + kl_divergence = to.distributions.kl_divergence( + previous_distribution.distribution, distribution.distribution + ) grads = to.autograd.grad(kl_divergence, list(distribution.parameters())) return np.concatenate([g.detach().numpy() for g in grads]) @@ -428,7 +442,9 @@ def kl_constraint_fn_prime(x): jac=kl_constraint_fn_prime, ) - def _make_performance_constraint(self, contexts: to.Tensor, contexts_old_log_prob: to.Tensor, values: to.Tensor, performance_lower_bound: float) -> NonlinearConstraint: + def _make_performance_constraint( + self, contexts: to.Tensor, contexts_old_log_prob: to.Tensor, values: to.Tensor, performance_lower_bound: float + ) -> NonlinearConstraint: def performance_constraint_fn(x): """Compute the constraint for the expected performance under the proposed distribution.""" distribution = MultivariateNormalWrapper.from_stacked(self.dim, x) @@ -470,7 +486,9 @@ def _log_context_distribution(self): def _sample_proposal_rollouts(self) -> List[List[StepSequence]]: return self.subrtn_sampler.rollouts - def _extract_particles(self, rollouts_all: List[List[StepSequence]], distribution: MultivariateNormalWrapper) -> Tuple[to.Tensor, to.Tensor, to.Tensor]: + def _extract_particles( + self, rollouts_all: List[List[StepSequence]], distribution: MultivariateNormalWrapper + ) -> Tuple[to.Tensor, to.Tensor, to.Tensor]: def get_domain_param_value(ro: StepSequence, param_name: str) -> np.ndarray: domain_param_dict = ro.rollout_info["domain_param"] untransformed_param_name = param_name + DomainParamTransform.UNTRANSFORMED_DOMAIN_PARAMETER_SUFFIX @@ -491,7 +509,9 @@ def get_domain_param_value(ro: StepSequence, param_name: str) -> np.ndarray: return contexts, contexts_log_prob, values # noinspection PyMethodMayBeStatic - def _compute_expected_performance(self, distribution: MultivariateNormalWrapper, context: to.Tensor, old_log_prop: to.Tensor, values: to.Tensor) -> to.Tensor: + def _compute_expected_performance( + self, distribution: MultivariateNormalWrapper, context: to.Tensor, old_log_prop: to.Tensor, values: to.Tensor + ) -> to.Tensor: """Calculate the expected performance after an update step.""" context_ratio = to.exp(distribution.distribution.log_prob(context) - old_log_prop) return to.mean(context_ratio * values) diff --git a/Pyrado/pyrado/algorithms/regression/timeseries_prediction.py b/Pyrado/pyrado/algorithms/regression/timeseries_prediction.py index 11326aee1f5..d33ea819c9a 100644 --- a/Pyrado/pyrado/algorithms/regression/timeseries_prediction.py +++ b/Pyrado/pyrado/algorithms/regression/timeseries_prediction.py @@ -104,7 +104,6 @@ def __init__( self._lr_scheduler = lr_scheduler(self.optim, **lr_scheduler_hparam) def step(self, snapshot_mode: str, meta_info: dict = None): - # Feed one epoch of the training set to the policy if self.windowed: # Predict diff --git a/Pyrado/pyrado/algorithms/step_based/dql.py b/Pyrado/pyrado/algorithms/step_based/dql.py index ff38c66ab8b..36ca2e6c6b5 100644 --- a/Pyrado/pyrado/algorithms/step_based/dql.py +++ b/Pyrado/pyrado/algorithms/step_based/dql.py @@ -192,7 +192,6 @@ def update(self): file=sys.stdout, leave=False, ): - # Sample steps and the associated next step from the replay memory steps, next_steps = self._memory.sample(self.batch_size) steps.torch(data_type=to.get_default_dtype()) diff --git a/Pyrado/pyrado/algorithms/step_based/gae.py b/Pyrado/pyrado/algorithms/step_based/gae.py index 0f80a42f85c..ac125f51d21 100644 --- a/Pyrado/pyrado/algorithms/step_based/gae.py +++ b/Pyrado/pyrado/algorithms/step_based/gae.py @@ -239,7 +239,6 @@ def update(self, rollouts: Sequence[StepSequence], use_empirical_returns: bool = # Iterate over all gathered samples num_epoch times for e in range(self.num_epoch): - for batch in tqdm( concat_ros.split_shuffled_batches( self.batch_size, complete_rollouts=isinstance(self.vfcn, RecurrentPolicy) diff --git a/Pyrado/pyrado/algorithms/step_based/ppo.py b/Pyrado/pyrado/algorithms/step_based/ppo.py index 2c93f0ccd5f..7f8d094318e 100644 --- a/Pyrado/pyrado/algorithms/step_based/ppo.py +++ b/Pyrado/pyrado/algorithms/step_based/ppo.py @@ -176,7 +176,6 @@ def update(self, rollouts: Sequence[StepSequence]): # Iterations over the whole data set for e in range(self.num_epoch): - for batch in tqdm( concat_ros.split_shuffled_batches(self.batch_size, complete_rollouts=self._policy.is_recurrent), total=num_iter_from_rollouts(None, concat_ros, self.batch_size), @@ -412,7 +411,6 @@ def update(self, rollouts: Sequence[StepSequence]): # Iterations over the whole data set for e in range(self.num_epoch): - for batch in tqdm( concat_ros.split_shuffled_batches( self.batch_size, diff --git a/Pyrado/pyrado/domain_randomization/domain_parameter.py b/Pyrado/pyrado/domain_randomization/domain_parameter.py index 5f150a052bc..4e91db48928 100644 --- a/Pyrado/pyrado/domain_randomization/domain_parameter.py +++ b/Pyrado/pyrado/domain_randomization/domain_parameter.py @@ -44,11 +44,11 @@ class DomainParam: """Class to store and manage (probably multiple) domain parameter a.k.a. physics parameter a.k.a. simulator parameter""" def __init__( - self, - name: Union[str, List[str]], - clip_lo: Optional[Union[int, float]] = -pyrado.inf, - clip_up: Optional[Union[int, float]] = pyrado.inf, - roundint: bool = False, + self, + name: Union[str, List[str]], + clip_lo: Optional[Union[int, float]] = -pyrado.inf, + clip_up: Optional[Union[int, float]] = pyrado.inf, + roundint: bool = False, ): """ Constructor, also see the constructor of DomainRandomizer. @@ -97,7 +97,7 @@ def adapt(self, domain_distr_param: str, domain_distr_param_value: Union[float, if domain_distr_param not in self.get_field_names(): raise pyrado.KeyErr( msg=f"The domain parameter {self.name} does not have a domain distribution parameter " - f"called {domain_distr_param}!" + f"called {domain_distr_param}!" ) setattr(self, domain_distr_param, domain_distr_param_value) @@ -314,14 +314,14 @@ def sample(self, num_samples: int = 1) -> List[to.Tensor]: class SelfPacedDomainParam(DomainParam): def __init__( - self, - name: List[str], - target_mean: to.Tensor, - target_cov_flat: to.Tensor, - init_mean: to.Tensor, - init_cov_flat: to.Tensor, - clip_lo: float, - clip_up: float, + self, + name: List[str], + target_mean: to.Tensor, + target_cov_flat: to.Tensor, + init_mean: to.Tensor, + init_cov_flat: to.Tensor, + clip_lo: float, + clip_up: float, ): """ Constructor @@ -362,12 +362,12 @@ def get_field_names(self) -> Sequence[str]: @staticmethod def make_broadening( - name: List[str], - mean: List[float], - init_cov_portion: float = 0.001, - target_cov_portion: float = 0.1, - clip_lo: float = -pyrado.inf, - clip_up: float = pyrado.inf, + name: List[str], + mean: List[float], + init_cov_portion: float = 0.001, + target_cov_portion: float = 0.1, + clip_lo: float = -pyrado.inf, + clip_up: float = pyrado.inf, ) -> "SelfPacedDomainParam": """ Creates a self-paced domain parameter having the same initial and target mean, but a larger variance on the @@ -394,7 +394,7 @@ def make_broadening( ) @staticmethod - def from_domain_randomizer(domain_randomizer, *, target_cov_factor=1., init_cov_factor=1 / 100): + def from_domain_randomizer(domain_randomizer, *, target_cov_factor=1.0, init_cov_factor=1 / 100): """ Creates a self-paced domain parameter having the same initial and target mean and target variance given by the domain randomizer's variance (scaled by `target_cov_factor`). The initial variance is also given by the domain randomizer's variance (scaled by `init_cov_factor`). @@ -403,15 +403,31 @@ def from_domain_randomizer(domain_randomizer, *, target_cov_factor=1., init_cov_ :param init_cov_factor: scaling of the randomizer's variance to get the init variance; defaults to `1/100` :return: the self-paced domain parameter """ - name, target_mean, target_cov_flat, init_mean, init_cov_flat, = [], [], [], [], [] + ( + name, + target_mean, + target_cov_flat, + init_mean, + init_cov_flat, + ) = ( + [], + [], + [], + [], + [], + ) for domain_param in domain_randomizer.domain_params: if not isinstance(domain_param, NormalDomainParam): - raise pyrado.TypeErr(given=domain_param, expected_type=NormalDomainParam, msg="each domain_param must be a NormalDomainParam") + raise pyrado.TypeErr( + given=domain_param, + expected_type=NormalDomainParam, + msg="each domain_param must be a NormalDomainParam", + ) name.append(domain_param.name) target_mean.append(domain_param.mean) - target_cov_flat.append(target_cov_factor * domain_param.std ** 2) + target_cov_flat.append(target_cov_factor * domain_param.std**2) init_mean.append(domain_param.mean) - init_cov_flat.append(init_cov_factor * domain_param.std ** 2) + init_cov_flat.append(init_cov_factor * domain_param.std**2) return SelfPacedDomainParam( name=name, target_mean=to.tensor(target_mean), @@ -443,7 +459,7 @@ def context_cov(self) -> to.Tensor: return self.context_cov_chol @ self.context_cov_chol.T def info(self) -> dict: - "" + """""" return { "name": self.name, "target_mean": self.target_mean, diff --git a/Pyrado/pyrado/environments/rcspysim/planar_insert.py b/Pyrado/pyrado/environments/rcspysim/planar_insert.py index 5dcedde15c9..82e68b54313 100644 --- a/Pyrado/pyrado/environments/rcspysim/planar_insert.py +++ b/Pyrado/pyrado/environments/rcspysim/planar_insert.py @@ -75,7 +75,6 @@ class PlanarInsertSim(RcsSim, Serializable): """ def __init__(self, task_args: dict, collision_config: dict = None, max_dist_force: float = None, **kwargs): - """ Constructor diff --git a/Pyrado/pyrado/policies/feed_forward/poly_time.py b/Pyrado/pyrado/policies/feed_forward/poly_time.py index cf5eb2c9dbf..c01702774a1 100644 --- a/Pyrado/pyrado/policies/feed_forward/poly_time.py +++ b/Pyrado/pyrado/policies/feed_forward/poly_time.py @@ -251,9 +251,7 @@ class TraceablePolySplineTimePolicy(nn.Module): t_init: float t_curr: float overtime_behavior: str - act_space_shape: Tuple[ - int, - ] + act_space_shape: Tuple[int,] act_space_flat_dim: int def __init__( diff --git a/Pyrado/pyrado/sampling/parallel_evaluation.py b/Pyrado/pyrado/sampling/parallel_evaluation.py index 0c2fd71c71e..cfd31298243 100644 --- a/Pyrado/pyrado/sampling/parallel_evaluation.py +++ b/Pyrado/pyrado/sampling/parallel_evaluation.py @@ -106,7 +106,9 @@ def eval_domain_params( # Run with progress bar with tqdm(leave=False, file=sys.stdout, unit="rollouts", desc="Sampling") as pb: # we set the sub seed to zero since every run will have its personal sub sub seed - return pool.run_map(functools.partial(_ps_run_one_domain_param, eval=True, seed=seed, sub_seed=0), list(enumerate(params)), pb) + return pool.run_map( + functools.partial(_ps_run_one_domain_param, eval=True, seed=seed, sub_seed=0), list(enumerate(params)), pb + ) def eval_nominal_domain( diff --git a/Pyrado/pyrado/sampling/parallel_rollout_sampler.py b/Pyrado/pyrado/sampling/parallel_rollout_sampler.py index 59006730c45..dbb90558624 100644 --- a/Pyrado/pyrado/sampling/parallel_rollout_sampler.py +++ b/Pyrado/pyrado/sampling/parallel_rollout_sampler.py @@ -276,7 +276,6 @@ def sample( disable=(not self.show_progress_bar), unit="steps" if self.min_steps is not None else "rollouts", ) as pb: - if self.min_steps is None: if init_states is None and domain_params is None: # Simply run min_rollouts times diff --git a/Pyrado/pyrado/sampling/sequences.py b/Pyrado/pyrado/sampling/sequences.py index bfa5d2be8f0..de132f422ad 100644 --- a/Pyrado/pyrado/sampling/sequences.py +++ b/Pyrado/pyrado/sampling/sequences.py @@ -99,6 +99,7 @@ def sequence_add_init(x_init, iter, dtype=int): :param dtype: data type to cast to (either int of float) :return: element at the given iteration and array of the whole sequence """ + # non-exponential growth def iter_function(x_seq, i, x_init): return x_seq[0, :] * (i + 1) @@ -115,6 +116,7 @@ def sequence_rec_double(x_init, iter, dtype=int): :param dtype: data type to cast to (either int of float) :return: element at the given iteration and array of the whole sequence """ + # exponential growth def iter_function(x_seq, i, x_init): return x_seq[i - 1, :] * 2.0 @@ -131,6 +133,7 @@ def sequence_sqrt(x_init, iter, dtype=int): :param dtype: data type to cast to (either int of float) :return: element at the given iteration and array of the whole sequence """ + # non-exponential growth def iter_function(x_seq, i, x_init): return x_seq[0, :] * np.sqrt(i + 1) # i+1 because sqrt(1) = 1 @@ -147,6 +150,7 @@ def sequence_rec_sqrt(x_init, iter, dtype=int): :param dtype: data type to cast to (either int of float) :return: element at the given iteration and array of the whole sequence """ + # exponential growth def iter_function(x_seq, i, x_init): return x_seq[i - 1, :] * np.sqrt(i + 1) # i+1 because sqrt(1) = 1 @@ -163,6 +167,7 @@ def sequence_nlog2(x_init, iter, dtype=int): :param dtype: data type to cast to (either int of float) :return: element at the given iteration and array of the whole sequence """ + # non-exponential growth def iter_function(x_seq, i, x_init): return x_seq[0, :] * i * np.log2(i + 2) # i+2 because log2(1) = 0 and log2(2) = 1 diff --git a/Pyrado/scripts/evaluation/paper_specific/sob_illustrative_example.py b/Pyrado/scripts/evaluation/paper_specific/sob_illustrative_example.py index e9ecfda3638..c035c4671d2 100755 --- a/Pyrado/scripts/evaluation/paper_specific/sob_illustrative_example.py +++ b/Pyrado/scripts/evaluation/paper_specific/sob_illustrative_example.py @@ -139,7 +139,6 @@ def check_E_n_Jhat(th_n_opt, n): b_Jhat_n_hist = np.empty((num_samples, num_iter)) for s in range(num_samples): - for n in range(1, num_iter + 1): n_V = np.random.binomial(n, psi) # perform n Bernoulli trials n_M = n - n_V diff --git a/Pyrado/scripts/hyperparam_optimization/hopt_qq-su_ppo2.py b/Pyrado/scripts/hyperparam_optimization/hopt_qq-su_ppo2.py index 6991db4a80b..2caccaf77d8 100755 --- a/Pyrado/scripts/hyperparam_optimization/hopt_qq-su_ppo2.py +++ b/Pyrado/scripts/hyperparam_optimization/hopt_qq-su_ppo2.py @@ -138,7 +138,6 @@ def train_and_eval(trial: optuna.Trial, study_dir: str, seed: int): if __name__ == "__main__": - # Parse command line arguments args = get_argparser().parse_args() diff --git a/setup_deps.py b/setup_deps.py index 6386e9bd3ec..6fe7b8333ba 100644 --- a/setup_deps.py +++ b/setup_deps.py @@ -315,7 +315,6 @@ def members(ml): with tarfile.open(tf.name) as tar: def is_within_directory(directory, target): - abs_directory = os.path.abspath(directory) abs_target = os.path.abspath(target) @@ -324,7 +323,6 @@ def is_within_directory(directory, target): return prefix == abs_directory def safe_extract(tar, path=".", members=None, *, numeric_owner=False): - for member in tar.getmembers(): member_path = os.path.join(path, member.name) if not is_within_directory(path, member_path):