From 2e4683bf72aa375caee45c24c20e35b3ca5bc9bf Mon Sep 17 00:00:00 2001 From: Dusan Varis Date: Wed, 8 Aug 2018 22:06:22 +0200 Subject: [PATCH] fixed pylints in generic_trainer, fixed typos --- neuralmonkey/trainers/generic_trainer.py | 109 +++++++++++++---------- neuralmonkey/trainers/regularizers.py | 26 +++--- 2 files changed, 76 insertions(+), 59 deletions(-) diff --git a/neuralmonkey/trainers/generic_trainer.py b/neuralmonkey/trainers/generic_trainer.py index 2315ab8d8..56a84cee9 100644 --- a/neuralmonkey/trainers/generic_trainer.py +++ b/neuralmonkey/trainers/generic_trainer.py @@ -7,7 +7,8 @@ from neuralmonkey.model.model_part import ModelPart from neuralmonkey.runners.base_runner import ( Executable, ExecutionResult, NextExecute) -from neuralmonkey.trainers.regularizers import (Regularizer, L2Regularizer) +from neuralmonkey.trainers.regularizers import ( + Regularizer, L1Regularizer, L2Regularizer) # pylint: disable=invalid-name Gradients = List[Tuple[tf.Tensor, tf.Variable]] @@ -38,8 +39,7 @@ class Objective(NamedTuple( """ -# pylint: disable=too-few-public-methods,too-many-locals,too-many-branches -# pylint: disable=too-many-statements +# pylint: disable=too-few-public-methods class GenericTrainer: def __init__(self, @@ -51,25 +51,15 @@ def __init__(self, var_collection: str = None) -> None: check_argument_types() + self.objectives = objectives + self.regularizers = [] # type: List[Regularizer] if regularizers is not None: self.regularizers = regularizers - if var_collection is None: - var_collection = tf.GraphKeys.TRAINABLE_VARIABLES - - if var_scopes is None: - var_lists = [tf.get_collection(var_collection)] - else: - var_lists = [tf.get_collection(var_collection, scope) - for scope in var_scopes] - - # Flatten the list of lists - self.var_list = [var for var_list in var_lists for var in var_list] + self.var_list = _get_var_list(var_scopes, var_collection) with tf.name_scope("trainer"): - step = tf.train.get_or_create_global_step() - if optimizer: self.optimizer = optimizer else: @@ -85,43 +75,25 @@ def __init__(self, collections=["summary_train"]) # pylint: enable=protected-access - with tf.name_scope("regularization"): - regularizable = [v for v in tf.trainable_variables() - if not BIAS_REGEX.findall(v.name) - and not v.name.startswith("vgg") - and not v.name.startswith("Inception") - and not v.name.startswith("resnet")] - reg_values = [reg.value(regularizable) - for reg in self.regularizers] - reg_costs = [ - reg.weight * reg_value - for reg, reg_value in zip(self.regularizers, reg_values)] - # unweighted losses for fetching - self.losses = [o.loss for o in objectives] + reg_values - - # we always want to include l2 values in the summary - if L2Regularizer not in [type(r) for r in self.regularizers]: - l2_reg = L2Regularizer(name="train_l2", weight=0.) - tf.summary.scalar(l2_reg.name, l2_reg.value(regularizable), - collections=["summary_train"]) - for reg, reg_value in zip(self.regularizers, reg_values): - tf.summary.scalar(reg.name, reg_value, - collections=["summary_train"]) + self.losses = [o.loss for o in self.objectives] # log all objectives - for obj in objectives: + for obj in self.objectives: tf.summary.scalar( obj.name, obj.loss, collections=["summary_train"]) + # compute regularization costs + reg_costs = self._compute_regularization() + # if the objective does not have its own gradients, # just use TF to do the derivative - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) - with tf.control_dependencies(update_ops): + with tf.control_dependencies( + tf.get_collection(tf.GraphKeys.UPDATE_OPS)): with tf.name_scope("gradient_collection"): differentiable_loss_sum = sum( [(o.weight if o.weight is not None else 1.) * o.loss - for o in objectives if o.gradients is None]) + for o in self.objectives if o.gradients is None]) differentiable_loss_sum += sum(reg_costs) implicit_gradients = self._get_gradients( differentiable_loss_sum) @@ -129,7 +101,7 @@ def __init__(self, # objectives that have their gradients explictly computed other_gradients = [ _scale_gradients(o.gradients, o.weight) - for o in objectives if o.gradients is not None] + for o in self.objectives if o.gradients is not None] if other_gradients: self.gradients = _sum_gradients( @@ -148,10 +120,11 @@ def __init__(self, if grad is not None] self.all_coders = set.union(*(obj.decoder.get_dependencies() - for obj in objectives)) + for obj in self.objectives)) self.train_op = self.optimizer.apply_gradients( - self.gradients, global_step=step) + self.gradients, + global_step=tf.train.get_or_create_global_step()) for grad, var in self.gradients: if grad is not None: @@ -164,6 +137,38 @@ def __init__(self, self.scalar_summaries = tf.summary.merge( tf.get_collection("summary_train")) + def _compute_regularization(self) -> List[tf.Tensor]: + with tf.name_scope("regularization"): + regularizable = [v for v in tf.trainable_variables() + if not BIAS_REGEX.findall(v.name) + and not v.name.startswith("vgg") + and not v.name.startswith("Inception") + and not v.name.startswith("resnet")] + reg_values = [reg.value(regularizable) + for reg in self.regularizers] + reg_costs = [ + reg.weight * reg_value + for reg, reg_value in zip(self.regularizers, reg_values)] + + # add unweighted regularization values + self.losses += reg_values + + # we always want to include l1 and l2 values in the summary + if L1Regularizer not in [type(r) for r in self.regularizers]: + l1_reg = L1Regularizer(name="train_l1", weight=0.) + tf.summary.scalar(l1_reg.name, l1_reg.value(regularizable), + collections=["summary_train"]) + if L2Regularizer not in [type(r) for r in self.regularizers]: + l2_reg = L2Regularizer(name="train_l2", weight=0.) + tf.summary.scalar(l2_reg.name, l2_reg.value(regularizable), + collections=["summary_train"]) + + for reg, reg_value in zip(self.regularizers, reg_values): + tf.summary.scalar(reg.name, reg_value, + collections=["summary_train"]) + + return reg_costs + def _get_gradients(self, tensor: tf.Tensor) -> Gradients: gradient_list = self.optimizer.compute_gradients(tensor, self.var_list) return gradient_list @@ -181,6 +186,20 @@ def get_executable( self.histogram_summaries if summaries else None) +def _get_var_list(var_scopes, var_collection) -> List[tf.Variable]: + if var_collection is None: + var_collection = tf.GraphKeys.TRAINABLE_VARIABLES + + if var_scopes is None: + var_lists = [tf.get_collection(var_collection)] + else: + var_lists = [tf.get_collection(var_collection, scope) + for scope in var_scopes] + + # Flatten the list of lists + return [var for var_list in var_lists for var in var_list] + + def _sum_gradients(gradients_list: List[Gradients]) -> Gradients: summed_dict = {} # type: Dict[tf.Variable, tf.Tensor] for gradients in gradients_list: diff --git a/neuralmonkey/trainers/regularizers.py b/neuralmonkey/trainers/regularizers.py index 86f5f21e2..d2c229fcb 100644 --- a/neuralmonkey/trainers/regularizers.py +++ b/neuralmonkey/trainers/regularizers.py @@ -15,11 +15,11 @@ class Regularizer(metaclass=ABCMeta): - """Base clas s for regularizers. + """Base class for regularizers. Regularizer objects are used to introduce additional loss terms to - the trainerthus constraining the model variable during training. These - loss terms have an adjustable weight allowing to set the ``importance'' + the trainer, thus constraining the model variable during training. These + loss terms have an adjustable weight allowing to set the "importance" of the term. """ @@ -31,7 +31,7 @@ def __init__(self, Arguments: name: Regularizer name. weight: Weight of the regularization term (usually expressed - as ``lambda'' in the literature). + as "lambda" in the literature). """ self._name = name self._weight = weight @@ -64,7 +64,7 @@ def __init__(self, Arguments: name: Regularizer name. - weight: Weight of the regularization term (default=1.0e-8. + weight: Weight of the regularization term. """ Regularizer.__init__(self, name, weight) @@ -95,9 +95,8 @@ class EWCRegularizer(Regularizer): Implements Elastic Weight Consolidation from the "Overcoming catastrophic forgetting in neural networks" paper. - The regularizer applies separate regularization weight to each trainable - variable based on how important the variable was for the previously - learned task. + The regularizer applies a separate regularization weight to each trainable + variable based on its importance for the previously learned task. https://arxiv.org/pdf/1612.00796.pdf """ @@ -120,8 +119,8 @@ def __init__(self, check_argument_types() Regularizer.__init__(self, name, weight) - log("Loading initial variables for EWC from " - "{}.".format(variables_file)) + log("Loading initial variables for EWC from {}." + .format(variables_file)) self.init_vars = tf.contrib.framework.load_checkpoint(variables_file) log("EWC initial variables loaded.") @@ -132,15 +131,14 @@ def __init__(self, def value(self, variables: List[tf.Tensor]) -> tf.Tensor: ewc_value = tf.constant(0.0) for var in variables: - var_name = var.name - init_var_name = var_name.split(":")[0] - if (var_name in self.gradients.files + init_var_name = var.name.split(":")[0] + if (var.name in self.gradients.files and self.init_vars.has_tensor(init_var_name)): init_var = tf.constant( self.init_vars.get_tensor(init_var_name), name="{}_init_value".format(init_var_name)) grad_squared = tf.constant( - np.square(self.gradients[var_name]), + np.square(self.gradients[var.name]), name="{}_ewc_weight".format(init_var_name)) ewc_value += tf.reduce_sum(tf.multiply( grad_squared, tf.square(var - init_var)))