From 9c94e7675fee15f0372db3c47efe931278b03c23 Mon Sep 17 00:00:00 2001 From: Dusan Varis Date: Wed, 8 Aug 2018 00:35:17 +0200 Subject: [PATCH] addressing PR reviews + fixed variable fetching in EWCRegularizer --- .../trainers/cross_entropy_trainer.py | 15 ++-- neuralmonkey/trainers/generic_trainer.py | 23 ++--- neuralmonkey/trainers/regularizers.py | 87 +++++++++++-------- 3 files changed, 67 insertions(+), 58 deletions(-) diff --git a/neuralmonkey/trainers/cross_entropy_trainer.py b/neuralmonkey/trainers/cross_entropy_trainer.py index 42c61b849..c652a2a63 100644 --- a/neuralmonkey/trainers/cross_entropy_trainer.py +++ b/neuralmonkey/trainers/cross_entropy_trainer.py @@ -3,7 +3,6 @@ import tensorflow as tf from typeguard import check_argument_types -from neuralmonkey.logging import warn from neuralmonkey.trainers.generic_trainer import ( GenericTrainer, Objective, ObjectiveWeight) from neuralmonkey.trainers.regularizers import ( @@ -42,17 +41,13 @@ def __init__(self, if regularizers is None: regularizers = [] - if l1_weight > 0.: - if L1Regularizer in [type(r) for r in regularizers]: - warn("You specified both trainer l1_weight " - "and a L1Regularizer object in your config") - regularizers.append(L1Regularizer(weight=l1_weight)) + if l1_weight > 0.: + regularizers.append( + L1Regularizer(name="train_l1", weight=l1_weight)) if l2_weight > 0.: - if L2Regularizer in [type(r) for r in regularizers]: - warn("You specified both trainer l2_weight " - "and a L2Regularizer object in your config") - regularizers.append(L2Regularizer(weight=l2_weight)) + regularizers.append( + L2Regularizer(name="train_l2", weight=l2_weight)) if len(decoder_weights) != len(decoders): raise ValueError( diff --git a/neuralmonkey/trainers/generic_trainer.py b/neuralmonkey/trainers/generic_trainer.py index 474e84f62..2315ab8d8 100644 --- a/neuralmonkey/trainers/generic_trainer.py +++ b/neuralmonkey/trainers/generic_trainer.py @@ -7,8 +7,7 @@ from neuralmonkey.model.model_part import ModelPart from neuralmonkey.runners.base_runner import ( Executable, ExecutionResult, NextExecute) -from neuralmonkey.trainers.regularizers import ( - Regularizer, L2Regularizer) +from neuralmonkey.trainers.regularizers import (Regularizer, L2Regularizer) # pylint: disable=invalid-name Gradients = List[Tuple[tf.Tensor, tf.Variable]] @@ -40,6 +39,7 @@ class Objective(NamedTuple( # pylint: disable=too-few-public-methods,too-many-locals,too-many-branches +# pylint: disable=too-many-statements class GenericTrainer: def __init__(self, @@ -102,7 +102,9 @@ def __init__(self, # we always want to include l2 values in the summary if L2Regularizer not in [type(r) for r in self.regularizers]: - reg_values.append(L2Regularizer().value(regularizable)) + l2_reg = L2Regularizer(name="train_l2", weight=0.) + tf.summary.scalar(l2_reg.name, l2_reg.value(regularizable), + collections=["summary_train"]) for reg, reg_value in zip(self.regularizers, reg_values): tf.summary.scalar(reg.name, reg_value, collections=["summary_train"]) @@ -119,8 +121,8 @@ def __init__(self, with tf.name_scope("gradient_collection"): differentiable_loss_sum = sum( [(o.weight if o.weight is not None else 1.) * o.loss - for o in objectives - if o.gradients is None] + reg_costs) + for o in objectives if o.gradients is None]) + differentiable_loss_sum += sum(reg_costs) implicit_gradients = self._get_gradients( differentiable_loss_sum) @@ -130,10 +132,10 @@ def __init__(self, for o in objectives if o.gradients is not None] if other_gradients: - gradients = _sum_gradients( + self.gradients = _sum_gradients( [implicit_gradients] + other_gradients) else: - gradients = implicit_gradients + self.gradients = implicit_gradients tf.summary.scalar("train_opt_cost", differentiable_loss_sum, @@ -141,14 +143,13 @@ def __init__(self, if clip_norm: assert clip_norm > 0.0 - gradients = [(tf.clip_by_norm(grad, clip_norm), var) - for grad, var in gradients - if grad is not None] + self.gradients = [(tf.clip_by_norm(grad, clip_norm), var) + for grad, var in self.gradients + if grad is not None] self.all_coders = set.union(*(obj.decoder.get_dependencies() for obj in objectives)) - self.gradients = gradients self.train_op = self.optimizer.apply_gradients( self.gradients, global_step=step) diff --git a/neuralmonkey/trainers/regularizers.py b/neuralmonkey/trainers/regularizers.py index 4b6f43fab..86f5f21e2 100644 --- a/neuralmonkey/trainers/regularizers.py +++ b/neuralmonkey/trainers/regularizers.py @@ -3,8 +3,8 @@ This module contains classes that can be used as a variable regularizers during training. All implementation should be derived from the Regularizer class. - """ +from abc import ABCMeta, abstractmethod from typing import List import numpy as np @@ -14,8 +14,14 @@ from neuralmonkey.logging import log -class Regularizer: - """Base class for the regularizers.""" +class Regularizer(metaclass=ABCMeta): + """Base clas s for regularizers. + + Regularizer objects are used to introduce additional loss terms to + the trainerthus constraining the model variable during training. These + loss terms have an adjustable weight allowing to set the ``importance'' + of the term. + """ def __init__(self, name: str, @@ -24,10 +30,9 @@ def __init__(self, Arguments: name: Regularizer name. - weight: Weight of the regularization term. + weight: Weight of the regularization term (usually expressed + as ``lambda'' in the literature). """ - check_argument_types() - self._name = name self._weight = weight @@ -39,7 +44,13 @@ def name(self) -> str: def weight(self) -> float: return self._weight - def value(self, variables) -> float: + @abstractmethod + def value(self, variables: List[tf.Tensor]) -> tf.Tensor: + """Compute the unweighted value of the regularization loss term. + + Arguments: + variables: List of the regularizable model variables. + """ raise NotImplementedError("Abstract method") @@ -47,17 +58,17 @@ class L1Regularizer(Regularizer): """L1 regularizer.""" def __init__(self, - name: str = "train_l1", - weight: float = 1.0e-8) -> None: + name: str, + weight: float) -> None: """Create the regularizer. Arguments: name: Regularizer name. - weight: Weight of the regularization term. + weight: Weight of the regularization term (default=1.0e-8. """ Regularizer.__init__(self, name, weight) - def value(self, variables: List[tf.Tensor]) -> float: + def value(self, variables: List[tf.Tensor]) -> tf.Tensor: return sum(tf.reduce_sum(abs(v)) for v in variables) @@ -65,8 +76,8 @@ class L2Regularizer(Regularizer): """L2 regularizer.""" def __init__(self, - name: str = "train_l2", - weight: float = 1.0e-8) -> None: + name: str, + weight: float) -> None: """Create the regularizer. Arguments: @@ -75,7 +86,7 @@ def __init__(self, """ Regularizer.__init__(self, name, weight) - def value(self, variables: List[tf.Tensor]) -> float: + def value(self, variables: List[tf.Tensor]) -> tf.Tensor: return sum(tf.reduce_sum(v ** 2) for v in variables) @@ -84,15 +95,18 @@ class EWCRegularizer(Regularizer): Implements Elastic Weight Consolidation from the "Overcoming catastrophic forgetting in neural networks" paper. + The regularizer applies separate regularization weight to each trainable + variable based on how important the variable was for the previously + learned task. https://arxiv.org/pdf/1612.00796.pdf """ def __init__(self, - name: str = "train_ewc", - weight: float = 0., - gradients_file: str = None, - variables_file: str = None) -> None: + name: str, + weight: float, + gradients_file: str, + variables_file: str) -> None: """Create the regularizer. Arguments: @@ -104,36 +118,35 @@ def __init__(self, on the previous task. """ check_argument_types() - Regularizer.__init__(self, name, weight) - if gradients_file is None: - raise ValueError("Missing gradients_file") - if variables_file is None: - raise ValueError("Missing variables_file") - - log("Loading initial variables for EWC from {}".format(variables_file)) + log("Loading initial variables for EWC from " + "{}.".format(variables_file)) self.init_vars = tf.contrib.framework.load_checkpoint(variables_file) - log("EWC initial variables loaded") + log("EWC initial variables loaded.") - log("Loading gradient estimates from {}".format(gradients_file)) + log("Loading gradient estimates from {}.".format(gradients_file)) self.gradients = np.load(gradients_file) - log("Gradient estimates loaded") + log("Gradient estimates loaded.") - def value(self, variables: List[tf.Tensor]) -> float: + def value(self, variables: List[tf.Tensor]) -> tf.Tensor: ewc_value = tf.constant(0.0) for var in variables: - var_name = var.name.split(":")[0] + var_name = var.name + init_var_name = var_name.split(":")[0] if (var_name in self.gradients.files - and self.init_vars.has_tensor(var_name)): - init_var = self.init_vars.get_tensor(var_name) - gradient = tf.constant( - self.gradients[var_name], name="ewc_gradients") + and self.init_vars.has_tensor(init_var_name)): + init_var = tf.constant( + self.init_vars.get_tensor(init_var_name), + name="{}_init_value".format(init_var_name)) + grad_squared = tf.constant( + np.square(self.gradients[var_name]), + name="{}_ewc_weight".format(init_var_name)) ewc_value += tf.reduce_sum(tf.multiply( - tf.square(gradient), tf.square(var - init_var))) + grad_squared, tf.square(var - init_var))) return ewc_value -L1 = L1Regularizer() -L2 = L2Regularizer() +L1 = L1Regularizer(name="train_l1", weight=1.0e-8) +L2 = L2Regularizer(name="train_l2", weight=1.0e-8)