From 2e4683bf72aa375caee45c24c20e35b3ca5bc9bf Mon Sep 17 00:00:00 2001
From: Dusan Varis <varis@ufal.mff.cuni.cz>
Date: Wed, 8 Aug 2018 22:06:22 +0200
Subject: [PATCH] fixed pylints in generic_trainer, fixed typos

---
 neuralmonkey/trainers/generic_trainer.py | 109 +++++++++++++----------
 neuralmonkey/trainers/regularizers.py    |  26 +++---
 2 files changed, 76 insertions(+), 59 deletions(-)

diff --git a/neuralmonkey/trainers/generic_trainer.py b/neuralmonkey/trainers/generic_trainer.py
index 2315ab8d8..56a84cee9 100644
--- a/neuralmonkey/trainers/generic_trainer.py
+++ b/neuralmonkey/trainers/generic_trainer.py
@@ -7,7 +7,8 @@
 from neuralmonkey.model.model_part import ModelPart
 from neuralmonkey.runners.base_runner import (
     Executable, ExecutionResult, NextExecute)
-from neuralmonkey.trainers.regularizers import (Regularizer, L2Regularizer)
+from neuralmonkey.trainers.regularizers import (
+    Regularizer, L1Regularizer, L2Regularizer)
 
 # pylint: disable=invalid-name
 Gradients = List[Tuple[tf.Tensor, tf.Variable]]
@@ -38,8 +39,7 @@ class Objective(NamedTuple(
     """
 
 
-# pylint: disable=too-few-public-methods,too-many-locals,too-many-branches
-# pylint: disable=too-many-statements
+# pylint: disable=too-few-public-methods
 class GenericTrainer:
 
     def __init__(self,
@@ -51,25 +51,15 @@ def __init__(self,
                  var_collection: str = None) -> None:
         check_argument_types()
 
+        self.objectives = objectives
+
         self.regularizers = []  # type: List[Regularizer]
         if regularizers is not None:
             self.regularizers = regularizers
 
-        if var_collection is None:
-            var_collection = tf.GraphKeys.TRAINABLE_VARIABLES
-
-        if var_scopes is None:
-            var_lists = [tf.get_collection(var_collection)]
-        else:
-            var_lists = [tf.get_collection(var_collection, scope)
-                         for scope in var_scopes]
-
-        # Flatten the list of lists
-        self.var_list = [var for var_list in var_lists for var in var_list]
+        self.var_list = _get_var_list(var_scopes, var_collection)
 
         with tf.name_scope("trainer"):
-            step = tf.train.get_or_create_global_step()
-
             if optimizer:
                 self.optimizer = optimizer
             else:
@@ -85,43 +75,25 @@ def __init__(self,
                                   collections=["summary_train"])
             # pylint: enable=protected-access
 
-            with tf.name_scope("regularization"):
-                regularizable = [v for v in tf.trainable_variables()
-                                 if not BIAS_REGEX.findall(v.name)
-                                 and not v.name.startswith("vgg")
-                                 and not v.name.startswith("Inception")
-                                 and not v.name.startswith("resnet")]
-                reg_values = [reg.value(regularizable)
-                              for reg in self.regularizers]
-                reg_costs = [
-                    reg.weight * reg_value
-                    for reg, reg_value in zip(self.regularizers, reg_values)]
-
             # unweighted losses for fetching
-            self.losses = [o.loss for o in objectives] + reg_values
-
-            # we always want to include l2 values in the summary
-            if L2Regularizer not in [type(r) for r in self.regularizers]:
-                l2_reg = L2Regularizer(name="train_l2", weight=0.)
-                tf.summary.scalar(l2_reg.name, l2_reg.value(regularizable),
-                                  collections=["summary_train"])
-            for reg, reg_value in zip(self.regularizers, reg_values):
-                tf.summary.scalar(reg.name, reg_value,
-                                  collections=["summary_train"])
+            self.losses = [o.loss for o in self.objectives]
 
             # log all objectives
-            for obj in objectives:
+            for obj in self.objectives:
                 tf.summary.scalar(
                     obj.name, obj.loss, collections=["summary_train"])
 
+            # compute regularization costs
+            reg_costs = self._compute_regularization()
+
             # if the objective does not have its own gradients,
             # just use TF to do the derivative
-            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
-            with tf.control_dependencies(update_ops):
+            with tf.control_dependencies(
+                    tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                 with tf.name_scope("gradient_collection"):
                     differentiable_loss_sum = sum(
                         [(o.weight if o.weight is not None else 1.) * o.loss
-                         for o in objectives if o.gradients is None])
+                         for o in self.objectives if o.gradients is None])
                     differentiable_loss_sum += sum(reg_costs)
                     implicit_gradients = self._get_gradients(
                         differentiable_loss_sum)
@@ -129,7 +101,7 @@ def __init__(self,
                     # objectives that have their gradients explictly computed
                     other_gradients = [
                         _scale_gradients(o.gradients, o.weight)
-                        for o in objectives if o.gradients is not None]
+                        for o in self.objectives if o.gradients is not None]
 
                     if other_gradients:
                         self.gradients = _sum_gradients(
@@ -148,10 +120,11 @@ def __init__(self,
                                       if grad is not None]
 
                 self.all_coders = set.union(*(obj.decoder.get_dependencies()
-                                              for obj in objectives))
+                                              for obj in self.objectives))
 
                 self.train_op = self.optimizer.apply_gradients(
-                    self.gradients, global_step=step)
+                    self.gradients,
+                    global_step=tf.train.get_or_create_global_step())
 
             for grad, var in self.gradients:
                 if grad is not None:
@@ -164,6 +137,38 @@ def __init__(self,
             self.scalar_summaries = tf.summary.merge(
                 tf.get_collection("summary_train"))
 
+    def _compute_regularization(self) -> List[tf.Tensor]:
+        with tf.name_scope("regularization"):
+            regularizable = [v for v in tf.trainable_variables()
+                             if not BIAS_REGEX.findall(v.name)
+                             and not v.name.startswith("vgg")
+                             and not v.name.startswith("Inception")
+                             and not v.name.startswith("resnet")]
+            reg_values = [reg.value(regularizable)
+                          for reg in self.regularizers]
+            reg_costs = [
+                reg.weight * reg_value
+                for reg, reg_value in zip(self.regularizers, reg_values)]
+
+        # add unweighted regularization values
+        self.losses += reg_values
+
+        # we always want to include l1 and l2 values in the summary
+        if L1Regularizer not in [type(r) for r in self.regularizers]:
+            l1_reg = L1Regularizer(name="train_l1", weight=0.)
+            tf.summary.scalar(l1_reg.name, l1_reg.value(regularizable),
+                              collections=["summary_train"])
+        if L2Regularizer not in [type(r) for r in self.regularizers]:
+            l2_reg = L2Regularizer(name="train_l2", weight=0.)
+            tf.summary.scalar(l2_reg.name, l2_reg.value(regularizable),
+                              collections=["summary_train"])
+
+        for reg, reg_value in zip(self.regularizers, reg_values):
+            tf.summary.scalar(reg.name, reg_value,
+                              collections=["summary_train"])
+
+        return reg_costs
+
     def _get_gradients(self, tensor: tf.Tensor) -> Gradients:
         gradient_list = self.optimizer.compute_gradients(tensor, self.var_list)
         return gradient_list
@@ -181,6 +186,20 @@ def get_executable(
                                self.histogram_summaries if summaries else None)
 
 
+def _get_var_list(var_scopes, var_collection) -> List[tf.Variable]:
+    if var_collection is None:
+        var_collection = tf.GraphKeys.TRAINABLE_VARIABLES
+
+    if var_scopes is None:
+        var_lists = [tf.get_collection(var_collection)]
+    else:
+        var_lists = [tf.get_collection(var_collection, scope)
+                     for scope in var_scopes]
+
+    # Flatten the list of lists
+    return [var for var_list in var_lists for var in var_list]
+
+
 def _sum_gradients(gradients_list: List[Gradients]) -> Gradients:
     summed_dict = {}  # type: Dict[tf.Variable, tf.Tensor]
     for gradients in gradients_list:
diff --git a/neuralmonkey/trainers/regularizers.py b/neuralmonkey/trainers/regularizers.py
index 86f5f21e2..d2c229fcb 100644
--- a/neuralmonkey/trainers/regularizers.py
+++ b/neuralmonkey/trainers/regularizers.py
@@ -15,11 +15,11 @@
 
 
 class Regularizer(metaclass=ABCMeta):
-    """Base clas    s for regularizers.
+    """Base class for regularizers.
 
     Regularizer objects are used to introduce additional loss terms to
-    the trainerthus constraining the model variable during training. These
-    loss terms have an adjustable weight allowing to set the ``importance''
+    the trainer, thus constraining the model variable during training. These
+    loss terms have an adjustable weight allowing to set the "importance"
     of the term.
     """
 
@@ -31,7 +31,7 @@ def __init__(self,
         Arguments:
             name: Regularizer name.
             weight: Weight of the regularization term (usually expressed
-                 as ``lambda'' in the literature).
+                 as "lambda" in the literature).
         """
         self._name = name
         self._weight = weight
@@ -64,7 +64,7 @@ def __init__(self,
 
         Arguments:
             name: Regularizer name.
-            weight: Weight of the regularization term (default=1.0e-8.
+            weight: Weight of the regularization term.
         """
         Regularizer.__init__(self, name, weight)
 
@@ -95,9 +95,8 @@ class EWCRegularizer(Regularizer):
 
     Implements Elastic Weight Consolidation from the "Overcoming catastrophic
     forgetting in neural networks" paper.
-    The regularizer applies separate regularization weight to each trainable
-    variable based on how important the variable was for the previously
-    learned task.
+    The regularizer applies a separate regularization weight to each trainable
+    variable based on its importance for the previously learned task.
 
     https://arxiv.org/pdf/1612.00796.pdf
     """
@@ -120,8 +119,8 @@ def __init__(self,
         check_argument_types()
         Regularizer.__init__(self, name, weight)
 
-        log("Loading initial variables for EWC from "
-            "{}.".format(variables_file))
+        log("Loading initial variables for EWC from {}."
+            .format(variables_file))
         self.init_vars = tf.contrib.framework.load_checkpoint(variables_file)
         log("EWC initial variables loaded.")
 
@@ -132,15 +131,14 @@ def __init__(self,
     def value(self, variables: List[tf.Tensor]) -> tf.Tensor:
         ewc_value = tf.constant(0.0)
         for var in variables:
-            var_name = var.name
-            init_var_name = var_name.split(":")[0]
-            if (var_name in self.gradients.files
+            init_var_name = var.name.split(":")[0]
+            if (var.name in self.gradients.files
                     and self.init_vars.has_tensor(init_var_name)):
                 init_var = tf.constant(
                     self.init_vars.get_tensor(init_var_name),
                     name="{}_init_value".format(init_var_name))
                 grad_squared = tf.constant(
-                    np.square(self.gradients[var_name]),
+                    np.square(self.gradients[var.name]),
                     name="{}_ewc_weight".format(init_var_name))
                 ewc_value += tf.reduce_sum(tf.multiply(
                     grad_squared, tf.square(var - init_var)))