Skip to content

Commit

Permalink
fixed pylints in generic_trainer, fixed typos
Browse files Browse the repository at this point in the history
  • Loading branch information
varisd committed Aug 8, 2018
1 parent 9c94e76 commit 2e4683b
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 59 deletions.
109 changes: 64 additions & 45 deletions neuralmonkey/trainers/generic_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from neuralmonkey.model.model_part import ModelPart
from neuralmonkey.runners.base_runner import (
Executable, ExecutionResult, NextExecute)
from neuralmonkey.trainers.regularizers import (Regularizer, L2Regularizer)
from neuralmonkey.trainers.regularizers import (
Regularizer, L1Regularizer, L2Regularizer)

# pylint: disable=invalid-name
Gradients = List[Tuple[tf.Tensor, tf.Variable]]
Expand Down Expand Up @@ -38,8 +39,7 @@ class Objective(NamedTuple(
"""


# pylint: disable=too-few-public-methods,too-many-locals,too-many-branches
# pylint: disable=too-many-statements
# pylint: disable=too-few-public-methods
class GenericTrainer:

def __init__(self,
Expand All @@ -51,25 +51,15 @@ def __init__(self,
var_collection: str = None) -> None:
check_argument_types()

self.objectives = objectives

self.regularizers = [] # type: List[Regularizer]
if regularizers is not None:
self.regularizers = regularizers

if var_collection is None:
var_collection = tf.GraphKeys.TRAINABLE_VARIABLES

if var_scopes is None:
var_lists = [tf.get_collection(var_collection)]
else:
var_lists = [tf.get_collection(var_collection, scope)
for scope in var_scopes]

# Flatten the list of lists
self.var_list = [var for var_list in var_lists for var in var_list]
self.var_list = _get_var_list(var_scopes, var_collection)

with tf.name_scope("trainer"):
step = tf.train.get_or_create_global_step()

if optimizer:
self.optimizer = optimizer
else:
Expand All @@ -85,51 +75,33 @@ def __init__(self,
collections=["summary_train"])
# pylint: enable=protected-access

with tf.name_scope("regularization"):
regularizable = [v for v in tf.trainable_variables()
if not BIAS_REGEX.findall(v.name)
and not v.name.startswith("vgg")
and not v.name.startswith("Inception")
and not v.name.startswith("resnet")]
reg_values = [reg.value(regularizable)
for reg in self.regularizers]
reg_costs = [
reg.weight * reg_value
for reg, reg_value in zip(self.regularizers, reg_values)]

# unweighted losses for fetching
self.losses = [o.loss for o in objectives] + reg_values

# we always want to include l2 values in the summary
if L2Regularizer not in [type(r) for r in self.regularizers]:
l2_reg = L2Regularizer(name="train_l2", weight=0.)
tf.summary.scalar(l2_reg.name, l2_reg.value(regularizable),
collections=["summary_train"])
for reg, reg_value in zip(self.regularizers, reg_values):
tf.summary.scalar(reg.name, reg_value,
collections=["summary_train"])
self.losses = [o.loss for o in self.objectives]

# log all objectives
for obj in objectives:
for obj in self.objectives:
tf.summary.scalar(
obj.name, obj.loss, collections=["summary_train"])

# compute regularization costs
reg_costs = self._compute_regularization()

# if the objective does not have its own gradients,
# just use TF to do the derivative
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
with tf.control_dependencies(
tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
with tf.name_scope("gradient_collection"):
differentiable_loss_sum = sum(
[(o.weight if o.weight is not None else 1.) * o.loss
for o in objectives if o.gradients is None])
for o in self.objectives if o.gradients is None])
differentiable_loss_sum += sum(reg_costs)
implicit_gradients = self._get_gradients(
differentiable_loss_sum)

# objectives that have their gradients explictly computed
other_gradients = [
_scale_gradients(o.gradients, o.weight)
for o in objectives if o.gradients is not None]
for o in self.objectives if o.gradients is not None]

if other_gradients:
self.gradients = _sum_gradients(
Expand All @@ -148,10 +120,11 @@ def __init__(self,
if grad is not None]

self.all_coders = set.union(*(obj.decoder.get_dependencies()
for obj in objectives))
for obj in self.objectives))

self.train_op = self.optimizer.apply_gradients(
self.gradients, global_step=step)
self.gradients,
global_step=tf.train.get_or_create_global_step())

for grad, var in self.gradients:
if grad is not None:
Expand All @@ -164,6 +137,38 @@ def __init__(self,
self.scalar_summaries = tf.summary.merge(
tf.get_collection("summary_train"))

def _compute_regularization(self) -> List[tf.Tensor]:
with tf.name_scope("regularization"):
regularizable = [v for v in tf.trainable_variables()
if not BIAS_REGEX.findall(v.name)
and not v.name.startswith("vgg")
and not v.name.startswith("Inception")
and not v.name.startswith("resnet")]
reg_values = [reg.value(regularizable)
for reg in self.regularizers]
reg_costs = [
reg.weight * reg_value
for reg, reg_value in zip(self.regularizers, reg_values)]

# add unweighted regularization values
self.losses += reg_values

# we always want to include l1 and l2 values in the summary
if L1Regularizer not in [type(r) for r in self.regularizers]:
l1_reg = L1Regularizer(name="train_l1", weight=0.)
tf.summary.scalar(l1_reg.name, l1_reg.value(regularizable),
collections=["summary_train"])
if L2Regularizer not in [type(r) for r in self.regularizers]:
l2_reg = L2Regularizer(name="train_l2", weight=0.)
tf.summary.scalar(l2_reg.name, l2_reg.value(regularizable),
collections=["summary_train"])

for reg, reg_value in zip(self.regularizers, reg_values):
tf.summary.scalar(reg.name, reg_value,
collections=["summary_train"])

return reg_costs

def _get_gradients(self, tensor: tf.Tensor) -> Gradients:
gradient_list = self.optimizer.compute_gradients(tensor, self.var_list)
return gradient_list
Expand All @@ -181,6 +186,20 @@ def get_executable(
self.histogram_summaries if summaries else None)


def _get_var_list(var_scopes, var_collection) -> List[tf.Variable]:
if var_collection is None:
var_collection = tf.GraphKeys.TRAINABLE_VARIABLES

if var_scopes is None:
var_lists = [tf.get_collection(var_collection)]
else:
var_lists = [tf.get_collection(var_collection, scope)
for scope in var_scopes]

# Flatten the list of lists
return [var for var_list in var_lists for var in var_list]


def _sum_gradients(gradients_list: List[Gradients]) -> Gradients:
summed_dict = {} # type: Dict[tf.Variable, tf.Tensor]
for gradients in gradients_list:
Expand Down
26 changes: 12 additions & 14 deletions neuralmonkey/trainers/regularizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@


class Regularizer(metaclass=ABCMeta):
"""Base clas s for regularizers.
"""Base class for regularizers.
Regularizer objects are used to introduce additional loss terms to
the trainerthus constraining the model variable during training. These
loss terms have an adjustable weight allowing to set the ``importance''
the trainer, thus constraining the model variable during training. These
loss terms have an adjustable weight allowing to set the "importance"
of the term.
"""

Expand All @@ -31,7 +31,7 @@ def __init__(self,
Arguments:
name: Regularizer name.
weight: Weight of the regularization term (usually expressed
as ``lambda'' in the literature).
as "lambda" in the literature).
"""
self._name = name
self._weight = weight
Expand Down Expand Up @@ -64,7 +64,7 @@ def __init__(self,
Arguments:
name: Regularizer name.
weight: Weight of the regularization term (default=1.0e-8.
weight: Weight of the regularization term.
"""
Regularizer.__init__(self, name, weight)

Expand Down Expand Up @@ -95,9 +95,8 @@ class EWCRegularizer(Regularizer):
Implements Elastic Weight Consolidation from the "Overcoming catastrophic
forgetting in neural networks" paper.
The regularizer applies separate regularization weight to each trainable
variable based on how important the variable was for the previously
learned task.
The regularizer applies a separate regularization weight to each trainable
variable based on its importance for the previously learned task.
https://arxiv.org/pdf/1612.00796.pdf
"""
Expand All @@ -120,8 +119,8 @@ def __init__(self,
check_argument_types()
Regularizer.__init__(self, name, weight)

log("Loading initial variables for EWC from "
"{}.".format(variables_file))
log("Loading initial variables for EWC from {}."
.format(variables_file))
self.init_vars = tf.contrib.framework.load_checkpoint(variables_file)
log("EWC initial variables loaded.")

Expand All @@ -132,15 +131,14 @@ def __init__(self,
def value(self, variables: List[tf.Tensor]) -> tf.Tensor:
ewc_value = tf.constant(0.0)
for var in variables:
var_name = var.name
init_var_name = var_name.split(":")[0]
if (var_name in self.gradients.files
init_var_name = var.name.split(":")[0]
if (var.name in self.gradients.files
and self.init_vars.has_tensor(init_var_name)):
init_var = tf.constant(
self.init_vars.get_tensor(init_var_name),
name="{}_init_value".format(init_var_name))
grad_squared = tf.constant(
np.square(self.gradients[var_name]),
np.square(self.gradients[var.name]),
name="{}_ewc_weight".format(init_var_name))
ewc_value += tf.reduce_sum(tf.multiply(
grad_squared, tf.square(var - init_var)))
Expand Down

0 comments on commit 2e4683b

Please sign in to comment.