fixed tests

ufal · Jul 26, 2018 · aaa8e8a · aaa8e8a
1 parent df97173
commit aaa8e8a
Show file tree

Hide file tree

Showing 26 changed files with 166 additions and 44 deletions.
diff --git a/neuralmonkey/runners/gradient_runner.py b/neuralmonkey/runners/gradient_runner.py
@@ -1,9 +1,9 @@
-from typing import Dict, List, Set, Union
+from typing import Any, Dict, List, Set, Union, Optional
 
 from typeguard import check_argument_types
 
 from neuralmonkey.runners.base_runner import (
-    BaseRunner, Executable, FeedDict, ExecutionResult, NextExecute)
+    BaseRunner, Executable, ExecutionResult, NextExecute)
 from neuralmonkey.model.model_part import ModelPart
 from neuralmonkey.decoders.autoregressive import AutoregressiveDecoder
 from neuralmonkey.decoders.classifier import Classifier
@@ -14,15 +14,15 @@
 # pylint: enable=invalid-name
 
 
-class GradientRunExecutable(Executable):
+class GradientRunnerExecutable(Executable):
 
     def __init__(self,
                  all_coders: Set[ModelPart],
-                 fetches: FeedDict) -> None:
+                 fetches: Dict[str, List[Any]]) -> None:
         self._all_coders = all_coders
         self._fetches = fetches
 
-        self.result = None
+        self.result = None  # type: Optional[ExecutionResult]
 
     def next_to_execute(self) -> NextExecute:
         """Get the feedables and tensors to run."""
@@ -61,10 +61,10 @@ def __init__(self,
     def get_executable(self,
                        compute_losses: bool,
                        summaries: bool,
-                       num_sessions: int) -> GradientRunExecutable:
+                       num_sessions: int) -> GradientRunnerExecutable:
         fetches = {"gradients": [g[1] for g in self._gradients]}
 
-        return GradientRunExecutable(self.all_coders, fetches)
+        return GradientRunnerExecutable(self.all_coders, fetches)
     # pylint: enable=unused-argument
 
     @property

diff --git a/neuralmonkey/trainers/generic_trainer.py b/neuralmonkey/trainers/generic_trainer.py
@@ -2,6 +2,7 @@
 import re
 
 import tensorflow as tf
+from typeguard import check_argument_types
 
 from neuralmonkey.model.model_part import ModelPart
 from neuralmonkey.runners.base_runner import (
@@ -37,7 +38,7 @@ class Objective(NamedTuple(
     """
 
 
-# pylint: disable=too-few-public-methods,too-many-locals,too-many-arguments
+# pylint: disable=too-few-public-methods,too-many-locals,too-many-branches
 class GenericTrainer:
 
     def __init__(self,
@@ -47,11 +48,11 @@ def __init__(self,
                  regularizers: List[BaseRegularizer] = None,
                  var_scopes: List[str] = None,
                  var_collection: str = None) -> None:
+        check_argument_types()
 
+        self.regularizers = []  # type: List[BaseRegularizer]
         if regularizers is not None:
             self.regularizers = regularizers
-        else:
-            self.regularizers = []
 
         if var_collection is None:
             var_collection = tf.GraphKeys.TRAINABLE_VARIABLES
@@ -91,6 +92,9 @@ def __init__(self,
                                  and not v.name.startswith("resnet")]
                 reg_values = [reg.value(regularizable)
                               for reg in self.regularizers]
+                reg_costs = [
+                    reg.weight * reg_value
+                    for reg, reg_value in zip(self.regularizers, reg_values)]
 
             # unweighted losses for fetching
             self.losses = [o.loss for o in objectives] + reg_values
@@ -110,13 +114,9 @@ def __init__(self,
             with tf.control_dependencies(update_ops):
                 with tf.name_scope("gradient_collection"):
                     differentiable_loss_sum = sum(
-                        (o.weight if o.weight is not None else 1.) * o.loss
-                        for o in objectives
-                        if o.gradients is None)
-                    differentiable_loss_sum += sum(
-                        reg.weight * reg_value
-                        for reg, reg_value in zip(self.regularizers,
-                                                  reg_values))
+                        [(o.weight if o.weight is not None else 1.) * o.loss
+                         for o in objectives
+                         if o.gradients is None] + reg_costs)
                     implicit_gradients = self._get_gradients(
                         differentiable_loss_sum)
 

diff --git a/neuralmonkey/trainers/regularizers.py b/neuralmonkey/trainers/regularizers.py
@@ -14,16 +14,24 @@
 from neuralmonkey.logging import log
 
 
-# pylint: disable=too-few-public-methods
 class BaseRegularizer:
     """Base class for the regularizers."""
 
     def __init__(self,
                  name: str,
                  weight: float) -> None:
         check_argument_types()
-        self.name = name
-        self.weight = weight
+
+        self._name = name
+        self._weight = weight
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @property
+    def weight(self) -> float:
+        return self._weight
 
     def value(self, variables) -> float:
         raise NotImplementedError("Abstract method")
@@ -33,7 +41,7 @@ class L1Regularizer(BaseRegularizer):
 
     def __init__(self,
                  name: str = "train_l1",
-                 weight: float = 0.) -> None:
+                 weight: float = 1.0e-8) -> None:
         BaseRegularizer.__init__(self, name, weight)
 
     def value(self, variables: List[tf.Tensor]) -> float:
@@ -44,7 +52,7 @@ class L2Regularizer(BaseRegularizer):
 
     def __init__(self,
                  name: str = "train_l2",
-                 weight: float = 0.) -> None:
+                 weight: float = 1.0e-8) -> None:
         BaseRegularizer.__init__(self, name, weight)
 
     def value(self, variables: List[tf.Tensor]) -> float:
@@ -89,3 +97,7 @@ def value(self, variables: List[tf.Tensor]) -> float:
                 tf.square(gradient), tf.square(var - init_var)))
 
         return ewc_value
+
+
+L1 = L1Regularizer()
+L2 = L2Regularizer()
diff --git a/tests/alignment.ini b/tests/alignment.ini
@@ -117,7 +117,7 @@ data_id="ali"
 class=trainers.cross_entropy_trainer.CrossEntropyTrainer
 decoders=[<decoder>,<alignment_decoder>]
 decoder_weights=[1.,1.0e-3]
-l2_weight=1.0e-8
+reguralizers=[trainers.reguralizers.L2]
 clip_norm=1.0
 
 [runner]

diff --git a/tests/audio-classifier.ini b/tests/audio-classifier.ini
@@ -74,7 +74,7 @@ vocabulary=<decoder_vocabulary>
 [trainer]
 class=trainers.cross_entropy_trainer.CrossEntropyTrainer
 decoders=[<decoder>]
-l2_weight=1.0e-8
+regularizers=[trainers.regularizers.L2]
 clip_norm=1.0
 
 [runner]

diff --git a/tests/bahdanau.ini b/tests/bahdanau.ini
@@ -101,7 +101,7 @@ maxout_size=7
 ; This block just fills the arguments of the trainer __init__ method.
 class=trainers.cross_entropy_trainer.CrossEntropyTrainer
 decoders=[<decoder>]
-l2_weight=1.0e-8
+regularizers=[trainers.regularizers.L2]
 clip_norm=1.0
 
 [runner]

diff --git a/tests/bpe.ini b/tests/bpe.ini
@@ -98,7 +98,7 @@ vocabulary=<bpe_vocabulary>
 ; This block just fills the arguments of the trainer __init__ method.
 class=trainers.cross_entropy_trainer.CrossEntropyTrainer
 decoders=[<decoder>]
-l2_weight=1.0e-8
+regularizers=[trainers.regularizers.L2]
 clip_norm=1.0
 optimizer=<adadelta>
 

diff --git a/tests/captioning.ini b/tests/captioning.ini
@@ -81,7 +81,7 @@ vocabulary=<decoder_vocabulary>
 ; This block just fills the arguments of the trainer __init__ method.
 class=trainers.cross_entropy_trainer.CrossEntropyTrainer
 decoders=[<decoder>]
-l2_weight=1.0e-8
+regularizers=[trainers.regularizers.L2]
 clip_norm=1.0
 
 [runner]

diff --git a/tests/classifier.ini b/tests/classifier.ini
@@ -90,7 +90,7 @@ vocabulary=<decoder_vocabulary>
 [trainer]
 class=trainers.cross_entropy_trainer.CrossEntropyTrainer
 decoders=[<decoder>]
-l2_weight=1.0e-8
+regularizers=[trainers.regularizers.L2]
 clip_norm=1.0
 
 [runner]

diff --git a/tests/ctc.ini b/tests/ctc.ini
@@ -73,7 +73,7 @@ name="decoder"
 [trainer]
 class=trainers.cross_entropy_trainer.CrossEntropyTrainer
 decoders=[<decoder>]
-l2_weight=1.0e-8
+regularizers=[trainers.regularizers.L2]
 
 [runner]
 class=runners.PlainRunner

diff --git a/tests/factored.ini b/tests/factored.ini
@@ -89,7 +89,7 @@ vocabulary=<surface_target_vocabulary>
 [trainer]
 class=trainers.cross_entropy_trainer.CrossEntropyTrainer
 decoders=[<decoder>]
-l2_weight=1.0e-8
+regularizers=[trainers.regularizers.L2]
 clip_norm=1.0
 
 [runner]

diff --git a/tests/flat-multiattention.ini b/tests/flat-multiattention.ini
@@ -160,7 +160,7 @@ max_steps=3
 ; This block just fills the arguments of the trainer __init__ method.
 class=trainers.cross_entropy_trainer.CrossEntropyTrainer
 decoders=[<decoder_flat_noshare_nosentinel>,<decoder_flat_share_nosentinel>,<decoder_flat_share_sentinel>,<decoder_flat_noshare_sentinel>]
-l2_weight=1.0e-8
+regularizers=[trainers.regularizers.L2]
 clip_norm=1.0
 
 [runner_flat_noshare_nosentinel]

diff --git a/tests/hier-multiattention.ini b/tests/hier-multiattention.ini
@@ -164,7 +164,7 @@ vocabulary=<decoder_vocabulary>
 ; This block just fills the arguments of the trainer __init__ method.
 class=trainers.cross_entropy_trainer.CrossEntropyTrainer
 decoders=[<decoder_hier_noshare_nosentinel>,<decoder_hier_share_nosentinel>,<decoder_hier_share_sentinel>,<decoder_hier_noshare_sentinel>]
-l2_weight=1.0e-8
+regularizers=[trainers.regularizers.L2]
 clip_norm=1.0
 
 [runner_hier_noshare_nosentinel]

diff --git a/tests/labeler.ini b/tests/labeler.ini
@@ -72,7 +72,7 @@ vocabulary=<tags_vocabulary>
 [trainer]
 class=trainers.cross_entropy_trainer.CrossEntropyTrainer
 decoders=[<decoder>]
-l2_weight=1.0e-8
+regularizers=[trainers.regularizers.L2]
 clip_norm=1.0
 
 [runner]

diff --git a/tests/language-model.ini b/tests/language-model.ini
@@ -53,7 +53,7 @@ vocabulary=<decoder_vocabulary>
 [trainer]
 class=trainers.cross_entropy_trainer.CrossEntropyTrainer
 decoders=[<decoder>]
-l2_weight=1.0e-8
+regularizers=[trainers.regularizers.L2]
 clip_norm=1.0
 
 [runner]

diff --git a/tests/mrt.ini b/tests/mrt.ini
@@ -0,0 +1,96 @@
+[main]
+name="translation with minimum risk training"
+tf_manager=<tf_manager>
+output="tests/outputs/mrt"
+overwrite_output_dir=True
+batch_size=16
+epochs=2
+train_dataset=<train_data>
+val_dataset=<val_data>
+trainer=<trainer>
+runners=[<runner>]
+postprocess=None
+evaluation=[("target", evaluators.BLEU)]
+logging_period=20
+validation_period=60
+runners_batch_size=1
+random_seed=1234
+
+[tf_manager]
+class=tf_manager.TensorFlowManager
+num_threads=4
+num_sessions=1
+
+[train_data]
+class=dataset.load_dataset_from_files
+s_source="tests/data/train.tc.en"
+s_target="tests/data/train.tc.de"
+preprocessors=[("source", "source_chars", processors.helpers.preprocess_char_based)]
+lazy=True
+
+[val_data]
+class=dataset.load_dataset_from_files
+s_source="tests/data/val.tc.en"
+s_target="tests/data/val.tc.de"
+preprocessors=[("source", "source_chars", processors.helpers.preprocess_char_based)]
+
+[encoder_vocabulary]
+class=vocabulary.from_wordlist
+path="tests/outputs/vocab/encoder_vocab.tsv"
+
+[encoder]
+class=encoders.recurrent.SentenceEncoder
+name="sentence_encoder"
+rnn_size=7
+max_input_len=10
+embedding_size=11
+dropout_keep_prob=0.5
+data_id="source"
+vocabulary=<encoder_vocabulary>
+
+[attention]
+class=attention.Attention
+name="attention_sentence_encoder"
+encoder=<encoder>
+
+[decoder_vocabulary]
+class=vocabulary.from_wordlist
+path="tests/outputs/vocab/decoder_vocab.tsv"
+
+[decoder]
+class=decoders.decoder.Decoder
+name="decoder"
+encoders=[<encoder>]
+rnn_size=8
+embedding_size=9
+attentions=[<attention>]
+dropout_keep_prob=0.5
+data_id="target"
+max_output_len=10
+vocabulary=<decoder_vocabulary>
+
+[reward]
+class=evaluators.gleu.GLEUEvaluator
+name="GLEU"
+
+[trainer]
+class=trainers.mrt_trainer.MinRiskTrainer
+decoders=[<decoder>]
+evaluator=<reward>
+num_of_samples=5
+alpha=0.1
+regularizers=[trainers.regularizers.L2]
+clip_norm=1.0
+optimizer=<adam>
+
+[adam]
+class=tf.train.AdamOptimizer
+beta1=0.9
+beta2=0.98
+epsilon=1.0e-9
+learning_rate=0.2
+
+[runner]
+class=runners.GreedyRunner
+decoder=<decoder>
+output_series="target"
diff --git a/tests/nematus.ini b/tests/nematus.ini
@@ -87,7 +87,7 @@ rnn_cell="NematusGRU"
 ; This block just fills the arguments of the trainer __init__ method.
 class=trainers.cross_entropy_trainer.CrossEntropyTrainer
 decoders=[<decoder>]
-l2_weight=1.0e-8
+regularizers=[trainers.regularizers.L2]
 clip_norm=1.0
 
 [runner]

diff --git a/tests/post-edit.ini b/tests/post-edit.ini
@@ -87,7 +87,7 @@ vocabulary=<target_vocabulary>
 [trainer]
 class=trainers.cross_entropy_trainer.CrossEntropyTrainer
 decoders=[<decoder>]
-l2_weight=1.0e-8
+regularizers=[trainers.regularizers.L2]
 
 [runner]
 class=runners.GreedyRunner
@@ -122,4 +122,4 @@ validation_period=2
 logging_period=1
 visualize_embeddings=[<trans_embedded_input>]
 postprocess=[("target", <postprocess>)]
-overwrite_output_dir=True
+overwrite_output_dir=True
diff --git a/tests/regressor.ini b/tests/regressor.ini
@@ -57,7 +57,7 @@ activation_fn=tf.nn.relu
 [trainer]
 class=trainers.cross_entropy_trainer.CrossEntropyTrainer
 decoders=[<decoder>]
-l2_weight=1.0e-8
+regularizers=[trainers.regularizers.L2]
 clip_norm=1.0
 
 [runner]