From aaa8e8a3c2b2a66320c95b58ae1c828e7ffea25e Mon Sep 17 00:00:00 2001 From: Dusan Varis Date: Thu, 26 Jul 2018 20:12:40 +0200 Subject: [PATCH] fixed tests --- neuralmonkey/runners/gradient_runner.py | 14 ++-- neuralmonkey/trainers/generic_trainer.py | 20 ++--- neuralmonkey/trainers/regularizers.py | 22 ++++-- tests/alignment.ini | 2 +- tests/audio-classifier.ini | 2 +- tests/bahdanau.ini | 2 +- tests/bpe.ini | 2 +- tests/captioning.ini | 2 +- tests/classifier.ini | 2 +- tests/ctc.ini | 2 +- tests/factored.ini | 2 +- tests/flat-multiattention.ini | 2 +- tests/hier-multiattention.ini | 2 +- tests/labeler.ini | 2 +- tests/language-model.ini | 2 +- tests/mrt.ini | 96 ++++++++++++++++++++++++ tests/nematus.ini | 2 +- tests/post-edit.ini | 4 +- tests/regressor.ini | 2 +- tests/rl.ini | 2 +- tests/self-critical.ini | 2 +- tests/small.ini | 2 +- tests/small_sent_cnn.ini | 2 +- tests/str.ini | 2 +- tests/test_bahdanau.ini | 14 ++++ tests/vocab.ini | 2 +- 26 files changed, 166 insertions(+), 44 deletions(-) create mode 100644 tests/mrt.ini create mode 100644 tests/test_bahdanau.ini diff --git a/neuralmonkey/runners/gradient_runner.py b/neuralmonkey/runners/gradient_runner.py index 45d182ba0..611a496a7 100644 --- a/neuralmonkey/runners/gradient_runner.py +++ b/neuralmonkey/runners/gradient_runner.py @@ -1,9 +1,9 @@ -from typing import Dict, List, Set, Union +from typing import Any, Dict, List, Set, Union, Optional from typeguard import check_argument_types from neuralmonkey.runners.base_runner import ( - BaseRunner, Executable, FeedDict, ExecutionResult, NextExecute) + BaseRunner, Executable, ExecutionResult, NextExecute) from neuralmonkey.model.model_part import ModelPart from neuralmonkey.decoders.autoregressive import AutoregressiveDecoder from neuralmonkey.decoders.classifier import Classifier @@ -14,15 +14,15 @@ # pylint: enable=invalid-name -class GradientRunExecutable(Executable): +class GradientRunnerExecutable(Executable): def __init__(self, all_coders: Set[ModelPart], - fetches: FeedDict) -> None: + fetches: Dict[str, List[Any]]) -> None: self._all_coders = all_coders self._fetches = fetches - self.result = None + self.result = None # type: Optional[ExecutionResult] def next_to_execute(self) -> NextExecute: """Get the feedables and tensors to run.""" @@ -61,10 +61,10 @@ def __init__(self, def get_executable(self, compute_losses: bool, summaries: bool, - num_sessions: int) -> GradientRunExecutable: + num_sessions: int) -> GradientRunnerExecutable: fetches = {"gradients": [g[1] for g in self._gradients]} - return GradientRunExecutable(self.all_coders, fetches) + return GradientRunnerExecutable(self.all_coders, fetches) # pylint: enable=unused-argument @property diff --git a/neuralmonkey/trainers/generic_trainer.py b/neuralmonkey/trainers/generic_trainer.py index d5b72b551..5ab1d40ad 100644 --- a/neuralmonkey/trainers/generic_trainer.py +++ b/neuralmonkey/trainers/generic_trainer.py @@ -2,6 +2,7 @@ import re import tensorflow as tf +from typeguard import check_argument_types from neuralmonkey.model.model_part import ModelPart from neuralmonkey.runners.base_runner import ( @@ -37,7 +38,7 @@ class Objective(NamedTuple( """ -# pylint: disable=too-few-public-methods,too-many-locals,too-many-arguments +# pylint: disable=too-few-public-methods,too-many-locals,too-many-branches class GenericTrainer: def __init__(self, @@ -47,11 +48,11 @@ def __init__(self, regularizers: List[BaseRegularizer] = None, var_scopes: List[str] = None, var_collection: str = None) -> None: + check_argument_types() + self.regularizers = [] # type: List[BaseRegularizer] if regularizers is not None: self.regularizers = regularizers - else: - self.regularizers = [] if var_collection is None: var_collection = tf.GraphKeys.TRAINABLE_VARIABLES @@ -91,6 +92,9 @@ def __init__(self, and not v.name.startswith("resnet")] reg_values = [reg.value(regularizable) for reg in self.regularizers] + reg_costs = [ + reg.weight * reg_value + for reg, reg_value in zip(self.regularizers, reg_values)] # unweighted losses for fetching self.losses = [o.loss for o in objectives] + reg_values @@ -110,13 +114,9 @@ def __init__(self, with tf.control_dependencies(update_ops): with tf.name_scope("gradient_collection"): differentiable_loss_sum = sum( - (o.weight if o.weight is not None else 1.) * o.loss - for o in objectives - if o.gradients is None) - differentiable_loss_sum += sum( - reg.weight * reg_value - for reg, reg_value in zip(self.regularizers, - reg_values)) + [(o.weight if o.weight is not None else 1.) * o.loss + for o in objectives + if o.gradients is None] + reg_costs) implicit_gradients = self._get_gradients( differentiable_loss_sum) diff --git a/neuralmonkey/trainers/regularizers.py b/neuralmonkey/trainers/regularizers.py index d21f47ba0..139bef3bb 100644 --- a/neuralmonkey/trainers/regularizers.py +++ b/neuralmonkey/trainers/regularizers.py @@ -14,7 +14,6 @@ from neuralmonkey.logging import log -# pylint: disable=too-few-public-methods class BaseRegularizer: """Base class for the regularizers.""" @@ -22,8 +21,17 @@ def __init__(self, name: str, weight: float) -> None: check_argument_types() - self.name = name - self.weight = weight + + self._name = name + self._weight = weight + + @property + def name(self) -> str: + return self._name + + @property + def weight(self) -> float: + return self._weight def value(self, variables) -> float: raise NotImplementedError("Abstract method") @@ -33,7 +41,7 @@ class L1Regularizer(BaseRegularizer): def __init__(self, name: str = "train_l1", - weight: float = 0.) -> None: + weight: float = 1.0e-8) -> None: BaseRegularizer.__init__(self, name, weight) def value(self, variables: List[tf.Tensor]) -> float: @@ -44,7 +52,7 @@ class L2Regularizer(BaseRegularizer): def __init__(self, name: str = "train_l2", - weight: float = 0.) -> None: + weight: float = 1.0e-8) -> None: BaseRegularizer.__init__(self, name, weight) def value(self, variables: List[tf.Tensor]) -> float: @@ -89,3 +97,7 @@ def value(self, variables: List[tf.Tensor]) -> float: tf.square(gradient), tf.square(var - init_var))) return ewc_value + + +L1 = L1Regularizer() +L2 = L2Regularizer() diff --git a/tests/alignment.ini b/tests/alignment.ini index 33498f799..6936c0fb9 100644 --- a/tests/alignment.ini +++ b/tests/alignment.ini @@ -117,7 +117,7 @@ data_id="ali" class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[,] decoder_weights=[1.,1.0e-3] -l2_weight=1.0e-8 +reguralizers=[trainers.reguralizers.L2] clip_norm=1.0 [runner] diff --git a/tests/audio-classifier.ini b/tests/audio-classifier.ini index d34756de5..23cc98368 100644 --- a/tests/audio-classifier.ini +++ b/tests/audio-classifier.ini @@ -74,7 +74,7 @@ vocabulary= [trainer] class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] clip_norm=1.0 [runner] diff --git a/tests/bahdanau.ini b/tests/bahdanau.ini index f3faa3ee1..4db714d0f 100644 --- a/tests/bahdanau.ini +++ b/tests/bahdanau.ini @@ -101,7 +101,7 @@ maxout_size=7 ; This block just fills the arguments of the trainer __init__ method. class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] clip_norm=1.0 [runner] diff --git a/tests/bpe.ini b/tests/bpe.ini index 482aede74..e06959976 100644 --- a/tests/bpe.ini +++ b/tests/bpe.ini @@ -98,7 +98,7 @@ vocabulary= ; This block just fills the arguments of the trainer __init__ method. class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] clip_norm=1.0 optimizer= diff --git a/tests/captioning.ini b/tests/captioning.ini index 0c2be57fd..a9fd5c748 100644 --- a/tests/captioning.ini +++ b/tests/captioning.ini @@ -81,7 +81,7 @@ vocabulary= ; This block just fills the arguments of the trainer __init__ method. class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] clip_norm=1.0 [runner] diff --git a/tests/classifier.ini b/tests/classifier.ini index 34ffb2eb9..3e1b0931f 100644 --- a/tests/classifier.ini +++ b/tests/classifier.ini @@ -90,7 +90,7 @@ vocabulary= [trainer] class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] clip_norm=1.0 [runner] diff --git a/tests/ctc.ini b/tests/ctc.ini index f2659f4f7..56b6a0630 100644 --- a/tests/ctc.ini +++ b/tests/ctc.ini @@ -73,7 +73,7 @@ name="decoder" [trainer] class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] [runner] class=runners.PlainRunner diff --git a/tests/factored.ini b/tests/factored.ini index 87d9b3e2e..35e6d99b2 100644 --- a/tests/factored.ini +++ b/tests/factored.ini @@ -89,7 +89,7 @@ vocabulary= [trainer] class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] clip_norm=1.0 [runner] diff --git a/tests/flat-multiattention.ini b/tests/flat-multiattention.ini index 1f8b1c5fe..baf0db716 100644 --- a/tests/flat-multiattention.ini +++ b/tests/flat-multiattention.ini @@ -160,7 +160,7 @@ max_steps=3 ; This block just fills the arguments of the trainer __init__ method. class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[,,,] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] clip_norm=1.0 [runner_flat_noshare_nosentinel] diff --git a/tests/hier-multiattention.ini b/tests/hier-multiattention.ini index 8c0b180d3..cdf8a0c42 100644 --- a/tests/hier-multiattention.ini +++ b/tests/hier-multiattention.ini @@ -164,7 +164,7 @@ vocabulary= ; This block just fills the arguments of the trainer __init__ method. class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[,,,] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] clip_norm=1.0 [runner_hier_noshare_nosentinel] diff --git a/tests/labeler.ini b/tests/labeler.ini index 8b2c8db64..20c8d1fa8 100644 --- a/tests/labeler.ini +++ b/tests/labeler.ini @@ -72,7 +72,7 @@ vocabulary= [trainer] class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] clip_norm=1.0 [runner] diff --git a/tests/language-model.ini b/tests/language-model.ini index d4b4ea13f..7ca6b50d1 100644 --- a/tests/language-model.ini +++ b/tests/language-model.ini @@ -53,7 +53,7 @@ vocabulary= [trainer] class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] clip_norm=1.0 [runner] diff --git a/tests/mrt.ini b/tests/mrt.ini new file mode 100644 index 000000000..19b29e8c5 --- /dev/null +++ b/tests/mrt.ini @@ -0,0 +1,96 @@ +[main] +name="translation with minimum risk training" +tf_manager= +output="tests/outputs/mrt" +overwrite_output_dir=True +batch_size=16 +epochs=2 +train_dataset= +val_dataset= +trainer= +runners=[] +postprocess=None +evaluation=[("target", evaluators.BLEU)] +logging_period=20 +validation_period=60 +runners_batch_size=1 +random_seed=1234 + +[tf_manager] +class=tf_manager.TensorFlowManager +num_threads=4 +num_sessions=1 + +[train_data] +class=dataset.load_dataset_from_files +s_source="tests/data/train.tc.en" +s_target="tests/data/train.tc.de" +preprocessors=[("source", "source_chars", processors.helpers.preprocess_char_based)] +lazy=True + +[val_data] +class=dataset.load_dataset_from_files +s_source="tests/data/val.tc.en" +s_target="tests/data/val.tc.de" +preprocessors=[("source", "source_chars", processors.helpers.preprocess_char_based)] + +[encoder_vocabulary] +class=vocabulary.from_wordlist +path="tests/outputs/vocab/encoder_vocab.tsv" + +[encoder] +class=encoders.recurrent.SentenceEncoder +name="sentence_encoder" +rnn_size=7 +max_input_len=10 +embedding_size=11 +dropout_keep_prob=0.5 +data_id="source" +vocabulary= + +[attention] +class=attention.Attention +name="attention_sentence_encoder" +encoder= + +[decoder_vocabulary] +class=vocabulary.from_wordlist +path="tests/outputs/vocab/decoder_vocab.tsv" + +[decoder] +class=decoders.decoder.Decoder +name="decoder" +encoders=[] +rnn_size=8 +embedding_size=9 +attentions=[] +dropout_keep_prob=0.5 +data_id="target" +max_output_len=10 +vocabulary= + +[reward] +class=evaluators.gleu.GLEUEvaluator +name="GLEU" + +[trainer] +class=trainers.mrt_trainer.MinRiskTrainer +decoders=[] +evaluator= +num_of_samples=5 +alpha=0.1 +regularizers=[trainers.regularizers.L2] +clip_norm=1.0 +optimizer= + +[adam] +class=tf.train.AdamOptimizer +beta1=0.9 +beta2=0.98 +epsilon=1.0e-9 +learning_rate=0.2 + +[runner] +class=runners.GreedyRunner +decoder= +output_series="target" diff --git a/tests/nematus.ini b/tests/nematus.ini index 7fd952022..4c42da014 100644 --- a/tests/nematus.ini +++ b/tests/nematus.ini @@ -87,7 +87,7 @@ rnn_cell="NematusGRU" ; This block just fills the arguments of the trainer __init__ method. class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] clip_norm=1.0 [runner] diff --git a/tests/post-edit.ini b/tests/post-edit.ini index 9b24dd194..96d280248 100644 --- a/tests/post-edit.ini +++ b/tests/post-edit.ini @@ -87,7 +87,7 @@ vocabulary= [trainer] class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] [runner] class=runners.GreedyRunner @@ -122,4 +122,4 @@ validation_period=2 logging_period=1 visualize_embeddings=[] postprocess=[("target", )] -overwrite_output_dir=True \ No newline at end of file +overwrite_output_dir=True diff --git a/tests/regressor.ini b/tests/regressor.ini index f31a9e2e6..212a8c47d 100644 --- a/tests/regressor.ini +++ b/tests/regressor.ini @@ -57,7 +57,7 @@ activation_fn=tf.nn.relu [trainer] class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] clip_norm=1.0 [runner] diff --git a/tests/rl.ini b/tests/rl.ini index 9002ffda2..8ba717832 100644 --- a/tests/rl.ini +++ b/tests/rl.ini @@ -85,7 +85,7 @@ name="GLEU" [trainer] class=trainers.generic_trainer.GenericTrainer objectives=[] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] clip_norm=1.0 optimizer= diff --git a/tests/self-critical.ini b/tests/self-critical.ini index 73c198bc0..e4a168fec 100644 --- a/tests/self-critical.ini +++ b/tests/self-critical.ini @@ -83,7 +83,7 @@ weight=0.5 [trainer] class=trainers.generic_trainer.GenericTrainer objectives=[,] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] clip_norm=1.0 [runner] diff --git a/tests/small.ini b/tests/small.ini index dfe8f2017..c459731c1 100644 --- a/tests/small.ini +++ b/tests/small.ini @@ -96,7 +96,7 @@ rnn_cell="NematusGRU" [trainer] class=trainers.CrossEntropyTrainer decoders=[] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] clip_norm=1.0 [runner] diff --git a/tests/small_sent_cnn.ini b/tests/small_sent_cnn.ini index 6af7dccc3..7b964247a 100644 --- a/tests/small_sent_cnn.ini +++ b/tests/small_sent_cnn.ini @@ -93,7 +93,7 @@ vocabulary= ; This block just fills the arguments of the trainer __init__ method. class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] clip_norm=1.0 [runner] diff --git a/tests/str.ini b/tests/str.ini index 8ae51bd1e..9fdb1f0a2 100644 --- a/tests/str.ini +++ b/tests/str.ini @@ -105,7 +105,7 @@ vocabulary= [trainer] class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] clip_norm=1.0 optimizer= diff --git a/tests/test_bahdanau.ini b/tests/test_bahdanau.ini new file mode 100644 index 000000000..1a87eafef --- /dev/null +++ b/tests/test_bahdanau.ini @@ -0,0 +1,14 @@ +[main] +test_datasets=[,] + + +[val_data] +class=dataset.load_dataset_from_files +s_source="tests/data/val10.part.tc.en" +s_target="tests/data/val10.tc.de" +s_target_out="tests/outputs/tmpout-val10.tc.de" + +[val_data_no_target] +class=dataset.load_dataset_from_files +s_source="tests/data/val10.tc.en" +s_gradients_out="tests/outputs/tmpout-val10.tc.de" diff --git a/tests/vocab.ini b/tests/vocab.ini index b7a7d4b64..da7b8a7ac 100644 --- a/tests/vocab.ini +++ b/tests/vocab.ini @@ -69,7 +69,7 @@ vocabulary= [trainer] class=trainers.cross_entropy_trainer.CrossEntropyTrainer decoders=[] -l2_weight=1.0e-8 +regularizers=[trainers.regularizers.L2] [runner] class=runners.GreedyRunner