bugfix in DelayedUpdateTrainer

ufal · Jan 31, 2019 · d826ad7 · d826ad7
1 parent c676d9b
commit d826ad7
Show file tree

Hide file tree

Showing 8 changed files with 17 additions and 22 deletions.
diff --git a/neuralmonkey/dataset.py b/neuralmonkey/dataset.py
@@ -364,18 +364,6 @@ def itergen():
             return func(iterators)
         return itergen
 
-    def _add_preprocessed_series(iterators, s_name, prep_sl):
-        preprocessor, source = prep_sl[s_name]
-        if s_name in iterators:
-            return
-        if source in prep_sl:
-            _add_preprocessed_series(iterators, source, prep_sl)
-        if source not in iterators:
-            raise ValueError(
-                "Source series {} for series-level preprocessor nonexistent: "
-                "Preprocessed series '', source series ''".format(source))
-        iterators[s_name] = _make_sl_iterator(source, preprocessor)
-
     # First, prepare iterators for series using file readers
     for s_name, source_spec in zip(series, data):
         if match_type(source_spec, ReaderDef):  # type: ignore
@@ -398,8 +386,12 @@ def _add_preprocessed_series(iterators, s_name, prep_sl):
     # Second, prepare series-level preprocessors.
     # Note that series-level preprocessors cannot be stacked on the dataset
     # specification level.
-    for s_name in prep_sl:
-        _add_preprocessed_series(iterators, s_name, prep_sl)
+    for s_name, (preprocessor, source) in prep_sl.items():
+        if source not in iterators:
+            raise ValueError(
+                "Source series {} for series-level preprocessor nonexistent: "
+                "Preprocessed series '', source series ''".format(source))
+        iterators[s_name] = _make_sl_iterator(source, preprocessor)
 
     # Finally, dataset-level preprocessors.
     for s_name, func in prep_dl.items():

diff --git a/neuralmonkey/decoders/decoder.py b/neuralmonkey/decoders/decoder.py
@@ -115,7 +115,6 @@ def __init__(self,
             name: Name of the decoder. Should be unique accross all Neural
                 Monkey objects.
             max_output_len: Maximum length of an output sequence.
-            reuse: Reuse the model variables.
             dropout_keep_prob: Probability of keeping a value during dropout.
             embedding_size: Size of embedding vectors for target words.
             embeddings_source: Embedded sequence to take embeddings from.
@@ -143,13 +142,13 @@ def __init__(self,
             vocabulary=vocabulary,
             data_id=data_id,
             max_output_len=max_output_len,
-            reuse=reuse,
             dropout_keep_prob=dropout_keep_prob,
             embedding_size=embedding_size,
             embeddings_source=embeddings_source,
             tie_embeddings=tie_embeddings,
             label_smoothing=label_smoothing,
             supress_unk=supress_unk,
+            reuse=reuse,
             save_checkpoint=save_checkpoint,
             load_checkpoint=load_checkpoint,
             initializers=initializers)

diff --git a/neuralmonkey/decoders/transformer.py b/neuralmonkey/decoders/transformer.py
@@ -96,7 +96,6 @@ def __init__(self,
             name: Name of the decoder. Should be unique accross all Neural
                 Monkey objects.
             max_output_len: Maximum length of an output sequence.
-            reuse: Reuse the model variables.
             dropout_keep_prob: Probability of keeping a value during dropout.
             embedding_size: Size of embedding vectors for target words.
             embeddings_source: Embedded sequence to take embeddings from.

diff --git a/neuralmonkey/trainers/delayed_update_trainer.py b/neuralmonkey/trainers/delayed_update_trainer.py
@@ -240,3 +240,13 @@ def summaries(self) -> Dict[str, tf.Tensor]:
                 tf.get_collection("summary_train")),
             "histogram_summaries": tf.summary.merge(
                 tf.get_collection("summary_gradients"))}
+
+    @property
+    def fetches(self) -> Dict[str, tf.Tensor]:
+        return {"train_op": self.train_op,
+                "losses": self.objective_values,
+                "batch_size": self.batch_size,
+                "_update_ops": tf.get_collection(tf.GraphKeys.UPDATE_OPS),
+                "accumulators": self.accumulate_ops,
+                "counter": self.cumulator_counter,
+                "resets": self.reset_ops}
diff --git a/neuralmonkey/trainers/generic_trainer.py b/neuralmonkey/trainers/generic_trainer.py
@@ -76,8 +76,6 @@ def __init__(self,
         if self.var_collection is None:
             self.var_collection = tf.GraphKeys.TRAINABLE_VARIABLES
 
-        self.objectives = objectives
-
         self.regularizers = []  # type: List[Regularizer]
         if regularizers is not None:
             self.regularizers = regularizers

diff --git a/tests/beamsearch.ini b/tests/beamsearch.ini
@@ -13,7 +13,6 @@ postprocess=None
 evaluation=[("target_beam.rank001", "target", evaluators.BLEU)]
 logging_period=20
 validation_period=60
-runners_batch_size=5
 random_seed=1234
 
 [tf_manager]

diff --git a/tests/mrt.ini b/tests/mrt.ini
@@ -13,7 +13,6 @@ postprocess=None
 evaluation=[("target", evaluators.BLEU)]
 logging_period=20
 validation_period=60
-runners_batch_size=1
 random_seed=1234
 
 [tf_manager]

diff --git a/tests/transformer.ini b/tests/transformer.ini
@@ -79,7 +79,6 @@ n_heads_enc=2
 [trainer]
 class=trainers.delayed_update_trainer.DelayedUpdateTrainer
 batches_per_update=5
-l2_weight=1.0e-8
 clip_norm=1.0
 objectives=[<obj>]
 optimizer=<lazyadam_g>