-
Notifications
You must be signed in to change notification settings - Fork 5
/
config.cfg
107 lines (71 loc) · 1.83 KB
/
config.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
[data]
# data_dir should contain train.src, train.trg, dev.src, dev.trg files
data_dir=
# directory where output files will be generated
work_dir=
# source corpus extension
src=de
# target corpus extension
trg=en
# number of bpe codes to learn from the source corpus
bpe_operation_src=30000
# number of bpe codes to learn from the target corpus
bpe_operation_trg=30000
# Subword pre-processing to be performed
apply_bpe=1
[nmt-architecture]
# source vocabulary size
n_words_src=30000
# target vocabulary size
n_words=30000
# maximum length of the sentence
maxlen=50
# word vector dimensionality
dim_word=620
# the number of LSTM units
dim=620
batch_size=100
valid_batch_size=100
# reload model parameter if a model already exist
reload_=True
# overwrite previous saved model
overwrite=False
# optimizer algorithm to use
optimizer=adadelta
# learning rate
lrate=0.01
#
dispFreq=1000000
# the development set will be evaluated after this many updates
validFreq=10000
# save the model parameters after every saveFreq updates
saveFreq=10000
# generate some samples after every sampleFreq (do not reduce this value, it may cause some trouble)
sampleFreq=1000000
use_dropout=True
# dropout for input embeddings (0: no dropout)
dropout_embedding=0.1
# dropout for hidden layers (0: no dropout)
dropout_hidden=0.1
# dropout source words (0: no dropout)
dropout_source=0.1
# dropout target words (0: no dropout)
dropout_target=0.1
# should the parallel corpus be shuffled for each new epoch
shuffle_each_epoch=True
# after this many epoxh the program will terminate
max_epochs=100
# finish after this many updates
finish_after=10000000
#
finetune=False
# L2 regularization penalty
decay_c=0.
# alignment regularization
alpha_c=0.
# gradient clipping threshold
clip_c=-1.
# early stopping patience
patience=1000
encoder=gru
decoder=gru_cond