-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path4_train_trans_bpe.sh
executable file
·95 lines (75 loc) · 2.71 KB
/
4_train_trans_bpe.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/bin/sh
if [ -z $ENGINEDIR ]
then
if [ ! -z "$1" ]
then
ENGINEDIR=$1
else
echo 'Specify or export ENGINEDIR'
exit 1
fi
fi
if [ ! -z $SKIPPREPROCESS ]
then
SKIPPREPROCESS=1
else
SKIPPREPROCESS=0
fi
MODELDIR=$ENGINEDIR/model
SCRIPTPATH=$( cd $( dirname $( readlink -f $0 ) ) && pwd )
#SCRIPTPATH=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
echo $SCRIPTPATH
OPENNMT=${SCRIPTPATH}'/OpenNMT-py'
export CUDA_VISIBLE_DEVICES=0,1,2
echo "4.1. Prepare the data..."
SRC=src
TRG=trg
SRC_VOC_SIZE=$(cat $ENGINEDIR/data/train.tc.bpe.${SRC}.dict.size)
TRG_VOC_SIZE=$(cat $ENGINEDIR/data/train.tc.bpe.${TRG}.dict.size)
echo $SRC_VOC_SIZE
echo $TRG_VOC_SIZE
if [ "$SKIPPREPROCESS" -eq "0" ] || [ ! -f $ENGINEDIR/data/ready_to_train.train.0.pt ];
then
echo $SKIPPREPROCESS
rm $ENGINEDIR/data/ready_to_train*
python3 $OPENNMT/preprocess.py \
-train_src $ENGINEDIR/data/train.tc.bpe.${SRC} \
-train_tgt $ENGINEDIR/data/train.tc.bpe.${TRG} \
-valid_src $ENGINEDIR/data/dev.tc.bpe.${SRC} \
-valid_tgt $ENGINEDIR/data/dev.tc.bpe.${TRG} \
-src_vocab_size $SRC_VOC_SIZE -tgt_vocab_size $TRG_VOC_SIZE \
-filter_valid \
-save_data $ENGINEDIR/data/ready_to_train
else
echo "Skipping preprocessing"
fi
echo "Launching GPU monitoring"
GPUMONPID=$( nvidia-smi dmon -i 0,1,2 -s mpucv -d 1 -o TD > $MODELDIR/gpu.log & )
mkdir $MODELDIR/power_log
python power_monitor.py $MODELDIR/power_log &
POWERMONPID=$!
echo "4.2. Train..."
echo "Options derived from: http://opennmt.net/OpenNMT-py/FAQ.html "
python $OPENNMT/train.py \
-data $ENGINEDIR/data/ready_to_train -save_model $MODELDIR/model \
-layers 6 -rnn_size 512 -word_vec_size 512 -transformer_ff 2048 -heads 8 \
-encoder_type transformer -decoder_type transformer -position_encoding \
-train_steps 202000 -max_generator_batches 2 -dropout 0.1 \
-batch_size 4096 -batch_type tokens -normalization tokens -accum_count 3 \
-optim adam -adam_beta2 0.998 -decay_method noam -warmup_steps 2000 -learning_rate 2 \
-max_grad_norm 0 -param_init 0 -param_init_glorot \
-label_smoothing 0.1 -valid_steps 500 -save_checkpoint_steps 500 \
-report_every 100 \
-early_stopping 5 -early_stopping_criteria ppl accuracy \
-world_size 3 -gpu_ranks 0 1 2 \
-log_file $MODELDIR/train.log
A=$( grep 'Best' $MODELDIR/train.log | rev | cut -d ' ' -f 1 | rev )
B=$( grep -B4 ${A}'.pt' $MODELDIR/train.log | head -2 | rev | cut -d ' ' -f 1 | rev | tr '\n' '_' )
MODELNAME='model_step_'${A}'.pt'
echo 'Saving best model: ' $MODELNAME
cp $MODELDIR/${MODELNAME} $MODELDIR/best_model_${A}_${B}.pl
echo $GPUMONPID
echo $POWERMONPID
kill -s 9 $GPUMONPID
kill -s 9 $POWERMONPID
echo "Done."