-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
72 lines (51 loc) · 3.66 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import torch
from utils.early_stopping import EarlyStopping
def train(model, word_num_layers, char_num_layers, morph_num_layers, num_epochs, pairs_batch_train, pairs_batch_dev, word_hidden_size, char_hidden_size, morph_hidden_size, batch_size, criterion, optimizer, patience, device):
early_stopping = EarlyStopping(patience=patience, verbose=False, delta=0)
for epoch in range(num_epochs):
model.train()
train_loss = 0
for i, batch in enumerate(pairs_batch_train):
pad_input_seqs, input_seq_lengths, pad_target_seqs, target_seq_lengths, pad_char_seqs, char_seq_lengths, pad_morph_seqs, morph_seq_lengths = batch
pad_input_seqs, pad_target_seqs, pad_char_seqs, pad_morph_seqs = pad_input_seqs.to(device), pad_target_seqs.to(device), pad_char_seqs.to(device), pad_morph_seqs.to(device)
model.zero_grad()
word_hidden = model.init_hidden(word_num_layers, word_hidden_size, batch_size, device)
char_hidden = model.init_hidden(char_num_layers, char_hidden_size, batch_size, device)
morph_hidden = model.init_hidden(morph_num_layers, morph_hidden_size, batch_size, device)
emissions = model(pad_input_seqs, input_seq_lengths, pad_char_seqs, char_seq_lengths, pad_morph_seqs, morph_seq_lengths, word_hidden,
char_hidden, morph_hidden, batch_size)
pad_target_seqs = pad_target_seqs.squeeze()
mask = pad_target_seqs.clone()
mask[mask != 0] = 1
mask = mask.byte()
loss = -model.crf(emissions, pad_target_seqs, mask=mask)
loss.backward()
train_loss += loss
optimizer.step()
# calculate validation loss
with torch.no_grad():
model.eval()
val_loss = 0
for i, batch in enumerate(pairs_batch_dev):
pad_input_seqs, input_seq_lengths, pad_target_seqs, target_seq_lengths, pad_char_seqs, char_seq_lengths, pad_morph_seqs, morph_seq_lengths = batch
pad_input_seqs, pad_target_seqs, pad_char_seqs, pad_morph_seqs = pad_input_seqs.to(device), pad_target_seqs.to(device), pad_char_seqs.to(device), pad_morph_seqs.to(device)
word_hidden = model.init_hidden(word_num_layers, word_hidden_size, batch_size, device)
char_hidden = model.init_hidden(char_num_layers, char_hidden_size, batch_size, device)
morph_hidden = model.init_hidden(morph_num_layers, morph_hidden_size, batch_size, device)
emissions = model(pad_input_seqs, input_seq_lengths, pad_char_seqs, char_seq_lengths, pad_morph_seqs, morph_seq_lengths, word_hidden,
char_hidden, morph_hidden, batch_size)
pad_target_seqs = pad_target_seqs.squeeze()
mask = pad_target_seqs.clone()
mask[mask != 0] = 1
mask = mask.byte()
loss = -model.crf(emissions, pad_target_seqs, mask=mask)
val_loss += loss
# early_stopping(val_loss/len(pairs_batch_dev), model)
# if early_stopping.early_stop:
# print("Early stopping")
# break
# if epoch % 5 == 0:
print('[Epoch: %d] train_loss: %.4f val_loss: %.4f' % (epoch+1, train_loss/len(pairs_batch_train), val_loss/len(pairs_batch_dev)))
print('\n The final loss is:')
print('[Epoch: %d] train_loss: %.4f val_loss: %.4f' % (epoch+1, train_loss/len(pairs_batch_train), val_loss/len(pairs_batch_dev)))
torch.save(model.state_dict(), 'weights/model_upper.pt')