forked from rafaelvalle/asrgen
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sr_train.py
86 lines (70 loc) · 2.6 KB
/
sr_train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from tqdm import tqdm
import torch
from data_processing import load_data, iterate_minibatches
from models import SpeakerRecognitionModel
# training params
SPEAKER_ID = None
SPEAKER_ID_OTHERS = None
LENGTH = 64
BATCH_SIZE = 256
VAL_BATCH_SIZE = 1024
TEST_BATCH_SIZE = 1024
N_ITERS = int(50000)
# model params
init_lr = 1e-4
# load data
data = load_data('data_16khz', '*.wav')
N_CLASSES = len(data['train'])
SPEAKER_ID_OTHERS = range(N_CLASSES)
N_TEST_RUNS = 100
data_training = iterate_minibatches(
data['train'], SPEAKER_ID, SPEAKER_ID_OTHERS, BATCH_SIZE,
shuffle=False, forever=True, length=LENGTH, one_hot_labels=False,
apply_transform=False)
data_validation = iterate_minibatches(
data['valid'], SPEAKER_ID, SPEAKER_ID_OTHERS, VAL_BATCH_SIZE,
shuffle=False, forever=True, length=LENGTH, one_hot_labels=False)
data_testing = iterate_minibatches(
data['test'], SPEAKER_ID, SPEAKER_ID_OTHERS, TEST_BATCH_SIZE,
shuffle=False, forever=True, length=LENGTH, one_hot_labels=False)
model = SpeakerRecognitionModel(N_CLASSES).cuda()
optimizer = torch.optim.Adam(
model.parameters(), lr=init_lr, betas=(0.5, 0.9), weight_decay=1e-4)
loss_fn = torch.nn.CrossEntropyLoss()
model.train()
for i in tqdm(range(N_ITERS)):
X, y = next(data_training)
X, y = X.cuda(), y.cuda()
X = X.unsqueeze(1)
model.zero_grad()
y_hat = model(X)
loss = loss_fn(y_hat, y)
loss.backward()
optimizer.step()
# learning rate schedule
lr = init_lr * 0.9999 ** (i-10000)
for param_group in optimizer.param_groups:
param_group['lr'] = lr
if i % 100 == 0:
_, predicted = torch.max(y_hat, 1)
accuracy = (predicted.data == y.data).float().squeeze().sum() / BATCH_SIZE
print("Iteration {}, loss {}, accuracy {}, lr {}".format(
i, float(loss), accuracy, lr))
with torch.no_grad():
X, y = next(data_validation)
X, y = X.unsqueeze(1).cuda(), y.cuda()
y_hat = model(X)
_, predicted = torch.max(y_hat, 1)
accuracy = (predicted.data == y.data).float().squeeze().sum() / VAL_BATCH_SIZE
print("Validation accuracy {}".format(accuracy))
with torch.no_grad():
accuracy = 0.0
for _ in range(N_TEST_RUNS):
X, y = next(data_testing)
X, y = X.unsqueeze(1).cuda(), y.cuda()
y_hat = model(X)
_, predicted = torch.max(y_hat, 1)
accuracy = (predicted.data == y.data).float().squeeze().sum()
accuracy = accuracy / (N_TEST_RUNS * TEST_BATCH_SIZE)
print("Test accuracy {}".format(accuracy))
torch.save({'model': model.state_dict()}, 'sr_model.pt')