-
Notifications
You must be signed in to change notification settings - Fork 50
/
siamese_lstm.py
97 lines (69 loc) · 3.77 KB
/
siamese_lstm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#-*- coding:utf-8 -*-
import tensorflow as tf
class SiameseLSTM(object):
def bi_lstm(self, rnn_size, layer_size, keep_prob):
# forward rnn
with tf.name_scope('fw_rnn'), tf.variable_scope('fw_rnn'):
lstm_fw_cell_list = [tf.contrib.rnn.LSTMCell(rnn_size) for _ in xrange(layer_size)]
lstm_fw_cell_m = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.MultiRNNCell(lstm_fw_cell_list), output_keep_prob=keep_prob)
# backward rnn
with tf.name_scope('bw_rnn'), tf.variable_scope('bw_rnn'):
lstm_bw_cell_list = [tf.contrib.rnn.LSTMCell(rnn_size) for _ in xrange(layer_size)]
lstm_bw_cell_m = tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.MultiRNNCell(lstm_fw_cell_list), output_keep_prob=keep_prob)
return lstm_fw_cell_m, lstm_bw_cell_m
def weight_variables(self, shape, name):
return tf.Variable(tf.truncated_normal(shape=shape, stddev=0.1), name=name)
def bias_variables(self, shape, name):
return tf.Variable(tf.constant(0.1, shape=shape), name=name)
def transform_inputs(self, inputs, rnn_size, sequence_length):
inputs = tf.transpose(inputs, [1,0,2])
inputs = tf.reshape(inputs, [-1, rnn_size])
inputs = tf.split(inputs, sequence_length, 0)
return inputs
def contrastive_loss(self, Ew, y):
l_1 = 0.25 * tf.square(1 - Ew)
l_0 = tf.square(tf.maximum(Ew, 0))
loss = tf.reduce_sum(y * l_1 + (1 - y) * l_0)
return loss
def __init__(self, rnn_size, layer_size, vocab_size, sequence_length, keep_prob, grad_clip):
self.input_x1 = tf.placeholder(tf.int32, shape=[None, sequence_length], name='input_x')
self.input_x2 = tf.placeholder(tf.int32, shape=[None, sequence_length], name='input_y')
with tf.device('/cpu:0'):
embedding = self.weight_variables([vocab_size, rnn_size], 'embedding')
inputs_x1 = tf.nn.embedding_lookup(embedding, self.input_x1)
inputs_x2 = tf.nn.embedding_lookup(embedding, self.input_x2)
inputs_x1 = self.transform_inputs(inputs_x1, rnn_size, sequence_length)
inputs_x2 = self.transform_inputs(inputs_x2, rnn_size, sequence_length)
with tf.variable_scope('output'):
bilstm_fw, bilstm_bw = self.bi_lstm(rnn_size, layer_size, keep_prob)
outputs_x1, _, _ = tf.contrib.rnn.static_bidirectional_rnn(bilstm_fw, bilstm_bw, inputs_x1, dtype=tf.float32)
output_x1 = tf.reduce_mean(outputs_x1, 0)
## 开启变量重用的开关
tf.get_variable_scope().reuse_variables()
outputs_x2, _, _ = tf.contrib.rnn.static_bidirectional_rnn(bilstm_fw, bilstm_bw, inputs_x2, dtype=tf.float32)
output_x2 = tf.reduce_mean(outputs_x2, 0)
with tf.variable_scope('dense_layer'):
fc_w1 = self.weight_variables([2*rnn_size, 128], 'fc_w1')
fc_w2 = self.weight_variables([2*rnn_size, 128], 'fc_w2')
fc_b1 = self.bias_variables([128], 'fc_b1')
fc_b2 = self.bias_variables([128], 'fc_b2')
self.logits_1 = tf.matmul(output_x1, fc_w1) + fc_b1
self.logits_2 = tf.matmul(output_x2, fc_w2) + fc_b2
print 'fw(x1) shape: ', self.logits_1.shape
print 'fw(x2) shape: ', self.logits_2.shape
# calc Energy 1,2 ..
f_x1x2 = tf.reduce_sum(tf.multiply(self.logits_1, self.logits_2), 1)
norm_fx1 = tf.sqrt(tf.reduce_sum(tf.square(self.logits_1),1))
norm_fx2 = tf.sqrt(tf.reduce_sum(tf.square(self.logits_2),1))
self.Ew = f_x1x2 / (norm_fx1 * norm_fx2)
print 'Ecos shape: ', self.Ew.shape
# contrastive loss
self.y_data = tf.placeholder(tf.float32, shape=[None], name='y_data')
self.cost = self.contrastive_loss(self.Ew, self.y_data)
# train optimization
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), grad_clip)
optimizer = tf.train.AdamOptimizer(1e-3)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))
if __name__ == '__main__':
SiameseLSTM(64, 4, 1000, 100, 0.5, 5.0)