-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsis.py
517 lines (471 loc) · 22.1 KB
/
sis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
import numpy as np
import os
import gzip
import json
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding, Flatten, Reshape, Conv2D, MaxPool2D
from keras.optimizers import Adam
from keras.preprocessing import sequence, text
from keras import backend as K
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
#####################################
# TENSORFLOW/KERAS CONFIG
#####################################
def tf_config(cuda_visible_devices='1'):
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' # ensures specific ordering of GPUs
os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devices
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
K.set_session(sess)
#####################################
# PREPROCESSING
#####################################
def load_reviews(path, verbose=True):
data_x, data_y = [ ], [ ]
fopen = gzip.open if path.endswith(".gz") else open
with fopen(path) as fin:
for line in fin:
line = line.decode('ascii')
y, sep, x = line.partition("\t")
# x = x.split()
y = y.split()
if len(x) == 0: continue
y = np.asarray([ float(v) for v in y ])
data_x.append(x)
data_y.append(y)
if verbose:
print("{} examples loaded from {}".format(len(data_x), path))
print("max text length: {}".format(max(len(x) for x in data_x)))
return data_x, data_y
def create_splits(X, y, test_size=3000, random_state=42):
return train_test_split(X,
y,
test_size=3000,
random_state=42)
def create_tokenizer(X, top_words=10000):
tokenizer = text.Tokenizer(num_words=top_words)
tokenizer.fit_on_texts(X)
return tokenizer
def pad_sequences(X, max_words=500):
return sequence.pad_sequences(X, maxlen=max_words)
#####################################
# MODELS
#####################################
# Create a model that can directly accept embeddings as input (rather than raw sequence)
# No dropout layers since we don't explicitly train this model
def make_lstm_model_feed_embeddings(max_words=500, embed_dim=100, lstm_dim=200):
model = Sequential()
model.add(LSTM(lstm_dim, input_shape=(max_words, embed_dim), return_sequences=True))
model.add(LSTM(lstm_dim))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='mse',
optimizer=Adam(),
metrics=['mse', 'mae'])
return model
def copy_layer_weights(from_model, to_model):
# If using dropout, need to modify the from_model layer indices around dropout layers (to 2, 4, 6)
to_model.layers[0].set_weights(from_model.layers[1].get_weights()) # LSTM 1
to_model.layers[1].set_weights(from_model.layers[2].get_weights()) # LSTM 2
to_model.layers[2].set_weights(from_model.layers[3].get_weights()) # Dense output
def make_text_cnn_model_feed_embeddings(max_words=500, embed_dim=100, num_filters=128, filter_size=3):
model = Sequential()
model.add(Reshape((500, 100, 1), input_shape=(max_words, embed_dim)))
model.add(Conv2D(num_filters, kernel_size=(filter_size, 100), padding='valid', activation='relu'))
model.add(MaxPool2D(pool_size=(500 - filter_size + 1, 1), padding='valid'))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='mse',
optimizer='adam',
metrics=['mse', 'mae', coeff_determination_metric])
return model
def copy_layer_weights_text_cnn(from_model, to_model):
to_model.layers[1].set_weights(from_model.layers[2].get_weights()) # Conv2D
to_model.layers[-1].set_weights(from_model.layers[-1].get_weights()) # Dense output
def get_embeddings(model):
return model.layers[0].get_weights()[0]
def coeff_determination_metric(y_true, y_pred):
SS_res = K.sum(K.square(y_true - y_pred))
SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
return (1 - SS_res / (SS_tot + K.epsilon()))
def predicted_probs(model, X_test):
y_pred = model.predict_proba(X_test, verbose=0)
y_pred = y_pred[:,0] # flatten to 1-D array
return y_pred
def eval_bin_class_model(model, X_test, y_test, verbose=True):
scores = model.evaluate(X_test, y_test, verbose=0)
accuracy = scores[1]
y_pred = predicted_probs(model, X_test)
roc_auc = roc_auc_score(y_test, y_pred)
if verbose:
print('Accuracy: %.2f%%' % (accuracy*100.0))
print('ROC AUC: %.3f' % roc_auc)
return (accuracy, roc_auc)
#####################################
# SIS
#####################################
def predict_for_embed_sequence(batch, model, batch_size=128):
batch_reshaped = [np.array(seq) for seq in batch]
pred = model.predict(np.array(batch_reshaped), batch_size=batch_size)
pred = pred.reshape(-1) # flatten
return pred
def predict_for_images(batch, model, batch_size=128):
batch_reshaped = [np.array(x) for x in batch]
pred = model.predict(np.array(batch_reshaped), batch_size=batch_size)
return pred
def probas_to_label(probas):
return np.argmax(probas, axis=-1)
def pred_class_and_prob(probas):
pred_class = probas_to_label(probas)
prob = probas[pred_class]
return pred_class, prob
# `embeddings` is the vocabulary embeddings matrix
# `replace_with` determines what to put in place of the removed word,
# can be either 'zeros' (zero-vector) or 'mean' (mean of latent embeddings)
def replacement(embeddings, replace_with='zeros'):
if replace_with == 'zeros':
replacement_embedding = np.zeros(embeddings.shape[1])
elif replace_with == 'mean':
replacement_embedding = np.mean(embeddings, axis=0)
else:
raise NotImplementedError()
return replacement_embedding
# `sequence` is input sequence to modify, as padded sequence of embeddings,
# e.g. embeddings[X_train[i]] (pad characters are ignored)
# `model` is a predictive model that accepts embedded as input
# `num_pad` is TODO
# `replacement_embedding` TODO
# Returns: list of predictions, where each value is the prediction if that element
# is removed (ignores padding characters)
def removed_word_predictions(embedded_sequence, model, num_pad,
replacement_embedding):
batch = []
for i in range(num_pad, embedded_sequence.shape[0]):
modified_sequence = embedded_sequence.copy()
modified_sequence[i] = replacement_embedding
batch.append(modified_sequence)
removed_scores = predict_for_embed_sequence(batch, model)
return removed_scores
# sets the i-th row of `seq` to vec
# assumes each row of seq is an encoded-vector
def replace_at_tf(seq, vec, i):
seq[i, :] = vec
def removed_word_predictions_tf(seq, model, replacement_embedding):
batch = []
for i in range(seq.shape[0]):
modified_seq = seq.copy()
replace_at_tf(modified_seq, replacement_embedding, i)
batch.append(modified_seq)
removed_scores = predict_for_embed_sequence(batch, model)
return removed_scores
def sis_removal(sequence, model, embeddings,
replace_with='mean',
return_history=False,
verbose=True,
index_to_token=None,
embedded_input=None,
replacement_embedding=None):
if embedded_input is not None: # assumes input already consists of embeddings
current_seq = np.copy(embedded_input)
else:
current_seq = embeddings[sequence]
starting_score = predict_for_embed_sequence([current_seq], model)[0]
if return_history:
history = [] # [starting_score]
if verbose:
print('Starting at: ', starting_score)
num_pad = np.count_nonzero(sequence == 0)
if starting_score >= 0.5:
condition = lambda score: score >= 0 #0.5
get_best_idx = np.nanargmax
else:
condition = lambda score: score < 1 #0.5
get_best_idx = np.nanargmin
current_score = starting_score
if replacement_embedding is None:
replacement_embedding = replacement(embeddings, replace_with=replace_with)
num_removed = 0
removed_elts = []
removed_elts_bool = np.zeros(np.count_nonzero(sequence), dtype=bool)
while condition(current_score) and not np.all(removed_elts_bool):
removed_scores = removed_word_predictions(current_seq, model, num_pad,
replacement_embedding)
# put nans in positions where element already removed
removed_scores[removed_elts_bool] = np.nan
best_to_remove_idx = get_best_idx(removed_scores)
current_score = removed_scores[best_to_remove_idx]
if return_history:
history.append(current_score)
if verbose:
if index_to_token is None:
print('Error: Need `index_to_token` to print verbose update.')
else:
print(best_to_remove_idx,
index_to_token[sequence[num_pad+best_to_remove_idx]],
current_score)
if condition(current_score): # actually do the removal
current_seq[num_pad+best_to_remove_idx] = replacement_embedding
removed_elts.append(best_to_remove_idx)
removed_elts_bool[best_to_remove_idx] = True
num_removed += 1
if return_history:
return removed_elts, history
return removed_elts
def sis_removal_tf(start_seq, model, replacement_embedding, return_history=True, verbose=True):
current_seq = np.array(start_seq.copy(), dtype='float32')
starting_score = predict_for_embed_sequence([current_seq], model)[0]
if return_history:
history = []
if verbose:
print('Starting at: ', starting_score)
get_best_idx = np.nanargmax # always trying to maximize prediction
current_score = starting_score
num_removed = 0
removed_elts = []
removed_elts_bool = np.zeros(current_seq.shape[0], dtype=bool)
while not np.all(removed_elts_bool):
removed_scores = removed_word_predictions_tf(current_seq, model, replacement_embedding)
# put nans in positions where element already removed
removed_scores[removed_elts_bool] = np.nan
best_to_remove_idx = get_best_idx(removed_scores)
current_score = removed_scores[best_to_remove_idx]
history.append(current_score)
if verbose:
print(best_to_remove_idx, current_score)
replace_at_tf(current_seq, replacement_embedding, best_to_remove_idx)
removed_elts.append(best_to_remove_idx)
removed_elts_bool[best_to_remove_idx] = True
num_removed += 1
if return_history:
return removed_elts, history
return removed_elts
def replace_at_img(x, replacement, pos):
x[pos] = replacement
def removed_word_predictions_img(x, pos_to_remove, model, replacement):
batch = []
for pos in pos_to_remove:
modified_x = x.copy()
replace_at_img(modified_x, replacement, pos)
batch.append(modified_x)
removed_preds = predict_for_images(batch, model)
return removed_preds
def sis_removal_img_classif(start_x, image, class_idx, model,
replacement, return_history=True,
verbose=True):
current_x = start_x.copy()
current_preds = predict_for_images([current_x], model)[0]
current_prob = current_preds[class_idx]
if return_history:
history = []
if verbose:
print('Predicting class %d with prob %.5f: ' % \
(class_idx, current_prob))
num_removed = 0
removed_elts = []
removed_elts_bool = np.zeros(image.get_num_pixels(), dtype=bool)
while not np.all(removed_elts_bool):
is_to_remove = np.where(np.logical_not(removed_elts_bool))[0]
pos_to_remove = [image.i_to_pos(i) for i in is_to_remove]
removed_preds = removed_word_predictions_img(current_x, pos_to_remove,
model, replacement)
removed_preds_for_class = removed_preds[:, class_idx]
best_to_remove_idx = np.argmax(removed_preds_for_class)
best_to_remove_i = is_to_remove[best_to_remove_idx]
best_to_remove_pos = pos_to_remove[best_to_remove_idx]
current_prob = removed_preds_for_class[best_to_remove_idx]
history.append(current_prob)
# actually do the removal
replace_at_img(current_x, replacement, best_to_remove_pos)
removed_elts.append(best_to_remove_i)
removed_elts_bool[best_to_remove_i] = True
num_removed += 1
if verbose:
print('Removed at pos: %s, prediction: %.5f, num removed: %d' % \
(str(best_to_remove_pos), current_prob, num_removed))
if return_history:
return removed_elts, history
return removed_elts
#####################################
# EVALUATION
#####################################
def load_rationale_annotations(path, verbose=True):
data = []
fopen = gzip.open if path.endswith(".gz") else open
with fopen(path) as fin:
for line in fin:
item = json.loads(line)
data.append(item)
if verbose:
print('Loaded %d annotations.' % len(data))
return data
# Retokenize the annotation using our trained tokenizer
# `annotation` is the list of tokens from the given annotation (e.g. annotations[i]['x'])
# `tokenizer` is our trained tokenizer (Keras object)
# Returns: tokenized example
# `annotation_indices` is the list of token indices for annotated sentences; if not None,
# also returns a list of same length where each
def retokenize_annotation(annotation, tokenizer, annotation_indices=None, count_missing=False):
joined_annotation = ' '.join(annotation)
retokenized = tokenizer.texts_to_sequences([joined_annotation])[0]
if count_missing:
word_sequence = text.text_to_word_sequence(joined_annotation,
tokenizer.filters,
tokenizer.lower,
tokenizer.split)
num_missing = len(word_sequence) - len(retokenized)
else:
num_missing = None
return retokenized, num_missing
def find_sub_list(sl, l):
res = []
sll = len(sl)
for ind in (i for i, e in enumerate(l) if e == sl[0]):
if l[ind:ind + sll] == sl:
res.append((ind, ind + sll -1))
return res
def get_annot_idxs(i, annotations, tokenizer, aspect):
tokenized_ex, _ = retokenize_annotation(annotations[i]['x'], tokenizer, count_missing=False)
annot_idxs_orig = annotations[i][str(aspect)]
annot_idxs = []
for a, b in annot_idxs_orig:
tokenized_annot, _ = retokenize_annotation(annotations[i]['x'][a:b], tokenizer, count_missing=False)
if len(tokenized_annot) == 0:
continue
idxs = find_sub_list(tokenized_annot, list(tokenized_ex))
# make sure the annotation sequence doesn't appear twice
if len(idxs) > 1:
# this happens rarely, maybe once per dataset
# need to look outside the retokenized annotation to resolve the ambiguity
# in theory if subsequence was all the way to the right of the sequence,
# this code would break, but it should be fine in practice
offset = 1
while len(idxs) > 1:
tokenized_annot, _ = retokenize_annotation(annotations[i]['x'][a:b+offset],
tokenizer,
count_missing=False)
idxs = find_sub_list(tokenized_annot, list(tokenized_ex))
idxs = [(idxs[0][0], idxs[0][1] - offset)]
assert(len(idxs) == 1)
annot_idxs.append(idxs[0])
return annot_idxs
# Starting from the end of `history`, going backwards, determine how many words needed
# in order to satisfy `threshold_f`.
# threshold_f(x) returns True if x satisfies the constraint, otherwise returns False
# If `ignore_last` is True, ignores the last element of history (since this usually corresponds to score after
# all sequence elements have been removed)
def find_min_words_needed(history, threshold_f, ignore_last=True):
num_needed = 0
vals = history[::-1]
if ignore_last:
vals = vals[1:]
found = False
for val in vals:
num_needed += 1
if threshold_f(val):
found = True
break
if ignore_last and not found: # need to use full input
num_needed += 1
return num_needed
# Computes min needed to satisfy threshold function for examples in X given by indices in example_idxs,
# assumes threshold_f is the same for all examples
# `remove_annots` is a list (indexed into by idxs) of tuples (sorted_word_order, history) - history used here
# Returns: (min_needed, min_needed_percentage), each of which has same length as example_idxs
# and percentage list is the percentage of the total number of words in that example
def min_needed_for_idxs(example_idxs, remove_annots, threshold_f):
min_needed = []
min_needed_percentage = []
for i in example_idxs:
history = remove_annots[i][1]
min_needed_i = find_min_words_needed(history, threshold_f)
min_needed.append(min_needed_i)
percentage = min_needed_i / float(len(history)) * 100.0
min_needed_percentage.append(percentage)
return (min_needed, min_needed_percentage)
def find_score_history_given_order(x, sorted_word_importances, num_pad, model,
mean_embedding, pad_embedding, embeddings):
seq = np.vstack([np.repeat(pad_embedding.reshape((1, pad_embedding.shape[0])), num_pad, axis=0),
np.repeat(mean_embedding.reshape((1, mean_embedding.shape[0])), len(sorted_word_importances),
axis=0)])
batch_seqs = [np.copy(seq)]
# reverse order to add back most important words first
for idx in sorted_word_importances[::-1]:
word = x[num_pad + idx]
word_embedding = embeddings[word]
seq[num_pad + idx] = word_embedding
batch_seqs.append(np.copy(seq))
history = predict_for_embed_sequence(batch_seqs, model)
# reverse order and remove first element, so first element of history list is score
# of sequence after first word removed
history = history[::-1][1:]
return history
# `seq` is a (4 x L) sequence (the final sequence in one-hot representation)
# `empty_input` is a (4 x L) empty input sequence (probably initially all
# 0's or 0.25's)
def find_score_history_given_order_tf(final_seq, sorted_word_importances,
model, empty_input):
current_seq = empty_input.copy()
batch_seqs = [np.copy(current_seq)]
# reverse order to add back most important words first
for idx in sorted_word_importances[::-1]:
current_seq[idx, :] = final_seq[idx, :]
batch_seqs.append(np.copy(current_seq))
history = predict_for_embed_sequence(batch_seqs, model)
# reverse order and remove first element, so first element of history list
# is score of sequence after first word removed
history = history[::-1][1:]
return history
# `indices_min_needed` comes from `zip(example_idxs, min_needed)` so it
# contains tuples of (i, k) pairs where i is the index of the example
# in the annotaiton set and k is the number of elems in the rationale
# For each element in the sequence, replace with `replacement_embedding`
# and predict using `model` (which accepts embeddings directly as input)
# Predictions are then made where each element has been individually removed.
# Applies `diffs_transform_f` to the returned list as numpy array (e.g. can do
# `lambda preds, orig: orig - x` for differences from original prediction)
# Returns tuple of (rationale_diffs, nonrationale_diffs) where rationale_diffs
# contains the score delta in the elements that are included in rationale,
# and nonrationale diffs contains the same but for non-rationale elements.
def perturbation_removal_rationale(indices_min_needed, X_annotation, model,
embeddings, replacement_embedding, remove_annots,
original_predictions,
diffs_transform_f=lambda preds_orig: preds_orig[1] - preds_orig[0]):
rationale_diffs = []
nonrationale_diffs = []
for i, k in indices_min_needed:
x = X_annotation[i]
num_pad = np.count_nonzero(x == 0)
x_embed = embeddings[x]
bottom_k = remove_annots[i][0][-k:]
preds = removed_word_predictions(x_embed, model, num_pad,
replacement_embedding)
original_pred = float(original_predictions[i])
diffs = diffs_transform_f((np.array(preds), original_pred))
bottom_k_diffs = np.take(diffs, bottom_k)
other_diffs = np.delete(diffs, bottom_k)
assert(diffs.shape[0] == bottom_k_diffs.shape[0] + other_diffs.shape[0])
rationale_diffs.append(bottom_k_diffs)
nonrationale_diffs.append(other_diffs)
return (rationale_diffs, nonrationale_diffs)
def perturbation_removal_rationale_tf(example_idxs, rationale_lens, X, model, replacement_embedding,
remove_results, original_predictions,
diffs_transform_f=lambda preds_orig: preds_orig[1] - preds_orig[0]):
assert(len(example_idxs) == len(rationale_lens))
rationale_diffs = []
nonrationale_diffs = []
for j, i in enumerate(example_idxs):
k = rationale_lens[j]
x = X[i]
bottom_k = remove_results[j][0][-k:]
preds = removed_word_predictions_tf(x, model, replacement_embedding)
original_pred = float(original_predictions[i])
diffs = diffs_transform_f((np.array(preds), original_pred))
bottom_k_diffs = np.take(diffs, bottom_k)
other_diffs = np.delete(diffs, bottom_k)
assert(diffs.shape[0] == bottom_k_diffs.shape[0] + other_diffs.shape[0])
rationale_diffs.append(bottom_k_diffs)
nonrationale_diffs.append(other_diffs)
return (rationale_diffs, nonrationale_diffs)