-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
122 lines (93 loc) · 4.23 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import numpy as np
import json
import sklearn.metrics as sk_metrics
def shuffle(f, seed=42):
indexes = np.arange(len(f))
np.random.RandomState(seed).shuffle(indexes)
return np.array(f)[indexes]
def load_dataset(dataset, noise_ratio = 0, num_calib_pos=0, num_calib_neg=0, seed=42, pos_limit=10000, neg_limit=10000):
max_noise_ratio = 1.0
max_num_calib_pos = 100
max_num_calib_neg = 100
with open('datasets/%s.json'%dataset) as f:
image_data = json.load(f)
pos_train_images = np.array(image_data['pos_train'])
pos_train_images = shuffle(pos_train_images, seed=seed)
pos_test_images = np.array(image_data['pos_test'])
pos_test_images = shuffle(pos_test_images, seed=seed)
neg_train_images = np.array(image_data['neg_train'])
neg_train_images = shuffle(neg_train_images, seed=seed)
neg_test_images = np.array(image_data['neg_test'])
neg_test_images = shuffle(neg_test_images, seed=seed)
pos_calib_images = pos_train_images[:num_calib_pos]
pos_train_images = pos_train_images[max_num_calib_pos:]
neg_calib_images = neg_train_images[:num_calib_neg]
neg_train_images = neg_train_images[max_num_calib_neg:]
min_num_neg = 200
num_neg_to_transfer = min(int(len(pos_train_images) * max_noise_ratio), len(neg_train_images)-min_num_neg)
num_pos = int(num_neg_to_transfer/max_noise_ratio)
noise_images = neg_train_images[:num_neg_to_transfer]
pos_train_images = pos_train_images[:num_pos]
neg_train_images = neg_train_images[num_neg_to_transfer:]
if noise_ratio > 0.0:
num_noise = int(len(pos_train_images)*noise_ratio)
num_pos = len(pos_train_images) - num_noise
pos_train_images = np.concatenate([noise_images[:num_noise], pos_train_images[:num_pos]])
pos_train_images = shuffle(pos_train_images, seed=seed)
neg_train_images = shuffle(neg_train_images, seed=seed)
pos_train_images = pos_train_images[:pos_limit]
neg_train_images = neg_train_images[:neg_limit]
test_images = np.concatenate([pos_test_images, neg_test_images])
test_labels = [1]*len(pos_test_images) + [0]*len(neg_test_images)
return {
'pos_train_images': pos_train_images,
'neg_train_images': neg_train_images,
'pos_test_images': pos_test_images,
'neg_test_images': neg_test_images,
'pos_calib_images': pos_calib_images,
'neg_calib_images': neg_calib_images,
'test_images': test_images,
'test_labels': test_labels,
'noise_ratio': noise_ratio
}
def get_metrics(y_true, y_pred, verbose=False):
if len(np.unique(y_true)) != 2:
return None
test_labels = np.array(y_true)
pred_raw = np.array(y_pred).copy()
scores = []
threshold_range = np.arange(0., 1., 0.01);
for threshold in threshold_range:
p = 1.*(pred_raw >= threshold)
precision = sk_metrics.precision_score(test_labels, p)
recall = sk_metrics.recall_score(test_labels, p)
if precision + recall == 0:
f1_score = 0
else:
f1_score = 2 * (precision * recall) / (precision + recall)
if(np.isnan(f1_score)):
f1_score = 0.
scores.append([threshold, precision, recall, f1_score])
scores = np.array(scores, dtype='float16')
auc = sk_metrics.auc(threshold_range, scores[:,-1])
max_index = np.argmax(scores[:,-1])
threshold, precision, recall, f1_score = scores[max_index]
roc_auc = sk_metrics.roc_auc_score(y_true, y_pred)
avg_precision = sk_metrics.average_precision_score(y_true, y_pred)
p = 1.*(pred_raw >= 0.5)
precision_at_50 = sk_metrics.precision_score(test_labels, p)
recall_at_50 = sk_metrics.recall_score(test_labels, p)
f1_score_at_50 = 2 * (precision_at_50 * recall_at_50) / (precision_at_50 + recall_at_50)
if(np.isnan(f1_score_at_50)):
f1_score_at_50 = 0.
return {'precision': precision,
'recall': recall,
'f1_score': f1_score,
'f1_auc': auc,
'threshold': threshold,
'roc_auc': roc_auc,
'avg_precision': avg_precision,
'precision_at_50': precision_at_50,
'recall_at_50': recall_at_50,
'f1_score_at_50': f1_score_at_50
}