-
Notifications
You must be signed in to change notification settings - Fork 7
/
tracker.py
415 lines (358 loc) · 17.8 KB
/
tracker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
import copy
import numpy as np
import torch
from filterpy.kalman import KalmanFilter
from scipy.optimize import linear_sum_assignment
NUSCENES_TRACKING_NAMES = [
'bicycle',
'bus',
'car',
'motorcycle',
'pedestrian',
'trailer',
'truck',
'construction_vehicle',
'barrier',
'traffic_cone',
]
# 99.9 percentile of the l2 velocity error distribution (per class / 0.5 second)
# This is an earlier statistics and I didn't spend much time tuning it.
# Tune this for your model should provide some considerable AMOTA improvement
NUSCENE_CLS_VELOCITY_ERROR = {
'car': 3,
'truck': 4,
'bus': 5.5,
'trailer': 2,
'pedestrian': 1,
'motorcycle': 4,
'bicycle': 2.5,
'construction_vehicle': 1,
'barrier': 1,
'traffic_cone': 1,
}
def greedy_assignment(dist):
matched_indices = []
if dist.shape[1] == 0:
return np.array(matched_indices, np.int32).reshape(-1, 2)
for i in range(dist.shape[0]):
j = dist[i].argmin()
if dist[i][j] < 1e16:
dist[:, j] = 1e18
matched_indices.append([i, j])
return np.array(matched_indices, np.int32).reshape(-1, 2)
def comparing_positions(self, positions1_data, positions2_data, positions1, positions2):
M = len(positions1_data)
N = len(positions2_data)
positions1_cat = np.array([index['label_preds'] for index in positions1_data], np.int32) # M pos1 labels
positions2_cat = np.array([index['label_preds'] for index in positions2_data], np.int32) # N pos2 labels
max_diff = np.array([self.velocity_error[box['detection_name']] for box in positions2_data], np.float32)
if len(positions1) > 0: # NOT FIRST FRAME
dist = (((positions1.reshape(1, -1, 2) - positions2.reshape(-1, 1, 2)) ** 2).sum(axis=2)) # N x M
dist = np.sqrt(dist) # absolute distance in meter
invalid = ((dist > max_diff.reshape(N, 1)) + (
positions2_cat.reshape(N, 1) != positions1_cat.reshape(1, M))) > 0
dist = dist + invalid * 1e18
if self.hungarian:
dist[dist > 1e18] = 1e18
matched_indices = linear_sum_assignment(copy.deepcopy(dist))
else:
matched_indices = greedy_assignment(copy.deepcopy(dist))
else: # first few frame
assert M == 0
matched_indices = np.array([], np.int32).reshape(-1, 2)
unmatched_positions1_data = [d for d in range(positions1.shape[0]) if not (d in matched_indices[:, 1])]
unmatched_positions2_data = [d for d in range(positions2.shape[0]) if not (d in matched_indices[:, 0])]
if self.hungarian:
matches = []
for m in matched_indices:
if dist[m[0], m[1]] > 1e16:
unmatched_positions2_data.append(m[0])
else:
matches.append(m)
matches = np.array(matches).reshape(-1, 2)
else:
matches = matched_indices
return matches, unmatched_positions1_data, unmatched_positions2_data
def update_function(self, track, m, loaded_model):
if self.score_update == 'nn':
inp = np.array([self.tracks[m[1]]['detection_score'], track['detection_score']])
new = loaded_model(torch.Tensor(inp))
track['detection_score'] = new.item()
elif self.score_update == 'parallel_addition':
track['detection_score'] = 1 - ((1 - track['detection_score']) * (
1 - self.tracks[m[1]]['detection_score'])) / (
(1 - track['detection_score']) + (
1 - self.tracks[m[1]]['detection_score']))
elif self.score_update == 'multiplication':
track['detection_score'] = 1 - (1 - track['detection_score']) * \
(1 - self.tracks[m[1]]['detection_score'])
elif self.score_update == 'addition':
track['detection_score'] += self.tracks[m[1]]['detection_score']
track['detection_score'] = np.clip(track['detection_score'], a_min=0.0, a_max=1.0)
elif self.score_update == 'max':
track['detection_score'] = np.maximum(track['detection_score'],
self.tracks[m[1]]['detection_score'])
return track
# comparing the results of matching the det with trk and compare them with ground truth resulting train data
def tain_data(self, annotated_data, first_matching_detections, match1_matched_det_pos, match1_matched_trk):
# prepare the ground truth data for matching
ground_truth = annotated_data['anns']
ground_truth_pos = np.array([index['translation'][:2] for index in ground_truth])
# matching the matched and the ground truth
training_matching = comparing_positions(self, first_matching_detections, ground_truth, match1_matched_det_pos,
ground_truth_pos)
# get the sets from the matching (only two are needed)
matched_training_matching, unmatched_training_matching, unmatched_ground_truth = training_matching[0], training_matching[1], training_matching[2]
# PS: false positive : the reference is the system and not the ground truth
# PS: True positive have the label 1 and false positive 0
# get matched detections (true positive)
det_target_1 = get_data_from_index(matched_training_matching, 1, first_matching_detections)
# get unmatched detections (false positive)
det_target_0 = get_data_from_index(np.array(unmatched_training_matching), 1, first_matching_detections)
# get matched tracklets (true positive)
trk_target_1 = get_data_from_index(matched_training_matching, 1, match1_matched_trk)
# get unmatched tracklets (false positive)
trk_target_0 = get_data_from_index(np.array(unmatched_training_matching), 1, match1_matched_trk)
# concatenate detection's detection_score [det_sc : True positive,,,det_sc : false positive,,,]
det_target_1_0 = np.array(
[index['detection_score'] for index in np.concatenate((det_target_1, det_target_0))])
# concatenate tracklet's detection_score [tr_sc : True positive,,,tr_sc : false positive,,,]
trk_target_1_0 = np.array(
[index['detection_score'] for index in np.concatenate((trk_target_1, trk_target_0))])
# get the two score inputs lists [[tr_sc,det_sc],,,]
inputs = list(
[[trk_target_1_0[index], det_target_1_0[index]]
for index in range(len(det_target_1_0))])
# get the labels list
labels = list(np.concatenate(
(np.ones(len(det_target_1), np.float64), np.zeros(len(det_target_0), np.float64))))
# each iteration return a dictionary with the inputs and labels
train_set = {'input': inputs, 'labels': labels}
return train_set
def get_data_from_index(index_list, i, data_source):
if len(index_list.shape) == 1:
return np.array([data_source[index] for index in index_list])
else:
return np.array([data_source[index] for index in np.array([match[i] for match in index_list])])
WAYMO_TRACKING_NAMES = [
1,
2,
4,
]
WAYMO_CLS_VELOCITY_ERROR = {
1: 2,
2: 0.2,
4: 0.5,
}
# reshape hungarians output to match the greedy output shape
def reshape(hungarian):
result = np.empty((0, 2), int)
for i in range(len(hungarian[0])):
result = np.append(result, np.array([[hungarian[0][i], hungarian[1][i]]]), axis=0)
return result
class PubTracker(object):
def __init__(self, hungarian=False, max_age=6, noise=0.05, active_th=1, min_hits=1, score_update=None,
deletion_th=0.0,
detection_th=0.0, dataset='Nuscenes', model_path = 'LeakyReLU.th'):
self.tracker = 'PointTracker'
self.hungarian = hungarian
self.max_age = max_age
self.min_hits = min_hits
self.noise = noise
self.s_th = active_th # activate threshold
self.score_update = score_update
self.det_th = detection_th # detection threshold
self.del_th = deletion_th # deletion threshold
self.use_vel = False
if score_update == 'nn':
self.loaded_model = torch.load(model_path)
self.loaded_model.eval()
else:
self.loaded_model = None
print("Use hungarian: {}".format(hungarian))
if dataset == 'Nuscenes':
self.velocity_error = NUSCENE_CLS_VELOCITY_ERROR
self.tracking_names = NUSCENES_TRACKING_NAMES
elif dataset == 'Waymo':
self.velocity_error = WAYMO_CLS_VELOCITY_ERROR
self.tracking_names = WAYMO_TRACKING_NAMES
self.id_count = 0
self.tracks = []
self.reset()
def reset(self):
self.id_count = 0
self.tracks = []
def step_centertrack(self, results, annotated_data, time_lag, version, train_data):
"""
computes connections between current resources with resources from older frames
:param results: resources in one specific frame
:param annotated_data: ground truth for train data
:param time_lag: time between two successive frame (difference in their timestamp)
:param version: trainval or test
:param train_data: boolean true if train_data needed false else
:param model_path: model_path for learning score update function
:return: tracks: tracklets (detection + tracking id, age, activity) for one specific frame
if train_data true than also return the training data
"""
# if no detection in this frame, reset tracks list
if len(results) == 0:
self.tracks = [] # <-- however, this means, all tracklets are gone (i.e. 'died')
return []
# if any detection is found, ...
else:
temp = []
for det in results: # for each detection ...
# filter out classes not evaluated for tracking
if det['detection_name'] not in self.tracking_names:
continue
# for all evaluated classes, extend with the following attributes
det['ct'] = np.array(det['translation'][:2]) # ct: 2d centerpoint of one detection
if self.tracker == 'PointTracker':
det['tracking'] = np.array(det['velocity'][:2]) * -1 * time_lag
# label_preds: class id (instead of class name)
det['label_preds'] = self.tracking_names.index(det['detection_name'])
temp.append(det)
results = temp # contains all extended resources
N = len(results) # number of resources in this frame
M = len(self.tracks) # number of tracklets
ret = [] # initiate return value (will become the updated tracklets list)
# if no tracklet exist just yet (i.e. processing the first frame)
if M == 0:
for result in results: # for each (extended) detection
# initiate new tracklet
track = result
self.id_count += 1
# extend tracklet with the following attributes:
track['tracking_id'] = self.id_count # tracklet id
track['age'] = 1 # how many frames without matching detection (i.e. inactivity)
track['active'] = self.min_hits # currently matched? (start with 1)
# if track['detection_score'] > self.active_th:
# track['active'] = self.min_hits
# else:
# track['active'] = 0
if self.tracker == 'KF':
if self.use_vel:
track['KF'] = KalmanFilter(6, 4)
track['KF'].H = np.array([[1., 0., 0., 0., 0., 0.],
[0., 1., 0., 0., 0., 0.],
[0., 0., 1., 0., 0., 0.],
[0., 0., 0., 1., 0., 0.]])
else:
track['KF'] = KalmanFilter(6, 2)
track['KF'].H = np.array([[1., 0., 0., 0., 0., 0.],
[0., 1., 0., 0., 0., 0.]])
track['KF'].x = np.hstack([track['ct'], np.array(track['velocity'][:2]), np.zeros(2)])
track['KF'].P *= 10
ret.append(track)
self.tracks = ret
if train_data:
return ret, {}
else:
return ret
# Processing from the second frame
if self.tracker == 'PointTracker':
# N X 2
# dets: estmated 2d centerpoint of a detection in the previous frame (ct + expected offset)
if 'tracking' in results[0]:
dets = np.array(
[det['ct'].astype(np.float32) + det['tracking'].astype(np.float32)
for det in results], np.float32)
else:
dets = np.array(
[det['ct'] for det in results], np.float32)
tracks = np.array(
[pre_det['ct'] for pre_det in self.tracks], np.float32) # M x 2
elif self.tracker == 'KF':
dets = np.array(
[det['ct'] for det in results], np.float32)
tracks = []
for tracklet in self.tracks:
tracklet['KF'].predict(F=np.array([[1, 0, time_lag, 0, time_lag * time_lag, 0],
[0, 1, 0, time_lag, 0, time_lag * time_lag],
[0, 0, 1, 0, time_lag, 0],
[0, 0, 0, 1, 0, time_lag],
[0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 0, 1]]))
tracks.append(tracklet['KF'].x[:2])
tracks = np.array(tracks, np.float32) # M x 2
# matching the current with the estimated pass
matching = comparing_positions(self, self.tracks, results, tracks, dets)
matched, unmatched_trk, unmatched_det = matching[0], matching[1], matching[2]
# train data for update function
if version == 'v1.0-trainval' and train_data:
matched_det = get_data_from_index(matched, 0, results)
matched_det_pos = get_data_from_index(matched, 0, dets)
matched_trk = get_data_from_index(matched, 1, self.tracks)
train_set = tain_data(self, annotated_data, matched_det, matched_det_pos, matched_trk)
# add matches
for m in matched:
# initiate new tracklet (with three additional attributes)
track = results[m[0]]
track['tracking_id'] = self.tracks[m[1]]['tracking_id'] # tracklet id = id of matched trackled
track['age'] = 1 # how many frames without matching detection (i.e. inactivity)
track['active'] = self.tracks[m[1]]['active'] + 1
if self.tracker == 'KF':
track['KF'] = self.tracks[m[1]]['KF']
if self.use_vel:
track['KF'].update(z=np.hstack([track['ct'], np.array(track['velocity'][:2])]))
else:
track['KF'].update(z=track['ct'])
track['translation'][0] = track['KF'].x[0]
track['translation'][1] = track['KF'].x[1]
track['velocity'][0] = track['KF'].x[2]
track['velocity'][1] = track['KF'].x[3]
self.tracks[m[1]]['detection_score'] = np.clip(self.tracks[m[1]]['detection_score'] - self.noise,
a_min=0.0, a_max=1.0)
# update detection score
track['detection_score'] = update_function(self, track, m, self.loaded_model)['detection_score']
ret.append(track)
# add unmatched resources as new 'born' tracklets
for i in unmatched_det:
track = results[i]
self.id_count += 1
track['tracking_id'] = self.id_count
track['age'] = 1
track['active'] = 1
if self.tracker == 'KF':
if self.use_vel:
track['KF'] = KalmanFilter(6, 4)
track['KF'].H = np.array([[1., 0., 0., 0., 0., 0.],
[0., 1., 0., 0., 0., 0.],
[0., 0., 1., 0., 0., 0.],
[0., 0., 0., 1., 0., 0.]])
else:
track['KF'] = KalmanFilter(6, 2)
track['KF'].H = np.array([[1., 0., 0., 0., 0., 0.],
[0., 1., 0., 0., 0., 0.]])
track['KF'].x = np.hstack([track['ct'], np.array(track['velocity'][:2]), np.zeros(2)])
track['KF'].P *= 10
# uncomment these line and comment the line above if you want to make the experiments
# in which with the resources are active only above a threshold
if track['detection_score'] > self.det_th:
track['active'] = 1
else:
track['active'] = 0
ret.append(track)
# still store unmatched tracks, however, we shouldn't output the object in current frame
for i in unmatched_trk:
track = self.tracks[i]
# update score (only apply score decay)
if self.score_update is not None:
track['detection_score'] -= self.noise
# keep tracklet if score is above threshold AND age is not too high
if track['age'] < self.max_age and track['detection_score'] > self.del_th:
track['age'] += 1
if track['detection_score'] > self.s_th:
track['active'] += 1
else:
track['active'] = 0
ct = track['ct']
if 'tracking' in track:
offset = track['tracking'] * -1 # move forward
track['ct'] = ct + offset
ret.append(track)
self.tracks = ret
if train_data:
return ret, train_set
else:
return ret