forked from harlanhong/CVPR2022-DaGAN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
crop-video.py
158 lines (124 loc) · 5.74 KB
/
crop-video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import face_alignment
import skimage.io
import numpy
from argparse import ArgumentParser
from skimage import img_as_ubyte
from skimage.transform import resize
from tqdm import tqdm
import os
import imageio
import numpy as np
import warnings
warnings.filterwarnings("ignore")
def extract_bbox(frame, fa):
if max(frame.shape[0], frame.shape[1]) > 640:
scale_factor = max(frame.shape[0], frame.shape[1]) / 640.0
frame = resize(frame, (int(frame.shape[0] / scale_factor), int(frame.shape[1] / scale_factor)))
frame = img_as_ubyte(frame)
else:
scale_factor = 1
frame = frame[..., :3]
bboxes = fa.face_detector.detect_from_image(frame[..., ::-1])
if len(bboxes) == 0:
return []
return np.array(bboxes)[:, :-1] * scale_factor
def bb_intersection_over_union(boxA, boxB):
xA = max(boxA[0], boxB[0])
yA = max(boxA[1], boxB[1])
xB = min(boxA[2], boxB[2])
yB = min(boxA[3], boxB[3])
interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
iou = interArea / float(boxAArea + boxBArea - interArea)
return iou
def join(tube_bbox, bbox):
xA = min(tube_bbox[0], bbox[0])
yA = min(tube_bbox[1], bbox[1])
xB = max(tube_bbox[2], bbox[2])
yB = max(tube_bbox[3], bbox[3])
return (xA, yA, xB, yB)
def compute_bbox(start, end, fps, tube_bbox, frame_shape, inp, image_shape, increase_area=0.1):
left, top, right, bot = tube_bbox
width = right - left
height = bot - top
#Computing aspect preserving bbox
width_increase = max(increase_area, ((1 + 2 * increase_area) * height - width) / (2 * width))
height_increase = max(increase_area, ((1 + 2 * increase_area) * width - height) / (2 * height))
left = int(left - width_increase * width)
top = int(top - height_increase * height)
right = int(right + width_increase * width)
bot = int(bot + height_increase * height)
top, bot, left, right = max(0, top), min(bot, frame_shape[0]), max(0, left), min(right, frame_shape[1])
h, w = bot - top, right - left
start = start / fps
end = end / fps
time = end - start
scale = f'{image_shape[0]}:{image_shape[1]}'
return f'ffmpeg -i {inp} -ss {start} -t {time} -filter:v "crop={w}:{h}:{left}:{top}, scale={scale}" crop.mp4'
def compute_bbox_trajectories(trajectories, fps, frame_shape, args):
commands = []
for i, (bbox, tube_bbox, start, end) in enumerate(trajectories):
if (end - start) > args.min_frames:
command = compute_bbox(start, end, fps, tube_bbox, frame_shape, inp=args.inp, image_shape=args.image_shape, increase_area=args.increase)
commands.append(command)
return commands
def process_video(args):
device = 'cpu' if args.cpu else 'cuda'
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device=device)
video = imageio.get_reader(args.inp)
trajectories = []
previous_frame = None
fps = video.get_meta_data()['fps']
commands = []
try:
for i, frame in tqdm(enumerate(video)):
frame_shape = frame.shape
bboxes = extract_bbox(frame, fa)
## For each trajectory check the criterion
not_valid_trajectories = []
valid_trajectories = []
for trajectory in trajectories:
tube_bbox = trajectory[0]
intersection = 0
for bbox in bboxes:
intersection = max(intersection, bb_intersection_over_union(tube_bbox, bbox))
if intersection > args.iou_with_initial:
valid_trajectories.append(trajectory)
else:
not_valid_trajectories.append(trajectory)
commands += compute_bbox_trajectories(not_valid_trajectories, fps, frame_shape, args)
trajectories = valid_trajectories
## Assign bbox to trajectories, create new trajectories
for bbox in bboxes:
intersection = 0
current_trajectory = None
for trajectory in trajectories:
tube_bbox = trajectory[0]
current_intersection = bb_intersection_over_union(tube_bbox, bbox)
if intersection < current_intersection and current_intersection > args.iou_with_initial:
intersection = bb_intersection_over_union(tube_bbox, bbox)
current_trajectory = trajectory
## Create new trajectory
if current_trajectory is None:
trajectories.append([bbox, bbox, i, i])
else:
current_trajectory[3] = i
current_trajectory[1] = join(current_trajectory[1], bbox)
except IndexError as e:
raise (e)
commands += compute_bbox_trajectories(trajectories, fps, frame_shape, args)
return commands
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--image_shape", default=(256, 256), type=lambda x: tuple(map(int, x.split(','))),
help="Image shape")
parser.add_argument("--increase", default=0.1, type=float, help='Increase bbox by this amount')
parser.add_argument("--iou_with_initial", type=float, default=0.25, help="The minimal allowed iou with inital bbox")
parser.add_argument("--inp", required=True, help='Input image or video')
parser.add_argument("--min_frames", type=int, default=150, help='Minimum number of frames')
parser.add_argument("--cpu", dest="cpu", action="store_true", help="cpu mode.")
args = parser.parse_args()
commands = process_video(args)
for command in commands:
print (command)