forked from idstcv/ZenNAS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
benchmark_network_latency.py
138 lines (110 loc) · 4.83 KB
/
benchmark_network_latency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
'''
Copyright (C) 2010-2021 Alibaba Group Holding Limited.
'''
import os,sys, argparse
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import ModelLoader, global_utils
import train_image_classification as tic
import torch, time
import numpy as np
def __get_latency__(model, batch_size, resolution, channel, gpu, benchmark_repeat_times, fp16):
device = torch.device('cuda:{}'.format(gpu))
torch.backends.cudnn.benchmark = True
torch.cuda.set_device(gpu)
model = model.cuda(gpu)
if fp16:
model = model.half()
dtype = torch.float16
else:
dtype = torch.float32
the_image = torch.randn(batch_size, channel, resolution, resolution, dtype=dtype,
device=device)
model.eval()
warmup_T = 3
with torch.no_grad():
for i in range(warmup_T):
the_output = model(the_image)
start_timer = time.time()
for repeat_count in range(benchmark_repeat_times):
the_output = model(the_image)
end_timer = time.time()
the_latency = (end_timer - start_timer) / float(benchmark_repeat_times) / batch_size
return the_latency
def get_robust_latency_mean_std(model, batch_size, resolution, channel, gpu, benchmark_repeat_times=30, fp16=False):
robust_repeat_times = 10
latency_list = []
model = model.cuda(gpu)
for repeat_count in range(robust_repeat_times):
try:
the_latency = __get_latency__(model, batch_size, resolution, channel, gpu, benchmark_repeat_times, fp16)
except Exception as e:
print(e)
the_latency = np.inf
latency_list.append(the_latency)
pass # end for
latency_list.sort()
avg_latency = np.mean(latency_list[2:8])
std_latency = np.std(latency_list[2:8])
return avg_latency, std_latency
def main(opt, argv):
global_utils.create_logging()
batch_size_list = [int(x) for x in opt.batch_size_list.split(',')]
opt.batch_size = 1
opt = tic.config_dist_env_and_opt(opt)
# create model
model = ModelLoader.get_model(opt, argv)
print('batch_size, latency_per_image')
for the_batch_size_per_gpu in batch_size_list:
the_latency, _ = get_robust_latency_mean_std(model=model, batch_size=the_batch_size_per_gpu,
resolution=opt.input_image_size, channel=3, gpu=opt.gpu,
benchmark_repeat_times=opt.repeat_times,
fp16=opt.fp16)
print('{},{:4g}'.format(the_batch_size_per_gpu, the_latency))
if opt.dist_mode == 'auto':
global_utils.release_gpu(opt.gpu)
def get_model_latency(model, batch_size, resolution, in_channels, gpu, repeat_times, fp16):
if gpu is not None:
device = torch.device('cuda:{}'.format(gpu))
else:
device = torch.device('cpu')
if fp16:
model = model.half()
dtype = torch.float16
else:
dtype = torch.float32
the_image = torch.randn(batch_size, in_channels, resolution, resolution, dtype=dtype,
device=device)
model.eval()
warmup_T = 3
with torch.no_grad():
for i in range(warmup_T):
the_output = model(the_image)
start_timer = time.time()
for repeat_count in range(repeat_times):
the_output = model(the_image)
end_timer = time.time()
the_latency = (end_timer - start_timer) / float(repeat_times) / batch_size
return the_latency
def parse_cmd_options(argv):
parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', type=int, default=None, help='number of instances in one mini-batch.')
parser.add_argument('--input_image_size', type=int, default=None,
help='resolution of input image, usually 32 for CIFAR and 224 for ImageNet.')
parser.add_argument('--save_dir', type=str, default=None,
help='output directory')
parser.add_argument('--repeat_times', type=int, default=1)
parser.add_argument('--gpu', type=int, default=None)
parser.add_argument('--fp16', action='store_true')
module_opt, _ = parser.parse_known_args(argv)
return module_opt
if __name__ == "__main__":
opt = global_utils.parse_cmd_options(sys.argv)
args = parse_cmd_options(sys.argv)
the_model = ModelLoader.get_model(opt, sys.argv)
if args.gpu is not None:
the_model = the_model.cuda(args.gpu)
the_latency = get_model_latency(model=the_model, batch_size=args.batch_size,
resolution=args.input_image_size,
in_channels=3, gpu=args.gpu, repeat_times=args.repeat_times,
fp16=args.fp16)
print(f'{the_latency:.4g} second(s) per image, or {1.0/the_latency:.4g} image(s) per second.')