diff --git a/tools/llm_bench/task/text_generation.py b/tools/llm_bench/task/text_generation.py index 485de94996..4822b228ca 100644 --- a/tools/llm_bench/task/text_generation.py +++ b/tools/llm_bench/task/text_generation.py @@ -301,7 +301,7 @@ def token_printer(): - np.array(perf_metrics.raw_metrics.m_new_token_times[:-1]) ).tolist() - tm_list = np.array([first_token_time] + second_tokens_durations) / 1000 + tm_list = (np.array([first_token_time] + second_tokens_durations) / 1000).tolist() inference_durations = (np.array(perf_metrics.raw_metrics.token_infer_durations) / 1000 / 1000).tolist() log.debug('latency of all tokens:') [log.debug('[{}]{:.4f}'.format(idx, tm)) for idx, tm in enumerate(tm_list)] @@ -323,8 +323,8 @@ def token_printer(): metrics_print.print_metrics( num, iter_data, - tm_list.tolist(), - inference_durations.tolist(), + tm_list, + inference_durations, warm_up=(num == 0), max_rss_mem=max_rss_mem_consumption, max_shared_mem=max_shared_mem_consumption,