From 98fa527b91e3e00623e1fa73be6873fa8634d3fa Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 20 Dec 2024 15:28:15 +0100 Subject: [PATCH 1/2] Update text_generation.py remove redundant `.tolist()` since it was already done above --- tools/llm_bench/task/text_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/llm_bench/task/text_generation.py b/tools/llm_bench/task/text_generation.py index 485de94996..1f0985a445 100644 --- a/tools/llm_bench/task/text_generation.py +++ b/tools/llm_bench/task/text_generation.py @@ -324,7 +324,7 @@ def token_printer(): num, iter_data, tm_list.tolist(), - inference_durations.tolist(), + inference_durations, warm_up=(num == 0), max_rss_mem=max_rss_mem_consumption, max_shared_mem=max_shared_mem_consumption, From 568c853e588331a1591b3cbab51738fc0f082922 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 20 Dec 2024 15:37:10 +0100 Subject: [PATCH 2/2] Update text_generation.py --- tools/llm_bench/task/text_generation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/llm_bench/task/text_generation.py b/tools/llm_bench/task/text_generation.py index 1f0985a445..4822b228ca 100644 --- a/tools/llm_bench/task/text_generation.py +++ b/tools/llm_bench/task/text_generation.py @@ -301,7 +301,7 @@ def token_printer(): - np.array(perf_metrics.raw_metrics.m_new_token_times[:-1]) ).tolist() - tm_list = np.array([first_token_time] + second_tokens_durations) / 1000 + tm_list = (np.array([first_token_time] + second_tokens_durations) / 1000).tolist() inference_durations = (np.array(perf_metrics.raw_metrics.token_infer_durations) / 1000 / 1000).tolist() log.debug('latency of all tokens:') [log.debug('[{}]{:.4f}'.format(idx, tm)) for idx, tm in enumerate(tm_list)] @@ -323,7 +323,7 @@ def token_printer(): metrics_print.print_metrics( num, iter_data, - tm_list.tolist(), + tm_list, inference_durations, warm_up=(num == 0), max_rss_mem=max_rss_mem_consumption,