Skip to content

Commit

Permalink
Do not report infer latency for genai for avoid confusion (#1295)
Browse files Browse the repository at this point in the history
CVS-158466
  • Loading branch information
eaidova authored Dec 4, 2024
1 parent e2fa0d0 commit d77898c
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 5 deletions.
7 changes: 4 additions & 3 deletions tools/llm_bench/task/speech_to_text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def run_speech_2_txt_generation(input_param, args, md5_list, iter_data_list):
- np.array(perf_metrics.raw_metrics.m_new_token_times[:-1])
).tolist()
tm_list = (np.array([first_token_time] + second_tokens_durations) / 1000).tolist()
tm_infer_list = []
tm_infer_list = None
result_text = result_text.texts[0]
else:
start = time.perf_counter()
Expand All @@ -71,8 +71,9 @@ def run_speech_2_txt_generation(input_param, args, md5_list, iter_data_list):
tm_infer_list = whisper_hook.get_time_infer_list()
log.debug('latency of all tokens:')
[log.debug('[{}]{:.4f}'.format(idx, tm)) for idx, tm in enumerate(tm_list)]
log.debug('latency of all infers:')
[log.debug('[{}]{:.4f}'.format(idx, tm)) for idx, tm in enumerate(tm_infer_list)]
if tm_infer_list is not None:
log.debug('latency of all infers:')
[log.debug('[{}]{:.4f}'.format(idx, tm)) for idx, tm in enumerate(tm_infer_list)]
generation_time = end - start
out_data = processor.tokenizer(result_text, return_tensors='pt')
out_tokens = out_data['input_ids'] if 'input_ids' in out_data else out_data
Expand Down
4 changes: 2 additions & 2 deletions tools/llm_bench/task/text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
num,
iter_data,
tm_list.tolist(),
[],
None,
warm_up=(num == 0),
max_rss_mem=max_rss_mem_consumption,
max_shared_mem=max_shared_mem_consumption,
Expand Down Expand Up @@ -394,7 +394,7 @@ def run_text_generation_genai_with_stream(input_text, num, model, tokenizer, arg
iter_num=num,
iter_data=iter_data,
tms=tm_list,
tms_infer=[],
tms_infer=None,
warm_up=(num == 0),
max_rss_mem=max_rss_mem_consumption,
max_shared_mem=max_shared_mem_consumption,
Expand Down

0 comments on commit d77898c

Please sign in to comment.