Skip to content

Commit

Permalink
Increase long prompt to be really long
Browse files Browse the repository at this point in the history
  • Loading branch information
ilya-lavrenov committed Dec 26, 2024
1 parent 6b89a12 commit c3924bc
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions tests/python_tests/test_llm_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,11 +605,12 @@ def run_perf_metrics_collection(model_descr, generation_config: Dict, prompt: st
# Do not apply 'repetition_penalty' if sampling is not used.
config['do_sample'] = False
config['repetition_penalty'] = 1.0 # 1.0 means no penalty

return ov_pipe.generate([prompt], **config).perf_metrics


test_cases = [
(dict(max_new_tokens=20), 'table is made of'),
(dict(max_new_tokens=20), 'table is made of' * 20),
]
@pytest.mark.parametrize("generation_config,prompt", test_cases)
@pytest.mark.parametrize("model_descr", get_models_list())
Expand Down Expand Up @@ -639,7 +640,7 @@ def test_perf_metrics(model_descr, generation_config, prompt):
raw_metrics = perf_metrics.raw_metrics
durations = np.array(raw_metrics.m_durations) / 1000
# Check that prefill is not included in durations for TPOT calculation.
# For the very long prompt prefill is slow and TTFT is much larger than any other token genration duration.
# For the very long prompt prefill is slow and TTFT is much larger than any other token generation duration.
assert np.all(mean_ttft > durations * 2)

mean_tpot, std_tpot = perf_metrics.get_tpot()
Expand Down

0 comments on commit c3924bc

Please sign in to comment.