From a247803bbf4a91424bfe78a89ff747ecca284ccf Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 20 Dec 2024 10:57:19 +0100 Subject: [PATCH] relax a bit prefill time comparison --- tests/python_tests/conftest.py | 3 ++- tests/python_tests/test_generate_api.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/python_tests/conftest.py b/tests/python_tests/conftest.py index f98f47ecf3..e159045601 100644 --- a/tests/python_tests/conftest.py +++ b/tests/python_tests/conftest.py @@ -3,7 +3,8 @@ def pytest_make_parametrize_id(config, val, argname): if argname in ['prompt', 'prompts', 'batched_prompts']: - return f'{val}' + # Print only first 1000 characters of long prompts. + return f'{val[:1000]}' elif argname == 'model_descr': return f"{val[0]}" elif argname == 'chat_config': diff --git a/tests/python_tests/test_generate_api.py b/tests/python_tests/test_generate_api.py index a1dc335136..d2c74e47ec 100644 --- a/tests/python_tests/test_generate_api.py +++ b/tests/python_tests/test_generate_api.py @@ -802,7 +802,7 @@ def test_perf_metrics(model_descr, generation_config, prompt): durations = np.array(raw_metrics.m_durations) / 1000 # Check that prefill is not included in durations for TPOT calculation. # For the very long prompt prefill is slow and TTFT is much larger than any other token genration duration. - assert np.all(mean_ttft > durations * 10) + assert np.all(mean_ttft > durations * 5) mean_tpot, std_tpot = perf_metrics.get_tpot() assert (mean_tpot, std_tpot) == (perf_metrics.get_tpot().mean, perf_metrics.get_tpot().std)