diff --git a/tests/python_tests/conftest.py b/tests/python_tests/conftest.py index f98f47ecf3..e159045601 100644 --- a/tests/python_tests/conftest.py +++ b/tests/python_tests/conftest.py @@ -3,7 +3,8 @@ def pytest_make_parametrize_id(config, val, argname): if argname in ['prompt', 'prompts', 'batched_prompts']: - return f'{val}' + # Print only first 1000 characters of long prompts. + return f'{val[:1000]}' elif argname == 'model_descr': return f"{val[0]}" elif argname == 'chat_config': diff --git a/tests/python_tests/test_generate_api.py b/tests/python_tests/test_generate_api.py index a1dc335136..d2c74e47ec 100644 --- a/tests/python_tests/test_generate_api.py +++ b/tests/python_tests/test_generate_api.py @@ -802,7 +802,7 @@ def test_perf_metrics(model_descr, generation_config, prompt): durations = np.array(raw_metrics.m_durations) / 1000 # Check that prefill is not included in durations for TPOT calculation. # For the very long prompt prefill is slow and TTFT is much larger than any other token genration duration. - assert np.all(mean_ttft > durations * 10) + assert np.all(mean_ttft > durations * 5) mean_tpot, std_tpot = perf_metrics.get_tpot() assert (mean_tpot, std_tpot) == (perf_metrics.get_tpot().mean, perf_metrics.get_tpot().std)