diff --git a/tests/python_tests/test_llm_pipeline.py b/tests/python_tests/test_llm_pipeline.py
index 19a8ccccf5..e4cec73a86 100644
--- a/tests/python_tests/test_llm_pipeline.py
+++ b/tests/python_tests/test_llm_pipeline.py
@@ -605,11 +605,12 @@ def run_perf_metrics_collection(model_descr, generation_config: Dict, prompt: st
         # Do not apply 'repetition_penalty' if sampling is not used.
         config['do_sample'] = False
         config['repetition_penalty'] = 1.0 # 1.0 means no penalty
+
     return ov_pipe.generate([prompt], **config).perf_metrics
 
 
 test_cases = [
-    (dict(max_new_tokens=20), 'table is made of'),
+    (dict(max_new_tokens=20), 'table is made of' * 20),
 ]
 @pytest.mark.parametrize("generation_config,prompt", test_cases)
 @pytest.mark.parametrize("model_descr", get_models_list())
@@ -639,7 +640,7 @@ def test_perf_metrics(model_descr, generation_config, prompt):
     raw_metrics = perf_metrics.raw_metrics
     durations = np.array(raw_metrics.m_durations) / 1000
     # Check that prefill is not included in durations for TPOT calculation.
-    # For the very long prompt prefill is slow and TTFT is much larger than any other token genration duration.
+    # For the very long prompt prefill is slow and TTFT is much larger than any other token generation duration.
     assert np.all(mean_ttft > durations * 2)
 
     mean_tpot, std_tpot = perf_metrics.get_tpot()