diff --git a/tests/python_tests/test_cache_optimizations.py b/tests/python_tests/test_cache_optimizations.py
index 4d0192fc9b..7fa002ba68 100644
--- a/tests/python_tests/test_cache_optimizations.py
+++ b/tests/python_tests/test_cache_optimizations.py
@@ -151,7 +151,7 @@ def test_cache_optimized_generation_is_similar_to_unoptimized(converted_model, t
 @pytest.fixture(scope='module')
 def phi3_converted_model(tmp_path_factory):
     model_id = "microsoft/Phi-3-mini-4k-instruct"
-    model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True, load_in_8bit=False)
+    model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True)
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     models_path = tmp_path_factory.mktemp("cacheopt_test_models") / model_id
     model.save_pretrained(models_path)
diff --git a/tests/python_tests/utils_longbench.py b/tests/python_tests/utils_longbench.py
index 32f063bfaa..77cf2efc4f 100644
--- a/tests/python_tests/utils_longbench.py
+++ b/tests/python_tests/utils_longbench.py
@@ -149,7 +149,7 @@ def qa_f1_score(prediction, ground_truth, **kwargs):
     "meta-llama/Llama-2-7b-chat-hf": 3500,
     "meta-llama/Meta-Llama-3-8B-Instruct": 5000,
     "meta-llama/Llama-3.1-8B-Instruct": 5000,
-    "microsoft/Phi-3-mini-4k-instruct": 5000,
+    "microsoft/Phi-3-mini-4k-instruct": 4096,
 }
 
 dataset2maxlen = {