diff --git a/tests/python_tests/test_cache_optimizations.py b/tests/python_tests/test_cache_optimizations.py index 4d0192fc9b..7fa002ba68 100644 --- a/tests/python_tests/test_cache_optimizations.py +++ b/tests/python_tests/test_cache_optimizations.py @@ -151,7 +151,7 @@ def test_cache_optimized_generation_is_similar_to_unoptimized(converted_model, t @pytest.fixture(scope='module') def phi3_converted_model(tmp_path_factory): model_id = "microsoft/Phi-3-mini-4k-instruct" - model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True, load_in_8bit=False) + model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(model_id) models_path = tmp_path_factory.mktemp("cacheopt_test_models") / model_id model.save_pretrained(models_path) diff --git a/tests/python_tests/utils_longbench.py b/tests/python_tests/utils_longbench.py index 32f063bfaa..77cf2efc4f 100644 --- a/tests/python_tests/utils_longbench.py +++ b/tests/python_tests/utils_longbench.py @@ -149,7 +149,7 @@ def qa_f1_score(prediction, ground_truth, **kwargs): "meta-llama/Llama-2-7b-chat-hf": 3500, "meta-llama/Meta-Llama-3-8B-Instruct": 5000, "meta-llama/Llama-3.1-8B-Instruct": 5000, - "microsoft/Phi-3-mini-4k-instruct": 5000, + "microsoft/Phi-3-mini-4k-instruct": 4096, } dataset2maxlen = {