From e4700f43f162c5a3848ffc3116fffd4f3a64b53b Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Mon, 6 May 2024 18:10:39 +0400
Subject: [PATCH] Tryied mistral as well

---
 .../continuous_batching/python/tests/requirements.txt |  8 ++++++--
 .../continuous_batching/python/tests/test_sampling.py | 11 ++++-------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/requirements.txt b/text_generation/causal_lm/cpp/continuous_batching/python/tests/requirements.txt
index 4810754472..c5abfa6403 100644
--- a/text_generation/causal_lm/cpp/continuous_batching/python/tests/requirements.txt
+++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/requirements.txt
@@ -1,5 +1,9 @@
 --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly/
-openvino
+# we need at least openvino 2024.2
+openvino >= 2024.2
 openvino-tokenizers
+# use latest released version once it's available
 git+https://github.com/huggingface/optimum-intel.git@main
-pytest
\ No newline at end of file
+pytest
+# set 'export HF_HUB_ENABLE_HF_TRANSFER=1' to benefits from hf_transfer
+hf_transfer
\ No newline at end of file
diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py
index 6c4c977f83..73ecc6d191 100644
--- a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py
+++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py
@@ -129,17 +129,14 @@ def run_continuous_batching(
     pipe = ContinuousBatchingPipeline(model_path.absolute().as_posix(), scheduler_config)
     return pipe.generate(prompts, generation_configs)
 
-# export models via
-# optimum-cli export openvino -m meta-llama/Llama-2-7b-chat-hf llama2
-# optimum-cli export openvino -m meta-llama/Llama-2-7b-chat-hf --fp16 llama2-fp16
-
 # tested models:
-# - facebook/opt-125m (opt125)
-# - meta-llama/Llama-2-7b-chat-hf (llama2 or llama2-fp16)
+# - facebook/opt-125m
+# - meta-llama/Llama-2-7b-chat-hf
+# - mistralai/Mistral-7B-Instruct-v0.2
 
 def test_check_greedy_search(tmp_path):
     prompts, generation_configs = get_test_dataset()
-    model_id : str = "meta-llama/Llama-2-7b-chat-hf"
+    model_id : str = "facebook/opt-125m"
 
     (hf_results, model_path) = run_hugging_face(model_id=model_id, prompts=prompts, generation_configs=generation_configs, tmp_path=tmp_path, use_optimum=True)
     my_results : List[GenerationResult] = run_continuous_batching(model_path, get_scheduler_config(), prompts, generation_configs)