From e4700f43f162c5a3848ffc3116fffd4f3a64b53b Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 6 May 2024 18:10:39 +0400 Subject: [PATCH] Tryied mistral as well --- .../continuous_batching/python/tests/requirements.txt | 8 ++++++-- .../continuous_batching/python/tests/test_sampling.py | 11 ++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/requirements.txt b/text_generation/causal_lm/cpp/continuous_batching/python/tests/requirements.txt index 4810754472..c5abfa6403 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/tests/requirements.txt +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/requirements.txt @@ -1,5 +1,9 @@ --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly/ -openvino +# we need at least openvino 2024.2 +openvino >= 2024.2 openvino-tokenizers +# use latest released version once it's available git+https://github.com/huggingface/optimum-intel.git@main -pytest \ No newline at end of file +pytest +# set 'export HF_HUB_ENABLE_HF_TRANSFER=1' to benefits from hf_transfer +hf_transfer \ No newline at end of file diff --git a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py index 6c4c977f83..73ecc6d191 100644 --- a/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py +++ b/text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py @@ -129,17 +129,14 @@ def run_continuous_batching( pipe = ContinuousBatchingPipeline(model_path.absolute().as_posix(), scheduler_config) return pipe.generate(prompts, generation_configs) -# export models via -# optimum-cli export openvino -m meta-llama/Llama-2-7b-chat-hf llama2 -# optimum-cli export openvino -m meta-llama/Llama-2-7b-chat-hf --fp16 llama2-fp16 - # tested models: -# - facebook/opt-125m (opt125) -# - meta-llama/Llama-2-7b-chat-hf (llama2 or llama2-fp16) +# - facebook/opt-125m +# - meta-llama/Llama-2-7b-chat-hf +# - mistralai/Mistral-7B-Instruct-v0.2 def test_check_greedy_search(tmp_path): prompts, generation_configs = get_test_dataset() - model_id : str = "meta-llama/Llama-2-7b-chat-hf" + model_id : str = "facebook/opt-125m" (hf_results, model_path) = run_hugging_face(model_id=model_id, prompts=prompts, generation_configs=generation_configs, tmp_path=tmp_path, use_optimum=True) my_results : List[GenerationResult] = run_continuous_batching(model_path, get_scheduler_config(), prompts, generation_configs)