diff --git a/.github/workflows/llm_bench-python.yml b/.github/workflows/llm_bench-python.yml index 1999bafcfe..8356805e19 100644 --- a/.github/workflows/llm_bench-python.yml +++ b/.github/workflows/llm_bench-python.yml @@ -151,7 +151,7 @@ jobs: rm -rf ./ov_models/internvl2-1B - name: WWB Tests run: | - pip install git+https://github.com/huggingface/optimum-intel.git + pip install git+https://github.com/huggingface/optimum-intel.git@420fa87d039425a906b7f755e4562b65947f016a GIT_CLONE_PROTECTION_ACTIVE=false PIP_PRE=1 PIP_EXTRA_INDEX_URL=https://storage.openvinotoolkit.org/simple/wheels/nightly pip install ${{ env.WWB_PATH }} python -m pytest -v ${{ env.WWB_PATH }}/tests stateful: @@ -190,7 +190,7 @@ jobs: - name: WWB Tests run: | pip install pytest - pip install git+https://github.com/huggingface/optimum-intel.git + pip install git+https://github.com/huggingface/optimum-intel.git@420fa87d039425a906b7f755e4562b65947f016a GIT_CLONE_PROTECTION_ACTIVE=false PIP_PRE=1 PIP_EXTRA_INDEX_URL=https://storage.openvinotoolkit.org/simple/wheels/nightly pip install ${{ env.WWB_PATH }} python -m pytest -v ${{ env.WWB_PATH }}/tests diff --git a/samples/export-requirements.txt b/samples/export-requirements.txt index 797b680b9a..d75fdbacee 100644 --- a/samples/export-requirements.txt +++ b/samples/export-requirements.txt @@ -2,7 +2,7 @@ --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly openvino-tokenizers~=2025.0.0.0.dev -optimum-intel @ git+https://github.com/huggingface/optimum-intel.git +optimum-intel @ git+https://github.com/huggingface/optimum-intel.git@420fa87d039425a906b7f755e4562b65947f016a numpy<2.0.0; sys_platform == 'darwin' einops==0.8.0 # For Qwen transformers_stream_generator==0.0.5 # For Qwen diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp index 090aed9650..6f4f124894 100644 --- a/src/cpp/src/llm_pipeline_static.cpp +++ b/src/cpp/src/llm_pipeline_static.cpp @@ -407,7 +407,8 @@ ov::genai::ModelConfigDesc get_modeldesc_from_json(const std::filesystem::path& if (config_data.contains("_name_or_path")) { desc.name_or_path = config_data["_name_or_path"].get(); } - desc.num_key_value_heads = config_data["num_key_value_heads"].get(); + desc.num_key_value_heads = config_data.contains("num_key_value_heads") + ? config_data["num_key_value_heads"].get() : -1; return desc; } @@ -1102,6 +1103,11 @@ EncodedResults StaticLLMPipeline::generate( m_kvcache_request.get_tensor(output_name).copy_to(kvcache_in_slice); } } + + if (streamer_ptr) { + streamer_ptr->end(); + } + auto stop_time = std::chrono::steady_clock::now(); // If is called without tokenization then that stat will not be reported. auto& metrics = results.perf_metrics; diff --git a/tests/python_tests/requirements.txt b/tests/python_tests/requirements.txt index 3dac3f8b00..bc5324b211 100644 --- a/tests/python_tests/requirements.txt +++ b/tests/python_tests/requirements.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/cpu -optimum-intel @ git+https://github.com/huggingface/optimum-intel.git +optimum-intel @ git+https://github.com/huggingface/optimum-intel.git@420fa87d039425a906b7f755e4562b65947f016a numpy<2.0.0; sys_platform == 'darwin' onnx==1.17.0 pytest diff --git a/tools/llm_bench/requirements.txt b/tools/llm_bench/requirements.txt index f5f4a3fdeb..acbc668c52 100644 --- a/tools/llm_bench/requirements.txt +++ b/tools/llm_bench/requirements.txt @@ -10,7 +10,7 @@ torch transformers>=4.40.0 diffusers>=0.22.0 #optimum is in dependency list of optimum-intel -git+https://github.com/huggingface/optimum-intel.git@main#egg=optimum-intel +git+https://github.com/huggingface/optimum-intel.git@420fa87d039425a906b7f755e4562b65947f016a#egg=optimum-intel git+https://github.com/openvinotoolkit/nncf.git@develop#egg=nncf packaging psutil