diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp index 5e448fe88c..8e26e200f4 100644 --- a/src/cpp/src/llm_pipeline.cpp +++ b/src/cpp/src/llm_pipeline.cpp @@ -75,7 +75,7 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase { ov::Core core; ov::CompiledModel compiled_model; auto [core_plugin_config, plugin_config] = ov::genai::utils::split_core_compile_config(config); - utils::slice_matmul_statefull_model(model); + utils::slice_matmul_stateful_model(model); m_kv_cache_seq_length_axis = ov::genai::utils::get_seq_len_axis(model); if (auto filtered_plugin_config = extract_adapters_from_properties(plugin_config, &m_generation_config.adapters)) { diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp index be9fc972dc..83dbf15376 100644 --- a/src/cpp/src/utils.cpp +++ b/src/cpp/src/utils.cpp @@ -259,7 +259,7 @@ ov::genai::TokenizedInputs subtract_chat_tokenized_inputs(const ov::genai::Token return {new_input_ids, new_attention_mask}; } -void slice_matmul_statefull_model(std::shared_ptr model) { +void slice_matmul_stateful_model(std::shared_ptr model) { auto last_node = model->output(0).get_node()->input_value(0).get_node(); ov::Node* matmul = dynamic_cast(last_node); if (matmul) { diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp index 57225e60ff..6207c889a2 100644 --- a/src/cpp/src/utils.hpp +++ b/src/cpp/src/utils.hpp @@ -106,7 +106,7 @@ std::shared_ptr read_model_with_config(const std::filesystem::path& m ov::genai::TokenizedInputs subtract_chat_tokenized_inputs(const ov::genai::TokenizedInputs& minuend, const ov::genai::TokenizedInputs& subtrahend); -void slice_matmul_statefull_model(std::shared_ptr model); +void slice_matmul_stateful_model(std::shared_ptr model); ov::Core singleton_core();