diff --git a/src/cpp/src/continuous_batching_impl.cpp b/src/cpp/src/continuous_batching_impl.cpp index 73bf4ec083..901c5c64be 100644 --- a/src/cpp/src/continuous_batching_impl.cpp +++ b/src/cpp/src/continuous_batching_impl.cpp @@ -21,7 +21,7 @@ ContinuousBatchingPipeline::ContinuousBatchingImpl::ContinuousBatchingImpl( ov::Core core; - auto [core_properties, compile_properties] = utils::split_core_complile_config(properties); + auto [core_properties, compile_properties] = utils::split_core_compile_config(properties); core.set_property(core_properties); // The model can be compiled for GPU as well @@ -57,7 +57,7 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::init( } SchedulerConfig updated_config = scheduler_config; - // update KV number in scheduler config + // update KV blocks number in scheduler config if (scheduler_config.num_kv_blocks != device_config.get_num_kv_blocks()) { updated_config.num_kv_blocks = device_config.get_num_kv_blocks(); } @@ -166,24 +166,6 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::step() { timer.start(); logits = m_model_runner->forward(m_requests, scheduler_output); timer.end(); - - ov::InferRequest infer_request = m_model_runner->get_infer_request(); - ov::CompiledModel compiled_model = infer_request.get_compiled_model(); - const bool is_profiling_enabled = compiled_model.get_property(ov::enable_profiling); - - // collect detailed statistic - if (is_profiling_enabled) { - std::vector profiling_info = m_model_runner->get_infer_request().get_profiling_info(); - for (const ov::ProfilingInfo& info : profiling_info) { - double current_time = info.real_time.count(); - if (info.node_type == "PagedAttentionExtension") { - m_perf.m_paged_attention_time_ms += current_time; - } else if (info.node_type == "FullyConnected") { - m_perf.m_matmul_time_ms += current_time; - } - m_perf.m_infer_total_ms += current_time; - } - } } #ifdef DEBUG_CACHE_STATE_DUMP diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp index 62a72b1cbd..14d558e431 100644 --- a/src/cpp/src/llm_pipeline.cpp +++ b/src/cpp/src/llm_pipeline.cpp @@ -63,7 +63,7 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase { { ov::Core core; if (auto filtered_plugin_config = extract_adapters_from_properties(plugin_config, &m_generation_config.adapters)) { - auto [core_plugin_config, compile_plugin_config] = ov::genai::utils::split_core_complile_config(*filtered_plugin_config); + auto [core_plugin_config, compile_plugin_config] = ov::genai::utils::split_core_compile_config(*filtered_plugin_config); core.set_property(core_plugin_config); auto model = core.read_model(models_path / "openvino_model.xml"); m_generation_config.adapters->set_tensor_name_prefix("base_model.model.model."); @@ -71,7 +71,7 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase { utils::slice_matmul_statefull_model(model); m_model_runner = core.compile_model(model, device, compile_plugin_config).create_infer_request(); } else { - auto [core_plugin_config, compile_plugin_config] = ov::genai::utils::split_core_complile_config(plugin_config); + auto [core_plugin_config, compile_plugin_config] = ov::genai::utils::split_core_compile_config(plugin_config); core.set_property(core_plugin_config); auto model = core.read_model(models_path / "openvino_model.xml"); utils::slice_matmul_statefull_model(model); diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp index 0f43555a5f..4e43fdadc9 100644 --- a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp +++ b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp @@ -31,7 +31,7 @@ ContinuousBatchingPipeline::SpeculativeDecodingImpl::SpeculativeDecodingImpl( const ov::genai::ModelDesc draft_model_desc, const ov::AnyMap& tokenizer_properties) { ov::Core core; - auto [core_properties, compile_properties] = ov::genai::utils::split_core_complile_config(main_properties); + auto [core_properties, compile_properties] = ov::genai::utils::split_core_compile_config(main_properties); core.set_property(core_properties); std::filesystem::path openvino_model_name = "openvino_model.xml", diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp index 50c2e0c49e..f1718a8a5d 100644 --- a/src/cpp/src/utils.cpp +++ b/src/cpp/src/utils.cpp @@ -203,7 +203,7 @@ ProcessorConfig from_any_map( * There are not supported by `core.compile` function plugin options like `ENABLE_MMAP` * Move this options to `core.set_property` config */ -std::pair split_core_complile_config(const ov::AnyMap& properties) { +std::pair split_core_compile_config(const ov::AnyMap& properties) { const std::vector unsupported_by_compile_properties{"ENABLE_MMAP"}; ov::AnyMap core_properties; ov::AnyMap compile_properties{properties}; diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp index 3487fccb81..fb58022d5f 100644 --- a/src/cpp/src/utils.hpp +++ b/src/cpp/src/utils.hpp @@ -78,7 +78,7 @@ ProcessorConfig from_any_map( const ProcessorConfig& initial ); -std::pair split_core_complile_config(const ov::AnyMap& properties); +std::pair split_core_compile_config(const ov::AnyMap& properties); ov::genai::TokenizedInputs subtract_chat_tokenized_inputs(const ov::genai::TokenizedInputs& minuend, const ov::genai::TokenizedInputs& subtrahend); diff --git a/src/cpp/src/whisper_pipeline.cpp b/src/cpp/src/whisper_pipeline.cpp index a90b281c6e..5c31d85fec 100644 --- a/src/cpp/src/whisper_pipeline.cpp +++ b/src/cpp/src/whisper_pipeline.cpp @@ -53,7 +53,7 @@ class WhisperPipeline::WhisperPipelineStatefulImpl : public WhisperPipeline::Whi const ov::AnyMap& properties) : WhisperPipelineImplBase{models_path} { ov::Core core = utils::singleton_core(); - auto [core_properties, compile_properties] = ov::genai::utils::split_core_complile_config(properties); + auto [core_properties, compile_properties] = ov::genai::utils::split_core_compile_config(properties); core.set_property(core_properties); m_models.encoder =