Skip to content

Commit

Permalink
CB: drop profiling as it drops performance (#1280)
Browse files Browse the repository at this point in the history
  • Loading branch information
ilya-lavrenov authored Nov 30, 2024
1 parent f59a638 commit 6f160e0
Show file tree
Hide file tree
Showing 6 changed files with 8 additions and 26 deletions.
22 changes: 2 additions & 20 deletions src/cpp/src/continuous_batching_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ ContinuousBatchingPipeline::ContinuousBatchingImpl::ContinuousBatchingImpl(

ov::Core core;

auto [core_properties, compile_properties] = utils::split_core_complile_config(properties);
auto [core_properties, compile_properties] = utils::split_core_compile_config(properties);
core.set_property(core_properties);

// The model can be compiled for GPU as well
Expand Down Expand Up @@ -57,7 +57,7 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::init(
}

SchedulerConfig updated_config = scheduler_config;
// update KV number in scheduler config
// update KV blocks number in scheduler config
if (scheduler_config.num_kv_blocks != device_config.get_num_kv_blocks()) {
updated_config.num_kv_blocks = device_config.get_num_kv_blocks();
}
Expand Down Expand Up @@ -166,24 +166,6 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::step() {
timer.start();
logits = m_model_runner->forward(m_requests, scheduler_output);
timer.end();

ov::InferRequest infer_request = m_model_runner->get_infer_request();
ov::CompiledModel compiled_model = infer_request.get_compiled_model();
const bool is_profiling_enabled = compiled_model.get_property(ov::enable_profiling);

// collect detailed statistic
if (is_profiling_enabled) {
std::vector<ov::ProfilingInfo> profiling_info = m_model_runner->get_infer_request().get_profiling_info();
for (const ov::ProfilingInfo& info : profiling_info) {
double current_time = info.real_time.count();
if (info.node_type == "PagedAttentionExtension") {
m_perf.m_paged_attention_time_ms += current_time;
} else if (info.node_type == "FullyConnected") {
m_perf.m_matmul_time_ms += current_time;
}
m_perf.m_infer_total_ms += current_time;
}
}
}

#ifdef DEBUG_CACHE_STATE_DUMP
Expand Down
4 changes: 2 additions & 2 deletions src/cpp/src/llm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,15 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase {
{
ov::Core core;
if (auto filtered_plugin_config = extract_adapters_from_properties(plugin_config, &m_generation_config.adapters)) {
auto [core_plugin_config, compile_plugin_config] = ov::genai::utils::split_core_complile_config(*filtered_plugin_config);
auto [core_plugin_config, compile_plugin_config] = ov::genai::utils::split_core_compile_config(*filtered_plugin_config);
core.set_property(core_plugin_config);
auto model = core.read_model(models_path / "openvino_model.xml");
m_generation_config.adapters->set_tensor_name_prefix("base_model.model.model.");
m_adapter_controller = AdapterController(model, *m_generation_config.adapters, device); // TODO: Make the prefix name configurable
utils::slice_matmul_statefull_model(model);
m_model_runner = core.compile_model(model, device, compile_plugin_config).create_infer_request();
} else {
auto [core_plugin_config, compile_plugin_config] = ov::genai::utils::split_core_complile_config(plugin_config);
auto [core_plugin_config, compile_plugin_config] = ov::genai::utils::split_core_compile_config(plugin_config);
core.set_property(core_plugin_config);
auto model = core.read_model(models_path / "openvino_model.xml");
utils::slice_matmul_statefull_model(model);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ ContinuousBatchingPipeline::SpeculativeDecodingImpl::SpeculativeDecodingImpl(
const ov::genai::ModelDesc draft_model_desc,
const ov::AnyMap& tokenizer_properties) {
ov::Core core;
auto [core_properties, compile_properties] = ov::genai::utils::split_core_complile_config(main_properties);
auto [core_properties, compile_properties] = ov::genai::utils::split_core_compile_config(main_properties);
core.set_property(core_properties);

std::filesystem::path openvino_model_name = "openvino_model.xml",
Expand Down
2 changes: 1 addition & 1 deletion src/cpp/src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ ProcessorConfig from_any_map(
* There are not supported by `core.compile` function plugin options like `ENABLE_MMAP`
* Move this options to `core.set_property` config
*/
std::pair<ov::AnyMap, ov::AnyMap> split_core_complile_config(const ov::AnyMap& properties) {
std::pair<ov::AnyMap, ov::AnyMap> split_core_compile_config(const ov::AnyMap& properties) {
const std::vector<std::string> unsupported_by_compile_properties{"ENABLE_MMAP"};
ov::AnyMap core_properties;
ov::AnyMap compile_properties{properties};
Expand Down
2 changes: 1 addition & 1 deletion src/cpp/src/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ ProcessorConfig from_any_map(
const ProcessorConfig& initial
);

std::pair<ov::AnyMap, ov::AnyMap> split_core_complile_config(const ov::AnyMap& properties);
std::pair<ov::AnyMap, ov::AnyMap> split_core_compile_config(const ov::AnyMap& properties);

ov::genai::TokenizedInputs subtract_chat_tokenized_inputs(const ov::genai::TokenizedInputs& minuend, const ov::genai::TokenizedInputs& subtrahend);

Expand Down
2 changes: 1 addition & 1 deletion src/cpp/src/whisper_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class WhisperPipeline::WhisperPipelineStatefulImpl : public WhisperPipeline::Whi
const ov::AnyMap& properties)
: WhisperPipelineImplBase{models_path} {
ov::Core core = utils::singleton_core();
auto [core_properties, compile_properties] = ov::genai::utils::split_core_complile_config(properties);
auto [core_properties, compile_properties] = ov::genai::utils::split_core_compile_config(properties);
core.set_property(core_properties);

m_models.encoder =
Expand Down

0 comments on commit 6f160e0

Please sign in to comment.