diff --git a/samples/cpp/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.cpp b/samples/cpp/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.cpp index e692110027..451a11b6f7 100644 --- a/samples/cpp/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.cpp +++ b/samples/cpp/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.cpp @@ -28,8 +28,7 @@ int main(int argc, char* argv[]) try { ov::genai::LLMPipeline pipe( model_path, device, - ov::genai::prompt_lookup(true), - ov::genai::scheduler_config(scheduler_config)); + ov::genai::prompt_lookup(true)); auto streamer = [](std::string subword) { std::cout << subword << std::flush; diff --git a/samples/python/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.py b/samples/python/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.py index 557897b6b1..726391ba9b 100755 --- a/samples/python/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.py +++ b/samples/python/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.py @@ -18,11 +18,8 @@ def main(): args = parser.parse_args() device = 'CPU' - scheduler_config = openvino_genai.SchedulerConfig() - # cache params - scheduler_config.cache_size = 2 - pipe = openvino_genai.LLMPipeline(args.model_dir, device, scheduler_config=scheduler_config, prompt_lookup=True) + pipe = openvino_genai.LLMPipeline(args.model_dir, device, prompt_lookup=True) config = openvino_genai.GenerationConfig() config.max_new_tokens = 100 diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp index cd3ecf8ae3..06b58bee49 100644 --- a/src/cpp/src/llm_pipeline.cpp +++ b/src/cpp/src/llm_pipeline.cpp @@ -701,7 +701,8 @@ ov::genai::LLMPipeline::LLMPipeline( ){ auto start_time = std::chrono::steady_clock::now(); if (properties.find(ov::genai::scheduler_config.name()) != properties.end() || - properties.find(utils::DRAFT_MODEL_ARG_NAME) != properties.end()) { + properties.find(utils::DRAFT_MODEL_ARG_NAME) != properties.end() || + properties.find(ov::genai::prompt_lookup.name()) != properties.end()) { auto [plugin_config, scheduler_config] = utils::split_scheduler_config(properties); m_pimpl = std::make_unique(models_path, tokenizer, scheduler_config, device, plugin_config); } else if (device == "NPU") { @@ -721,7 +722,8 @@ ov::genai::LLMPipeline::LLMPipeline( auto start_time = std::chrono::steady_clock::now(); if (config.find(ov::genai::scheduler_config.name()) != config.end() || - config.find(utils::DRAFT_MODEL_ARG_NAME) != config.end()) { + config.find(utils::DRAFT_MODEL_ARG_NAME) != config.end() || + config.find(ov::genai::prompt_lookup.name()) != config.end()) { auto [plugin_config, scheduler_config] = utils::split_scheduler_config(config); m_pimpl = std::make_unique(models_path, scheduler_config, device, plugin_config); } else if (device == "NPU") { @@ -745,7 +747,8 @@ ov::genai::LLMPipeline::LLMPipeline( auto start_time = std::chrono::steady_clock::now(); if (plugin_config.find(ov::genai::scheduler_config.name()) != plugin_config.end() || - plugin_config.find(utils::DRAFT_MODEL_ARG_NAME) != plugin_config.end()) { + plugin_config.find(utils::DRAFT_MODEL_ARG_NAME) != plugin_config.end() || + plugin_config.find(ov::genai::prompt_lookup.name()) != plugin_config.end()){ auto [plugin_config_, scheduler_config] = utils::split_scheduler_config(plugin_config); m_pimpl = std::make_unique(model_str, weights_tensor,