diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp index b066bb7a53..f21dc34d7f 100644 --- a/src/cpp/include/openvino/genai/generation_config.hpp +++ b/src/cpp/include/openvino/genai/generation_config.hpp @@ -68,7 +68,7 @@ enum class StopCriteria { EARLY, HEURISTIC, NEVER }; * @param presence_penalty reduces absolute log prob if the token was generated at least once. Ignored for non continuous batching. * @param frequency_penalty reduces absolute log prob as many times as the token was generated. Ignored for non continuous batching. * @param rng_seed initializes random generator. Ignored for non continuous batching. - * + * * Speculative decoding parameters: * @param assistant_confidence_threshold the lower token probability of candidate to be validated by main model in case of static strategy candidates number update. * @param num_assistant_tokens the defined candidates number to be generated by draft model in case of dynamic strategy candidates number update. @@ -118,8 +118,7 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig { // EOS special token int64_t eos_token_id = -1; - // Optional adapters - AdapterConfig adapters; + std::optional adapters; /** @brief sets eos_token_id to tokenizer_eos_token_id if eos_token_id is less than 0. * Otherwise verifies eos_token_id == tokenizer_eos_token_id. diff --git a/src/cpp/include/openvino/genai/text2image/clip_text_model.hpp b/src/cpp/include/openvino/genai/text2image/clip_text_model.hpp index 63d2b4bafd..84037eefee 100644 --- a/src/cpp/include/openvino/genai/text2image/clip_text_model.hpp +++ b/src/cpp/include/openvino/genai/text2image/clip_text_model.hpp @@ -56,7 +56,7 @@ class OPENVINO_GENAI_EXPORTS CLIPTextModel { return compile(device, ov::AnyMap{std::forward(properties)...}); } - void set_adapters(const AdapterConfig& adapters); + void set_adapters(const std::optional& adapters); ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance); diff --git a/src/cpp/include/openvino/genai/text2image/clip_text_model_with_projection.hpp b/src/cpp/include/openvino/genai/text2image/clip_text_model_with_projection.hpp index f176f461ab..280f12b9ed 100644 --- a/src/cpp/include/openvino/genai/text2image/clip_text_model_with_projection.hpp +++ b/src/cpp/include/openvino/genai/text2image/clip_text_model_with_projection.hpp @@ -56,7 +56,7 @@ class OPENVINO_GENAI_EXPORTS CLIPTextModelWithProjection { return compile(device, ov::AnyMap{std::forward(properties)...}); } - void set_adapters(const AdapterConfig& adapters); + void set_adapters(const std::optional& adapters); ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance); diff --git a/src/cpp/include/openvino/genai/text2image/pipeline.hpp b/src/cpp/include/openvino/genai/text2image/pipeline.hpp index 1101a8d084..a8201cf6c9 100644 --- a/src/cpp/include/openvino/genai/text2image/pipeline.hpp +++ b/src/cpp/include/openvino/genai/text2image/pipeline.hpp @@ -85,7 +85,7 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline { int64_t width = -1; size_t num_inference_steps = 50; - AdapterConfig adapters; + std::optional adapters; void update_generation_config(const ov::AnyMap& config_map); diff --git a/src/cpp/include/openvino/genai/text2image/unet2d_condition_model.hpp b/src/cpp/include/openvino/genai/text2image/unet2d_condition_model.hpp index c9a3f16f2d..a7e0309901 100644 --- a/src/cpp/include/openvino/genai/text2image/unet2d_condition_model.hpp +++ b/src/cpp/include/openvino/genai/text2image/unet2d_condition_model.hpp @@ -63,7 +63,7 @@ class OPENVINO_GENAI_EXPORTS UNet2DConditionModel { void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states); - void set_adapters(const AdapterConfig& adapters); + void set_adapters(const std::optional& adapters); ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep); diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp index af9c1ed404..1513ef716a 100644 --- a/src/cpp/src/llm_pipeline.cpp +++ b/src/cpp/src/llm_pipeline.cpp @@ -82,8 +82,8 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase { auto [core_plugin_config, compile_plugin_config] = ov::genai::utils::split_core_complile_config(*filtered_plugin_config); core.set_property(core_plugin_config); auto model = core.read_model(models_path / "openvino_model.xml"); - m_generation_config.adapters.set_tensor_name_prefix("base_model.model.model."); - m_adapter_controller = AdapterController(model, m_generation_config.adapters, device); // TODO: Make the prefix name configurable + m_generation_config.adapters->set_tensor_name_prefix("base_model.model.model."); + m_adapter_controller = AdapterController(model, *m_generation_config.adapters, device); // TODO: Make the prefix name configurable utils::slice_matmul_statefull_model(model); m_model_runner = core.compile_model(model, device, compile_plugin_config).create_infer_request(); } else { diff --git a/src/cpp/src/lora_helper.cpp b/src/cpp/src/lora_helper.cpp index 6a659c28ac..5d836832dd 100644 --- a/src/cpp/src/lora_helper.cpp +++ b/src/cpp/src/lora_helper.cpp @@ -4,7 +4,8 @@ namespace ov { namespace genai { -std::optional extract_adapters_from_properties (const AnyMap& properties, AdapterConfig* adapter_config) { + +std::optional extract_adapters_from_properties (const AnyMap& properties, std::optional* adapter_config) { auto adapters_iter = properties.find(AdaptersProperty::name()); if (adapters_iter != properties.end()) { if(adapter_config) { @@ -17,7 +18,7 @@ std::optional extract_adapters_from_properties (const AnyMap& properties return std::nullopt; } -bool update_adapters_from_properties (const AnyMap& properties, AdapterConfig& adapter_config) { +bool update_adapters_from_properties (const AnyMap& properties, std::optional& adapter_config) { auto adapters_iter = properties.find(AdaptersProperty::name()); if (adapters_iter != properties.end()) { adapter_config = adapters_iter->second.as(); diff --git a/src/cpp/src/lora_helper.hpp b/src/cpp/src/lora_helper.hpp index db16c8322f..fc0872fb5a 100644 --- a/src/cpp/src/lora_helper.hpp +++ b/src/cpp/src/lora_helper.hpp @@ -11,11 +11,11 @@ namespace genai { // Search for `adapters` property in `properties` map. If it is found and `adapter_config` is not nullptr, // set `adapter_config` with found value, and return a copy of `properties` with the `adapters` property removed. // If there is no `adapters` property, `adapter_config` is left unchanged and std::nullopt is returned. -std::optional extract_adapters_from_properties (const AnyMap& properties, AdapterConfig* adapter_config = nullptr); +std::optional extract_adapters_from_properties (const AnyMap& properties, std::optional* adapter_config = nullptr); // Search for `adapters` property in `properties` map. If it is found, set `adapter_config` with found value and return true. // If `adapters` property is not found, do nothing and return false. -bool update_adapters_from_properties (const AnyMap& properties, AdapterConfig& adapter_config); +bool update_adapters_from_properties (const AnyMap& properties, std::optional& adapter_config); } } \ No newline at end of file diff --git a/src/cpp/src/text2image/models/autoencoder_kl.cpp b/src/cpp/src/text2image/models/autoencoder_kl.cpp index f78d9df182..fca9c21050 100644 --- a/src/cpp/src/text2image/models/autoencoder_kl.cpp +++ b/src/cpp/src/text2image/models/autoencoder_kl.cpp @@ -47,11 +47,7 @@ AutoencoderKL::AutoencoderKL(const std::filesystem::path& root_dir, const std::string& device, const ov::AnyMap& properties) : AutoencoderKL(root_dir) { - if (auto filtered_properties = extract_adapters_from_properties(properties)) { - compile(device, *filtered_properties); - } else { - compile(device, properties); - } + compile(device, properties); } AutoencoderKL::AutoencoderKL(const AutoencoderKL&) = default; @@ -74,7 +70,12 @@ AutoencoderKL& AutoencoderKL::reshape(int batch_size, int height, int width) { AutoencoderKL& AutoencoderKL::compile(const std::string& device, const ov::AnyMap& properties) { OPENVINO_ASSERT(m_model, "Model has been already compiled. Cannot re-compile already compiled model"); ov::Core core = utils::singleton_core(); - ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties); + ov::CompiledModel compiled_model; + if (auto filtered_properties = extract_adapters_from_properties(properties)) { + compiled_model = core.compile_model(m_model, device, *filtered_properties); + } else { + compiled_model = core.compile_model(m_model, device, properties); + } m_request = compiled_model.create_infer_request(); // release the original model m_model.reset(); diff --git a/src/cpp/src/text2image/models/clip_text_model.cpp b/src/cpp/src/text2image/models/clip_text_model.cpp index f82ed7a4aa..a4495ae847 100644 --- a/src/cpp/src/text2image/models/clip_text_model.cpp +++ b/src/cpp/src/text2image/models/clip_text_model.cpp @@ -35,14 +35,7 @@ CLIPTextModel::CLIPTextModel(const std::filesystem::path& root_dir, const std::string& device, const ov::AnyMap& properties) : CLIPTextModel(root_dir) { - AdapterConfig adapters; - if (auto filtered_properties = extract_adapters_from_properties(properties, &adapters)) { - adapters.set_tensor_name_prefix(adapters.get_tensor_name_prefix().value_or("lora_te")); - m_adapter_controller = AdapterController(m_model, adapters, device); - compile(device, *filtered_properties); - } else { - compile(device, properties); - } + compile(device, properties); } CLIPTextModel::CLIPTextModel(const CLIPTextModel&) = default; @@ -66,7 +59,15 @@ CLIPTextModel& CLIPTextModel::reshape(int batch_size) { CLIPTextModel& CLIPTextModel::compile(const std::string& device, const ov::AnyMap& properties) { OPENVINO_ASSERT(m_model, "Model has been already compiled. Cannot re-compile already compiled model"); ov::Core core = utils::singleton_core(); - ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties); + ov::CompiledModel compiled_model; + std::optional adapters; + if (auto filtered_properties = extract_adapters_from_properties(properties, &adapters)) { + adapters->set_tensor_name_prefix(adapters->get_tensor_name_prefix().value_or("lora_te")); + m_adapter_controller = AdapterController(m_model, *adapters, device); + compiled_model = core.compile_model(m_model, device, *filtered_properties); + } else { + compiled_model = core.compile_model(m_model, device, properties); + } m_request = compiled_model.create_infer_request(); // release the original model m_model.reset(); @@ -74,8 +75,10 @@ CLIPTextModel& CLIPTextModel::compile(const std::string& device, const ov::AnyMa return *this; } -void CLIPTextModel::set_adapters(const AdapterConfig& adapters) { - m_adapter_controller.apply(m_request, adapters); +void CLIPTextModel::set_adapters(const std::optional& adapters) { + if(adapters) { + m_adapter_controller.apply(m_request, *adapters); + } } ov::Tensor CLIPTextModel::infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance) { diff --git a/src/cpp/src/text2image/models/clip_text_model_with_projection.cpp b/src/cpp/src/text2image/models/clip_text_model_with_projection.cpp index 878d660b62..ed714ee01d 100644 --- a/src/cpp/src/text2image/models/clip_text_model_with_projection.cpp +++ b/src/cpp/src/text2image/models/clip_text_model_with_projection.cpp @@ -35,14 +35,7 @@ CLIPTextModelWithProjection::CLIPTextModelWithProjection(const std::filesystem:: const std::string& device, const ov::AnyMap& properties) : CLIPTextModelWithProjection(root_dir) { - AdapterConfig adapters; - if(auto filtered_properties = extract_adapters_from_properties(properties, &adapters)) { - adapters.set_tensor_name_prefix(adapters.get_tensor_name_prefix().value_or("lora_te")); - m_adapter_controller = AdapterController(m_model, adapters, device); - compile(device, *filtered_properties); - } else { - compile(device, properties); - } + compile(device, properties); } CLIPTextModelWithProjection::CLIPTextModelWithProjection(const CLIPTextModelWithProjection&) = default; @@ -66,7 +59,15 @@ CLIPTextModelWithProjection& CLIPTextModelWithProjection::reshape(int batch_size CLIPTextModelWithProjection& CLIPTextModelWithProjection::compile(const std::string& device, const ov::AnyMap& properties) { OPENVINO_ASSERT(m_model, "Model has been already compiled. Cannot re-compile already compiled model"); ov::Core core = utils::singleton_core(); - ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties); + ov::CompiledModel compiled_model; + std::optional adapters; + if(auto filtered_properties = extract_adapters_from_properties(properties, &adapters)) { + adapters->set_tensor_name_prefix(adapters->get_tensor_name_prefix().value_or("lora_te")); + m_adapter_controller = AdapterController(m_model, *adapters, device); + compiled_model = core.compile_model(m_model, device, *filtered_properties); + } else { + compiled_model = core.compile_model(m_model, device, properties); + } m_request = compiled_model.create_infer_request(); // release the original model m_model.reset(); @@ -74,8 +75,10 @@ CLIPTextModelWithProjection& CLIPTextModelWithProjection::compile(const std::str return *this; } -void CLIPTextModelWithProjection::set_adapters(const AdapterConfig& adapters) { - m_adapter_controller.apply(m_request, adapters); +void CLIPTextModelWithProjection::set_adapters(const std::optional& adapters) { + if(adapters) { + m_adapter_controller.apply(m_request, *adapters); + } } ov::Tensor CLIPTextModelWithProjection::infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance) { diff --git a/src/cpp/src/text2image/models/unet2d_condition_model.cpp b/src/cpp/src/text2image/models/unet2d_condition_model.cpp index a71e0275d5..2e8e06932e 100644 --- a/src/cpp/src/text2image/models/unet2d_condition_model.cpp +++ b/src/cpp/src/text2image/models/unet2d_condition_model.cpp @@ -45,14 +45,7 @@ UNet2DConditionModel::UNet2DConditionModel(const std::filesystem::path& root_dir const std::string& device, const ov::AnyMap& properties) : UNet2DConditionModel(root_dir) { - AdapterConfig adapters; - if (auto filtered_properties = extract_adapters_from_properties(properties, &adapters)) { - adapters.set_tensor_name_prefix(adapters.get_tensor_name_prefix().value_or("lora_unet")); - m_adapter_controller = AdapterController(m_model, adapters, device); - compile(device, *filtered_properties); - } else { - compile(device, properties); - } + compile(device, properties); } UNet2DConditionModel::UNet2DConditionModel(const UNet2DConditionModel&) = default; @@ -96,7 +89,15 @@ UNet2DConditionModel& UNet2DConditionModel::reshape(int batch_size, int height, UNet2DConditionModel& UNet2DConditionModel::compile(const std::string& device, const ov::AnyMap& properties) { OPENVINO_ASSERT(m_model, "Model has been already compiled. Cannot re-compile already compiled model"); ov::Core core = utils::singleton_core(); - ov::CompiledModel compiled_model = core.compile_model(m_model, device, properties); + ov::CompiledModel compiled_model; + std::optional adapters; + if (auto filtered_properties = extract_adapters_from_properties(properties, &adapters)) { + adapters->set_tensor_name_prefix(adapters->get_tensor_name_prefix().value_or("lora_unet")); + m_adapter_controller = AdapterController(m_model, *adapters, device); + compiled_model = core.compile_model(m_model, device, *filtered_properties); + } else { + compiled_model = core.compile_model(m_model, device, properties); + } m_request = compiled_model.create_infer_request(); // release the original model m_model.reset(); @@ -109,8 +110,10 @@ void UNet2DConditionModel::set_hidden_states(const std::string& tensor_name, ov: m_request.set_tensor(tensor_name, encoder_hidden_states); } -void UNet2DConditionModel::set_adapters(const AdapterConfig& adapters) { - m_adapter_controller.apply(m_request, adapters); +void UNet2DConditionModel::set_adapters(const std::optional& adapters) { + if(adapters) { + m_adapter_controller.apply(m_request, *adapters); + } } ov::Tensor UNet2DConditionModel::infer(ov::Tensor sample, ov::Tensor timestep) { diff --git a/src/cpp/src/text2image/stable_diffusion_pipeline.hpp b/src/cpp/src/text2image/stable_diffusion_pipeline.hpp index 014f1763bf..ed1508534f 100644 --- a/src/cpp/src/text2image/stable_diffusion_pipeline.hpp +++ b/src/cpp/src/text2image/stable_diffusion_pipeline.hpp @@ -133,6 +133,7 @@ class Text2ImagePipeline::StableDiffusionPipeline : public Text2ImagePipeline::D m_clip_text_encoder->compile(device, properties); m_unet->compile(device, properties); m_vae_decoder->compile(device, properties); + update_adapters_from_properties(properties, m_generation_config.adapters); } ov::Tensor generate(const std::string& positive_prompt, diff --git a/src/cpp/src/text2image/stable_diffusion_xl_pipeline.hpp b/src/cpp/src/text2image/stable_diffusion_xl_pipeline.hpp index e42a34d287..8a239f418f 100644 --- a/src/cpp/src/text2image/stable_diffusion_xl_pipeline.hpp +++ b/src/cpp/src/text2image/stable_diffusion_xl_pipeline.hpp @@ -67,32 +67,22 @@ class Text2ImagePipeline::StableDiffusionXLPipeline : public Text2ImagePipeline: const std::string text_encoder = data["text_encoder"][1].get(); if (text_encoder == "CLIPTextModel") { - AdapterConfig adapters; - std::filesystem::path path = root_dir / "text_encoder"; - if(update_adapters_from_properties(properties, adapters) && !adapters.get_tensor_name_prefix()) { - auto clip_properties = properties; - adapters.set_tensor_name_prefix("lora_te1"); - clip_properties[ov::genai::adapters.name()] = adapters; - m_clip_text_encoder = std::make_shared(path, device, clip_properties); - } else { - m_clip_text_encoder = std::make_shared(path, device, properties); - } + m_clip_text_encoder = std::make_shared( + root_dir / "text_encoder", + device, + properties_for_text_encoder(properties, "lora_te1") + ); } else { OPENVINO_THROW("Unsupported '", text_encoder, "' text encoder type"); } const std::string text_encoder_2 = data["text_encoder_2"][1].get(); if (text_encoder_2 == "CLIPTextModelWithProjection") { - AdapterConfig adapters; - std::filesystem::path path = root_dir / "text_encoder_2"; - if(update_adapters_from_properties(properties, adapters) && !adapters.get_tensor_name_prefix()) { - auto clip_properties = properties; - adapters.set_tensor_name_prefix("lora_te2"); - clip_properties[ov::genai::adapters.name()] = adapters; - m_clip_text_encoder_with_projection = std::make_shared(path, device, clip_properties); - } else { - m_clip_text_encoder_with_projection = std::make_shared(path, device, properties); - } + m_clip_text_encoder_with_projection = std::make_shared( + root_dir / "text_encoder_2", + device, + properties_for_text_encoder(properties, "lora_te2") + ); } else { OPENVINO_THROW("Unsupported '", text_encoder, "' text encoder type"); } @@ -138,10 +128,11 @@ class Text2ImagePipeline::StableDiffusionXLPipeline : public Text2ImagePipeline: } void compile(const std::string& device, const ov::AnyMap& properties) override { - m_clip_text_encoder->compile(device, properties); - m_clip_text_encoder_with_projection->compile(device, properties); + m_clip_text_encoder->compile(device, properties_for_text_encoder(properties, "lora_te1")); + m_clip_text_encoder_with_projection->compile(device, properties_for_text_encoder(properties, "lora_te2")); m_unet->compile(device, properties); m_vae_decoder->compile(device, properties); + update_adapters_from_properties(properties, m_generation_config.adapters); } ov::Tensor generate(const std::string& positive_prompt, @@ -371,6 +362,15 @@ class Text2ImagePipeline::StableDiffusionXLPipeline : public Text2ImagePipeline: OPENVINO_ASSERT(generation_config.negative_prompt_3.empty(), "Negative prompt 3 is not used by ", pipeline_name); } + ov::AnyMap properties_for_text_encoder(ov::AnyMap properties, const std::string& tensor_name_prefix) { + std::optional adapters; + if(update_adapters_from_properties(properties, adapters) && !adapters->get_tensor_name_prefix()) { + adapters->set_tensor_name_prefix(tensor_name_prefix); + properties[ov::genai::adapters.name()] = *adapters; + } + return properties; + } + std::shared_ptr m_clip_text_encoder; std::shared_ptr m_clip_text_encoder_with_projection; std::shared_ptr m_unet; diff --git a/src/cpp/src/utils.hpp b/src/cpp/src/utils.hpp index c412b4b650..9adc46c87a 100644 --- a/src/cpp/src/utils.hpp +++ b/src/cpp/src/utils.hpp @@ -24,11 +24,14 @@ ov::Tensor extend_attention(ov::Tensor attention_mask); void update_position_ids(ov::Tensor&& position_ids, const ov::Tensor&& attention_mask); +template struct OmitOptional { using value = T; }; +template struct OmitOptional> { using value = T; }; + template void read_anymap_param(const ov::AnyMap& config_map, const std::string& name, T& param) { auto it = config_map.find(name); if (it != config_map.end()) { - param = it->second.as(); + param = it->second.as::value>(); } }