From d17f7168f278ef98acfdc7ba1ac93e4c759a6402 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 13 Dec 2024 08:03:11 +0400 Subject: [PATCH] [Image generation] Added num_steps to callback (#1372) With image to image and inpainting, an user passed `num_inference_steps` is scaled based on `strength` parameter. So, we need to report actual number of steps within `callback` CC @RyanMetcalfeInt8 --- samples/cpp/image_generation/README.md | 6 ++--- samples/python/image_generation/README.md | 6 ++--- .../image_generation/generation_config.hpp | 4 ++-- .../src/image_generation/flux_pipeline.hpp | 22 ++++++------------- .../stable_diffusion_3_pipeline.hpp | 21 ++++++++---------- .../stable_diffusion_pipeline.hpp | 13 +++++------ src/python/py_utils.cpp | 2 +- tools/llm_bench/llm_bench_utils/ov_utils.py | 2 +- 8 files changed, 31 insertions(+), 45 deletions(-) diff --git a/samples/cpp/image_generation/README.md b/samples/cpp/image_generation/README.md index 795bea8999..8a5cc5aa19 100644 --- a/samples/cpp/image_generation/README.md +++ b/samples/cpp/image_generation/README.md @@ -52,9 +52,9 @@ Please find the template of the callback usage below. ```cpp ov::genai::Text2ImagePipeline pipe(models_path, device); -auto callback = [&](size_t step, ov::Tensor& intermediate_res) -> bool { - std::cout << "Image generation step: " << step << std::endl; - ov::Tensor img = pipe.decode(intermediate_res); // get intermediate image tensor +auto callback = [&](size_t step, size_t num_steps, ov::Tensor& latent) -> bool { + std::cout << "Image generation step: " << step << " / " << num_steps << std::endl; + ov::Tensor img = pipe.decode(latent); // get intermediate image tensor if (your_condition) // return true if you want to interrupt image generation return true; return false; diff --git a/samples/python/image_generation/README.md b/samples/python/image_generation/README.md index 4abe45b2b4..321f3f6d05 100644 --- a/samples/python/image_generation/README.md +++ b/samples/python/image_generation/README.md @@ -52,9 +52,9 @@ Please find the template of the callback usage below. ```python pipe = openvino_genai.Text2ImagePipeline(model_dir, device) -def callback(step, intermediate_res): - print("Image generation step: ", step) - image_tensor = pipe.decode(intermediate_res) # get intermediate image tensor +def callback(step, num_steps, latent): + print(f"Image generation step: {step} / {num_steps}") + image_tensor = pipe.decode(latent) # get intermediate image tensor if your_condition: # return True if you want to interrupt image generation return True return False diff --git a/src/cpp/include/openvino/genai/image_generation/generation_config.hpp b/src/cpp/include/openvino/genai/image_generation/generation_config.hpp index 0b749ecd83..50e576466d 100644 --- a/src/cpp/include/openvino/genai/image_generation/generation_config.hpp +++ b/src/cpp/include/openvino/genai/image_generation/generation_config.hpp @@ -216,11 +216,11 @@ static constexpr ov::Property max_sequence_length{"max_sequence_length"}; /** * User callback for image generation pipelines, which is called within a pipeline with the following arguments: - * - Total number of inference steps. Note, that in case of 'strength' parameter, the number of inference steps is reduced linearly * - Current inference step + * - Total number of inference steps. Note, that in case of 'strength' parameter, the number of inference steps is reduced linearly * - Tensor representing current latent. Such latent can be converted to human-readable representation via image generation pipeline 'decode()' method */ -static constexpr ov::Property> callback{"callback"}; +static constexpr ov::Property> callback{"callback"}; /** * Function to pass 'ImageGenerationConfig' as property to 'generate()' call. diff --git a/src/cpp/src/image_generation/flux_pipeline.hpp b/src/cpp/src/image_generation/flux_pipeline.hpp index 4cdac5bb1a..ac82bd0cab 100644 --- a/src/cpp/src/image_generation/flux_pipeline.hpp +++ b/src/cpp/src/image_generation/flux_pipeline.hpp @@ -326,9 +326,11 @@ class FluxPipeline : public DiffusionPipeline { m_custom_generation_config.strength = 1.0f; } - if (!initial_image) { - // in case of typical text to image generation, we need to ignore 'strength' - m_custom_generation_config.strength = 1.0f; + // Use callback if defined + std::function callback = nullptr; + auto callback_iter = properties.find(ov::genai::callback.name()); + if (callback_iter != properties.end()) { + callback = callback_iter->second.as>(); } const size_t vae_scale_factor = m_vae->get_vae_scale_factor(); @@ -355,14 +357,6 @@ class FluxPipeline : public DiffusionPipeline { m_scheduler->set_timesteps_with_sigma(sigmas, mu); std::vector timesteps = m_scheduler->get_float_timesteps(); - // Use callback if defined - std::function callback; - auto callback_iter = properties.find(ov::genai::callback.name()); - bool do_callback = callback_iter != properties.end(); - if (do_callback) { - callback = callback_iter->second.as>(); - } - // 6. Denoising loop ov::Tensor timestep(ov::element::f32, {1}); float* timestep_data = timestep.data(); @@ -375,10 +369,8 @@ class FluxPipeline : public DiffusionPipeline { auto scheduler_step_result = m_scheduler->step(noise_pred_tensor, latents, inference_step, m_custom_generation_config.generator); latents = scheduler_step_result["latent"]; - if (do_callback) { - if (callback(inference_step, latents)) { - return ov::Tensor(ov::element::u8, {}); - } + if (callback && callback(inference_step, timesteps.size(), latents)) { + return ov::Tensor(ov::element::u8, {}); } } diff --git a/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp index 4e9a70ec2d..3cdaa409d1 100644 --- a/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp @@ -431,6 +431,13 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { generation_config.strength = 1.0f; } + // Use callback if defined + std::function callback = nullptr; + auto callback_iter = properties.find(ov::genai::callback.name()); + if (callback_iter != properties.end()) { + callback = callback_iter->second.as>(); + } + const auto& transformer_config = m_transformer->get_config(); const size_t vae_scale_factor = m_vae->get_vae_scale_factor(); const size_t batch_size_multiplier = do_classifier_free_guidance(generation_config.guidance_scale) @@ -467,14 +474,6 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { // 6. Denoising loop ov::Tensor noisy_residual_tensor(ov::element::f32, {}); - // Use callback if defined - std::function callback; - auto callback_iter = properties.find(ov::genai::callback.name()); - bool do_callback = callback_iter != properties.end(); - if (do_callback) { - callback = callback_iter->second.as>(); - } - for (size_t inference_step = 0; inference_step < timesteps.size(); ++inference_step) { // concat the same latent twice along a batch dimension in case of CFG if (batch_size_multiplier > 1) { @@ -510,10 +509,8 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { auto scheduler_step_result = m_scheduler->step(noisy_residual_tensor, latent, inference_step, generation_config.generator); latent = scheduler_step_result["latent"]; - if (do_callback) { - if (callback(inference_step, latent)) { - return ov::Tensor(ov::element::u8, {}); - } + if (callback && callback(inference_step, timesteps.size(), latent)) { + return ov::Tensor(ov::element::u8, {}); } } diff --git a/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp index 9dbdbac088..c53c9b7d25 100644 --- a/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp @@ -306,11 +306,10 @@ class StableDiffusionPipeline : public DiffusionPipeline { } // use callback if defined - std::function callback; + std::function callback = nullptr; auto callback_iter = properties.find(ov::genai::callback.name()); - bool do_callback = callback_iter != properties.end(); - if (do_callback) { - callback = callback_iter->second.as>(); + if (callback_iter != properties.end()) { + callback = callback_iter->second.as>(); } // Stable Diffusion pipeline @@ -400,10 +399,8 @@ class StableDiffusionPipeline : public DiffusionPipeline { const auto it = scheduler_step_result.find("denoised"); denoised = it != scheduler_step_result.end() ? it->second : latent; - if (do_callback) { - if (callback(inference_step, denoised)) { - return ov::Tensor(ov::element::u8, {}); - } + if (callback && callback(inference_step, timesteps.size(), denoised)) { + return ov::Tensor(ov::element::u8, {}); } } diff --git a/src/python/py_utils.cpp b/src/python/py_utils.cpp index 9d33318f0a..45a0c46174 100644 --- a/src/python/py_utils.cpp +++ b/src/python/py_utils.cpp @@ -280,7 +280,7 @@ ov::Any py_object_to_any(const py::object& py_obj, std::string property_name) { } else if (py::isinstance(py_obj)) { return py::cast>(py_obj); } else if (py::isinstance(py_obj) && property_name == "callback") { - return py::cast>(py_obj); + return py::cast>(py_obj); } else if ((py::isinstance(py_obj) || py::isinstance(py_obj) || py::isinstance(py_obj)) && property_name == "streamer") { auto streamer = py::cast(py_obj); return ov::genai::streamer(pystreamer_to_streamer(streamer)).second; diff --git a/tools/llm_bench/llm_bench_utils/ov_utils.py b/tools/llm_bench/llm_bench_utils/ov_utils.py index c5fa422824..8a28fbe355 100644 --- a/tools/llm_bench/llm_bench_utils/ov_utils.py +++ b/tools/llm_bench/llm_bench_utils/ov_utils.py @@ -366,7 +366,7 @@ def __init__(self) -> types.NoneType: self.start_time = time.perf_counter() self.duration = -1 - def __call__(self, step, latents): + def __call__(self, step, num_steps, latents): self.iteration_time.append(time.perf_counter() - self.start_time) self.start_time = time.perf_counter() return False