[ImageGeneration] EulerAncestralDiscreteScheduler (#1407)

![image](https://github.com/user-attachments/assets/6b688510-50d9-4f32-b80d-cb8cfa0b4b79) CVS-156803 CVS-158965 --------- Co-authored-by: Ilya Lavrenov <[email protected]>
openvinotoolkit · Dec 18, 2024 · c4c7bcd · c4c7bcd
1 parent 5f3f334
commit c4c7bcd
Show file tree

Hide file tree

Showing 9 changed files with 337 additions and 4 deletions.
diff --git a/src/cpp/include/openvino/genai/image_generation/scheduler.hpp b/src/cpp/include/openvino/genai/image_generation/scheduler.hpp
@@ -19,7 +19,8 @@ class OPENVINO_GENAI_EXPORTS Scheduler {
         DDIM,
         EULER_DISCRETE,
         FLOW_MATCH_EULER_DISCRETE,
-        PNDM
+        PNDM,
+        EULER_ANCESTRAL_DISCRETE
     };
 
     static std::shared_ptr<Scheduler> from_config(const std::filesystem::path& scheduler_config_path,

diff --git a/src/cpp/src/image_generation/schedulers/euler_ancestral_discrete.cpp b/src/cpp/src/image_generation/schedulers/euler_ancestral_discrete.cpp
@@ -0,0 +1,261 @@
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include <cassert>
+#include <random>
+#include <fstream>
+#include <iterator>
+
+#include "image_generation/schedulers/euler_ancestral_discrete.hpp"
+#include "image_generation/numpy_utils.hpp"
+
+namespace ov {
+namespace genai {
+
+EulerAncestralDiscreteScheduler::Config::Config(const std::filesystem::path& scheduler_config_path) {
+    std::ifstream file(scheduler_config_path);
+    OPENVINO_ASSERT(file.is_open(), "Failed to open ", scheduler_config_path);
+
+    nlohmann::json data = nlohmann::json::parse(file);
+    using utils::read_json_param;
+
+    read_json_param(data, "num_train_timesteps", num_train_timesteps);
+    read_json_param(data, "beta_start", beta_start);
+    read_json_param(data, "beta_end", beta_end);
+    read_json_param(data, "beta_schedule", beta_schedule);
+    read_json_param(data, "trained_betas", trained_betas);
+    read_json_param(data, "steps_offset", steps_offset);
+    read_json_param(data, "prediction_type", prediction_type);
+    read_json_param(data, "timestep_spacing", timestep_spacing);
+    read_json_param(data, "rescale_betas_zero_snr", rescale_betas_zero_snr);
+}
+
+EulerAncestralDiscreteScheduler::EulerAncestralDiscreteScheduler(const std::filesystem::path& scheduler_config_path) 
+    : EulerAncestralDiscreteScheduler(Config(scheduler_config_path)) {
+}
+
+EulerAncestralDiscreteScheduler::EulerAncestralDiscreteScheduler(const Config& scheduler_config): m_config(scheduler_config) {
+    std::vector<float> alphas, betas;
+
+    using numpy_utils::linspace;
+
+    if (!m_config.trained_betas.empty()) {
+        betas = m_config.trained_betas;
+    } else if (m_config.beta_schedule == BetaSchedule::LINEAR) {
+        betas = linspace<float>(m_config.beta_start, m_config.beta_end, m_config.num_train_timesteps);
+    } else if (m_config.beta_schedule == BetaSchedule::SCALED_LINEAR) {
+        float start = std::sqrt(m_config.beta_start);
+        float end = std::sqrt(m_config.beta_end);
+        betas = linspace<float>(start, end, m_config.num_train_timesteps);
+        std::for_each(betas.begin(), betas.end(), [](float& x) {
+            x *= x;
+        });
+    // TODO: else if beta_schedule == "squaredcos_cap_v2"
+    } else {
+        OPENVINO_THROW(
+            "'beta_schedule' must be one of 'LINEAR' or 'SCALED_LINEAR'. Please, add support of other types");
+    }
+
+    if (m_config.rescale_betas_zero_snr) {
+        using numpy_utils::rescale_zero_terminal_snr;
+        rescale_zero_terminal_snr(betas);
+    }
+
+    std::transform(betas.begin(), betas.end(), std::back_inserter(alphas), [](float b) {
+        return 1.0f - b;
+    });
+
+    for (size_t i = 1; i <= alphas.size(); ++i) {
+        float alpha_cumprod =
+            std::accumulate(std::begin(alphas), std::begin(alphas) + i, 1.0, std::multiplies<float>{});
+        m_alphas_cumprod.push_back(alpha_cumprod);
+    }
+
+    if (m_config.rescale_betas_zero_snr) {
+        m_alphas_cumprod.back() = std::pow(2, -24);
+    }
+
+    for (auto it = m_alphas_cumprod.rbegin(); it != m_alphas_cumprod.rend(); ++it) {
+        float sigma = std::pow(((1 - (*it)) / (*it)), 0.5);
+        m_sigmas.push_back(sigma);
+    }
+    m_sigmas.push_back(0);
+
+    // setable values
+    auto linspaced =
+        linspace<float>(0.0f, static_cast<float>(m_config.num_train_timesteps - 1), m_config.num_train_timesteps, true);
+    for (auto it = linspaced.rbegin(); it != linspaced.rend(); ++it) {
+        m_timesteps.push_back(static_cast<int64_t>(std::round(*it)));
+    }
+    m_num_inference_steps = -1;
+    m_step_index = -1;
+    m_begin_index = -1;
+    m_is_scale_input_called = false;
+}
+
+void EulerAncestralDiscreteScheduler::set_timesteps(size_t num_inference_steps, float strength) {
+    m_timesteps.clear();
+    m_sigmas.clear();
+    m_step_index = m_begin_index = -1;
+    m_num_inference_steps = num_inference_steps;
+    std::vector<float> sigmas;
+
+    switch (m_config.timestep_spacing) {
+    case TimestepSpacing::LINSPACE: {
+        using numpy_utils::linspace;
+        float end = static_cast<float>(m_config.num_train_timesteps - 1);
+        auto linspaced = linspace<float>(0.0f, end, num_inference_steps, true);
+        for (auto it = linspaced.rbegin(); it != linspaced.rend(); ++it) {
+            m_timesteps.push_back(static_cast<int64_t>(std::round(*it)));
+        }
+        break;
+    }
+    case TimestepSpacing::LEADING: {
+        size_t step_ratio = m_config.num_train_timesteps / m_num_inference_steps;
+        for (size_t i = num_inference_steps - 1; i != -1; --i) {
+            m_timesteps.push_back(i * step_ratio + m_config.steps_offset);
+        }
+        break;
+    }
+    case TimestepSpacing::TRAILING: {
+        float step_ratio = static_cast<float>(m_config.num_train_timesteps) / static_cast<float>(m_num_inference_steps);
+        for (float i = m_config.num_train_timesteps; i > 0; i -= step_ratio) {
+            m_timesteps.push_back(static_cast<int64_t>(std::round(i)) - 1);
+        }
+        break;
+    }
+    default:
+        OPENVINO_THROW("Unsupported value for 'timestep_spacing'");
+    }
+
+    for (const float& i : m_alphas_cumprod) {
+        float sigma = std::pow(((1 - i) / i), 0.5);
+        sigmas.push_back(sigma);
+    }
+
+    using numpy_utils::interp;
+    std::vector<size_t> x_data_points(sigmas.size());
+    std::iota(x_data_points.begin(), x_data_points.end(), 0);
+    m_sigmas = interp(m_timesteps, x_data_points, sigmas);
+    m_sigmas.push_back(0.0f);
+
+    // apply 'strength' used in image generation
+    // in diffusers, it's https://github.com/huggingface/diffusers/blob/v0.31.0/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py#L650
+    {
+        size_t init_timestep = std::min<size_t>(num_inference_steps * strength, num_inference_steps);
+        size_t t_start = std::max<size_t>(num_inference_steps - init_timestep, 0);
+        // keep original timesteps
+        m_schedule_timesteps = m_timesteps;
+        // while return patched ones by 'strength' parameter
+        m_timesteps = std::vector<int64_t>(m_timesteps.begin() + t_start, m_timesteps.end());
+        m_begin_index = t_start;
+    }
+}
+
+std::map<std::string, ov::Tensor> EulerAncestralDiscreteScheduler::step(ov::Tensor noise_pred, ov::Tensor latents, size_t inference_step, std::shared_ptr<Generator> generator) {
+    // noise_pred - model_output
+    // latents - sample
+    // inference_step
+
+    size_t timestep = m_timesteps[inference_step];
+
+    if (m_step_index == -1)
+        m_step_index = m_begin_index;
+
+    float sigma = m_sigmas[m_step_index];
+
+    float* model_output_data = noise_pred.data<float>();
+    float* sample_data = latents.data<float>();
+
+    ov::Tensor pred_original_sample(noise_pred.get_element_type(), noise_pred.get_shape());
+    float* pred_original_sample_data = pred_original_sample.data<float>();
+
+    switch (m_config.prediction_type) {
+    case PredictionType::EPSILON:
+        for (size_t i = 0; i < noise_pred.get_size(); ++i) {
+            pred_original_sample_data[i] = sample_data[i] - sigma * model_output_data[i];
+        }
+        break;
+    case PredictionType::V_PREDICTION:
+        for (size_t i = 0; i < noise_pred.get_size(); ++i) {
+            pred_original_sample_data[i] = model_output_data[i] * (-sigma / std::pow((std::pow(sigma, 2) + 1), 0.5)) +
+                                           (sample_data[i] / (std::pow(sigma, 2) + 1));
+        }
+        break;
+    default:
+        OPENVINO_THROW("Unsupported value for 'PredictionType': must be one of `epsilon`, or `v_prediction`");
+    }
+
+    float sigma_from = m_sigmas[m_step_index];
+    float sigma_to = m_sigmas[m_step_index + 1];
+    float sigma_up = std::sqrt(std::pow(sigma_to, 2) * (std::pow(sigma_from, 2) - std::pow(sigma_to, 2)) / std::pow(sigma_from, 2));
+    float sigma_down = std::sqrt(std::pow(sigma_to, 2) - std::pow(sigma_up, 2));
+    float dt = sigma_down - sigma;
+
+    ov::Tensor prev_sample = ov::Tensor(latents.get_element_type(), latents.get_shape());
+    float* prev_sample_data = prev_sample.data<float>();
+
+    ov::Tensor noise = generator->randn_tensor(noise_pred.get_shape());
+    const float* noise_data = noise.data<float>();
+
+    for (size_t i = 0; i < prev_sample.get_size(); ++i) {
+        float derivative = (sample_data[i] - pred_original_sample_data[i]) / sigma;
+        prev_sample_data[i] = (sample_data[i] + derivative * dt) + noise_data[i] * sigma_up;
+    }
+
+    m_step_index++;
+
+    return {{"latent", prev_sample}, {"denoised", pred_original_sample}};
+}
+
+size_t EulerAncestralDiscreteScheduler::_index_for_timestep(int64_t timestep) const{
+    for (size_t i = 0; i < m_schedule_timesteps.size(); ++i) {
+        if (timestep == m_schedule_timesteps[i]) {
+            return i;
+        }
+    }
+
+    OPENVINO_THROW("Failed to find index for timestep ", timestep);
+}
+
+void EulerAncestralDiscreteScheduler::add_noise(ov::Tensor init_latent, ov::Tensor noise, int64_t latent_timestep) const {
+    size_t index_for_timestep = _index_for_timestep(latent_timestep);
+    const float sigma = m_sigmas[index_for_timestep];
+
+    float * init_latent_data = init_latent.data<float>();
+    const float * noise_data = noise.data<float>();
+
+    for (size_t i = 0; i < init_latent.get_size(); ++i) {
+        init_latent_data[i] = init_latent_data[i] + sigma * noise_data[i];
+    }
+}
+
+std::vector<int64_t> EulerAncestralDiscreteScheduler::get_timesteps() const {
+    return m_timesteps;
+}
+
+void EulerAncestralDiscreteScheduler::scale_model_input(ov::Tensor sample, size_t inference_step) {
+    if (m_step_index == -1)
+        m_step_index = m_begin_index;
+
+    float sigma = m_sigmas[m_step_index];
+    float* sample_data = sample.data<float>();
+    for (size_t i = 0; i < sample.get_size(); i++) {
+        sample_data[i] /= std::pow((std::pow(sigma, 2) + 1), 0.5);
+    }
+    m_is_scale_input_called = true;
+}
+
+float EulerAncestralDiscreteScheduler::get_init_noise_sigma() const {
+    float max_sigma = *std::max_element(m_sigmas.begin(), m_sigmas.end());
+
+    if (m_config.timestep_spacing == TimestepSpacing::LINSPACE ||
+        m_config.timestep_spacing == TimestepSpacing::TRAILING) {
+        return max_sigma;
+    }
+
+    return std::sqrt(std::pow(max_sigma, 2) + 1);
+}
+
+} // namespace genai
+} // namespace ov
diff --git a/src/cpp/src/image_generation/schedulers/euler_ancestral_discrete.hpp b/src/cpp/src/image_generation/schedulers/euler_ancestral_discrete.hpp
@@ -0,0 +1,61 @@
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <filesystem>
+#include <list>
+#include <string>
+
+#include "image_generation/schedulers/types.hpp"
+#include "image_generation/schedulers/ischeduler.hpp"
+
+namespace ov {
+namespace genai {
+
+class EulerAncestralDiscreteScheduler : public IScheduler {
+public:
+    struct Config {
+        int32_t num_train_timesteps = 1000;
+        float beta_start = 0.0001f, beta_end = 0.02f;
+        BetaSchedule beta_schedule = BetaSchedule::LINEAR;
+        std::vector<float> trained_betas = {};
+        size_t steps_offset = 0;
+        PredictionType prediction_type = PredictionType::EPSILON;
+        TimestepSpacing timestep_spacing = TimestepSpacing::LEADING;
+        bool rescale_betas_zero_snr = false;
+
+        Config() = default;
+        explicit Config(const std::filesystem::path& scheduler_config_path);
+    };
+
+    explicit EulerAncestralDiscreteScheduler(const std::filesystem::path& scheduler_config_path);
+    explicit EulerAncestralDiscreteScheduler(const Config& scheduler_config);
+
+    void set_timesteps(size_t num_inference_steps, float strength) override;
+
+    std::vector<std::int64_t> get_timesteps() const override;
+
+    float get_init_noise_sigma() const override;
+
+    void scale_model_input(ov::Tensor sample, size_t inference_step) override;
+
+    std::map<std::string, ov::Tensor> step(ov::Tensor noise_pred, ov::Tensor latents, size_t inference_step, std::shared_ptr<Generator> generator) override;
+
+    void add_noise(ov::Tensor init_latent, ov::Tensor noise, int64_t latent_timestep) const override;
+
+private:
+    Config m_config;
+
+    std::vector<float> m_alphas_cumprod, m_sigmas;
+    std::vector<int64_t> m_timesteps, m_schedule_timesteps;
+    size_t m_num_inference_steps;
+
+    int m_step_index, m_begin_index;
+    bool m_is_scale_input_called;
+
+    size_t _index_for_timestep(int64_t timestep) const;
+};
+
+} // namespace genai
+} // namespace ov
diff --git a/src/cpp/src/image_generation/schedulers/scheduler.cpp b/src/cpp/src/image_generation/schedulers/scheduler.cpp
@@ -11,6 +11,7 @@
 #include "image_generation/schedulers/euler_discrete.hpp"
 #include "image_generation/schedulers/flow_match_euler_discrete.hpp"
 #include "image_generation/schedulers/pndm.hpp"
+#include "image_generation/schedulers/euler_ancestral_discrete.hpp"
 
 namespace ov {
 namespace genai {
@@ -41,6 +42,8 @@ std::shared_ptr<Scheduler> Scheduler::from_config(const std::filesystem::path& s
         scheduler = std::make_shared<FlowMatchEulerDiscreteScheduler>(scheduler_config_path);
     } else if (scheduler_type == Scheduler::Type::PNDM) {
         scheduler = std::make_shared<PNDMScheduler>(scheduler_config_path);
+    } else if (scheduler_type == Scheduler::Type::EULER_ANCESTRAL_DISCRETE) {
+        scheduler = std::make_shared<EulerAncestralDiscreteScheduler>(scheduler_config_path);
     } else {
         OPENVINO_THROW("Unsupported scheduler type '", scheduler_type, ". Please, manually create scheduler via supported one");
     }

diff --git a/src/cpp/src/image_generation/schedulers/types.cpp b/src/cpp/src/image_generation/schedulers/types.cpp
@@ -57,6 +57,8 @@ void read_json_param(const nlohmann::json& data, const std::string& name, Schedu
             param = Scheduler::FLOW_MATCH_EULER_DISCRETE;
         else if (scheduler_type_str == "PNDMScheduler")
             param = Scheduler::PNDM;
+        else if (scheduler_type_str == "EulerAncestralDiscreteScheduler")
+            param = Scheduler::EULER_ANCESTRAL_DISCRETE;
         else if (!scheduler_type_str.empty()) {
             OPENVINO_THROW("Unsupported value for 'scheduler' ", scheduler_type_str);
         }

diff --git a/src/docs/SUPPORTED_MODELS.md b/src/docs/SUPPORTED_MODELS.md
@@ -217,6 +217,7 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
         <ul>
           <li><a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-0.9"><code>stabilityai/stable-diffusion-xl-base-0.9</code></a></li>
           <li><a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0"><code>stabilityai/stable-diffusion-xl-base-1.0</code></a></li>
+          <li><a href="https://huggingface.co/stabilityai/sdxl-turbo"><code>stabilityai/sdxl-turbo</code></a></li>
         </ul>
       </td>
     </tr>

diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -1343,15 +1343,18 @@ class Scheduler:
           FLOW_MATCH_EULER_DISCRETE
         
           PNDM
+        
+          EULER_ANCESTRAL_DISCRETE
         """
         AUTO: typing.ClassVar[Scheduler.Type]  # value = <Type.AUTO: 0>
         DDIM: typing.ClassVar[Scheduler.Type]  # value = <Type.DDIM: 3>
+        EULER_ANCESTRAL_DISCRETE: typing.ClassVar[Scheduler.Type]  # value = <Type.EULER_ANCESTRAL_DISCRETE: 7>
         EULER_DISCRETE: typing.ClassVar[Scheduler.Type]  # value = <Type.EULER_DISCRETE: 4>
         FLOW_MATCH_EULER_DISCRETE: typing.ClassVar[Scheduler.Type]  # value = <Type.FLOW_MATCH_EULER_DISCRETE: 5>
         LCM: typing.ClassVar[Scheduler.Type]  # value = <Type.LCM: 1>
         LMS_DISCRETE: typing.ClassVar[Scheduler.Type]  # value = <Type.LMS_DISCRETE: 2>
         PNDM: typing.ClassVar[Scheduler.Type]  # value = <Type.PNDM: 6>
-        __members__: typing.ClassVar[dict[str, Scheduler.Type]]  # value = {'AUTO': <Type.AUTO: 0>, 'LCM': <Type.LCM: 1>, 'LMS_DISCRETE': <Type.LMS_DISCRETE: 2>, 'DDIM': <Type.DDIM: 3>, 'EULER_DISCRETE': <Type.EULER_DISCRETE: 4>, 'FLOW_MATCH_EULER_DISCRETE': <Type.FLOW_MATCH_EULER_DISCRETE: 5>, 'PNDM': <Type.PNDM: 6>}
+        __members__: typing.ClassVar[dict[str, Scheduler.Type]]  # value = {'AUTO': <Type.AUTO: 0>, 'LCM': <Type.LCM: 1>, 'LMS_DISCRETE': <Type.LMS_DISCRETE: 2>, 'DDIM': <Type.DDIM: 3>, 'EULER_DISCRETE': <Type.EULER_DISCRETE: 4>, 'FLOW_MATCH_EULER_DISCRETE': <Type.FLOW_MATCH_EULER_DISCRETE: 5>, 'PNDM': <Type.PNDM: 6>, 'EULER_ANCESTRAL_DISCRETE': <Type.EULER_ANCESTRAL_DISCRETE: 7>}
         def __eq__(self, other: typing.Any) -> bool:
             ...
         def __getstate__(self) -> int:

diff --git a/src/python/py_image_generation_pipelines.cpp b/src/python/py_image_generation_pipelines.cpp
@@ -198,7 +198,8 @@ void init_image_generation_pipelines(py::module_& m) {
         .value("DDIM", ov::genai::Scheduler::Type::DDIM)
         .value("EULER_DISCRETE", ov::genai::Scheduler::Type::EULER_DISCRETE)
         .value("FLOW_MATCH_EULER_DISCRETE", ov::genai::Scheduler::Type::FLOW_MATCH_EULER_DISCRETE)
-        .value("PNDM", ov::genai::Scheduler::Type::PNDM);
+        .value("PNDM", ov::genai::Scheduler::Type::PNDM)
+        .value("EULER_ANCESTRAL_DISCRETE", ov::genai::Scheduler::Type::EULER_ANCESTRAL_DISCRETE);
     image_generation_scheduler.def_static("from_config",
         &ov::genai::Scheduler::from_config,
         py::arg("scheduler_config_path"),
-Original file line number
+Diff line change
@@ Expand Up @@
             <ul>
               <li><a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-0.9"><code>stabilityai/stable-diffusion-xl-base-0.9</code></a></li>
               <li><a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0"><code>stabilityai/stable-diffusion-xl-base-1.0</code></a></li>
+              <li><a href="https://huggingface.co/stabilityai/sdxl-turbo"><code>stabilityai/sdxl-turbo</code></a></li>
             </ul>
           </td>
         </tr>
@@ Expand Down @@