Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ImageGeneration] EulerAncestralDiscreteScheduler #1407

Merged
merged 3 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ class OPENVINO_GENAI_EXPORTS Scheduler {
DDIM,
EULER_DISCRETE,
FLOW_MATCH_EULER_DISCRETE,
PNDM
PNDM,
EULER_ANCESTRAL_DISCRETE
};

static std::shared_ptr<Scheduler> from_config(const std::filesystem::path& scheduler_config_path,
Expand Down
261 changes: 261 additions & 0 deletions src/cpp/src/image_generation/schedulers/euler_ancestral_discrete.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
// Copyright (C) 2023-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#include <cassert>
#include <random>
#include <fstream>
#include <iterator>

#include "image_generation/schedulers/euler_ancestral_discrete.hpp"
#include "image_generation/numpy_utils.hpp"

namespace ov {
namespace genai {

EulerAncestralDiscreteScheduler::Config::Config(const std::filesystem::path& scheduler_config_path) {
std::ifstream file(scheduler_config_path);
OPENVINO_ASSERT(file.is_open(), "Failed to open ", scheduler_config_path);

nlohmann::json data = nlohmann::json::parse(file);
using utils::read_json_param;

read_json_param(data, "num_train_timesteps", num_train_timesteps);
read_json_param(data, "beta_start", beta_start);
read_json_param(data, "beta_end", beta_end);
read_json_param(data, "beta_schedule", beta_schedule);
read_json_param(data, "trained_betas", trained_betas);
read_json_param(data, "steps_offset", steps_offset);
read_json_param(data, "prediction_type", prediction_type);
read_json_param(data, "timestep_spacing", timestep_spacing);
read_json_param(data, "rescale_betas_zero_snr", rescale_betas_zero_snr);
}

EulerAncestralDiscreteScheduler::EulerAncestralDiscreteScheduler(const std::filesystem::path& scheduler_config_path)
: EulerAncestralDiscreteScheduler(Config(scheduler_config_path)) {
}

EulerAncestralDiscreteScheduler::EulerAncestralDiscreteScheduler(const Config& scheduler_config): m_config(scheduler_config) {
std::vector<float> alphas, betas;

using numpy_utils::linspace;

if (!m_config.trained_betas.empty()) {
betas = m_config.trained_betas;
} else if (m_config.beta_schedule == BetaSchedule::LINEAR) {
betas = linspace<float>(m_config.beta_start, m_config.beta_end, m_config.num_train_timesteps);
} else if (m_config.beta_schedule == BetaSchedule::SCALED_LINEAR) {
float start = std::sqrt(m_config.beta_start);
float end = std::sqrt(m_config.beta_end);
betas = linspace<float>(start, end, m_config.num_train_timesteps);
std::for_each(betas.begin(), betas.end(), [](float& x) {
x *= x;
});
// TODO: else if beta_schedule == "squaredcos_cap_v2"
} else {
OPENVINO_THROW(
"'beta_schedule' must be one of 'LINEAR' or 'SCALED_LINEAR'. Please, add support of other types");
}

if (m_config.rescale_betas_zero_snr) {
using numpy_utils::rescale_zero_terminal_snr;
rescale_zero_terminal_snr(betas);
}

std::transform(betas.begin(), betas.end(), std::back_inserter(alphas), [](float b) {
return 1.0f - b;
});

for (size_t i = 1; i <= alphas.size(); ++i) {
float alpha_cumprod =
std::accumulate(std::begin(alphas), std::begin(alphas) + i, 1.0, std::multiplies<float>{});
m_alphas_cumprod.push_back(alpha_cumprod);
}

if (m_config.rescale_betas_zero_snr) {
m_alphas_cumprod.back() = std::pow(2, -24);
}

for (auto it = m_alphas_cumprod.rbegin(); it != m_alphas_cumprod.rend(); ++it) {
float sigma = std::pow(((1 - (*it)) / (*it)), 0.5);
m_sigmas.push_back(sigma);
}
m_sigmas.push_back(0);

// setable values
auto linspaced =
linspace<float>(0.0f, static_cast<float>(m_config.num_train_timesteps - 1), m_config.num_train_timesteps, true);
for (auto it = linspaced.rbegin(); it != linspaced.rend(); ++it) {
m_timesteps.push_back(static_cast<int64_t>(std::round(*it)));
}
m_num_inference_steps = -1;
m_step_index = -1;
m_begin_index = -1;
m_is_scale_input_called = false;
}

void EulerAncestralDiscreteScheduler::set_timesteps(size_t num_inference_steps, float strength) {
m_timesteps.clear();
m_sigmas.clear();
m_step_index = m_begin_index = -1;
m_num_inference_steps = num_inference_steps;
std::vector<float> sigmas;

switch (m_config.timestep_spacing) {
case TimestepSpacing::LINSPACE: {
using numpy_utils::linspace;
float end = static_cast<float>(m_config.num_train_timesteps - 1);
auto linspaced = linspace<float>(0.0f, end, num_inference_steps, true);
for (auto it = linspaced.rbegin(); it != linspaced.rend(); ++it) {
m_timesteps.push_back(static_cast<int64_t>(std::round(*it)));
}
break;
}
case TimestepSpacing::LEADING: {
size_t step_ratio = m_config.num_train_timesteps / m_num_inference_steps;
for (size_t i = num_inference_steps - 1; i != -1; --i) {
m_timesteps.push_back(i * step_ratio + m_config.steps_offset);
}
break;
}
case TimestepSpacing::TRAILING: {
float step_ratio = static_cast<float>(m_config.num_train_timesteps) / static_cast<float>(m_num_inference_steps);
for (float i = m_config.num_train_timesteps; i > 0; i -= step_ratio) {
m_timesteps.push_back(static_cast<int64_t>(std::round(i)) - 1);
}
break;
}
default:
OPENVINO_THROW("Unsupported value for 'timestep_spacing'");
}

for (const float& i : m_alphas_cumprod) {
float sigma = std::pow(((1 - i) / i), 0.5);
sigmas.push_back(sigma);
}

using numpy_utils::interp;
std::vector<size_t> x_data_points(sigmas.size());
std::iota(x_data_points.begin(), x_data_points.end(), 0);
m_sigmas = interp(m_timesteps, x_data_points, sigmas);
m_sigmas.push_back(0.0f);

// apply 'strength' used in image generation
// in diffusers, it's https://github.com/huggingface/diffusers/blob/v0.31.0/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py#L650
{
size_t init_timestep = std::min<size_t>(num_inference_steps * strength, num_inference_steps);
size_t t_start = std::max<size_t>(num_inference_steps - init_timestep, 0);
// keep original timesteps
m_schedule_timesteps = m_timesteps;
// while return patched ones by 'strength' parameter
m_timesteps = std::vector<int64_t>(m_timesteps.begin() + t_start, m_timesteps.end());
m_begin_index = t_start;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

have you tried that image2image works the same as in diffusers? when strength parameter < 1

Copy link
Contributor Author

@likholat likholat Dec 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Input image:
image

prompt = "robotic racoon with wings"
strength=0.8
num_inference_steps=20

Result:
image

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For strength=1.0
image

}

std::map<std::string, ov::Tensor> EulerAncestralDiscreteScheduler::step(ov::Tensor noise_pred, ov::Tensor latents, size_t inference_step, std::shared_ptr<Generator> generator) {
// noise_pred - model_output
// latents - sample
// inference_step

size_t timestep = m_timesteps[inference_step];

if (m_step_index == -1)
m_step_index = m_begin_index;

float sigma = m_sigmas[m_step_index];

float* model_output_data = noise_pred.data<float>();
float* sample_data = latents.data<float>();

ov::Tensor pred_original_sample(noise_pred.get_element_type(), noise_pred.get_shape());
float* pred_original_sample_data = pred_original_sample.data<float>();

switch (m_config.prediction_type) {
case PredictionType::EPSILON:
for (size_t i = 0; i < noise_pred.get_size(); ++i) {
pred_original_sample_data[i] = sample_data[i] - sigma * model_output_data[i];
}
break;
case PredictionType::V_PREDICTION:
for (size_t i = 0; i < noise_pred.get_size(); ++i) {
pred_original_sample_data[i] = model_output_data[i] * (-sigma / std::pow((std::pow(sigma, 2) + 1), 0.5)) +
(sample_data[i] / (std::pow(sigma, 2) + 1));
}
break;
default:
OPENVINO_THROW("Unsupported value for 'PredictionType': must be one of `epsilon`, or `v_prediction`");
}

float sigma_from = m_sigmas[m_step_index];
float sigma_to = m_sigmas[m_step_index + 1];
float sigma_up = std::sqrt(std::pow(sigma_to, 2) * (std::pow(sigma_from, 2) - std::pow(sigma_to, 2)) / std::pow(sigma_from, 2));
float sigma_down = std::sqrt(std::pow(sigma_to, 2) - std::pow(sigma_up, 2));
float dt = sigma_down - sigma;

ov::Tensor prev_sample = ov::Tensor(latents.get_element_type(), latents.get_shape());
float* prev_sample_data = prev_sample.data<float>();

ov::Tensor noise = generator->randn_tensor(noise_pred.get_shape());
const float* noise_data = noise.data<float>();

for (size_t i = 0; i < prev_sample.get_size(); ++i) {
float derivative = (sample_data[i] - pred_original_sample_data[i]) / sigma;
prev_sample_data[i] = (sample_data[i] + derivative * dt) + noise_data[i] * sigma_up;
}

m_step_index++;

return {{"latent", prev_sample}, {"denoised", pred_original_sample}};
}

size_t EulerAncestralDiscreteScheduler::_index_for_timestep(int64_t timestep) const{
for (size_t i = 0; i < m_schedule_timesteps.size(); ++i) {
if (timestep == m_schedule_timesteps[i]) {
return i;
}
}

OPENVINO_THROW("Failed to find index for timestep ", timestep);
}

void EulerAncestralDiscreteScheduler::add_noise(ov::Tensor init_latent, ov::Tensor noise, int64_t latent_timestep) const {
size_t index_for_timestep = _index_for_timestep(latent_timestep);
const float sigma = m_sigmas[index_for_timestep];

float * init_latent_data = init_latent.data<float>();
const float * noise_data = noise.data<float>();

for (size_t i = 0; i < init_latent.get_size(); ++i) {
init_latent_data[i] = init_latent_data[i] + sigma * noise_data[i];
}
}

std::vector<int64_t> EulerAncestralDiscreteScheduler::get_timesteps() const {
return m_timesteps;
}

void EulerAncestralDiscreteScheduler::scale_model_input(ov::Tensor sample, size_t inference_step) {
if (m_step_index == -1)
m_step_index = m_begin_index;

float sigma = m_sigmas[m_step_index];
float* sample_data = sample.data<float>();
for (size_t i = 0; i < sample.get_size(); i++) {
sample_data[i] /= std::pow((std::pow(sigma, 2) + 1), 0.5);
}
m_is_scale_input_called = true;
}

float EulerAncestralDiscreteScheduler::get_init_noise_sigma() const {
float max_sigma = *std::max_element(m_sigmas.begin(), m_sigmas.end());

if (m_config.timestep_spacing == TimestepSpacing::LINSPACE ||
m_config.timestep_spacing == TimestepSpacing::TRAILING) {
return max_sigma;
}

return std::sqrt(std::pow(max_sigma, 2) + 1);
}

} // namespace genai
} // namespace ov
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright (C) 2023-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include <filesystem>
#include <list>
#include <string>

#include "image_generation/schedulers/types.hpp"
#include "image_generation/schedulers/ischeduler.hpp"

namespace ov {
namespace genai {

class EulerAncestralDiscreteScheduler : public IScheduler {
public:
struct Config {
int32_t num_train_timesteps = 1000;
float beta_start = 0.0001f, beta_end = 0.02f;
BetaSchedule beta_schedule = BetaSchedule::LINEAR;
std::vector<float> trained_betas = {};
size_t steps_offset = 0;
PredictionType prediction_type = PredictionType::EPSILON;
TimestepSpacing timestep_spacing = TimestepSpacing::LEADING;
bool rescale_betas_zero_snr = false;

Config() = default;
explicit Config(const std::filesystem::path& scheduler_config_path);
};

explicit EulerAncestralDiscreteScheduler(const std::filesystem::path& scheduler_config_path);
explicit EulerAncestralDiscreteScheduler(const Config& scheduler_config);

void set_timesteps(size_t num_inference_steps, float strength) override;

std::vector<std::int64_t> get_timesteps() const override;

float get_init_noise_sigma() const override;

void scale_model_input(ov::Tensor sample, size_t inference_step) override;

std::map<std::string, ov::Tensor> step(ov::Tensor noise_pred, ov::Tensor latents, size_t inference_step, std::shared_ptr<Generator> generator) override;

void add_noise(ov::Tensor init_latent, ov::Tensor noise, int64_t latent_timestep) const override;

private:
Config m_config;

std::vector<float> m_alphas_cumprod, m_sigmas;
std::vector<int64_t> m_timesteps, m_schedule_timesteps;
size_t m_num_inference_steps;

int m_step_index, m_begin_index;
bool m_is_scale_input_called;

size_t _index_for_timestep(int64_t timestep) const;
};

} // namespace genai
} // namespace ov
3 changes: 3 additions & 0 deletions src/cpp/src/image_generation/schedulers/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "image_generation/schedulers/euler_discrete.hpp"
#include "image_generation/schedulers/flow_match_euler_discrete.hpp"
#include "image_generation/schedulers/pndm.hpp"
#include "image_generation/schedulers/euler_ancestral_discrete.hpp"

namespace ov {
namespace genai {
Expand Down Expand Up @@ -41,6 +42,8 @@ std::shared_ptr<Scheduler> Scheduler::from_config(const std::filesystem::path& s
scheduler = std::make_shared<FlowMatchEulerDiscreteScheduler>(scheduler_config_path);
} else if (scheduler_type == Scheduler::Type::PNDM) {
scheduler = std::make_shared<PNDMScheduler>(scheduler_config_path);
} else if (scheduler_type == Scheduler::Type::EULER_ANCESTRAL_DISCRETE) {
scheduler = std::make_shared<EulerAncestralDiscreteScheduler>(scheduler_config_path);
} else {
OPENVINO_THROW("Unsupported scheduler type '", scheduler_type, ". Please, manually create scheduler via supported one");
}
Expand Down
2 changes: 2 additions & 0 deletions src/cpp/src/image_generation/schedulers/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ void read_json_param(const nlohmann::json& data, const std::string& name, Schedu
param = Scheduler::FLOW_MATCH_EULER_DISCRETE;
else if (scheduler_type_str == "PNDMScheduler")
param = Scheduler::PNDM;
else if (scheduler_type_str == "EulerAncestralDiscreteScheduler")
param = Scheduler::EULER_ANCESTRAL_DISCRETE;
ilya-lavrenov marked this conversation as resolved.
Show resolved Hide resolved
else if (!scheduler_type_str.empty()) {
OPENVINO_THROW("Unsupported value for 'scheduler' ", scheduler_type_str);
}
Expand Down
1 change: 1 addition & 0 deletions src/docs/SUPPORTED_MODELS.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
<ul>
<li><a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-0.9"><code>stabilityai/stable-diffusion-xl-base-0.9</code></a></li>
<li><a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0"><code>stabilityai/stable-diffusion-xl-base-1.0</code></a></li>
<li><a href="https://huggingface.co/stabilityai/sdxl-turbo"><code>stabilityai/sdxl-turbo</code></a></li>
</ul>
</td>
</tr>
Expand Down
5 changes: 4 additions & 1 deletion src/python/openvino_genai/py_openvino_genai.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1343,15 +1343,18 @@ class Scheduler:
FLOW_MATCH_EULER_DISCRETE

PNDM

EULER_ANCESTRAL_DISCRETE
"""
AUTO: typing.ClassVar[Scheduler.Type] # value = <Type.AUTO: 0>
DDIM: typing.ClassVar[Scheduler.Type] # value = <Type.DDIM: 3>
EULER_ANCESTRAL_DISCRETE: typing.ClassVar[Scheduler.Type] # value = <Type.EULER_ANCESTRAL_DISCRETE: 7>
EULER_DISCRETE: typing.ClassVar[Scheduler.Type] # value = <Type.EULER_DISCRETE: 4>
FLOW_MATCH_EULER_DISCRETE: typing.ClassVar[Scheduler.Type] # value = <Type.FLOW_MATCH_EULER_DISCRETE: 5>
LCM: typing.ClassVar[Scheduler.Type] # value = <Type.LCM: 1>
LMS_DISCRETE: typing.ClassVar[Scheduler.Type] # value = <Type.LMS_DISCRETE: 2>
PNDM: typing.ClassVar[Scheduler.Type] # value = <Type.PNDM: 6>
__members__: typing.ClassVar[dict[str, Scheduler.Type]] # value = {'AUTO': <Type.AUTO: 0>, 'LCM': <Type.LCM: 1>, 'LMS_DISCRETE': <Type.LMS_DISCRETE: 2>, 'DDIM': <Type.DDIM: 3>, 'EULER_DISCRETE': <Type.EULER_DISCRETE: 4>, 'FLOW_MATCH_EULER_DISCRETE': <Type.FLOW_MATCH_EULER_DISCRETE: 5>, 'PNDM': <Type.PNDM: 6>}
__members__: typing.ClassVar[dict[str, Scheduler.Type]] # value = {'AUTO': <Type.AUTO: 0>, 'LCM': <Type.LCM: 1>, 'LMS_DISCRETE': <Type.LMS_DISCRETE: 2>, 'DDIM': <Type.DDIM: 3>, 'EULER_DISCRETE': <Type.EULER_DISCRETE: 4>, 'FLOW_MATCH_EULER_DISCRETE': <Type.FLOW_MATCH_EULER_DISCRETE: 5>, 'PNDM': <Type.PNDM: 6>, 'EULER_ANCESTRAL_DISCRETE': <Type.EULER_ANCESTRAL_DISCRETE: 7>}
def __eq__(self, other: typing.Any) -> bool:
...
def __getstate__(self) -> int:
Expand Down
3 changes: 2 additions & 1 deletion src/python/py_image_generation_pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,8 @@ void init_image_generation_pipelines(py::module_& m) {
.value("DDIM", ov::genai::Scheduler::Type::DDIM)
.value("EULER_DISCRETE", ov::genai::Scheduler::Type::EULER_DISCRETE)
.value("FLOW_MATCH_EULER_DISCRETE", ov::genai::Scheduler::Type::FLOW_MATCH_EULER_DISCRETE)
.value("PNDM", ov::genai::Scheduler::Type::PNDM);
.value("PNDM", ov::genai::Scheduler::Type::PNDM)
.value("EULER_ANCESTRAL_DISCRETE", ov::genai::Scheduler::Type::EULER_ANCESTRAL_DISCRETE);
image_generation_scheduler.def_static("from_config",
&ov::genai::Scheduler::from_config,
py::arg("scheduler_config_path"),
Expand Down
Loading
Loading