Skip to content

Commit

Permalink
Merge branch 'prompt_lookup' of github.com:iefode/openvino.genai into…
Browse files Browse the repository at this point in the history
… prompt_lookup
  • Loading branch information
iefode committed Dec 16, 2024
2 parents f4bb74b + 9f87e89 commit a1ba5be
Show file tree
Hide file tree
Showing 31 changed files with 507 additions and 185 deletions.
12 changes: 2 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -194,12 +194,7 @@ import openvino_genai

device = 'CPU' # GPU can be used as well
pipe = openvino_genai.Text2ImagePipeline("./dreamlike_anime_1_0_ov/INT8", device)
image_tensor = pipe.generate(
"cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting",
width=512,
height=512,
num_inference_steps=20
)
image_tensor = pipe.generate("cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting")

image = Image.fromarray(image_tensor.data[0])
image.save("image.bmp")
Expand All @@ -218,10 +213,7 @@ int main(int argc, char* argv[]) {
const std::string device = "CPU"; // GPU can be used as well

ov::genai::Text2ImagePipeline pipe(models_path, device);
ov::Tensor image = pipe.generate(prompt,
ov::genai::width(512),
ov::genai::height(512),
ov::genai::num_inference_steps(20));
ov::Tensor image = pipe.generate(prompt);

imwrite("image.bmp", image, true);
}
Expand Down
11 changes: 4 additions & 7 deletions samples/cpp/image_generation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ Users can change the sample code and play with the following generation paramete
- Apply multiple different LoRA adapters and mix them with different blending coefficients
- (Image to image and inpainting) Play with `strength` parameter to control how initial image is noised and reduce number of inference steps


> [!NOTE]
> Image generated with HuggingFace / Optimum Intel is not the same generated by this C++ sample: C++ random generation with MT19937 results differ from `numpy.random.randn()` and `diffusers.utils.randn_tensor` (uses `torch.Generator` inside). So, it's expected that image generated by Diffusers and C++ versions provide different images, because latent images are initialize differently.
## Download and convert the models and tokenizers

The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
Expand Down Expand Up @@ -88,13 +92,6 @@ With adapter | Without adapter
:---:|:---:
![](./lora.bmp) | ![](./baseline.bmp)


## Note

- Image generated with HuggingFace / Optimum Intel is not the same generated by this C++ sample:

C++ random generation with MT19937 results differ from `numpy.random.randn()` and `diffusers.utils.randn_tensor`. So, it's expected that image generated by Python and C++ versions provide different images, because latent images are initialize differently. Users can implement their own random generator derived from `ov::genai::Generator` and pass it to `Text2ImagePipeline::generate` method.

## Run text to image with multiple devices

The `heterogeneous_stable_diffusion` sample demonstrates how a Text2ImagePipeline object can be created from individual subcomponents - scheduler, text encoder, unet, & vae decoder. This approach gives fine-grained control over the devices used to execute each stage of the stable diffusion pipeline.
Expand Down
8 changes: 4 additions & 4 deletions samples/cpp/image_generation/lora_text2image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,19 @@ int32_t main(int32_t argc, char* argv[]) try {

std::cout << "Generating image with LoRA adapters applied, resulting image will be in lora.bmp\n";
ov::Tensor image = pipe.generate(prompt,
ov::genai::generator(std::make_shared<ov::genai::CppStdGenerator>(42)),
ov::genai::width(512),
ov::genai::height(896),
ov::genai::num_inference_steps(20));
ov::genai::num_inference_steps(20),
ov::genai::rng_seed(42));
imwrite("lora.bmp", image, true);

std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n";
image = pipe.generate(prompt,
ov::genai::adapters(), // passing adapters in generate overrides adapters set in the constructor; adapters() means no adapters
ov::genai::generator(std::make_shared<ov::genai::CppStdGenerator>(42)),
ov::genai::width(512),
ov::genai::height(896),
ov::genai::num_inference_steps(20));
ov::genai::num_inference_steps(20),
ov::genai::rng_seed(42));
imwrite("baseline.bmp", image, true);

return EXIT_SUCCESS;
Expand Down
8 changes: 6 additions & 2 deletions samples/python/image_generation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ Users can change the sample code and play with the following generation paramete
- Apply multiple different LoRA adapters and mix them with different blending coefficients
- (Image to image and inpainting) Play with `strength` parameter to control how initial image is noised and reduce number of inference steps

> [!NOTE]
> OpenVINO GenAI is written in C++ and uses `CppStdGenerator` random generator in Image Generation pipelines, while Diffusers library uses `torch.Generator` underhood.
> To have the same results with HuggingFace, pass manually created `torch.Generator(device='cpu').manual_seed(seed)` to Diffusers generation pipelines and `openvino_genai.TorchGenerator(seed)` to OpenVINO GenAI pipelines as value for `generator` kwarg.
## Download and convert the models and tokenizers

The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
Expand All @@ -41,7 +45,7 @@ Install [deployment-requirements.txt](../../deployment-requirements.txt) via `pi

Prompt: `cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting`

![](./text2image.bmp)
![](./../../cpp/image_generation/512x512.bmp)

### Run with callback

Expand Down Expand Up @@ -85,7 +89,7 @@ Check the difference:

With adapter | Without adapter
:---:|:---:
![](./lora.bmp) | ![](./baseline.bmp)
![](./../../cpp/image_generation/lora.bmp) | ![](./../../cpp/image_generation/baseline.bmp)

## Run text to image with multiple devices

Expand Down
3 changes: 0 additions & 3 deletions samples/python/image_generation/baseline.bmp

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,7 @@ def main():
height=height,
guidance_scale=guidance_scale,
num_inference_steps=number_of_inference_steps_per_image,
num_images_per_prompt=1,
generator=openvino_genai.CppStdGenerator(42)
num_images_per_prompt=1
)

image = Image.fromarray(image_tensor.data[0])
Expand Down
3 changes: 0 additions & 3 deletions samples/python/image_generation/lora.bmp

This file was deleted.

24 changes: 5 additions & 19 deletions samples/python/image_generation/lora_text2image.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,6 @@

import openvino as ov
import openvino_genai
import numpy as np
import sys


class Generator(openvino_genai.Generator):
def __init__(self, seed, mu=0.0, sigma=1.0):
openvino_genai.Generator.__init__(self)
np.random.seed(seed)
self.mu = mu
self.sigma = sigma

def next(self):
return np.random.normal(self.mu, self.sigma)


def image_write(path: str, image_tensor: ov.Tensor):
from PIL import Image
Expand All @@ -46,23 +32,23 @@ def main():

# LoRA adapters passed to the constructor will be activated by default in next generates
pipe = openvino_genai.Text2ImagePipeline(args.models_path, device, adapters=adapter_config)

print("Generating image with LoRA adapters applied, resulting image will be in lora.bmp")
image = pipe.generate(prompt,
generator=Generator(42),
width=512,
height=896,
num_inference_steps=20)
num_inference_steps=20,
rng_seed=42)

image_write("lora.bmp", image)
print("Generating image without LoRA adapters applied, resulting image will be in baseline.bmp")
image = pipe.generate(prompt,
# passing adapters in generate overrides adapters set in the constructor; openvino_genai.AdapterConfig() means no adapters
adapters=openvino_genai.AdapterConfig(),
generator=Generator(42),
width=512,
height=896,
num_inference_steps=20
)
num_inference_steps=20,
rng_seed=42)
image_write("baseline.bmp", image)


Expand Down
3 changes: 0 additions & 3 deletions samples/python/image_generation/text2image.bmp

This file was deleted.

15 changes: 1 addition & 14 deletions samples/python/image_generation/text2image.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,6 @@

import openvino_genai
from PIL import Image
import numpy as np

class Generator(openvino_genai.Generator):
def __init__(self, seed, mu=0.0, sigma=1.0):
openvino_genai.Generator.__init__(self)
np.random.seed(seed)
self.mu = mu
self.sigma = sigma

def next(self):
return np.random.normal(self.mu, self.sigma)


def main():
Expand All @@ -33,9 +22,7 @@ def main():
width=512,
height=512,
num_inference_steps=20,
num_images_per_prompt=1,
generator=Generator(42) # openvino_genai.CppStdGenerator can be used to have same images as C++ sample
)
num_images_per_prompt=1)

image = Image.fromarray(image_tensor.data[0])
image.save("image.bmp")
Expand Down
8 changes: 4 additions & 4 deletions src/cpp/include/openvino/genai/generation_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ enum class StopCriteria { EARLY, HEURISTIC, NEVER };
* @param top_k the number of highest probability vocabulary tokens to keep for top-k-filtering.
* @param do_sample whether or not to use multinomial random sampling that add up to `top_p` or higher are kept.
* @param repetition_penalty the parameter for repetition penalty. 1.0 means no penalty.
* @param presence_penalty reduces absolute log prob if the token was generated at least once. Ignored for non continuous batching.
* @param frequency_penalty reduces absolute log prob as many times as the token was generated. Ignored for non continuous batching.
* @param rng_seed initializes random generator. Ignored for non continuous batching.
* @param presence_penalty reduces absolute log prob if the token was generated at least once.
* @param frequency_penalty reduces absolute log prob as many times as the token was generated.
* @param rng_seed initializes random generator.
*
* Assisting generation parameters:
* @param assistant_confidence_threshold the lower token probability of candidate to be validated by main model in case of dynamic strategy candidates number update.
Expand Down Expand Up @@ -179,7 +179,7 @@ static constexpr ov::Property<float> repetition_penalty{"repetition_penalty"};
static constexpr ov::Property<int64_t> eos_token_id{"eos_token_id"};
static constexpr ov::Property<float> presence_penalty{"presence_penalty"};
static constexpr ov::Property<float> frequency_penalty{"frequency_penalty"};
static constexpr ov::Property<size_t> rng_seed{"rng_seed"};
extern OPENVINO_GENAI_EXPORTS ov::Property<size_t> rng_seed;

static constexpr ov::Property<float> assistant_confidence_threshold{"assistant_confidence_threshold"};
static constexpr ov::Property<size_t> num_assistant_tokens{"num_assistant_tokens"};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ class OPENVINO_GENAI_EXPORTS Generator {
*/
virtual ov::Tensor randn_tensor(const ov::Shape& shape);

/**
* Sets a new initial seed value to random generator
* @param new_seed A new seed value
*/
virtual void seed(size_t new_seed) = 0;

/**
* Default dtor defined to ensure working RTTI.
*/
Expand All @@ -58,9 +64,11 @@ class OPENVINO_GENAI_EXPORTS CppStdGenerator : public Generator {

virtual float next() override;

virtual void seed(size_t new_seed) override;

private:
std::mt19937 gen;
std::normal_distribution<float> normal;
std::mt19937 m_gen;
std::normal_distribution<float> m_normal;
};

/**
Expand All @@ -81,17 +89,25 @@ struct OPENVINO_GENAI_EXPORTS ImageGenerationConfig {
size_t num_images_per_prompt = 1;

/**
* Random generator to initial latents, add noise to initial images in case of image to image / inpainting pipelines
* Random generator to initialize latents, add noise to initial images in case of image to image / inpainting pipelines
* By default, random generator is initialized as `CppStdGenerator(generation_config.rng_seed)`
* @note If `generator` is specified, it has higher priority than `rng_seed` parameter.
*/
std::shared_ptr<Generator> generator = nullptr;

/**
* Seed for random generator
* @note If `generator` is specified, it has higher priority than `rng_seed` parameter.
*/
std::shared_ptr<Generator> generator = std::make_shared<CppStdGenerator>(42);
size_t rng_seed = 42;

float guidance_scale = 7.5f;
int64_t height = -1;
int64_t width = -1;
size_t num_inference_steps = 50;

/**
* Max sequence lenght for T4 encoder / tokenizer used in SD3 / FLUX models
* Max sequence length for T5 encoder / tokenizer used in SD3 / FLUX models
*/
int max_sequence_length = -1;

Expand Down Expand Up @@ -203,6 +219,12 @@ static constexpr ov::Property<float> strength{"strength"};
*/
static constexpr ov::Property<std::shared_ptr<Generator>> generator{"generator"};

/**
* Seed for random generator
* @note If `generator` is specified, it has higher priority than `rng_seed` parameter.
*/
extern OPENVINO_GENAI_EXPORTS ov::Property<size_t> rng_seed;

/**
* This parameters limits max sequence length for T5 encoder for SD3 and FLUX models.
* T5 tokenizer output is padded with pad tokens to 'max_sequence_length' within a pipeline.
Expand Down
8 changes: 5 additions & 3 deletions src/cpp/src/continuous_batching_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,11 @@ ContinuousBatchingPipeline::ContinuousBatchingImpl::generate(const std::vector<o
}
if (streamer_ptr && generations.at(0)->can_read()) {
std::unordered_map<uint64_t, GenerationOutput> token = generations.at(0).get()->back();
OPENVINO_ASSERT(1 == token.size());
OPENVINO_ASSERT(1 == token.begin()->second.generated_ids.size());
continue_generation = !streamer_ptr->put(token.begin()->second.generated_ids.at(0));
for (const auto& gen_token : token.begin()->second.generated_ids) {
if (!streamer_ptr->put(gen_token)) {
break;
}
}
}
}

Expand Down
7 changes: 6 additions & 1 deletion src/cpp/src/generation_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,16 @@
namespace ov {
namespace genai {

ov::Property<size_t> rng_seed{"rng_seed"};

GenerationConfig::GenerationConfig(const std::filesystem::path& json_path) {
using utils::read_json_param;

std::ifstream f(json_path);
OPENVINO_ASSERT(f.is_open(), "Failed to open '", json_path, "' with generation config");

nlohmann::json data = nlohmann::json::parse(f);

read_json_param(data, "max_new_tokens", max_new_tokens);
read_json_param(data, "max_length", max_length);
// note that ignore_eos is not present in HF GenerationConfig
Expand Down Expand Up @@ -103,6 +105,9 @@ void GenerationConfig::update_generation_config(const ov::AnyMap& config_map) {
read_anymap_param(config_map, "echo", echo);
read_anymap_param(config_map, "logprobs", logprobs);
read_anymap_param(config_map, "adapters", adapters);

// TODO: add support of 'generator' property similar to Image generation
read_anymap_param(config_map, "rng_seed", rng_seed);
}

size_t GenerationConfig::get_max_new_tokens(size_t prompt_length) const {
Expand Down
1 change: 0 additions & 1 deletion src/cpp/src/image_generation/flux_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#pragma once

#include <cassert>
#include <ctime>

#include "image_generation/diffusion_pipeline.hpp"
#include "image_generation/numpy_utils.hpp"
Expand Down
28 changes: 25 additions & 3 deletions src/cpp/src/image_generation/generation_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,15 @@ ov::Tensor Generator::randn_tensor(const ov::Shape& shape) {
}

CppStdGenerator::CppStdGenerator(uint32_t seed)
: gen(seed), normal(0.0f, 1.0f) {
: m_gen(seed), m_normal(0.0f, 1.0f) {
}

float CppStdGenerator::next() {
return normal(gen);
return m_normal(m_gen);
}

void CppStdGenerator::seed(size_t new_seed) {
m_gen.seed(new_seed);
}

//
Expand All @@ -55,7 +59,6 @@ void ImageGenerationConfig::update_generation_config(const ov::AnyMap& propertie
read_anymap_param(properties, "negative_prompt_2", negative_prompt_2);
read_anymap_param(properties, "negative_prompt_3", negative_prompt_3);
read_anymap_param(properties, "num_images_per_prompt", num_images_per_prompt);
read_anymap_param(properties, "generator", generator);
read_anymap_param(properties, "guidance_scale", guidance_scale);
read_anymap_param(properties, "height", height);
read_anymap_param(properties, "width", width);
Expand All @@ -64,6 +67,25 @@ void ImageGenerationConfig::update_generation_config(const ov::AnyMap& propertie
read_anymap_param(properties, "adapters", adapters);
read_anymap_param(properties, "max_sequence_length", max_sequence_length);

// 'generator' has higher priority than 'seed' parameter
const bool have_generator_param = properties.find(ov::genai::generator.name()) != properties.end();
if (have_generator_param) {
read_anymap_param(properties, "generator", generator);
} else {
read_anymap_param(properties, "rng_seed", rng_seed);

// initialize random generator with a given seed value
if (!generator) {
generator = std::make_shared<CppStdGenerator>(rng_seed);
}

const bool have_rng_seed = properties.find(ov::genai::rng_seed.name()) != properties.end();
if (have_rng_seed) {
// we need to change seed as an user have specified it manually
generator->seed(rng_seed);
}
}

validate();
}

Expand Down
Loading

0 comments on commit a1ba5be

Please sign in to comment.