Merge branch 'prompt_lookup' of github.com:iefode/openvino.genai into…

… prompt_lookup
openvinotoolkit · Dec 16, 2024 · a1ba5be · a1ba5be
2 parents f4bb74b + 9f87e89
commit a1ba5be
Show file tree

Hide file tree

Showing 31 changed files with 507 additions and 185 deletions.
diff --git a/README.md b/README.md
@@ -194,12 +194,7 @@ import openvino_genai
 
 device = 'CPU'  # GPU can be used as well
 pipe = openvino_genai.Text2ImagePipeline("./dreamlike_anime_1_0_ov/INT8", device)
-image_tensor = pipe.generate(
-    "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting",
-    width=512,
-    height=512,
-    num_inference_steps=20
-)
+image_tensor = pipe.generate("cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting")
 
 image = Image.fromarray(image_tensor.data[0])
 image.save("image.bmp")
@@ -218,10 +213,7 @@ int main(int argc, char* argv[]) {
    const std::string device = "CPU";  // GPU can be used as well
 
    ov::genai::Text2ImagePipeline pipe(models_path, device);
-   ov::Tensor image = pipe.generate(prompt,
-        ov::genai::width(512),
-        ov::genai::height(512),
-        ov::genai::num_inference_steps(20));
+   ov::Tensor image = pipe.generate(prompt);
 
    imwrite("image.bmp", image, true);
 }

diff --git a/samples/cpp/image_generation/README.md b/samples/cpp/image_generation/README.md
@@ -20,6 +20,10 @@ Users can change the sample code and play with the following generation paramete
 - Apply multiple different LoRA adapters and mix them with different blending coefficients
 - (Image to image and inpainting) Play with `strength` parameter to control how initial image is noised and reduce number of inference steps
 
+
+> [!NOTE]
+> Image generated with HuggingFace / Optimum Intel is not the same generated by this C++ sample: C++ random generation with MT19937 results differ from `numpy.random.randn()` and `diffusers.utils.randn_tensor` (uses `torch.Generator` inside). So, it's expected that image generated by Diffusers and C++ versions provide different images, because latent images are initialize differently.
+
 ## Download and convert the models and tokenizers
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
@@ -88,13 +92,6 @@ With adapter | Without adapter
 :---:|:---:
 ![](./lora.bmp) | ![](./baseline.bmp)
 
-
-## Note
-
-- Image generated with HuggingFace / Optimum Intel is not the same generated by this C++ sample:
-
-C++ random generation with MT19937 results differ from `numpy.random.randn()` and `diffusers.utils.randn_tensor`. So, it's expected that image generated by Python and C++ versions provide different images, because latent images are initialize differently. Users can implement their own random generator derived from `ov::genai::Generator` and pass it to `Text2ImagePipeline::generate` method.
-
 ## Run text to image with multiple devices
 
 The `heterogeneous_stable_diffusion` sample demonstrates how a Text2ImagePipeline object can be created from individual subcomponents - scheduler, text encoder, unet, & vae decoder. This approach gives fine-grained control over the devices used to execute each stage of the stable diffusion pipeline.

diff --git a/samples/cpp/image_generation/lora_text2image.cpp b/samples/cpp/image_generation/lora_text2image.cpp
@@ -24,19 +24,19 @@ int32_t main(int32_t argc, char* argv[]) try {
 
     std::cout << "Generating image with LoRA adapters applied, resulting image will be in lora.bmp\n";
     ov::Tensor image = pipe.generate(prompt,
-        ov::genai::generator(std::make_shared<ov::genai::CppStdGenerator>(42)),
         ov::genai::width(512),
         ov::genai::height(896),
-        ov::genai::num_inference_steps(20));
+        ov::genai::num_inference_steps(20),
+        ov::genai::rng_seed(42));
     imwrite("lora.bmp", image, true);
 
     std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n";
     image = pipe.generate(prompt,
         ov::genai::adapters(),  // passing adapters in generate overrides adapters set in the constructor; adapters() means no adapters
-        ov::genai::generator(std::make_shared<ov::genai::CppStdGenerator>(42)),
         ov::genai::width(512),
         ov::genai::height(896),
-        ov::genai::num_inference_steps(20));
+        ov::genai::num_inference_steps(20),
+        ov::genai::rng_seed(42));
     imwrite("baseline.bmp", image, true);
 
     return EXIT_SUCCESS;

diff --git a/samples/python/image_generation/README.md b/samples/python/image_generation/README.md
@@ -20,6 +20,10 @@ Users can change the sample code and play with the following generation paramete
 - Apply multiple different LoRA adapters and mix them with different blending coefficients
 - (Image to image and inpainting) Play with `strength` parameter to control how initial image is noised and reduce number of inference steps
 
+> [!NOTE]  
+> OpenVINO GenAI is written in C++ and uses `CppStdGenerator` random generator in Image Generation pipelines, while Diffusers library uses `torch.Generator` underhood.
+> To have the same results with HuggingFace, pass manually created `torch.Generator(device='cpu').manual_seed(seed)` to Diffusers generation pipelines and `openvino_genai.TorchGenerator(seed)` to OpenVINO GenAI pipelines as value for `generator` kwarg.
+
 ## Download and convert the models and tokenizers
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
@@ -41,7 +45,7 @@ Install [deployment-requirements.txt](../../deployment-requirements.txt) via `pi
 
 Prompt: `cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting`
 
-   ![](./text2image.bmp)
+   ![](./../../cpp/image_generation/512x512.bmp)
 
 ### Run with callback
 
@@ -85,7 +89,7 @@ Check the difference:
 
 With adapter | Without adapter
 :---:|:---:
-![](./lora.bmp) | ![](./baseline.bmp)
+![](./../../cpp/image_generation/lora.bmp) | ![](./../../cpp/image_generation/baseline.bmp)
 
 ## Run text to image with multiple devices
 

diff --git a/samples/python/image_generation/baseline.bmp b/samples/python/image_generation/baseline.bmp
diff --git a/samples/python/image_generation/heterogeneous_stable_diffusion.py b/samples/python/image_generation/heterogeneous_stable_diffusion.py
@@ -101,8 +101,7 @@ def main():
             height=height,
             guidance_scale=guidance_scale,
             num_inference_steps=number_of_inference_steps_per_image,
-            num_images_per_prompt=1,
-            generator=openvino_genai.CppStdGenerator(42)
+            num_images_per_prompt=1
         )
 
         image = Image.fromarray(image_tensor.data[0])

diff --git a/samples/python/image_generation/lora.bmp b/samples/python/image_generation/lora.bmp
diff --git a/samples/python/image_generation/lora_text2image.py b/samples/python/image_generation/lora_text2image.py
@@ -6,20 +6,6 @@
 
 import openvino as ov
 import openvino_genai
-import numpy as np
-import sys
-
-
-class Generator(openvino_genai.Generator):
-    def __init__(self, seed, mu=0.0, sigma=1.0):
-        openvino_genai.Generator.__init__(self)
-        np.random.seed(seed)
-        self.mu = mu
-        self.sigma = sigma
-
-    def next(self):
-        return np.random.normal(self.mu, self.sigma)
-
 
 def image_write(path: str, image_tensor: ov.Tensor):
     from PIL import Image
@@ -46,23 +32,23 @@ def main():
 
     # LoRA adapters passed to the constructor will be activated by default in next generates
     pipe = openvino_genai.Text2ImagePipeline(args.models_path, device, adapters=adapter_config)
+
     print("Generating image with LoRA adapters applied, resulting image will be in lora.bmp")
     image = pipe.generate(prompt,
-                          generator=Generator(42),
                           width=512,
                           height=896,
-                          num_inference_steps=20)
+                          num_inference_steps=20,
+                          rng_seed=42)
 
     image_write("lora.bmp", image)
     print("Generating image without LoRA adapters applied, resulting image will be in baseline.bmp")
     image = pipe.generate(prompt,
                           # passing adapters in generate overrides adapters set in the constructor; openvino_genai.AdapterConfig() means no adapters
                           adapters=openvino_genai.AdapterConfig(),
-                          generator=Generator(42),
                           width=512,
                           height=896,
-                          num_inference_steps=20
-                          )
+                          num_inference_steps=20,
+                          rng_seed=42)
     image_write("baseline.bmp", image)
 
 

diff --git a/samples/python/image_generation/text2image.bmp b/samples/python/image_generation/text2image.bmp
diff --git a/samples/python/image_generation/text2image.py b/samples/python/image_generation/text2image.py
@@ -6,17 +6,6 @@
 
 import openvino_genai
 from PIL import Image
-import numpy as np
-
-class Generator(openvino_genai.Generator):
-    def __init__(self, seed, mu=0.0, sigma=1.0):
-        openvino_genai.Generator.__init__(self)
-        np.random.seed(seed)
-        self.mu = mu
-        self.sigma = sigma
-
-    def next(self):
-        return np.random.normal(self.mu, self.sigma)
 
 
 def main():
@@ -33,9 +22,7 @@ def main():
         width=512,
         height=512,
         num_inference_steps=20,
-        num_images_per_prompt=1,
-        generator=Generator(42)  # openvino_genai.CppStdGenerator can be used to have same images as C++ sample
-    )
+        num_images_per_prompt=1)
 
     image = Image.fromarray(image_tensor.data[0])
     image.save("image.bmp")

diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -67,9 +67,9 @@ enum class StopCriteria { EARLY, HEURISTIC, NEVER };
  * @param top_k the number of highest probability vocabulary tokens to keep for top-k-filtering.
  * @param do_sample whether or not to use multinomial random sampling that add up to `top_p` or higher are kept.
  * @param repetition_penalty the parameter for repetition penalty. 1.0 means no penalty.
- * @param presence_penalty reduces absolute log prob if the token was generated at least once. Ignored for non continuous batching.
- * @param frequency_penalty reduces absolute log prob as many times as the token was generated. Ignored for non continuous batching.
- * @param rng_seed initializes random generator. Ignored for non continuous batching.
+ * @param presence_penalty reduces absolute log prob if the token was generated at least once.
+ * @param frequency_penalty reduces absolute log prob as many times as the token was generated.
+ * @param rng_seed initializes random generator.
  *
  * Assisting generation parameters:
  * @param assistant_confidence_threshold the lower token probability of candidate to be validated by main model in case of dynamic strategy candidates number update.
@@ -179,7 +179,7 @@ static constexpr ov::Property<float> repetition_penalty{"repetition_penalty"};
 static constexpr ov::Property<int64_t> eos_token_id{"eos_token_id"};
 static constexpr ov::Property<float> presence_penalty{"presence_penalty"};
 static constexpr ov::Property<float> frequency_penalty{"frequency_penalty"};
-static constexpr ov::Property<size_t> rng_seed{"rng_seed"};
+extern OPENVINO_GENAI_EXPORTS ov::Property<size_t> rng_seed;
 
 static constexpr ov::Property<float> assistant_confidence_threshold{"assistant_confidence_threshold"};
 static constexpr ov::Property<size_t> num_assistant_tokens{"num_assistant_tokens"};

diff --git a/src/cpp/include/openvino/genai/image_generation/generation_config.hpp b/src/cpp/include/openvino/genai/image_generation/generation_config.hpp
@@ -39,6 +39,12 @@ class OPENVINO_GENAI_EXPORTS Generator {
      */
     virtual ov::Tensor randn_tensor(const ov::Shape& shape);
 
+    /**
+     * Sets a new initial seed value to random generator
+     * @param new_seed A new seed value
+     */
+    virtual void seed(size_t new_seed) = 0;
+
     /**
      * Default dtor defined to ensure working RTTI.
      */
@@ -58,9 +64,11 @@ class OPENVINO_GENAI_EXPORTS CppStdGenerator : public Generator {
 
     virtual float next() override;
 
+    virtual void seed(size_t new_seed) override;
+
 private:
-    std::mt19937 gen;
-    std::normal_distribution<float> normal;
+    std::mt19937 m_gen;
+    std::normal_distribution<float> m_normal;
 };
 
 /**
@@ -81,17 +89,25 @@ struct OPENVINO_GENAI_EXPORTS ImageGenerationConfig {
     size_t num_images_per_prompt = 1;
 
     /**
-     * Random generator to initial latents, add noise to initial images in case of image to image / inpainting pipelines
+     * Random generator to initialize latents, add noise to initial images in case of image to image / inpainting pipelines
+     * By default, random generator is initialized as `CppStdGenerator(generation_config.rng_seed)`
+     * @note If `generator` is specified, it has higher priority than `rng_seed` parameter.
+     */
+    std::shared_ptr<Generator> generator = nullptr;
+
+    /**
+     * Seed for random generator
+     * @note If `generator` is specified, it has higher priority than `rng_seed` parameter.
      */
-    std::shared_ptr<Generator> generator = std::make_shared<CppStdGenerator>(42);
+    size_t rng_seed = 42;
 
     float guidance_scale = 7.5f;
     int64_t height = -1;
     int64_t width = -1;
     size_t num_inference_steps = 50;
 
     /**
-     * Max sequence lenght for T4 encoder / tokenizer used in SD3 / FLUX models
+     * Max sequence length for T5 encoder / tokenizer used in SD3 / FLUX models
      */
     int max_sequence_length = -1;
 
@@ -203,6 +219,12 @@ static constexpr ov::Property<float> strength{"strength"};
  */
 static constexpr ov::Property<std::shared_ptr<Generator>> generator{"generator"};
 
+/**
+ * Seed for random generator
+ * @note If `generator` is specified, it has higher priority than `rng_seed` parameter.
+ */
+extern OPENVINO_GENAI_EXPORTS ov::Property<size_t> rng_seed;
+
 /**
  * This parameters limits max sequence length for T5 encoder for SD3 and FLUX models.
  * T5 tokenizer output is padded with pad tokens to 'max_sequence_length' within a pipeline.

diff --git a/src/cpp/src/continuous_batching_impl.cpp b/src/cpp/src/continuous_batching_impl.cpp
@@ -287,9 +287,11 @@ ContinuousBatchingPipeline::ContinuousBatchingImpl::generate(const std::vector<o
         }
         if (streamer_ptr && generations.at(0)->can_read()) {
             std::unordered_map<uint64_t, GenerationOutput> token = generations.at(0).get()->back();
-            OPENVINO_ASSERT(1 == token.size());
-            OPENVINO_ASSERT(1 == token.begin()->second.generated_ids.size());
-            continue_generation = !streamer_ptr->put(token.begin()->second.generated_ids.at(0));
+            for (const auto& gen_token : token.begin()->second.generated_ids) {
+                if (!streamer_ptr->put(gen_token)) {
+                    break;
+                }
+            }
         }
     }
 

diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp
@@ -14,14 +14,16 @@
 namespace ov {
 namespace genai {
 
+ov::Property<size_t> rng_seed{"rng_seed"};
+
 GenerationConfig::GenerationConfig(const std::filesystem::path& json_path) {
     using utils::read_json_param;
 
     std::ifstream f(json_path);
     OPENVINO_ASSERT(f.is_open(), "Failed to open '", json_path, "' with generation config");
 
     nlohmann::json data = nlohmann::json::parse(f);
-    
+
     read_json_param(data, "max_new_tokens", max_new_tokens);
     read_json_param(data, "max_length", max_length);
     // note that ignore_eos is not present in HF GenerationConfig
@@ -103,6 +105,9 @@ void GenerationConfig::update_generation_config(const ov::AnyMap& config_map) {
     read_anymap_param(config_map, "echo", echo);
     read_anymap_param(config_map, "logprobs", logprobs);
     read_anymap_param(config_map, "adapters", adapters);
+
+    // TODO: add support of 'generator' property similar to Image generation
+    read_anymap_param(config_map, "rng_seed", rng_seed);
 }
 
 size_t GenerationConfig::get_max_new_tokens(size_t prompt_length) const {

diff --git a/src/cpp/src/image_generation/flux_pipeline.hpp b/src/cpp/src/image_generation/flux_pipeline.hpp
@@ -4,7 +4,6 @@
 #pragma once
 
 #include <cassert>
-#include <ctime>
 
 #include "image_generation/diffusion_pipeline.hpp"
 #include "image_generation/numpy_utils.hpp"

diff --git a/src/cpp/src/image_generation/generation_config.cpp b/src/cpp/src/image_generation/generation_config.cpp
@@ -27,11 +27,15 @@ ov::Tensor Generator::randn_tensor(const ov::Shape& shape) {
 }
 
 CppStdGenerator::CppStdGenerator(uint32_t seed)
-    : gen(seed), normal(0.0f, 1.0f) {
+    : m_gen(seed), m_normal(0.0f, 1.0f) {
 }
 
 float CppStdGenerator::next() {
-    return normal(gen);
+    return m_normal(m_gen);
+}
+
+void CppStdGenerator::seed(size_t new_seed) {
+    m_gen.seed(new_seed);
 }
 
 //
@@ -55,7 +59,6 @@ void ImageGenerationConfig::update_generation_config(const ov::AnyMap& propertie
     read_anymap_param(properties, "negative_prompt_2", negative_prompt_2);
     read_anymap_param(properties, "negative_prompt_3", negative_prompt_3);
     read_anymap_param(properties, "num_images_per_prompt", num_images_per_prompt);
-    read_anymap_param(properties, "generator", generator);
     read_anymap_param(properties, "guidance_scale", guidance_scale);
     read_anymap_param(properties, "height", height);
     read_anymap_param(properties, "width", width);
@@ -64,6 +67,25 @@ void ImageGenerationConfig::update_generation_config(const ov::AnyMap& propertie
     read_anymap_param(properties, "adapters", adapters);
     read_anymap_param(properties, "max_sequence_length", max_sequence_length);
 
+    // 'generator' has higher priority than 'seed' parameter
+    const bool have_generator_param = properties.find(ov::genai::generator.name()) != properties.end();
+    if (have_generator_param) {
+        read_anymap_param(properties, "generator", generator);
+    } else {
+        read_anymap_param(properties, "rng_seed", rng_seed);
+
+        // initialize random generator with a given seed value
+        if (!generator) {
+            generator = std::make_shared<CppStdGenerator>(rng_seed);
+        }
+
+        const bool have_rng_seed = properties.find(ov::genai::rng_seed.name()) != properties.end();
+        if (have_rng_seed) {
+            // we need to change seed as an user have specified it manually
+            generator->seed(rng_seed);
+        }
+    }
+
     validate();
 }