From 987ecd783b88ecc9a764eb3b8f4f0dddf248eee6 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Fri, 27 Dec 2024 11:03:25 +0100
Subject: [PATCH] Tests for generation config

---
 .../openvino/genai/generation_config.hpp      |  28 ++-
 src/cpp/src/generation_config.cpp             | 224 +++++++++++-------
 src/cpp/src/whisper_generation_config.cpp     |   4 +
 .../openvino_genai/py_openvino_genai.pyi      |  24 +-
 .../py_continuous_batching_pipeline.cpp       |   8 +-
 src/python/py_generation_config.cpp           |   7 +-
 src/python/py_image_generation_pipelines.cpp  |  12 +-
 src/python/py_llm_pipeline.cpp                |   9 +-
 src/python/py_utils.cpp                       |   5 +-
 src/python/py_vlm_pipeline.cpp                |   6 +-
 src/python/py_whisper_pipeline.cpp            |  12 +-
 tests/cpp/CMakeLists.txt                      |   4 +-
 tests/cpp/generate_config.cpp                 | 143 -----------
 tests/python_tests/ov_genai_test_utils.py     |  15 +-
 tests/python_tests/test_generation_config.py  |  96 ++++++++
 tests/python_tests/test_llm_pipeline.py       |  50 +---
 tests/python_tests/test_tokenizer.py          |   4 +-
 17 files changed, 328 insertions(+), 323 deletions(-)
 delete mode 100644 tests/cpp/generate_config.cpp
 create mode 100644 tests/python_tests/test_generation_config.py
diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
index 4ea75e94c5..164ff29131 100644
--- a/src/cpp/include/openvino/genai/generation_config.hpp
+++ b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -93,15 +93,22 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
     bool echo = false;
     size_t logprobs = 0;
 
+    // EOS special token
+    int64_t eos_token_id = -1;
     std::set<std::string> stop_strings;
     // Default setting in vLLM (and OpenAI API) is not to include stop string in the output
     bool include_stop_str_in_output = false;
     std::set<int64_t> stop_token_ids;
 
+    // penalties (not used in beam search)
+    float repetition_penalty = 1.0f;
+    float presence_penalty = 0.0;
+    float frequency_penalty = 0.0f;
+
     // Beam search specific
     size_t num_beam_groups = 1;
     size_t num_beams = 1;
-    float diversity_penalty = 1.0f;
+    float diversity_penalty = 0.0f;
     float length_penalty = 1.0f;
     size_t num_return_sequences = 1;
     size_t no_repeat_ngram_size = std::numeric_limits<size_t>::max();
@@ -112,9 +119,6 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
     float top_p = 1.0f;
     size_t top_k = std::numeric_limits<size_t>::max();
     bool do_sample = false;
-    float repetition_penalty = 1.0f;
-    float presence_penalty = 0.0;
-    float frequency_penalty = 0.0f;
     size_t rng_seed = 0;
 
     // Assisting generation parameters
@@ -122,9 +126,6 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
     size_t num_assistant_tokens = 0;
     size_t max_ngram_size = 0;
 
-    // EOS special token
-    int64_t eos_token_id = -1;
-
     std::optional<AdapterConfig> adapters;
 
     /** @brief sets eos_token_id to tokenizer_eos_token_id if eos_token_id is less than 0.
@@ -136,11 +137,13 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
     bool is_greedy_decoding() const;
     bool is_beam_search() const;
     bool is_multinomial() const;
-    OPENVINO_DEPRECATED("Please, use `is_assisting_generation()` instead of `is_speculative_decoding()`. This method will be removed in 2025.0.0 release")
-    bool is_speculative_decoding() const;
     bool is_assisting_generation() const;
     bool is_prompt_lookup() const;
-    void update_generation_config(const ov::AnyMap& config_map);
+
+    OPENVINO_DEPRECATED("Please, use `is_assisting_generation()` instead of `is_speculative_decoding()`. This method will be removed in 2026.0.0 release")
+    bool is_speculative_decoding() const;
+
+    void update_generation_config(const ov::AnyMap& properties);
 
     template <typename... Properties>
     util::EnableIfAllStringAny<void, Properties...> update_generation_config(Properties&&... properties) {
@@ -187,8 +190,13 @@ static constexpr ov::Property<float> assistant_confidence_threshold{"assistant_c
 static constexpr ov::Property<size_t> num_assistant_tokens{"num_assistant_tokens"};
 
 // Predefined Configs
+
+OPENVINO_DEPRECATED("Please, use individual parameters instead of predefined configs. This method will be removed in 2026.0.0 release")
 OPENVINO_GENAI_EXPORTS GenerationConfig beam_search();
+OPENVINO_DEPRECATED("Please, use individual parameters instead of predefined configs. This method will be removed in 2026.0.0 release")
 OPENVINO_GENAI_EXPORTS GenerationConfig greedy();
+OPENVINO_DEPRECATED("Please, use individual parameters instead of predefined configs. This method will be removed in 2026.0.0 release")
 OPENVINO_GENAI_EXPORTS GenerationConfig multinomial();
+
 }  // namespace genai
 }  // namespace ov
diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp
index 4ff184547e..29d7119a43 100644
--- a/src/cpp/src/generation_config.cpp
+++ b/src/cpp/src/generation_config.cpp
@@ -24,6 +24,7 @@ GenerationConfig::GenerationConfig(const std::filesystem::path& json_path) {
 
     nlohmann::json data = nlohmann::json::parse(f);
 
+    read_json_param(data, "eos_token_id", eos_token_id);
     read_json_param(data, "max_new_tokens", max_new_tokens);
     read_json_param(data, "max_length", max_length);
     // note that ignore_eos is not present in HF GenerationConfig
@@ -34,26 +35,26 @@ GenerationConfig::GenerationConfig(const std::filesystem::path& json_path) {
     read_json_param(data, "include_stop_str_in_output", include_stop_str_in_output);
     // note that stop_token_ids is not present in HF GenerationConfig
     read_json_param(data, "stop_token_ids", stop_token_ids);
+
+    // note that echo is not present in HF GenerationConfig
+    read_json_param(data, "echo", echo);
+    // note that logprobs is not present in HF GenerationConfig
+    read_json_param(data, "logprobs", logprobs);
+
+    // penalties
+    read_json_param(data, "repetition_penalty", repetition_penalty);
+    // note that frequency_penalty is not present in HF GenerationConfig
+    read_json_param(data, "frequency_penalty", frequency_penalty);
+    // note that presence_penalty is not present in HF GenerationConfig
+    read_json_param(data, "presence_penalty", presence_penalty);
+
+    // beam search
     read_json_param(data, "num_beam_groups", num_beam_groups);
     read_json_param(data, "num_beams", num_beams);
     read_json_param(data, "diversity_penalty", diversity_penalty);
     read_json_param(data, "length_penalty", length_penalty);
     read_json_param(data, "num_return_sequences", num_return_sequences);
     read_json_param(data, "no_repeat_ngram_size", no_repeat_ngram_size);
-    read_json_param(data, "temperature", temperature);
-    read_json_param(data, "top_p", top_p);
-    read_json_param(data, "top_k", top_k);
-    read_json_param(data, "do_sample", do_sample);
-    read_json_param(data, "repetition_penalty", repetition_penalty);
-    read_json_param(data, "eos_token_id", eos_token_id);
-    // note that echo is not present in HF GenerationConfig
-    read_json_param(data, "echo", echo);
-    // note that logprobs is not present in HF GenerationConfig
-    read_json_param(data, "logprobs", logprobs);
-
-    // append EOS to stop_token_ids
-    if (eos_token_id != -1)
-        set_eos_token_id(eos_token_id);
 
     if (data.contains("early_stopping")) {
         auto field_type = data["early_stopping"].type();
@@ -65,6 +66,23 @@ GenerationConfig::GenerationConfig(const std::filesystem::path& json_path) {
             stop_criteria = StopCriteria::HEURISTIC;
         }
     }
+
+    // multinomial
+    read_json_param(data, "do_sample", do_sample);
+    read_json_param(data, "temperature", temperature);
+    read_json_param(data, "top_p", top_p);
+    read_json_param(data, "top_k", top_k);
+
+    // assistant generation
+    read_json_param(data, "assistant_confidence_threshold", assistant_confidence_threshold);
+    read_json_param(data, "num_assistant_tokens", num_assistant_tokens);
+    read_json_param(data, "max_ngram_size", max_ngram_size);
+
+    // append EOS to stop_token_ids
+    if (eos_token_id != -1)
+        set_eos_token_id(eos_token_id);
+
+    validate();
 }
 
 void GenerationConfig::set_eos_token_id(size_t tokenizer_eos_token_id) {
@@ -79,35 +97,52 @@ void GenerationConfig::set_eos_token_id(size_t tokenizer_eos_token_id) {
     stop_token_ids.insert(eos_token_id);
 }
 
-void GenerationConfig::update_generation_config(const ov::AnyMap& config_map) {
+void GenerationConfig::update_generation_config(const ov::AnyMap& properties) {
     using utils::read_anymap_param;
 
-    read_anymap_param(config_map, "max_new_tokens", max_new_tokens);
-    read_anymap_param(config_map, "max_length", max_length);
-    read_anymap_param(config_map, "ignore_eos", ignore_eos);
-    read_anymap_param(config_map, "min_new_tokens", min_new_tokens);
-    read_anymap_param(config_map, "stop_strings", stop_strings);
-    read_anymap_param(config_map, "include_stop_str_in_output", include_stop_str_in_output);
-    read_anymap_param(config_map, "stop_token_ids", stop_token_ids);
-    read_anymap_param(config_map, "num_beam_groups", num_beam_groups);
-    read_anymap_param(config_map, "num_beams", num_beams);
-    read_anymap_param(config_map, "diversity_penalty", diversity_penalty);
-    read_anymap_param(config_map, "length_penalty", length_penalty);
-    read_anymap_param(config_map, "num_return_sequences", num_return_sequences);
-    read_anymap_param(config_map, "no_repeat_ngram_size", no_repeat_ngram_size);
-    read_anymap_param(config_map, "stop_criteria", stop_criteria);
-    read_anymap_param(config_map, "temperature", temperature);
-    read_anymap_param(config_map, "top_p", top_p);
-    read_anymap_param(config_map, "top_k", top_k);
-    read_anymap_param(config_map, "do_sample", do_sample);
-    read_anymap_param(config_map, "repetition_penalty", repetition_penalty);
-    read_anymap_param(config_map, "eos_token_id", eos_token_id);
-    read_anymap_param(config_map, "echo", echo);
-    read_anymap_param(config_map, "logprobs", logprobs);
-    read_anymap_param(config_map, "adapters", adapters);
+    // stop conditions
+    read_anymap_param(properties, "eos_token_id", eos_token_id);
+    read_anymap_param(properties, "max_new_tokens", max_new_tokens);
+    read_anymap_param(properties, "max_length", max_length);
+    read_anymap_param(properties, "ignore_eos", ignore_eos);
+    read_anymap_param(properties, "min_new_tokens", min_new_tokens);
+    read_anymap_param(properties, "stop_strings", stop_strings);
+    read_anymap_param(properties, "include_stop_str_in_output", include_stop_str_in_output);
+    read_anymap_param(properties, "stop_token_ids", stop_token_ids);
+
+    // generic
+    read_anymap_param(properties, "echo", echo);
+    read_anymap_param(properties, "logprobs", logprobs);
+    read_anymap_param(properties, "num_return_sequences", num_return_sequences);
+    read_anymap_param(properties, "adapters", adapters);
 
+    // penalties
+    read_anymap_param(properties, "frequency_penalty", frequency_penalty);
+    read_anymap_param(properties, "presence_penalty", presence_penalty);
+    read_anymap_param(properties, "repetition_penalty", repetition_penalty);
+
+    // beam search
+    read_anymap_param(properties, "num_beam_groups", num_beam_groups);
+    read_anymap_param(properties, "num_beams", num_beams);
+    read_anymap_param(properties, "diversity_penalty", diversity_penalty);
+    read_anymap_param(properties, "length_penalty", length_penalty);
+    read_anymap_param(properties, "stop_criteria", stop_criteria);
+    read_anymap_param(properties, "no_repeat_ngram_size", no_repeat_ngram_size);
+
+    // multinomial
+    read_anymap_param(properties, "do_sample", do_sample);
+    read_anymap_param(properties, "temperature", temperature);
+    read_anymap_param(properties, "top_p", top_p);
+    read_anymap_param(properties, "top_k", top_k);
     // TODO: add support of 'generator' property similar to Image generation
-    read_anymap_param(config_map, "rng_seed", rng_seed);
+    read_anymap_param(properties, "rng_seed", rng_seed);
+
+    // assistant generation
+    read_anymap_param(properties, "assistant_confidence_threshold", assistant_confidence_threshold);
+    read_anymap_param(properties, "num_assistant_tokens", num_assistant_tokens);
+    read_anymap_param(properties, "max_ngram_size", max_ngram_size);
+
+    validate();
 }
 
 size_t GenerationConfig::get_max_new_tokens(size_t prompt_length) const {
@@ -136,69 +171,90 @@ bool GenerationConfig::is_speculative_decoding() const {
 }
 
 bool GenerationConfig::is_assisting_generation() const {
-    return (assistant_confidence_threshold > 0 || num_assistant_tokens > 0);
+    return assistant_confidence_threshold > 0 || num_assistant_tokens > 0;
 }
 
 bool GenerationConfig::is_prompt_lookup() const {
-    return (max_ngram_size > 0 && num_assistant_tokens > 0);
+    return max_ngram_size > 0 && num_assistant_tokens > 0;
 }
 
 void GenerationConfig::validate() const {
+    OPENVINO_ASSERT(num_return_sequences > 0, "num_return_sequences must be greater than 0");
+
+    // Stop conditions
+
     OPENVINO_ASSERT(eos_token_id == -1 || stop_token_ids.find(eos_token_id) != stop_token_ids.end(),
         "'stop_token_ids' must contain 'eos_token_id'. Please, call 'set_eos_token_id' with 'eos_token_id' value");
 
-    OPENVINO_ASSERT(!do_sample || num_beams == 1, 
-                    "Beam search with sampling is not supported yet. "
-                    "Please either set do_sample=false to use beam search "
-                    "or set num_beams=1 if you with to use multinomial sampling.");
-    OPENVINO_ASSERT(num_return_sequences > 0, "num_return_sequences must be greater than 0");
+    auto stop_token_ids_it = std::find_if(stop_token_ids.begin(), stop_token_ids.end(), [] (int64_t stop_token_id) -> bool {
+        return stop_token_id < 0;
+    });
+    OPENVINO_ASSERT(stop_token_ids_it == stop_token_ids.end(), "'stop_token_ids' must be non-negative, but it contains a value ", *stop_token_ids_it);
+
+    OPENVINO_ASSERT(!ignore_eos || max_new_tokens != SIZE_MAX || max_length != SIZE_MAX,
+                    "ignore_eos is true, in this case either 'max_new_tokens', or 'max_length' should be defined.");
+
+    OPENVINO_ASSERT(eos_token_id != -1 || !stop_token_ids.empty() || !stop_strings.empty() || max_new_tokens != SIZE_MAX || max_length != SIZE_MAX,
+                    "Either 'eos_token_id', or 'stop_token_ids', or ''stop_strings'', or 'max_new_tokens', or 'max_length' should be defined.");
+
     OPENVINO_ASSERT(max_new_tokens > 0 || (max_new_tokens == 0 && echo), "'max_new_tokens' must be greater than 0, if `echo` is set, 0 is also accepted");
     OPENVINO_ASSERT(min_new_tokens <= max_new_tokens, "min_new_tokens must be less or equal max_new_tokens");
-    OPENVINO_ASSERT(
-        num_beams % num_beam_groups == 0,
-        "number of beams should be divisible by number of groups"
-    );
-    
-    // max_new_tokens has priority over max_length
-    // if max_new_tokens is defined no need to check max_length
-    OPENVINO_ASSERT(max_new_tokens != SIZE_MAX ||  max_length > 0, 
-                    "'max_length' must be greater than 0 or 'max_new_tokens' should be defined");
-
-    OPENVINO_ASSERT(!do_sample || top_k > 0,
-                    "top_k must be a strictly positive, but got ",
-                    top_k);
-    OPENVINO_ASSERT(!do_sample || (top_p > 0 && top_p <= 1.0f),
-                    "top_p must be a positive float > 0 and < 1, but got ",
-                    top_p);
-    OPENVINO_ASSERT(!do_sample || temperature > 0,
-                    "Temperature must be a strictly positive float, but got ",
-                    temperature);
-
-    OPENVINO_ASSERT(repetition_penalty > 0,
-                    "Repetition penalty must be a strictly positive float, but got ",
-                    repetition_penalty);
-    
-    OPENVINO_ASSERT(!ignore_eos || max_new_tokens != SIZE_MAX || max_length != SIZE_MAX,
-                    "ignore_eos == true, in this case either 'max_new_tokens', or 'max_length' should be defined.");
 
-    OPENVINO_ASSERT(eos_token_id != -1 || max_new_tokens != SIZE_MAX || max_length != SIZE_MAX,
-                    "Either 'eos_token_id', or 'max_new_tokens', or 'max_length' should be defined.");
+    // Sampling strategies
+
+    OPENVINO_ASSERT(num_return_sequences == 1 || (is_multinomial() || is_beam_search()), 
+        "'num_return_sequences' can be more than 1 only in case of beam search or multinomial sampling, but got ", num_return_sequences);
+
+    // generic penalties, but not supported by beam search currently
+    if (!is_beam_search()) {
+        OPENVINO_ASSERT(frequency_penalty >= -2.0f && frequency_penalty <= 2.0f, "'frequence_penalty' penalty must be within [-2.0; 2.0], but got ", frequency_penalty);
+        OPENVINO_ASSERT(presence_penalty >= -2.0f && presence_penalty <= 2.0f, "'presence_penalty' penalty must be within [-2.0; 2.0], but got ", presence_penalty);
+        OPENVINO_ASSERT(repetition_penalty > 0.0f, "'repetition_penalty' must be a strictly positive float, but got ", repetition_penalty);
+    } else {
+        OPENVINO_ASSERT(frequency_penalty == 0.0f, "'frequency_penalty' is not currently supported by beam search and should be 0.0f, but got ", frequency_penalty);
+        OPENVINO_ASSERT(presence_penalty == 0.0f, "'presence_penalty' is not currently supported by beam search and should be 0.0f, but got ", presence_penalty);
+        OPENVINO_ASSERT(repetition_penalty == 1.0f, "'repetition_penalty' is not currently supported by beam search and should be 1.0f, but got ", repetition_penalty);
+    }
+
+    if (is_multinomial()) {
+        OPENVINO_ASSERT(top_k > 0, "When 'do_sample' is true, top_k must be a strictly positive, but got ", top_k);
+        OPENVINO_ASSERT(top_p > 0 && top_p <= 1.0f, "When 'do_sample' is true, top_p must be a positive float > 0 and < 1, but got ", top_p);
+        OPENVINO_ASSERT(temperature > 0, "When 'do_sample' is true, temperature must be a strictly positive float, but got ", temperature);
+    } else {
+        // parameters requiring multinomial
+        OPENVINO_ASSERT(top_k == std::numeric_limits<size_t>::max(), "When 'do_sample' is false, top_k must be max of size_t, but got ", top_k);
+        OPENVINO_ASSERT(top_p == 1.0f, "When 'do_sample' is false, top_p must be 1.0f, but got ", top_p);
+        OPENVINO_ASSERT(temperature == 1.0f, "When 'do_sample' is false, temperature must be a 1.0f, but got ", temperature);
+    }
+
     if (is_beam_search()) {
-        OPENVINO_ASSERT(no_repeat_ngram_size > 0, "no_repeat_ngram_size must be positive");
+        OPENVINO_ASSERT(num_beams % num_beam_groups == 0, "'num_beams' (", num_beams, ") should be divisible by 'num_beam_groups' (", num_beam_groups, ")");
+        OPENVINO_ASSERT(num_beams >= num_return_sequences, "'num_beams' (", num_beams, ") must be greater equal than 'num_return_sequences' (", num_return_sequences, ")");
+
+        OPENVINO_ASSERT(!do_sample,
+                        "Beam search with sampling is not supported yet. "
+                        "Please either set do_sample=false to use beam search "
+                        "or set num_beams=1 if you with to use multinomial sampling.");
+
+        OPENVINO_ASSERT(no_repeat_ngram_size > 0, "'no_repeat_ngram_size' must be positive");
         if (num_beam_groups > 1) {
-            OPENVINO_ASSERT(diversity_penalty != 0.0f, "For grouped beam search 'diversity_penalty' should not be zero, it it fallbacks to non-grouped beam search");
+            OPENVINO_ASSERT(diversity_penalty != 0.0f, "For grouped beam search 'diversity_penalty' should not be zero, otherwise it fallbacks to non-grouped beam search");
+        } else {
+            OPENVINO_ASSERT(diversity_penalty == 0.0f, "For beam search 'diversity_penalty' is applicable only when grouped beam search is used, but got 'num_beam_groups' == 1");
         }
     } else {
-        OPENVINO_ASSERT(frequency_penalty >= -2.0f && frequency_penalty <= 2.0f, "frequence_penalty penalty must be a [-2; +2]");
-        OPENVINO_ASSERT(presence_penalty >= -2.0f && presence_penalty <= 2.0f, "presence_penalty penalty must be a [-2; +2]");
+        // parameters requiring beam search
+        OPENVINO_ASSERT(num_beam_groups == 1, "'num_beam_groups' is supported by beam search only and should be 1 otherwise, but got ", num_beam_groups);
+        OPENVINO_ASSERT(no_repeat_ngram_size == std::numeric_limits<size_t>::max(), "'no_repeat_ngram_size' is supported only by beam search, otherwise should be set to max of size_t, but got ", no_repeat_ngram_size);
+        OPENVINO_ASSERT(diversity_penalty == 0.0f, "'diversity_penalty' is set to non default value 0.0f, but got ", diversity_penalty, ", which is supported only by beam search sampling, but 'num_beams' is set to 1");
+        OPENVINO_ASSERT(length_penalty == 1.0f, "'length_penalty' is set to non default value 1.0f, but got ", length_penalty, ", which is supported only by beam search sampling, but 'num_beams' is set to 1");
     }
+
+    // assistant generation
+
     if (is_assisting_generation()) {
-        if (assistant_confidence_threshold != 0.f) {
-            OPENVINO_ASSERT(num_assistant_tokens == 0, "Parameters `assistant_confidence_threshold` and `num_assistant_tokens` are mutually exclusive in `GenerationConfig`");
-            OPENVINO_ASSERT(!is_prompt_lookup(), "Parameters `assistant_confidence_threshold` cannot be used while Prompt Lookup decoding");
-        } else {
-            OPENVINO_ASSERT(num_assistant_tokens > 0, "Parameters `assistant_confidence_threshold` and `num_assistant_tokens` are mutually exclusive in `GenerationConfig`");
-        };
+        OPENVINO_ASSERT(!is_beam_search() && num_return_sequences == 1, "Beam search and parallel sampling are not compatible with assistant generation");
+        OPENVINO_ASSERT(assistant_confidence_threshold == 0.0f || num_assistant_tokens == 0, "Parameters `assistant_confidence_threshold` and `num_assistant_tokens` are mutually exclusive in `GenerationConfig`");
     }
 }
 
diff --git a/src/cpp/src/whisper_generation_config.cpp b/src/cpp/src/whisper_generation_config.cpp
index beb663caaf..20207d30cf 100644
--- a/src/cpp/src/whisper_generation_config.cpp
+++ b/src/cpp/src/whisper_generation_config.cpp
@@ -40,6 +40,8 @@ WhisperGenerationConfig::WhisperGenerationConfig(const std::filesystem::path& js
     }
 
     read_json_param(data, "lang_to_id", lang_to_id);
+
+    validate();
 }
 
 void WhisperGenerationConfig::set_eos_token_id(int64_t tokenizer_eos_token_id) {
@@ -76,6 +78,8 @@ void WhisperGenerationConfig::update_generation_config(const ov::AnyMap& config_
     read_anymap_param(config_map, "return_timestamps", return_timestamps);
     read_anymap_param(config_map, "initial_prompt", initial_prompt);
     read_anymap_param(config_map, "hotwords", hotwords);
+
+    validate();
 }
 
 size_t WhisperGenerationConfig::get_max_new_tokens(size_t prompt_length) const {
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index 8510a8389f..ea9ad1bbf3 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -367,16 +367,16 @@ class ContinuousBatchingPipeline:
     def __init__(self, models_path: os.PathLike, tokenizer: Tokenizer, scheduler_config: SchedulerConfig, device: str, properties: dict[str, typing.Any] = {}) -> None:
         ...
     @typing.overload
-    def add_request(self, request_id: int, input_ids: openvino._pyopenvino.Tensor, sampling_params: GenerationConfig) -> GenerationHandle:
+    def add_request(self, request_id: int, input_ids: openvino._pyopenvino.Tensor, generation_config: GenerationConfig) -> GenerationHandle:
         ...
     @typing.overload
-    def add_request(self, request_id: int, prompt: str, sampling_params: GenerationConfig) -> GenerationHandle:
+    def add_request(self, request_id: int, prompt: str, generation_config: GenerationConfig) -> GenerationHandle:
         ...
     @typing.overload
-    def generate(self, input_ids: list[openvino._pyopenvino.Tensor], sampling_params: list[GenerationConfig], streamer: typing.Callable[[str], bool] | StreamerBase | None = None) -> list[EncodedGenerationResult]:
+    def generate(self, input_ids: list[openvino._pyopenvino.Tensor], generation_config: list[GenerationConfig], streamer: typing.Callable[[str], bool] | StreamerBase | None = None) -> list[EncodedGenerationResult]:
         ...
     @typing.overload
-    def generate(self, prompts: list[str], sampling_params: list[GenerationConfig], streamer: typing.Callable[[str], bool] | StreamerBase | None = None) -> list[GenerationResult]:
+    def generate(self, prompts: list[str], generation_config: list[GenerationConfig], streamer: typing.Callable[[str], bool] | StreamerBase | None = None) -> list[GenerationResult]:
         ...
     def get_config(self) -> GenerationConfig:
         ...
@@ -613,7 +613,9 @@ class GenerationConfig:
         ...
     def set_eos_token_id(self, tokenizer_eos_token_id: int) -> None:
         ...
-    def update_generation_config(self, config_map: dict[str, openvino._pyopenvino.OVAny]) -> None:
+    def update_generation_config(self, **kwargs) -> None:
+        ...
+    def validate(self) -> None:
         ...
 class GenerationFinishReason:
     """
@@ -826,7 +828,7 @@ class Image2ImagePipeline:
         ...
     def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None:
         ...
-    def set_generation_config(self, generation_config: ImageGenerationConfig) -> None:
+    def set_generation_config(self, config: ImageGenerationConfig) -> None:
         ...
     def set_scheduler(self, scheduler: Scheduler) -> None:
         ...
@@ -927,7 +929,7 @@ class InpaintingPipeline:
         ...
     def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None:
         ...
-    def set_generation_config(self, generation_config: ImageGenerationConfig) -> None:
+    def set_generation_config(self, config: ImageGenerationConfig) -> None:
         ...
     def set_scheduler(self, scheduler: Scheduler) -> None:
         ...
@@ -1615,7 +1617,7 @@ class Text2ImagePipeline:
         ...
     def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None:
         ...
-    def set_generation_config(self, generation_config: ImageGenerationConfig) -> None:
+    def set_generation_config(self, config: ImageGenerationConfig) -> None:
         ...
     def set_scheduler(self, scheduler: Scheduler) -> None:
         ...
@@ -1865,9 +1867,9 @@ class VLMPipeline:
         ...
     def get_tokenizer(self) -> Tokenizer:
         ...
-    def set_chat_template(self, new_template: str) -> None:
+    def set_chat_template(self, chat_template: str) -> None:
         ...
-    def set_generation_config(self, new_config: GenerationConfig) -> None:
+    def set_generation_config(self, config: GenerationConfig) -> None:
         ...
     def start_chat(self, system_message: str = '') -> None:
         ...
@@ -2043,6 +2045,8 @@ class WhisperGenerationConfig:
         ...
     def set_eos_token_id(self, tokenizer_eos_token_id: int) -> None:
         ...
+    def update_generation_config(self, **kwargs) -> None:
+        ...
 class WhisperPerfMetrics(PerfMetrics):
     """
     
diff --git a/src/python/py_continuous_batching_pipeline.cpp b/src/python/py_continuous_batching_pipeline.cpp
index be7a72481f..2b48e4d44d 100644
--- a/src/python/py_continuous_batching_pipeline.cpp
+++ b/src/python/py_continuous_batching_pipeline.cpp
@@ -235,22 +235,22 @@ void init_continuous_batching_pipeline(py::module_& m) {
         .def("get_tokenizer", &ContinuousBatchingPipeline::get_tokenizer)
         .def("get_config", &ContinuousBatchingPipeline::get_config)
         .def("get_metrics", &ContinuousBatchingPipeline::get_metrics)
-        .def("add_request", py::overload_cast<uint64_t, const ov::Tensor&, const ov::genai::GenerationConfig&>(&ContinuousBatchingPipeline::add_request), py::arg("request_id"), py::arg("input_ids"), py::arg("sampling_params"))
-        .def("add_request", py::overload_cast<uint64_t, const std::string&, const ov::genai::GenerationConfig&>(&ContinuousBatchingPipeline::add_request), py::arg("request_id"), py::arg("prompt"), py::arg("sampling_params"))
+        .def("add_request", py::overload_cast<uint64_t, const ov::Tensor&, const ov::genai::GenerationConfig&>(&ContinuousBatchingPipeline::add_request), py::arg("request_id"), py::arg("input_ids"), py::arg("generation_config"))
+        .def("add_request", py::overload_cast<uint64_t, const std::string&, const ov::genai::GenerationConfig&>(&ContinuousBatchingPipeline::add_request), py::arg("request_id"), py::arg("prompt"), py::arg("generation_config"))
         .def("step", &ContinuousBatchingPipeline::step)
         .def("has_non_finished_requests", &ContinuousBatchingPipeline::has_non_finished_requests)
         .def(
             "generate",
             py::overload_cast<const std::vector<ov::Tensor>&, const std::vector<ov::genai::GenerationConfig>&, const ov::genai::StreamerVariant&>(&ContinuousBatchingPipeline::generate),
             py::arg("input_ids"),
-            py::arg("sampling_params"),
+            py::arg("generation_config"),
             py::arg("streamer") = std::monostate{}
         )
         .def(
             "generate",
             py::overload_cast<const std::vector<std::string>&, const std::vector<ov::genai::GenerationConfig>&, const ov::genai::StreamerVariant&>(&ContinuousBatchingPipeline::generate),
             py::arg("prompts"),
-            py::arg("sampling_params"),
+            py::arg("generation_config"),
             py::arg("streamer") = std::monostate{}
         );
 }
diff --git a/src/python/py_generation_config.cpp b/src/python/py_generation_config.cpp
index f49bcf29bd..c7517400f3 100644
--- a/src/python/py_generation_config.cpp
+++ b/src/python/py_generation_config.cpp
@@ -120,5 +120,10 @@ void init_generation_config(py::module_& m) {
         .def("is_greedy_decoding", &GenerationConfig::is_greedy_decoding)
         .def("is_assisting_generation", &GenerationConfig::is_assisting_generation)
         .def("is_prompt_lookup", &GenerationConfig::is_prompt_lookup)
-        .def("update_generation_config", static_cast<void (GenerationConfig::*)(const ov::AnyMap&)>(&ov::genai::GenerationConfig::update_generation_config), py::arg("config_map"));
+        .def("validate", &GenerationConfig::validate)
+        .def("update_generation_config", [](
+            ov::genai::GenerationConfig config,
+            const py::kwargs& kwargs) {
+            config.update_generation_config(pyutils::kwargs_to_any_map(kwargs));
+        });
    }
diff --git a/src/python/py_image_generation_pipelines.cpp b/src/python/py_image_generation_pipelines.cpp
index 311f3f3760..754c6b6a38 100644
--- a/src/python/py_image_generation_pipelines.cpp
+++ b/src/python/py_image_generation_pipelines.cpp
@@ -255,8 +255,8 @@ void init_image_generation_pipelines(py::module_& m) {
             device (str): Device to run the model on (e.g., CPU, GPU).
             kwargs: Text2ImagePipeline properties
         )")
-        .def("get_generation_config", &ov::genai::Text2ImagePipeline::get_generation_config)
-        .def("set_generation_config", &ov::genai::Text2ImagePipeline::set_generation_config, py::arg("generation_config"))
+        .def("get_generation_config", &ov::genai::Text2ImagePipeline::get_generation_config, py::return_value_policy::copy)
+        .def("set_generation_config", &ov::genai::Text2ImagePipeline::set_generation_config, py::arg("config"))
         .def("set_scheduler", &ov::genai::Text2ImagePipeline::set_scheduler, py::arg("scheduler"))
         .def("reshape", &ov::genai::Text2ImagePipeline::reshape, py::arg("num_images_per_prompt"), py::arg("height"), py::arg("width"), py::arg("guidance_scale"))
         .def_static("stable_diffusion", &ov::genai::Text2ImagePipeline::stable_diffusion, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("unet"), py::arg("vae"))
@@ -323,8 +323,8 @@ void init_image_generation_pipelines(py::module_& m) {
             device (str): Device to run the model on (e.g., CPU, GPU).
             kwargs: Image2ImagePipeline properties
         )")
-        .def("get_generation_config", &ov::genai::Image2ImagePipeline::get_generation_config)
-        .def("set_generation_config", &ov::genai::Image2ImagePipeline::set_generation_config, py::arg("generation_config"))
+        .def("get_generation_config", &ov::genai::Image2ImagePipeline::get_generation_config, py::return_value_policy::copy)
+        .def("set_generation_config", &ov::genai::Image2ImagePipeline::set_generation_config, py::arg("config"))
         .def("set_scheduler", &ov::genai::Image2ImagePipeline::set_scheduler, py::arg("scheduler"))
         .def("reshape", &ov::genai::Image2ImagePipeline::reshape, py::arg("num_images_per_prompt"), py::arg("height"), py::arg("width"), py::arg("guidance_scale"))
         .def_static("stable_diffusion", &ov::genai::Image2ImagePipeline::stable_diffusion, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("unet"), py::arg("vae"))
@@ -386,8 +386,8 @@ void init_image_generation_pipelines(py::module_& m) {
             device (str): Device to run the model on (e.g., CPU, GPU).
             kwargs: InpaintingPipeline properties
         )")
-        .def("get_generation_config", &ov::genai::InpaintingPipeline::get_generation_config)
-        .def("set_generation_config", &ov::genai::InpaintingPipeline::set_generation_config, py::arg("generation_config"))
+        .def("get_generation_config", &ov::genai::InpaintingPipeline::get_generation_config, py::return_value_policy::copy)
+        .def("set_generation_config", &ov::genai::InpaintingPipeline::set_generation_config, py::arg("config"))
         .def("set_scheduler", &ov::genai::InpaintingPipeline::set_scheduler, py::arg("scheduler"))
         .def("reshape", &ov::genai::InpaintingPipeline::reshape, py::arg("num_images_per_prompt"), py::arg("height"), py::arg("width"), py::arg("guidance_scale"))
         .def_static("stable_diffusion", &ov::genai::InpaintingPipeline::stable_diffusion, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("unet"), py::arg("vae"))
diff --git a/src/python/py_llm_pipeline.cpp b/src/python/py_llm_pipeline.cpp
index b1d5136253..7360975a0b 100644
--- a/src/python/py_llm_pipeline.cpp
+++ b/src/python/py_llm_pipeline.cpp
@@ -53,15 +53,10 @@ py::object call_common_generate(
     const pyutils::PyBindStreamerVariant& py_streamer,
     const py::kwargs& kwargs
 ) {
-    ov::genai::GenerationConfig default_config;
-    if (config.has_value()) {
-        default_config = *config;
-    } else {
-        default_config = pipe.get_generation_config();
-    }
+    ov::genai::GenerationConfig default_config = config.has_value() ? *config : pipe.get_generation_config();
     auto updated_config = pyutils::update_config_from_kwargs(default_config, kwargs);
+
     py::object results;
-    EncodedInputs tensor_data;
     StreamerVariant streamer = pyutils::pystreamer_to_streamer(py_streamer);
 
     // Call suitable generate overload for each type of input.
diff --git a/src/python/py_utils.cpp b/src/python/py_utils.cpp
index 45a0c46174..34522409ea 100644
--- a/src/python/py_utils.cpp
+++ b/src/python/py_utils.cpp
@@ -358,7 +358,10 @@ ov::genai::OptionalGenerationConfig update_config_from_kwargs(const ov::genai::O
     ov::genai::GenerationConfig res_config;
     if(config.has_value())
         res_config = *config;
-    res_config.update_generation_config(kwargs_to_any_map(kwargs));
+
+    if (!kwargs.empty())
+        res_config.update_generation_config(kwargs_to_any_map(kwargs));
+
     return res_config;
 }
 
diff --git a/src/python/py_vlm_pipeline.cpp b/src/python/py_vlm_pipeline.cpp
index 340cb3da62..b0cfa0a42a 100644
--- a/src/python/py_vlm_pipeline.cpp
+++ b/src/python/py_vlm_pipeline.cpp
@@ -150,10 +150,10 @@ void init_vlm_pipeline(py::module_& m) {
 
         .def("start_chat", &ov::genai::VLMPipeline::start_chat, py::arg("system_message") = "")
         .def("finish_chat", &ov::genai::VLMPipeline::finish_chat)
-        .def("set_chat_template", &ov::genai::VLMPipeline::set_chat_template, py::arg("new_template"))
+        .def("set_chat_template", &ov::genai::VLMPipeline::set_chat_template, py::arg("chat_template"))
         .def("get_tokenizer", &ov::genai::VLMPipeline::get_tokenizer)
-        .def("get_generation_config", &ov::genai::VLMPipeline::get_generation_config)
-        .def("set_generation_config", &ov::genai::VLMPipeline::set_generation_config, py::arg("new_config"))
+        .def("get_generation_config", &ov::genai::VLMPipeline::get_generation_config, py::return_value_policy::copy)
+        .def("set_generation_config", &ov::genai::VLMPipeline::set_generation_config, py::arg("config"))
         .def(
             "generate",
             [](ov::genai::VLMPipeline& pipe,
diff --git a/src/python/py_whisper_pipeline.cpp b/src/python/py_whisper_pipeline.cpp
index cd42dcf58d..05087eeacb 100644
--- a/src/python/py_whisper_pipeline.cpp
+++ b/src/python/py_whisper_pipeline.cpp
@@ -187,7 +187,10 @@ OptionalWhisperGenerationConfig update_whisper_config_from_kwargs(const Optional
     WhisperGenerationConfig res_config;
     if (config.has_value())
         res_config = *config;
-    res_config.update_generation_config(pyutils::kwargs_to_any_map(kwargs));
+
+    if (!kwargs.empty())
+        res_config.update_generation_config(pyutils::kwargs_to_any_map(kwargs));
+
     return res_config;
 }
 
@@ -295,7 +298,12 @@ void init_whisper_pipeline(py::module_& m) {
         .def_readwrite("return_timestamps", &WhisperGenerationConfig::return_timestamps)
         .def_readwrite("initial_prompt", &WhisperGenerationConfig::initial_prompt)
         .def_readwrite("hotwords", &WhisperGenerationConfig::hotwords)
-        .def("set_eos_token_id", &WhisperGenerationConfig::set_eos_token_id, py::arg("tokenizer_eos_token_id"));
+        .def("set_eos_token_id", &WhisperGenerationConfig::set_eos_token_id, py::arg("tokenizer_eos_token_id"))
+        .def("update_generation_config", [](
+            ov::genai::WhisperGenerationConfig config,
+            const py::kwargs& kwargs) {
+            config.update_generation_config(pyutils::kwargs_to_any_map(kwargs));
+        });;
 
     py::class_<WhisperRawPerfMetrics>(m, "WhisperRawPerfMetrics", raw_perf_metrics_docstring)
         .def(py::init<>())
diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt
index 093cd993de..b8c2e625c5 100644
--- a/tests/cpp/CMakeLists.txt
+++ b/tests/cpp/CMakeLists.txt
@@ -25,8 +25,8 @@ file(GLOB src_files "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src/sequence_group.cpp"
                     "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src/continuous_batching*.cpp"
                     "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src/text_callback_streamer.cpp")
 
-add_executable(${TEST_TARGET_NAME} ${tests_src}
-        block_allocator.cpp)
+add_executable(${TEST_TARGET_NAME} ${tests_src})
+
 target_link_libraries(${TEST_TARGET_NAME} PRIVATE openvino::genai gtest_main)
 target_include_directories(${TEST_TARGET_NAME} PRIVATE "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src")
 target_sources(${TEST_TARGET_NAME} PRIVATE ${src_files})
diff --git a/tests/cpp/generate_config.cpp b/tests/cpp/generate_config.cpp
deleted file mode 100644
index 974fd499f8..0000000000
--- a/tests/cpp/generate_config.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright (C) 2024 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include <gtest/gtest.h>
-#include <openvino/core/except.hpp>
-#include "openvino/genai/generation_config.hpp"
-
-
-using namespace ov::genai;
-
-TEST(GenerationConfigTest, invalid_temperature) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.temperature = -0.1;
-    config.do_sample = true;
-    EXPECT_THROW(config.validate(), ov::Exception);
-}
-
-TEST(GenerationConfigTest, valid_temperature) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.temperature = 0.1;
-    EXPECT_NO_THROW(config.validate());
-}
-
-TEST(GenerationConfigTest, invalid_top_p) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.top_p = -0.5;
-    EXPECT_THROW(config.validate(), ov::Exception);
-    config.top_p = 1.1;
-    EXPECT_THROW(config.validate(), ov::Exception);
-}
-
-TEST(GenerationConfigTest, valid_top_p) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.top_p = 0.1;
-    EXPECT_NO_THROW(config.validate());
-}
-
-TEST(GenerationConfigTest, invalid_repeatition_penalty) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.repetition_penalty = -3.0;
-    EXPECT_THROW(config.validate(), ov::Exception);
-    config.repetition_penalty = -0.1;
-    EXPECT_THROW(config.validate(), ov::Exception);
-}
-
-TEST(GenerationConfigTest, valid_repeatition_penalty) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.repetition_penalty = 1.8;
-    EXPECT_NO_THROW(config.validate());
-    config.repetition_penalty = 0.1;
-    EXPECT_NO_THROW(config.validate());
-}
-
-TEST(GenerationConfigTest, invalid_presence_penalty) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.presence_penalty = 3.0;
-    EXPECT_THROW(config.validate(), ov::Exception);
-    config.presence_penalty = -3.1;
-    EXPECT_THROW(config.validate(), ov::Exception);
-}
-
-TEST(GenerationConfigTest, valid_presence_penalty) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.presence_penalty = 1.8;
-    EXPECT_NO_THROW(config.validate());
-    config.presence_penalty = -2.0;
-    EXPECT_NO_THROW(config.validate());
-}
-
-TEST(GenerationConfigTest, invalid_frequency_penalty) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.frequency_penalty = 3.0;
-    EXPECT_THROW(config.validate(), ov::Exception);
-    config.frequency_penalty = -3.1;
-    EXPECT_THROW(config.validate(), ov::Exception);
-}
-
-TEST(GenerationConfigTest, valid_frequency_penalty) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.frequency_penalty = 1.8;
-    EXPECT_NO_THROW(config.validate());
-    config.frequency_penalty = -2.0;
-    EXPECT_NO_THROW(config.validate());
-}
-
-ov::genai::GenerationConfig speculative_decoding_multinomial() {
-    auto speculative_decoding_multinomial_config = ov::genai::multinomial();
-    speculative_decoding_multinomial_config.num_assistant_tokens = 5;
-    return speculative_decoding_multinomial_config;
-}
-
-ov::genai::GenerationConfig speculative_decoding_greedy() {
-    auto speculative_decoding_greedy_config = ov::genai::greedy();
-    speculative_decoding_greedy_config.assistant_confidence_threshold = 0.4f;
-    return speculative_decoding_greedy_config;
-}
-
-TEST(GenerationConfigTest, invalid_static_spec_decoding) {
-    GenerationConfig config = speculative_decoding_greedy();
-    config.num_assistant_tokens = 5;
-    config.assistant_confidence_threshold = 0.2;
-    EXPECT_THROW(config.validate(), ov::Exception);
-}
-
-TEST(GenerationConfigTest, valid_static_spec_decoding) {
-    GenerationConfig config = speculative_decoding_greedy();
-    config.num_assistant_tokens = 5;
-    config.assistant_confidence_threshold = 0;
-    EXPECT_NO_THROW(config.validate());
-}
-
-TEST(GenerationConfigTest, invalid_dynamic_spec_decoding) {
-    GenerationConfig config = speculative_decoding_greedy();
-    config.num_assistant_tokens = 5;
-    config.assistant_confidence_threshold = 0.5;
-    EXPECT_THROW(config.validate(), ov::Exception);
-}
-
-TEST(GenerationConfigTest, valid_dynamic_spec_decoding) {
-    GenerationConfig config = speculative_decoding_greedy();
-    config.assistant_confidence_threshold = 0.5;
-    config.num_assistant_tokens = 0;
-    EXPECT_NO_THROW(config.validate());
-}
diff --git a/tests/python_tests/ov_genai_test_utils.py b/tests/python_tests/ov_genai_test_utils.py
index 3fc89cb8a7..9e8e4681f9 100644
--- a/tests/python_tests/ov_genai_test_utils.py
+++ b/tests/python_tests/ov_genai_test_utils.py
@@ -111,7 +111,7 @@ def read_model(params, **tokenizer_kwargs):
         path,
         hf_tokenizer,
         opt_model,
-        ov_genai.LLMPipeline(path, 'CPU', **{'ENABLE_MMAP': False}),
+        ov_genai.LLMPipeline(path, 'CPU', ENABLE_MMAP=False),
     )
 
 
@@ -139,7 +139,7 @@ def model_tmp_path(tmpdir_factory):
 
 
 @pytest.fixture(scope="module")
-def model_tokenizers_path_tmp_path(tmpdir_factory):
+def model_tokenizers_tmp_path(tmpdir_factory):
     model_id, path, _, _, _ = read_model(get_models_list()[0])
     temp_path = tmpdir_factory.mktemp(model_id.replace('/', '_'))
 
@@ -180,10 +180,15 @@ def load_genai_pipe_with_configs(configs: List[Tuple], temp_path):
     for config_json, config_name in configs:
         with (temp_path / config_name).open('w') as f:
             json.dump(config_json, f)
-    return ov_genai.LLMPipeline(temp_path, 'CPU')
+
+    ov_pipe = ov_genai.LLMPipeline(temp_path, 'CPU')
+
+    for _, config_name in configs:
+        os.remove(temp_path / config_name)
+
+    return ov_pipe
 
 
 @functools.lru_cache(1)
 def get_continuous_batching(path):
-    scheduler_config = ov_genai.SchedulerConfig()
-    return ov_genai.LLMPipeline(path, ov_genai.Tokenizer(path), 'CPU', **{"scheduler_config": scheduler_config})
+    return ov_genai.LLMPipeline(path, 'CPU', scheduler_config=ov_genai.SchedulerConfig())
diff --git a/tests/python_tests/test_generation_config.py b/tests/python_tests/test_generation_config.py
new file mode 100644
index 0000000000..28477c58a3
--- /dev/null
+++ b/tests/python_tests/test_generation_config.py
@@ -0,0 +1,96 @@
+# Copyright (C) 2023-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from openvino_genai import GenerationConfig
+import pytest
+
+configs = [
+    # stop conditions
+    dict(max_new_tokens=12),
+    dict(max_length=12),
+    dict(stop_token_ids={2}),
+    dict(eos_token_id=1, stop_token_ids={1}),
+    dict(stop_strings={"a", "b"}),
+    dict(ignore_eos=True, max_new_tokens=10),
+    dict(ignore_eos=True, max_length=10),
+    dict(max_new_tokens=0, echo=True),
+    dict(min_new_tokens=1, max_new_tokens=1),
+    # multinomial
+    dict(max_new_tokens=1, do_sample=True, num_return_sequences=2),
+    dict(max_new_tokens=1, do_sample=True, top_k=1),
+    dict(max_new_tokens=1, do_sample=True, top_p=0.5),
+    dict(max_new_tokens=1, do_sample=True, temperature=0.5),
+    # beam search
+    dict(max_new_tokens=1, num_beams=2),
+    dict(max_new_tokens=1, num_beams=2, num_return_sequences=1),
+    dict(max_new_tokens=1, num_beams=2, num_return_sequences=2),
+    dict(max_new_tokens=1, num_beams=4, num_beam_groups=2, diversity_penalty=1.0),
+    dict(max_new_tokens=1, num_beams=4, length_penalty=1.0),
+    dict(max_new_tokens=1, num_beams=4, no_repeat_ngram_size=2),
+    # assistant generation
+    dict(max_new_tokens=1, assistant_confidence_threshold=0.5),
+    dict(max_new_tokens=1, num_assistant_tokens=2),
+    dict(max_new_tokens=1, num_assistant_tokens=2, max_ngram_size=2), # prompt lookup
+]
+@pytest.mark.parametrize("generation_config", configs)
+@pytest.mark.precommit
+@pytest.mark.nightly
+def test_valid_configs(generation_config):
+    config = GenerationConfig(**generation_config)
+    config.update_generation_config(**generation_config)
+    config.validate()
+
+
+invalid_configs = [
+    dict(num_return_sequences=0), # no reason to run with empty output
+    dict(num_return_sequences=2), # beam search or multimonial is required
+    # stop conditions
+    dict(),
+    dict(eos_token_id=1), # 'stop_token_ids' does not contain 'eos_token_id'
+    dict(eos_token_id=1, stop_token_ids={2}), # 'stop_token_ids' is not empty, but does not contain 'eos_token_id'
+    dict(ignore_eos=True),  # no 'max_new_tokens', no 'max_length' with 'ignore_eos'
+    dict(stop_token_ids={-1}), # value in 'stop_token_ids' must be non-negative 
+    dict(max_new_tokens=0), # max new tokens cannot be empty (only when 'echo' is True)
+    dict(max_new_tokens=10, min_new_tokens=20), # 'max_new_tokens' must be >= 'min_new_tokens'
+    # penalties
+    dict(max_new_tokens=1, repetition_penalty=-1.0), # invalid repetition_penalty
+    dict(max_new_tokens=1, presence_penalty=-3.0), # invalid presence_penalty
+    dict(max_new_tokens=1, frequency_penalty=3.0), # invalid frequency_penalty
+    # multinomial sampling
+    dict(max_new_tokens=1, do_sample=True, top_k=-1), # 'top_k' must be > 0 when 'do_sample' is True
+    dict(max_new_tokens=1, do_sample=True, top_p=1.1), # 'top_p' must be within (0, 1] when 'do_sample' is True
+    dict(max_new_tokens=1, do_sample=True, top_p=0), # 'top_p' must be within (0, 1] when 'do_sample' is True
+    dict(max_new_tokens=1, do_sample=True, temperature=-1.0), # invalid temp
+    # parameters requiring multimonial
+    dict(max_new_tokens=1, top_k=1), # requires do_sample=True
+    dict(max_new_tokens=1, top_p=0.5), # requires do_sample=True
+    dict(max_new_tokens=1, temperature=2.0), # requires do_sample=True
+    # beam search
+    dict(max_new_tokens=1, num_beams=2, num_return_sequences=3), # 'num_beams' must be >= 'num_return_sequences'
+    dict(max_new_tokens=1, num_beams=3, num_beam_groups=2), # 'num_beams' must be divisible by 'num_beam_groups'
+    dict(max_new_tokens=1, num_beams=3, do_sample=True), # 'beam sample is not supported
+    dict(max_new_tokens=1, num_beams=3, no_repeat_ngram_size=0), # invalid 'no_repeat_ngram_size'
+    dict(max_new_tokens=1, num_beams=4, num_beam_groups=2, diversity_penalty=0.0), # 'diversity_penalty' should not be a default value
+    dict(max_new_tokens=1, num_beams=4, diversity_penalty=1.0), # 'diversity_penalty' is used only for grouped beam search
+    dict(max_new_tokens=1, num_beams=2, frequency_penalty=1.0), # 'frequency_penalty' is not supported by beam search
+    dict(max_new_tokens=1, num_beams=2, presence_penalty=1.0), # 'presence_penalty' is not supported by beam search
+    dict(max_new_tokens=1, num_beams=2, repetition_penalty=0.0), # 'repetition_penalty' is not supported by beam search
+    # parameters requiring beam search
+    dict(max_new_tokens=1, num_beam_groups=2), # requiring beam search
+    dict(max_new_tokens=1, no_repeat_ngram_size=2), # requiring beam search
+    dict(max_new_tokens=1, diversity_penalty=1.0), # requiring beam search
+    dict(max_new_tokens=1, length_penalty=2), # requiring beam search
+    # assistant generation
+    dict(max_new_tokens=1, num_assistant_tokens=2, do_sample=True, num_return_sequences=2), # 'num_return_sequences' must be 1, as we cannot use different number of tokens per sequence within a group
+    dict(max_new_tokens=1, assistant_confidence_threshold=1.0, do_sample=True, num_return_sequences=2), # 'num_return_sequences' must be 1, as we cannot use different number of tokens per sequence within a group
+    dict(max_new_tokens=1, num_assistant_tokens=2, num_beams=2), # beam search is not compatible with assistant generation
+    dict(max_new_tokens=1, assistant_confidence_threshold=1.0, num_assistant_tokens=2) # 'assistant_confidence_threshold' and 'num_assistant_tokens' are mutually exclusive
+    # TODO: add tests for invalid properties
+]
+@pytest.mark.parametrize("generation_config", invalid_configs)
+@pytest.mark.precommit
+@pytest.mark.nightly
+def test_invalid_generation_configs_throws(generation_config):
+    config = GenerationConfig()
+    with pytest.raises(RuntimeError):
+        config.update_generation_config(**generation_config)
diff --git a/tests/python_tests/test_llm_pipeline.py b/tests/python_tests/test_llm_pipeline.py
index 9f00996a58..e0def3b433 100644
--- a/tests/python_tests/test_llm_pipeline.py
+++ b/tests/python_tests/test_llm_pipeline.py
@@ -18,7 +18,6 @@
     get_chat_models_list,
     model_tmp_path,
     STOP_CRITERIA_MAP,
-    get_continuous_batching,
 )
 
 
@@ -492,30 +491,9 @@ def test_operator_with_streamer_kwargs_batch_throws():
         ov_pipe('', num_beams=2, streamer=printer)
 
 #
-# Tests on generation configs (invalid cases and handling within LLMPipeline)
+# Tests on generation configs handling
 #
 
-invalid_configs = [
-    dict(num_beam_groups=3, num_beams=15, do_sample=True),
-    # TODO: CVS-158682 eos_token_id is still read from tiny-random-phi3 and we cannot modify RTInfo in tests
-    # dict(do_sample=True),  # no eos_token_id no max_new_tokens, no max_len
-    dict(eos_token_id=42, ignore_eos=True),  # no max_new_tokens, no max_len with ignore_eos
-    dict(repetition_penalty=-1.0, eos_token_id=42, max_new_tokens=20), # invalid penalty
-    dict(temperature=-1.0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid temp
-    dict(top_p=-1.0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid top_p
-    dict(top_k=0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid top_k
-]
-@pytest.mark.parametrize("generation_config", invalid_configs)
-@pytest.mark.precommit
-@pytest.mark.nightly
-def test_invalid_generation_configs_throws(model_tmp_path, generation_config):
-    model_id, temp_path = model_tmp_path
-    config_json = {}
-    ov_pipe = load_genai_pipe_with_configs([(config_json, "config.json")], temp_path)
-    with pytest.raises(RuntimeError):
-        ov_pipe.generate('blah blah', **generation_config)
-
-
 @pytest.mark.precommit
 @pytest.mark.nightly
 def test_eos_token_is_inherited_from_default_generation_config(model_tmp_path):
@@ -529,28 +507,14 @@ def test_eos_token_is_inherited_from_default_generation_config(model_tmp_path):
     assert 37 == ov_pipe.get_generation_config().eos_token_id
 
 
-invalid_py_configs = [
-    dict(num_beam_groups=3, num_beams=15, do_sample=True),
-    # TODO: Currently unexpected params do not cause exceptions. Need to implement it in c++ and return this test
-  #  dict(unexisting_key_name=True),  # no eos_token_id no max_new_tokens, no max_len
-    dict(eos_token_id=42, ignore_eos=True),  # no max_new_tokens, no max_len with ignore_eos
-    dict(repetition_penalty=-1.0, eos_token_id=42, max_new_tokens=20), # invalid penalty
-    dict(temperature=-1.0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid temp
-    dict(top_p=-1.0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid top_p
-    dict(top_k=0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid top_k
-]
 @pytest.mark.precommit
 @pytest.mark.nightly
-@pytest.mark.parametrize("generation_config", invalid_py_configs)
-def test_python_generation_config_validation_throws(model_tmp_path, generation_config):
-    model_id, temp_path = model_tmp_path
-    ov_pipe = load_genai_pipe_with_configs([({"eos_token_id": 37}, "config.json")], temp_path)
-
-    # 'unexisting_key_name' key validity is checked in pybind and ValueError will be returned
-    #  instead of RuntimeError, which is returned when GenerationConfig values are validated
-    return_exception_type = ValueError if 'unexisting_key_name' in generation_config else RuntimeError
-    with pytest.raises(return_exception_type):
-        ov_pipe.set_generation_config(ov_genai.GenerationConfig(**generation_config))
+def test_pipeline_validates_generation_config():
+    model_id, path = 'katuni4ka/tiny-random-phi3', Path('tiny-random-phi3')
+    ov_pipe = read_model((model_id, path))[4]
+    invalid_generation_config = dict(num_beam_groups=3, num_beams=15, do_sample=True) # beam sample is not supported
+    with pytest.raises(RuntimeError):
+        ov_pipe.generate("dummy prompt", **invalid_generation_config)
 
 #
 # Work with Unicode in Python API
diff --git a/tests/python_tests/test_tokenizer.py b/tests/python_tests/test_tokenizer.py
index 0c2a106d50..6c27edcd71 100644
--- a/tests/python_tests/test_tokenizer.py
+++ b/tests/python_tests/test_tokenizer.py
@@ -265,7 +265,7 @@ def test_load_special_tokens_from_special_tokens_map_json(model_tmp_path):
 @pytest.mark.precommit
 @pytest.mark.nightly
 @pytest.mark.skip(reason="CVS-158682 - RTInfo is not modified in tests for unknown reasons")
-def test_load_special_tokens_from_tokenizer_config_json(model_tokenizers_path_tmp_path):
+def test_load_special_tokens_from_tokenizer_config_json(model_tokenizers_tmp_path):
     # special_tokens_map is not available
     # but tokenize_config.json exists
     # will load both string and integer representations
@@ -280,7 +280,7 @@ def test_load_special_tokens_from_tokenizer_config_json(model_tokenizers_path_tm
         "eos_token": "</s>",
     }
 
-    tok = load_genai_tokenizer_with_configs([(tok_config_json, "tokenizer_config.json")], model_tokenizers_path_tmp_path[1])
+    tok = load_genai_tokenizer_with_configs([(tok_config_json, "tokenizer_config.json")], model_tokenizers_tmp_path[1])
     assert tok.get_pad_token() == tok_config_json['pad_token']
     assert tok.get_bos_token() == tok_config_json['bos_token']
     assert tok.get_eos_token() == tok_config_json['eos_token']