From 987ecd783b88ecc9a764eb3b8f4f0dddf248eee6 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 27 Dec 2024 11:03:25 +0100 Subject: [PATCH] Tests for generation config --- .../openvino/genai/generation_config.hpp | 28 ++- src/cpp/src/generation_config.cpp | 224 +++++++++++------- src/cpp/src/whisper_generation_config.cpp | 4 + .../openvino_genai/py_openvino_genai.pyi | 24 +- .../py_continuous_batching_pipeline.cpp | 8 +- src/python/py_generation_config.cpp | 7 +- src/python/py_image_generation_pipelines.cpp | 12 +- src/python/py_llm_pipeline.cpp | 9 +- src/python/py_utils.cpp | 5 +- src/python/py_vlm_pipeline.cpp | 6 +- src/python/py_whisper_pipeline.cpp | 12 +- tests/cpp/CMakeLists.txt | 4 +- tests/cpp/generate_config.cpp | 143 ----------- tests/python_tests/ov_genai_test_utils.py | 15 +- tests/python_tests/test_generation_config.py | 96 ++++++++ tests/python_tests/test_llm_pipeline.py | 50 +--- tests/python_tests/test_tokenizer.py | 4 +- 17 files changed, 328 insertions(+), 323 deletions(-) delete mode 100644 tests/cpp/generate_config.cpp create mode 100644 tests/python_tests/test_generation_config.py diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp index 4ea75e94c5..164ff29131 100644 --- a/src/cpp/include/openvino/genai/generation_config.hpp +++ b/src/cpp/include/openvino/genai/generation_config.hpp @@ -93,15 +93,22 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig { bool echo = false; size_t logprobs = 0; + // EOS special token + int64_t eos_token_id = -1; std::set stop_strings; // Default setting in vLLM (and OpenAI API) is not to include stop string in the output bool include_stop_str_in_output = false; std::set stop_token_ids; + // penalties (not used in beam search) + float repetition_penalty = 1.0f; + float presence_penalty = 0.0; + float frequency_penalty = 0.0f; + // Beam search specific size_t num_beam_groups = 1; size_t num_beams = 1; - float diversity_penalty = 1.0f; + float diversity_penalty = 0.0f; float length_penalty = 1.0f; size_t num_return_sequences = 1; size_t no_repeat_ngram_size = std::numeric_limits::max(); @@ -112,9 +119,6 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig { float top_p = 1.0f; size_t top_k = std::numeric_limits::max(); bool do_sample = false; - float repetition_penalty = 1.0f; - float presence_penalty = 0.0; - float frequency_penalty = 0.0f; size_t rng_seed = 0; // Assisting generation parameters @@ -122,9 +126,6 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig { size_t num_assistant_tokens = 0; size_t max_ngram_size = 0; - // EOS special token - int64_t eos_token_id = -1; - std::optional adapters; /** @brief sets eos_token_id to tokenizer_eos_token_id if eos_token_id is less than 0. @@ -136,11 +137,13 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig { bool is_greedy_decoding() const; bool is_beam_search() const; bool is_multinomial() const; - OPENVINO_DEPRECATED("Please, use `is_assisting_generation()` instead of `is_speculative_decoding()`. This method will be removed in 2025.0.0 release") - bool is_speculative_decoding() const; bool is_assisting_generation() const; bool is_prompt_lookup() const; - void update_generation_config(const ov::AnyMap& config_map); + + OPENVINO_DEPRECATED("Please, use `is_assisting_generation()` instead of `is_speculative_decoding()`. This method will be removed in 2026.0.0 release") + bool is_speculative_decoding() const; + + void update_generation_config(const ov::AnyMap& properties); template util::EnableIfAllStringAny update_generation_config(Properties&&... properties) { @@ -187,8 +190,13 @@ static constexpr ov::Property assistant_confidence_threshold{"assistant_c static constexpr ov::Property num_assistant_tokens{"num_assistant_tokens"}; // Predefined Configs + +OPENVINO_DEPRECATED("Please, use individual parameters instead of predefined configs. This method will be removed in 2026.0.0 release") OPENVINO_GENAI_EXPORTS GenerationConfig beam_search(); +OPENVINO_DEPRECATED("Please, use individual parameters instead of predefined configs. This method will be removed in 2026.0.0 release") OPENVINO_GENAI_EXPORTS GenerationConfig greedy(); +OPENVINO_DEPRECATED("Please, use individual parameters instead of predefined configs. This method will be removed in 2026.0.0 release") OPENVINO_GENAI_EXPORTS GenerationConfig multinomial(); + } // namespace genai } // namespace ov diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp index 4ff184547e..29d7119a43 100644 --- a/src/cpp/src/generation_config.cpp +++ b/src/cpp/src/generation_config.cpp @@ -24,6 +24,7 @@ GenerationConfig::GenerationConfig(const std::filesystem::path& json_path) { nlohmann::json data = nlohmann::json::parse(f); + read_json_param(data, "eos_token_id", eos_token_id); read_json_param(data, "max_new_tokens", max_new_tokens); read_json_param(data, "max_length", max_length); // note that ignore_eos is not present in HF GenerationConfig @@ -34,26 +35,26 @@ GenerationConfig::GenerationConfig(const std::filesystem::path& json_path) { read_json_param(data, "include_stop_str_in_output", include_stop_str_in_output); // note that stop_token_ids is not present in HF GenerationConfig read_json_param(data, "stop_token_ids", stop_token_ids); + + // note that echo is not present in HF GenerationConfig + read_json_param(data, "echo", echo); + // note that logprobs is not present in HF GenerationConfig + read_json_param(data, "logprobs", logprobs); + + // penalties + read_json_param(data, "repetition_penalty", repetition_penalty); + // note that frequency_penalty is not present in HF GenerationConfig + read_json_param(data, "frequency_penalty", frequency_penalty); + // note that presence_penalty is not present in HF GenerationConfig + read_json_param(data, "presence_penalty", presence_penalty); + + // beam search read_json_param(data, "num_beam_groups", num_beam_groups); read_json_param(data, "num_beams", num_beams); read_json_param(data, "diversity_penalty", diversity_penalty); read_json_param(data, "length_penalty", length_penalty); read_json_param(data, "num_return_sequences", num_return_sequences); read_json_param(data, "no_repeat_ngram_size", no_repeat_ngram_size); - read_json_param(data, "temperature", temperature); - read_json_param(data, "top_p", top_p); - read_json_param(data, "top_k", top_k); - read_json_param(data, "do_sample", do_sample); - read_json_param(data, "repetition_penalty", repetition_penalty); - read_json_param(data, "eos_token_id", eos_token_id); - // note that echo is not present in HF GenerationConfig - read_json_param(data, "echo", echo); - // note that logprobs is not present in HF GenerationConfig - read_json_param(data, "logprobs", logprobs); - - // append EOS to stop_token_ids - if (eos_token_id != -1) - set_eos_token_id(eos_token_id); if (data.contains("early_stopping")) { auto field_type = data["early_stopping"].type(); @@ -65,6 +66,23 @@ GenerationConfig::GenerationConfig(const std::filesystem::path& json_path) { stop_criteria = StopCriteria::HEURISTIC; } } + + // multinomial + read_json_param(data, "do_sample", do_sample); + read_json_param(data, "temperature", temperature); + read_json_param(data, "top_p", top_p); + read_json_param(data, "top_k", top_k); + + // assistant generation + read_json_param(data, "assistant_confidence_threshold", assistant_confidence_threshold); + read_json_param(data, "num_assistant_tokens", num_assistant_tokens); + read_json_param(data, "max_ngram_size", max_ngram_size); + + // append EOS to stop_token_ids + if (eos_token_id != -1) + set_eos_token_id(eos_token_id); + + validate(); } void GenerationConfig::set_eos_token_id(size_t tokenizer_eos_token_id) { @@ -79,35 +97,52 @@ void GenerationConfig::set_eos_token_id(size_t tokenizer_eos_token_id) { stop_token_ids.insert(eos_token_id); } -void GenerationConfig::update_generation_config(const ov::AnyMap& config_map) { +void GenerationConfig::update_generation_config(const ov::AnyMap& properties) { using utils::read_anymap_param; - read_anymap_param(config_map, "max_new_tokens", max_new_tokens); - read_anymap_param(config_map, "max_length", max_length); - read_anymap_param(config_map, "ignore_eos", ignore_eos); - read_anymap_param(config_map, "min_new_tokens", min_new_tokens); - read_anymap_param(config_map, "stop_strings", stop_strings); - read_anymap_param(config_map, "include_stop_str_in_output", include_stop_str_in_output); - read_anymap_param(config_map, "stop_token_ids", stop_token_ids); - read_anymap_param(config_map, "num_beam_groups", num_beam_groups); - read_anymap_param(config_map, "num_beams", num_beams); - read_anymap_param(config_map, "diversity_penalty", diversity_penalty); - read_anymap_param(config_map, "length_penalty", length_penalty); - read_anymap_param(config_map, "num_return_sequences", num_return_sequences); - read_anymap_param(config_map, "no_repeat_ngram_size", no_repeat_ngram_size); - read_anymap_param(config_map, "stop_criteria", stop_criteria); - read_anymap_param(config_map, "temperature", temperature); - read_anymap_param(config_map, "top_p", top_p); - read_anymap_param(config_map, "top_k", top_k); - read_anymap_param(config_map, "do_sample", do_sample); - read_anymap_param(config_map, "repetition_penalty", repetition_penalty); - read_anymap_param(config_map, "eos_token_id", eos_token_id); - read_anymap_param(config_map, "echo", echo); - read_anymap_param(config_map, "logprobs", logprobs); - read_anymap_param(config_map, "adapters", adapters); + // stop conditions + read_anymap_param(properties, "eos_token_id", eos_token_id); + read_anymap_param(properties, "max_new_tokens", max_new_tokens); + read_anymap_param(properties, "max_length", max_length); + read_anymap_param(properties, "ignore_eos", ignore_eos); + read_anymap_param(properties, "min_new_tokens", min_new_tokens); + read_anymap_param(properties, "stop_strings", stop_strings); + read_anymap_param(properties, "include_stop_str_in_output", include_stop_str_in_output); + read_anymap_param(properties, "stop_token_ids", stop_token_ids); + + // generic + read_anymap_param(properties, "echo", echo); + read_anymap_param(properties, "logprobs", logprobs); + read_anymap_param(properties, "num_return_sequences", num_return_sequences); + read_anymap_param(properties, "adapters", adapters); + // penalties + read_anymap_param(properties, "frequency_penalty", frequency_penalty); + read_anymap_param(properties, "presence_penalty", presence_penalty); + read_anymap_param(properties, "repetition_penalty", repetition_penalty); + + // beam search + read_anymap_param(properties, "num_beam_groups", num_beam_groups); + read_anymap_param(properties, "num_beams", num_beams); + read_anymap_param(properties, "diversity_penalty", diversity_penalty); + read_anymap_param(properties, "length_penalty", length_penalty); + read_anymap_param(properties, "stop_criteria", stop_criteria); + read_anymap_param(properties, "no_repeat_ngram_size", no_repeat_ngram_size); + + // multinomial + read_anymap_param(properties, "do_sample", do_sample); + read_anymap_param(properties, "temperature", temperature); + read_anymap_param(properties, "top_p", top_p); + read_anymap_param(properties, "top_k", top_k); // TODO: add support of 'generator' property similar to Image generation - read_anymap_param(config_map, "rng_seed", rng_seed); + read_anymap_param(properties, "rng_seed", rng_seed); + + // assistant generation + read_anymap_param(properties, "assistant_confidence_threshold", assistant_confidence_threshold); + read_anymap_param(properties, "num_assistant_tokens", num_assistant_tokens); + read_anymap_param(properties, "max_ngram_size", max_ngram_size); + + validate(); } size_t GenerationConfig::get_max_new_tokens(size_t prompt_length) const { @@ -136,69 +171,90 @@ bool GenerationConfig::is_speculative_decoding() const { } bool GenerationConfig::is_assisting_generation() const { - return (assistant_confidence_threshold > 0 || num_assistant_tokens > 0); + return assistant_confidence_threshold > 0 || num_assistant_tokens > 0; } bool GenerationConfig::is_prompt_lookup() const { - return (max_ngram_size > 0 && num_assistant_tokens > 0); + return max_ngram_size > 0 && num_assistant_tokens > 0; } void GenerationConfig::validate() const { + OPENVINO_ASSERT(num_return_sequences > 0, "num_return_sequences must be greater than 0"); + + // Stop conditions + OPENVINO_ASSERT(eos_token_id == -1 || stop_token_ids.find(eos_token_id) != stop_token_ids.end(), "'stop_token_ids' must contain 'eos_token_id'. Please, call 'set_eos_token_id' with 'eos_token_id' value"); - OPENVINO_ASSERT(!do_sample || num_beams == 1, - "Beam search with sampling is not supported yet. " - "Please either set do_sample=false to use beam search " - "or set num_beams=1 if you with to use multinomial sampling."); - OPENVINO_ASSERT(num_return_sequences > 0, "num_return_sequences must be greater than 0"); + auto stop_token_ids_it = std::find_if(stop_token_ids.begin(), stop_token_ids.end(), [] (int64_t stop_token_id) -> bool { + return stop_token_id < 0; + }); + OPENVINO_ASSERT(stop_token_ids_it == stop_token_ids.end(), "'stop_token_ids' must be non-negative, but it contains a value ", *stop_token_ids_it); + + OPENVINO_ASSERT(!ignore_eos || max_new_tokens != SIZE_MAX || max_length != SIZE_MAX, + "ignore_eos is true, in this case either 'max_new_tokens', or 'max_length' should be defined."); + + OPENVINO_ASSERT(eos_token_id != -1 || !stop_token_ids.empty() || !stop_strings.empty() || max_new_tokens != SIZE_MAX || max_length != SIZE_MAX, + "Either 'eos_token_id', or 'stop_token_ids', or ''stop_strings'', or 'max_new_tokens', or 'max_length' should be defined."); + OPENVINO_ASSERT(max_new_tokens > 0 || (max_new_tokens == 0 && echo), "'max_new_tokens' must be greater than 0, if `echo` is set, 0 is also accepted"); OPENVINO_ASSERT(min_new_tokens <= max_new_tokens, "min_new_tokens must be less or equal max_new_tokens"); - OPENVINO_ASSERT( - num_beams % num_beam_groups == 0, - "number of beams should be divisible by number of groups" - ); - - // max_new_tokens has priority over max_length - // if max_new_tokens is defined no need to check max_length - OPENVINO_ASSERT(max_new_tokens != SIZE_MAX || max_length > 0, - "'max_length' must be greater than 0 or 'max_new_tokens' should be defined"); - - OPENVINO_ASSERT(!do_sample || top_k > 0, - "top_k must be a strictly positive, but got ", - top_k); - OPENVINO_ASSERT(!do_sample || (top_p > 0 && top_p <= 1.0f), - "top_p must be a positive float > 0 and < 1, but got ", - top_p); - OPENVINO_ASSERT(!do_sample || temperature > 0, - "Temperature must be a strictly positive float, but got ", - temperature); - - OPENVINO_ASSERT(repetition_penalty > 0, - "Repetition penalty must be a strictly positive float, but got ", - repetition_penalty); - - OPENVINO_ASSERT(!ignore_eos || max_new_tokens != SIZE_MAX || max_length != SIZE_MAX, - "ignore_eos == true, in this case either 'max_new_tokens', or 'max_length' should be defined."); - OPENVINO_ASSERT(eos_token_id != -1 || max_new_tokens != SIZE_MAX || max_length != SIZE_MAX, - "Either 'eos_token_id', or 'max_new_tokens', or 'max_length' should be defined."); + // Sampling strategies + + OPENVINO_ASSERT(num_return_sequences == 1 || (is_multinomial() || is_beam_search()), + "'num_return_sequences' can be more than 1 only in case of beam search or multinomial sampling, but got ", num_return_sequences); + + // generic penalties, but not supported by beam search currently + if (!is_beam_search()) { + OPENVINO_ASSERT(frequency_penalty >= -2.0f && frequency_penalty <= 2.0f, "'frequence_penalty' penalty must be within [-2.0; 2.0], but got ", frequency_penalty); + OPENVINO_ASSERT(presence_penalty >= -2.0f && presence_penalty <= 2.0f, "'presence_penalty' penalty must be within [-2.0; 2.0], but got ", presence_penalty); + OPENVINO_ASSERT(repetition_penalty > 0.0f, "'repetition_penalty' must be a strictly positive float, but got ", repetition_penalty); + } else { + OPENVINO_ASSERT(frequency_penalty == 0.0f, "'frequency_penalty' is not currently supported by beam search and should be 0.0f, but got ", frequency_penalty); + OPENVINO_ASSERT(presence_penalty == 0.0f, "'presence_penalty' is not currently supported by beam search and should be 0.0f, but got ", presence_penalty); + OPENVINO_ASSERT(repetition_penalty == 1.0f, "'repetition_penalty' is not currently supported by beam search and should be 1.0f, but got ", repetition_penalty); + } + + if (is_multinomial()) { + OPENVINO_ASSERT(top_k > 0, "When 'do_sample' is true, top_k must be a strictly positive, but got ", top_k); + OPENVINO_ASSERT(top_p > 0 && top_p <= 1.0f, "When 'do_sample' is true, top_p must be a positive float > 0 and < 1, but got ", top_p); + OPENVINO_ASSERT(temperature > 0, "When 'do_sample' is true, temperature must be a strictly positive float, but got ", temperature); + } else { + // parameters requiring multinomial + OPENVINO_ASSERT(top_k == std::numeric_limits::max(), "When 'do_sample' is false, top_k must be max of size_t, but got ", top_k); + OPENVINO_ASSERT(top_p == 1.0f, "When 'do_sample' is false, top_p must be 1.0f, but got ", top_p); + OPENVINO_ASSERT(temperature == 1.0f, "When 'do_sample' is false, temperature must be a 1.0f, but got ", temperature); + } + if (is_beam_search()) { - OPENVINO_ASSERT(no_repeat_ngram_size > 0, "no_repeat_ngram_size must be positive"); + OPENVINO_ASSERT(num_beams % num_beam_groups == 0, "'num_beams' (", num_beams, ") should be divisible by 'num_beam_groups' (", num_beam_groups, ")"); + OPENVINO_ASSERT(num_beams >= num_return_sequences, "'num_beams' (", num_beams, ") must be greater equal than 'num_return_sequences' (", num_return_sequences, ")"); + + OPENVINO_ASSERT(!do_sample, + "Beam search with sampling is not supported yet. " + "Please either set do_sample=false to use beam search " + "or set num_beams=1 if you with to use multinomial sampling."); + + OPENVINO_ASSERT(no_repeat_ngram_size > 0, "'no_repeat_ngram_size' must be positive"); if (num_beam_groups > 1) { - OPENVINO_ASSERT(diversity_penalty != 0.0f, "For grouped beam search 'diversity_penalty' should not be zero, it it fallbacks to non-grouped beam search"); + OPENVINO_ASSERT(diversity_penalty != 0.0f, "For grouped beam search 'diversity_penalty' should not be zero, otherwise it fallbacks to non-grouped beam search"); + } else { + OPENVINO_ASSERT(diversity_penalty == 0.0f, "For beam search 'diversity_penalty' is applicable only when grouped beam search is used, but got 'num_beam_groups' == 1"); } } else { - OPENVINO_ASSERT(frequency_penalty >= -2.0f && frequency_penalty <= 2.0f, "frequence_penalty penalty must be a [-2; +2]"); - OPENVINO_ASSERT(presence_penalty >= -2.0f && presence_penalty <= 2.0f, "presence_penalty penalty must be a [-2; +2]"); + // parameters requiring beam search + OPENVINO_ASSERT(num_beam_groups == 1, "'num_beam_groups' is supported by beam search only and should be 1 otherwise, but got ", num_beam_groups); + OPENVINO_ASSERT(no_repeat_ngram_size == std::numeric_limits::max(), "'no_repeat_ngram_size' is supported only by beam search, otherwise should be set to max of size_t, but got ", no_repeat_ngram_size); + OPENVINO_ASSERT(diversity_penalty == 0.0f, "'diversity_penalty' is set to non default value 0.0f, but got ", diversity_penalty, ", which is supported only by beam search sampling, but 'num_beams' is set to 1"); + OPENVINO_ASSERT(length_penalty == 1.0f, "'length_penalty' is set to non default value 1.0f, but got ", length_penalty, ", which is supported only by beam search sampling, but 'num_beams' is set to 1"); } + + // assistant generation + if (is_assisting_generation()) { - if (assistant_confidence_threshold != 0.f) { - OPENVINO_ASSERT(num_assistant_tokens == 0, "Parameters `assistant_confidence_threshold` and `num_assistant_tokens` are mutually exclusive in `GenerationConfig`"); - OPENVINO_ASSERT(!is_prompt_lookup(), "Parameters `assistant_confidence_threshold` cannot be used while Prompt Lookup decoding"); - } else { - OPENVINO_ASSERT(num_assistant_tokens > 0, "Parameters `assistant_confidence_threshold` and `num_assistant_tokens` are mutually exclusive in `GenerationConfig`"); - }; + OPENVINO_ASSERT(!is_beam_search() && num_return_sequences == 1, "Beam search and parallel sampling are not compatible with assistant generation"); + OPENVINO_ASSERT(assistant_confidence_threshold == 0.0f || num_assistant_tokens == 0, "Parameters `assistant_confidence_threshold` and `num_assistant_tokens` are mutually exclusive in `GenerationConfig`"); } } diff --git a/src/cpp/src/whisper_generation_config.cpp b/src/cpp/src/whisper_generation_config.cpp index beb663caaf..20207d30cf 100644 --- a/src/cpp/src/whisper_generation_config.cpp +++ b/src/cpp/src/whisper_generation_config.cpp @@ -40,6 +40,8 @@ WhisperGenerationConfig::WhisperGenerationConfig(const std::filesystem::path& js } read_json_param(data, "lang_to_id", lang_to_id); + + validate(); } void WhisperGenerationConfig::set_eos_token_id(int64_t tokenizer_eos_token_id) { @@ -76,6 +78,8 @@ void WhisperGenerationConfig::update_generation_config(const ov::AnyMap& config_ read_anymap_param(config_map, "return_timestamps", return_timestamps); read_anymap_param(config_map, "initial_prompt", initial_prompt); read_anymap_param(config_map, "hotwords", hotwords); + + validate(); } size_t WhisperGenerationConfig::get_max_new_tokens(size_t prompt_length) const { diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 8510a8389f..ea9ad1bbf3 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -367,16 +367,16 @@ class ContinuousBatchingPipeline: def __init__(self, models_path: os.PathLike, tokenizer: Tokenizer, scheduler_config: SchedulerConfig, device: str, properties: dict[str, typing.Any] = {}) -> None: ... @typing.overload - def add_request(self, request_id: int, input_ids: openvino._pyopenvino.Tensor, sampling_params: GenerationConfig) -> GenerationHandle: + def add_request(self, request_id: int, input_ids: openvino._pyopenvino.Tensor, generation_config: GenerationConfig) -> GenerationHandle: ... @typing.overload - def add_request(self, request_id: int, prompt: str, sampling_params: GenerationConfig) -> GenerationHandle: + def add_request(self, request_id: int, prompt: str, generation_config: GenerationConfig) -> GenerationHandle: ... @typing.overload - def generate(self, input_ids: list[openvino._pyopenvino.Tensor], sampling_params: list[GenerationConfig], streamer: typing.Callable[[str], bool] | StreamerBase | None = None) -> list[EncodedGenerationResult]: + def generate(self, input_ids: list[openvino._pyopenvino.Tensor], generation_config: list[GenerationConfig], streamer: typing.Callable[[str], bool] | StreamerBase | None = None) -> list[EncodedGenerationResult]: ... @typing.overload - def generate(self, prompts: list[str], sampling_params: list[GenerationConfig], streamer: typing.Callable[[str], bool] | StreamerBase | None = None) -> list[GenerationResult]: + def generate(self, prompts: list[str], generation_config: list[GenerationConfig], streamer: typing.Callable[[str], bool] | StreamerBase | None = None) -> list[GenerationResult]: ... def get_config(self) -> GenerationConfig: ... @@ -613,7 +613,9 @@ class GenerationConfig: ... def set_eos_token_id(self, tokenizer_eos_token_id: int) -> None: ... - def update_generation_config(self, config_map: dict[str, openvino._pyopenvino.OVAny]) -> None: + def update_generation_config(self, **kwargs) -> None: + ... + def validate(self) -> None: ... class GenerationFinishReason: """ @@ -826,7 +828,7 @@ class Image2ImagePipeline: ... def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None: ... - def set_generation_config(self, generation_config: ImageGenerationConfig) -> None: + def set_generation_config(self, config: ImageGenerationConfig) -> None: ... def set_scheduler(self, scheduler: Scheduler) -> None: ... @@ -927,7 +929,7 @@ class InpaintingPipeline: ... def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None: ... - def set_generation_config(self, generation_config: ImageGenerationConfig) -> None: + def set_generation_config(self, config: ImageGenerationConfig) -> None: ... def set_scheduler(self, scheduler: Scheduler) -> None: ... @@ -1615,7 +1617,7 @@ class Text2ImagePipeline: ... def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None: ... - def set_generation_config(self, generation_config: ImageGenerationConfig) -> None: + def set_generation_config(self, config: ImageGenerationConfig) -> None: ... def set_scheduler(self, scheduler: Scheduler) -> None: ... @@ -1865,9 +1867,9 @@ class VLMPipeline: ... def get_tokenizer(self) -> Tokenizer: ... - def set_chat_template(self, new_template: str) -> None: + def set_chat_template(self, chat_template: str) -> None: ... - def set_generation_config(self, new_config: GenerationConfig) -> None: + def set_generation_config(self, config: GenerationConfig) -> None: ... def start_chat(self, system_message: str = '') -> None: ... @@ -2043,6 +2045,8 @@ class WhisperGenerationConfig: ... def set_eos_token_id(self, tokenizer_eos_token_id: int) -> None: ... + def update_generation_config(self, **kwargs) -> None: + ... class WhisperPerfMetrics(PerfMetrics): """ diff --git a/src/python/py_continuous_batching_pipeline.cpp b/src/python/py_continuous_batching_pipeline.cpp index be7a72481f..2b48e4d44d 100644 --- a/src/python/py_continuous_batching_pipeline.cpp +++ b/src/python/py_continuous_batching_pipeline.cpp @@ -235,22 +235,22 @@ void init_continuous_batching_pipeline(py::module_& m) { .def("get_tokenizer", &ContinuousBatchingPipeline::get_tokenizer) .def("get_config", &ContinuousBatchingPipeline::get_config) .def("get_metrics", &ContinuousBatchingPipeline::get_metrics) - .def("add_request", py::overload_cast(&ContinuousBatchingPipeline::add_request), py::arg("request_id"), py::arg("input_ids"), py::arg("sampling_params")) - .def("add_request", py::overload_cast(&ContinuousBatchingPipeline::add_request), py::arg("request_id"), py::arg("prompt"), py::arg("sampling_params")) + .def("add_request", py::overload_cast(&ContinuousBatchingPipeline::add_request), py::arg("request_id"), py::arg("input_ids"), py::arg("generation_config")) + .def("add_request", py::overload_cast(&ContinuousBatchingPipeline::add_request), py::arg("request_id"), py::arg("prompt"), py::arg("generation_config")) .def("step", &ContinuousBatchingPipeline::step) .def("has_non_finished_requests", &ContinuousBatchingPipeline::has_non_finished_requests) .def( "generate", py::overload_cast&, const std::vector&, const ov::genai::StreamerVariant&>(&ContinuousBatchingPipeline::generate), py::arg("input_ids"), - py::arg("sampling_params"), + py::arg("generation_config"), py::arg("streamer") = std::monostate{} ) .def( "generate", py::overload_cast&, const std::vector&, const ov::genai::StreamerVariant&>(&ContinuousBatchingPipeline::generate), py::arg("prompts"), - py::arg("sampling_params"), + py::arg("generation_config"), py::arg("streamer") = std::monostate{} ); } diff --git a/src/python/py_generation_config.cpp b/src/python/py_generation_config.cpp index f49bcf29bd..c7517400f3 100644 --- a/src/python/py_generation_config.cpp +++ b/src/python/py_generation_config.cpp @@ -120,5 +120,10 @@ void init_generation_config(py::module_& m) { .def("is_greedy_decoding", &GenerationConfig::is_greedy_decoding) .def("is_assisting_generation", &GenerationConfig::is_assisting_generation) .def("is_prompt_lookup", &GenerationConfig::is_prompt_lookup) - .def("update_generation_config", static_cast(&ov::genai::GenerationConfig::update_generation_config), py::arg("config_map")); + .def("validate", &GenerationConfig::validate) + .def("update_generation_config", []( + ov::genai::GenerationConfig config, + const py::kwargs& kwargs) { + config.update_generation_config(pyutils::kwargs_to_any_map(kwargs)); + }); } diff --git a/src/python/py_image_generation_pipelines.cpp b/src/python/py_image_generation_pipelines.cpp index 311f3f3760..754c6b6a38 100644 --- a/src/python/py_image_generation_pipelines.cpp +++ b/src/python/py_image_generation_pipelines.cpp @@ -255,8 +255,8 @@ void init_image_generation_pipelines(py::module_& m) { device (str): Device to run the model on (e.g., CPU, GPU). kwargs: Text2ImagePipeline properties )") - .def("get_generation_config", &ov::genai::Text2ImagePipeline::get_generation_config) - .def("set_generation_config", &ov::genai::Text2ImagePipeline::set_generation_config, py::arg("generation_config")) + .def("get_generation_config", &ov::genai::Text2ImagePipeline::get_generation_config, py::return_value_policy::copy) + .def("set_generation_config", &ov::genai::Text2ImagePipeline::set_generation_config, py::arg("config")) .def("set_scheduler", &ov::genai::Text2ImagePipeline::set_scheduler, py::arg("scheduler")) .def("reshape", &ov::genai::Text2ImagePipeline::reshape, py::arg("num_images_per_prompt"), py::arg("height"), py::arg("width"), py::arg("guidance_scale")) .def_static("stable_diffusion", &ov::genai::Text2ImagePipeline::stable_diffusion, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("unet"), py::arg("vae")) @@ -323,8 +323,8 @@ void init_image_generation_pipelines(py::module_& m) { device (str): Device to run the model on (e.g., CPU, GPU). kwargs: Image2ImagePipeline properties )") - .def("get_generation_config", &ov::genai::Image2ImagePipeline::get_generation_config) - .def("set_generation_config", &ov::genai::Image2ImagePipeline::set_generation_config, py::arg("generation_config")) + .def("get_generation_config", &ov::genai::Image2ImagePipeline::get_generation_config, py::return_value_policy::copy) + .def("set_generation_config", &ov::genai::Image2ImagePipeline::set_generation_config, py::arg("config")) .def("set_scheduler", &ov::genai::Image2ImagePipeline::set_scheduler, py::arg("scheduler")) .def("reshape", &ov::genai::Image2ImagePipeline::reshape, py::arg("num_images_per_prompt"), py::arg("height"), py::arg("width"), py::arg("guidance_scale")) .def_static("stable_diffusion", &ov::genai::Image2ImagePipeline::stable_diffusion, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("unet"), py::arg("vae")) @@ -386,8 +386,8 @@ void init_image_generation_pipelines(py::module_& m) { device (str): Device to run the model on (e.g., CPU, GPU). kwargs: InpaintingPipeline properties )") - .def("get_generation_config", &ov::genai::InpaintingPipeline::get_generation_config) - .def("set_generation_config", &ov::genai::InpaintingPipeline::set_generation_config, py::arg("generation_config")) + .def("get_generation_config", &ov::genai::InpaintingPipeline::get_generation_config, py::return_value_policy::copy) + .def("set_generation_config", &ov::genai::InpaintingPipeline::set_generation_config, py::arg("config")) .def("set_scheduler", &ov::genai::InpaintingPipeline::set_scheduler, py::arg("scheduler")) .def("reshape", &ov::genai::InpaintingPipeline::reshape, py::arg("num_images_per_prompt"), py::arg("height"), py::arg("width"), py::arg("guidance_scale")) .def_static("stable_diffusion", &ov::genai::InpaintingPipeline::stable_diffusion, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("unet"), py::arg("vae")) diff --git a/src/python/py_llm_pipeline.cpp b/src/python/py_llm_pipeline.cpp index b1d5136253..7360975a0b 100644 --- a/src/python/py_llm_pipeline.cpp +++ b/src/python/py_llm_pipeline.cpp @@ -53,15 +53,10 @@ py::object call_common_generate( const pyutils::PyBindStreamerVariant& py_streamer, const py::kwargs& kwargs ) { - ov::genai::GenerationConfig default_config; - if (config.has_value()) { - default_config = *config; - } else { - default_config = pipe.get_generation_config(); - } + ov::genai::GenerationConfig default_config = config.has_value() ? *config : pipe.get_generation_config(); auto updated_config = pyutils::update_config_from_kwargs(default_config, kwargs); + py::object results; - EncodedInputs tensor_data; StreamerVariant streamer = pyutils::pystreamer_to_streamer(py_streamer); // Call suitable generate overload for each type of input. diff --git a/src/python/py_utils.cpp b/src/python/py_utils.cpp index 45a0c46174..34522409ea 100644 --- a/src/python/py_utils.cpp +++ b/src/python/py_utils.cpp @@ -358,7 +358,10 @@ ov::genai::OptionalGenerationConfig update_config_from_kwargs(const ov::genai::O ov::genai::GenerationConfig res_config; if(config.has_value()) res_config = *config; - res_config.update_generation_config(kwargs_to_any_map(kwargs)); + + if (!kwargs.empty()) + res_config.update_generation_config(kwargs_to_any_map(kwargs)); + return res_config; } diff --git a/src/python/py_vlm_pipeline.cpp b/src/python/py_vlm_pipeline.cpp index 340cb3da62..b0cfa0a42a 100644 --- a/src/python/py_vlm_pipeline.cpp +++ b/src/python/py_vlm_pipeline.cpp @@ -150,10 +150,10 @@ void init_vlm_pipeline(py::module_& m) { .def("start_chat", &ov::genai::VLMPipeline::start_chat, py::arg("system_message") = "") .def("finish_chat", &ov::genai::VLMPipeline::finish_chat) - .def("set_chat_template", &ov::genai::VLMPipeline::set_chat_template, py::arg("new_template")) + .def("set_chat_template", &ov::genai::VLMPipeline::set_chat_template, py::arg("chat_template")) .def("get_tokenizer", &ov::genai::VLMPipeline::get_tokenizer) - .def("get_generation_config", &ov::genai::VLMPipeline::get_generation_config) - .def("set_generation_config", &ov::genai::VLMPipeline::set_generation_config, py::arg("new_config")) + .def("get_generation_config", &ov::genai::VLMPipeline::get_generation_config, py::return_value_policy::copy) + .def("set_generation_config", &ov::genai::VLMPipeline::set_generation_config, py::arg("config")) .def( "generate", [](ov::genai::VLMPipeline& pipe, diff --git a/src/python/py_whisper_pipeline.cpp b/src/python/py_whisper_pipeline.cpp index cd42dcf58d..05087eeacb 100644 --- a/src/python/py_whisper_pipeline.cpp +++ b/src/python/py_whisper_pipeline.cpp @@ -187,7 +187,10 @@ OptionalWhisperGenerationConfig update_whisper_config_from_kwargs(const Optional WhisperGenerationConfig res_config; if (config.has_value()) res_config = *config; - res_config.update_generation_config(pyutils::kwargs_to_any_map(kwargs)); + + if (!kwargs.empty()) + res_config.update_generation_config(pyutils::kwargs_to_any_map(kwargs)); + return res_config; } @@ -295,7 +298,12 @@ void init_whisper_pipeline(py::module_& m) { .def_readwrite("return_timestamps", &WhisperGenerationConfig::return_timestamps) .def_readwrite("initial_prompt", &WhisperGenerationConfig::initial_prompt) .def_readwrite("hotwords", &WhisperGenerationConfig::hotwords) - .def("set_eos_token_id", &WhisperGenerationConfig::set_eos_token_id, py::arg("tokenizer_eos_token_id")); + .def("set_eos_token_id", &WhisperGenerationConfig::set_eos_token_id, py::arg("tokenizer_eos_token_id")) + .def("update_generation_config", []( + ov::genai::WhisperGenerationConfig config, + const py::kwargs& kwargs) { + config.update_generation_config(pyutils::kwargs_to_any_map(kwargs)); + });; py::class_(m, "WhisperRawPerfMetrics", raw_perf_metrics_docstring) .def(py::init<>()) diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt index 093cd993de..b8c2e625c5 100644 --- a/tests/cpp/CMakeLists.txt +++ b/tests/cpp/CMakeLists.txt @@ -25,8 +25,8 @@ file(GLOB src_files "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src/sequence_group.cpp" "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src/continuous_batching*.cpp" "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src/text_callback_streamer.cpp") -add_executable(${TEST_TARGET_NAME} ${tests_src} - block_allocator.cpp) +add_executable(${TEST_TARGET_NAME} ${tests_src}) + target_link_libraries(${TEST_TARGET_NAME} PRIVATE openvino::genai gtest_main) target_include_directories(${TEST_TARGET_NAME} PRIVATE "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src") target_sources(${TEST_TARGET_NAME} PRIVATE ${src_files}) diff --git a/tests/cpp/generate_config.cpp b/tests/cpp/generate_config.cpp deleted file mode 100644 index 974fd499f8..0000000000 --- a/tests/cpp/generate_config.cpp +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include -#include -#include "openvino/genai/generation_config.hpp" - - -using namespace ov::genai; - -TEST(GenerationConfigTest, invalid_temperature) { - GenerationConfig config; - config.max_new_tokens = 20; - config.temperature = -0.1; - config.do_sample = true; - EXPECT_THROW(config.validate(), ov::Exception); -} - -TEST(GenerationConfigTest, valid_temperature) { - GenerationConfig config; - config.max_new_tokens = 20; - config.do_sample = true; - config.temperature = 0.1; - EXPECT_NO_THROW(config.validate()); -} - -TEST(GenerationConfigTest, invalid_top_p) { - GenerationConfig config; - config.max_new_tokens = 20; - config.do_sample = true; - config.top_p = -0.5; - EXPECT_THROW(config.validate(), ov::Exception); - config.top_p = 1.1; - EXPECT_THROW(config.validate(), ov::Exception); -} - -TEST(GenerationConfigTest, valid_top_p) { - GenerationConfig config; - config.max_new_tokens = 20; - config.do_sample = true; - config.top_p = 0.1; - EXPECT_NO_THROW(config.validate()); -} - -TEST(GenerationConfigTest, invalid_repeatition_penalty) { - GenerationConfig config; - config.max_new_tokens = 20; - config.do_sample = true; - config.repetition_penalty = -3.0; - EXPECT_THROW(config.validate(), ov::Exception); - config.repetition_penalty = -0.1; - EXPECT_THROW(config.validate(), ov::Exception); -} - -TEST(GenerationConfigTest, valid_repeatition_penalty) { - GenerationConfig config; - config.max_new_tokens = 20; - config.do_sample = true; - config.repetition_penalty = 1.8; - EXPECT_NO_THROW(config.validate()); - config.repetition_penalty = 0.1; - EXPECT_NO_THROW(config.validate()); -} - -TEST(GenerationConfigTest, invalid_presence_penalty) { - GenerationConfig config; - config.max_new_tokens = 20; - config.do_sample = true; - config.presence_penalty = 3.0; - EXPECT_THROW(config.validate(), ov::Exception); - config.presence_penalty = -3.1; - EXPECT_THROW(config.validate(), ov::Exception); -} - -TEST(GenerationConfigTest, valid_presence_penalty) { - GenerationConfig config; - config.max_new_tokens = 20; - config.do_sample = true; - config.presence_penalty = 1.8; - EXPECT_NO_THROW(config.validate()); - config.presence_penalty = -2.0; - EXPECT_NO_THROW(config.validate()); -} - -TEST(GenerationConfigTest, invalid_frequency_penalty) { - GenerationConfig config; - config.max_new_tokens = 20; - config.do_sample = true; - config.frequency_penalty = 3.0; - EXPECT_THROW(config.validate(), ov::Exception); - config.frequency_penalty = -3.1; - EXPECT_THROW(config.validate(), ov::Exception); -} - -TEST(GenerationConfigTest, valid_frequency_penalty) { - GenerationConfig config; - config.max_new_tokens = 20; - config.do_sample = true; - config.frequency_penalty = 1.8; - EXPECT_NO_THROW(config.validate()); - config.frequency_penalty = -2.0; - EXPECT_NO_THROW(config.validate()); -} - -ov::genai::GenerationConfig speculative_decoding_multinomial() { - auto speculative_decoding_multinomial_config = ov::genai::multinomial(); - speculative_decoding_multinomial_config.num_assistant_tokens = 5; - return speculative_decoding_multinomial_config; -} - -ov::genai::GenerationConfig speculative_decoding_greedy() { - auto speculative_decoding_greedy_config = ov::genai::greedy(); - speculative_decoding_greedy_config.assistant_confidence_threshold = 0.4f; - return speculative_decoding_greedy_config; -} - -TEST(GenerationConfigTest, invalid_static_spec_decoding) { - GenerationConfig config = speculative_decoding_greedy(); - config.num_assistant_tokens = 5; - config.assistant_confidence_threshold = 0.2; - EXPECT_THROW(config.validate(), ov::Exception); -} - -TEST(GenerationConfigTest, valid_static_spec_decoding) { - GenerationConfig config = speculative_decoding_greedy(); - config.num_assistant_tokens = 5; - config.assistant_confidence_threshold = 0; - EXPECT_NO_THROW(config.validate()); -} - -TEST(GenerationConfigTest, invalid_dynamic_spec_decoding) { - GenerationConfig config = speculative_decoding_greedy(); - config.num_assistant_tokens = 5; - config.assistant_confidence_threshold = 0.5; - EXPECT_THROW(config.validate(), ov::Exception); -} - -TEST(GenerationConfigTest, valid_dynamic_spec_decoding) { - GenerationConfig config = speculative_decoding_greedy(); - config.assistant_confidence_threshold = 0.5; - config.num_assistant_tokens = 0; - EXPECT_NO_THROW(config.validate()); -} diff --git a/tests/python_tests/ov_genai_test_utils.py b/tests/python_tests/ov_genai_test_utils.py index 3fc89cb8a7..9e8e4681f9 100644 --- a/tests/python_tests/ov_genai_test_utils.py +++ b/tests/python_tests/ov_genai_test_utils.py @@ -111,7 +111,7 @@ def read_model(params, **tokenizer_kwargs): path, hf_tokenizer, opt_model, - ov_genai.LLMPipeline(path, 'CPU', **{'ENABLE_MMAP': False}), + ov_genai.LLMPipeline(path, 'CPU', ENABLE_MMAP=False), ) @@ -139,7 +139,7 @@ def model_tmp_path(tmpdir_factory): @pytest.fixture(scope="module") -def model_tokenizers_path_tmp_path(tmpdir_factory): +def model_tokenizers_tmp_path(tmpdir_factory): model_id, path, _, _, _ = read_model(get_models_list()[0]) temp_path = tmpdir_factory.mktemp(model_id.replace('/', '_')) @@ -180,10 +180,15 @@ def load_genai_pipe_with_configs(configs: List[Tuple], temp_path): for config_json, config_name in configs: with (temp_path / config_name).open('w') as f: json.dump(config_json, f) - return ov_genai.LLMPipeline(temp_path, 'CPU') + + ov_pipe = ov_genai.LLMPipeline(temp_path, 'CPU') + + for _, config_name in configs: + os.remove(temp_path / config_name) + + return ov_pipe @functools.lru_cache(1) def get_continuous_batching(path): - scheduler_config = ov_genai.SchedulerConfig() - return ov_genai.LLMPipeline(path, ov_genai.Tokenizer(path), 'CPU', **{"scheduler_config": scheduler_config}) + return ov_genai.LLMPipeline(path, 'CPU', scheduler_config=ov_genai.SchedulerConfig()) diff --git a/tests/python_tests/test_generation_config.py b/tests/python_tests/test_generation_config.py new file mode 100644 index 0000000000..28477c58a3 --- /dev/null +++ b/tests/python_tests/test_generation_config.py @@ -0,0 +1,96 @@ +# Copyright (C) 2023-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino_genai import GenerationConfig +import pytest + +configs = [ + # stop conditions + dict(max_new_tokens=12), + dict(max_length=12), + dict(stop_token_ids={2}), + dict(eos_token_id=1, stop_token_ids={1}), + dict(stop_strings={"a", "b"}), + dict(ignore_eos=True, max_new_tokens=10), + dict(ignore_eos=True, max_length=10), + dict(max_new_tokens=0, echo=True), + dict(min_new_tokens=1, max_new_tokens=1), + # multinomial + dict(max_new_tokens=1, do_sample=True, num_return_sequences=2), + dict(max_new_tokens=1, do_sample=True, top_k=1), + dict(max_new_tokens=1, do_sample=True, top_p=0.5), + dict(max_new_tokens=1, do_sample=True, temperature=0.5), + # beam search + dict(max_new_tokens=1, num_beams=2), + dict(max_new_tokens=1, num_beams=2, num_return_sequences=1), + dict(max_new_tokens=1, num_beams=2, num_return_sequences=2), + dict(max_new_tokens=1, num_beams=4, num_beam_groups=2, diversity_penalty=1.0), + dict(max_new_tokens=1, num_beams=4, length_penalty=1.0), + dict(max_new_tokens=1, num_beams=4, no_repeat_ngram_size=2), + # assistant generation + dict(max_new_tokens=1, assistant_confidence_threshold=0.5), + dict(max_new_tokens=1, num_assistant_tokens=2), + dict(max_new_tokens=1, num_assistant_tokens=2, max_ngram_size=2), # prompt lookup +] +@pytest.mark.parametrize("generation_config", configs) +@pytest.mark.precommit +@pytest.mark.nightly +def test_valid_configs(generation_config): + config = GenerationConfig(**generation_config) + config.update_generation_config(**generation_config) + config.validate() + + +invalid_configs = [ + dict(num_return_sequences=0), # no reason to run with empty output + dict(num_return_sequences=2), # beam search or multimonial is required + # stop conditions + dict(), + dict(eos_token_id=1), # 'stop_token_ids' does not contain 'eos_token_id' + dict(eos_token_id=1, stop_token_ids={2}), # 'stop_token_ids' is not empty, but does not contain 'eos_token_id' + dict(ignore_eos=True), # no 'max_new_tokens', no 'max_length' with 'ignore_eos' + dict(stop_token_ids={-1}), # value in 'stop_token_ids' must be non-negative + dict(max_new_tokens=0), # max new tokens cannot be empty (only when 'echo' is True) + dict(max_new_tokens=10, min_new_tokens=20), # 'max_new_tokens' must be >= 'min_new_tokens' + # penalties + dict(max_new_tokens=1, repetition_penalty=-1.0), # invalid repetition_penalty + dict(max_new_tokens=1, presence_penalty=-3.0), # invalid presence_penalty + dict(max_new_tokens=1, frequency_penalty=3.0), # invalid frequency_penalty + # multinomial sampling + dict(max_new_tokens=1, do_sample=True, top_k=-1), # 'top_k' must be > 0 when 'do_sample' is True + dict(max_new_tokens=1, do_sample=True, top_p=1.1), # 'top_p' must be within (0, 1] when 'do_sample' is True + dict(max_new_tokens=1, do_sample=True, top_p=0), # 'top_p' must be within (0, 1] when 'do_sample' is True + dict(max_new_tokens=1, do_sample=True, temperature=-1.0), # invalid temp + # parameters requiring multimonial + dict(max_new_tokens=1, top_k=1), # requires do_sample=True + dict(max_new_tokens=1, top_p=0.5), # requires do_sample=True + dict(max_new_tokens=1, temperature=2.0), # requires do_sample=True + # beam search + dict(max_new_tokens=1, num_beams=2, num_return_sequences=3), # 'num_beams' must be >= 'num_return_sequences' + dict(max_new_tokens=1, num_beams=3, num_beam_groups=2), # 'num_beams' must be divisible by 'num_beam_groups' + dict(max_new_tokens=1, num_beams=3, do_sample=True), # 'beam sample is not supported + dict(max_new_tokens=1, num_beams=3, no_repeat_ngram_size=0), # invalid 'no_repeat_ngram_size' + dict(max_new_tokens=1, num_beams=4, num_beam_groups=2, diversity_penalty=0.0), # 'diversity_penalty' should not be a default value + dict(max_new_tokens=1, num_beams=4, diversity_penalty=1.0), # 'diversity_penalty' is used only for grouped beam search + dict(max_new_tokens=1, num_beams=2, frequency_penalty=1.0), # 'frequency_penalty' is not supported by beam search + dict(max_new_tokens=1, num_beams=2, presence_penalty=1.0), # 'presence_penalty' is not supported by beam search + dict(max_new_tokens=1, num_beams=2, repetition_penalty=0.0), # 'repetition_penalty' is not supported by beam search + # parameters requiring beam search + dict(max_new_tokens=1, num_beam_groups=2), # requiring beam search + dict(max_new_tokens=1, no_repeat_ngram_size=2), # requiring beam search + dict(max_new_tokens=1, diversity_penalty=1.0), # requiring beam search + dict(max_new_tokens=1, length_penalty=2), # requiring beam search + # assistant generation + dict(max_new_tokens=1, num_assistant_tokens=2, do_sample=True, num_return_sequences=2), # 'num_return_sequences' must be 1, as we cannot use different number of tokens per sequence within a group + dict(max_new_tokens=1, assistant_confidence_threshold=1.0, do_sample=True, num_return_sequences=2), # 'num_return_sequences' must be 1, as we cannot use different number of tokens per sequence within a group + dict(max_new_tokens=1, num_assistant_tokens=2, num_beams=2), # beam search is not compatible with assistant generation + dict(max_new_tokens=1, assistant_confidence_threshold=1.0, num_assistant_tokens=2) # 'assistant_confidence_threshold' and 'num_assistant_tokens' are mutually exclusive + # TODO: add tests for invalid properties +] +@pytest.mark.parametrize("generation_config", invalid_configs) +@pytest.mark.precommit +@pytest.mark.nightly +def test_invalid_generation_configs_throws(generation_config): + config = GenerationConfig() + with pytest.raises(RuntimeError): + config.update_generation_config(**generation_config) diff --git a/tests/python_tests/test_llm_pipeline.py b/tests/python_tests/test_llm_pipeline.py index 9f00996a58..e0def3b433 100644 --- a/tests/python_tests/test_llm_pipeline.py +++ b/tests/python_tests/test_llm_pipeline.py @@ -18,7 +18,6 @@ get_chat_models_list, model_tmp_path, STOP_CRITERIA_MAP, - get_continuous_batching, ) @@ -492,30 +491,9 @@ def test_operator_with_streamer_kwargs_batch_throws(): ov_pipe('', num_beams=2, streamer=printer) # -# Tests on generation configs (invalid cases and handling within LLMPipeline) +# Tests on generation configs handling # -invalid_configs = [ - dict(num_beam_groups=3, num_beams=15, do_sample=True), - # TODO: CVS-158682 eos_token_id is still read from tiny-random-phi3 and we cannot modify RTInfo in tests - # dict(do_sample=True), # no eos_token_id no max_new_tokens, no max_len - dict(eos_token_id=42, ignore_eos=True), # no max_new_tokens, no max_len with ignore_eos - dict(repetition_penalty=-1.0, eos_token_id=42, max_new_tokens=20), # invalid penalty - dict(temperature=-1.0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid temp - dict(top_p=-1.0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid top_p - dict(top_k=0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid top_k -] -@pytest.mark.parametrize("generation_config", invalid_configs) -@pytest.mark.precommit -@pytest.mark.nightly -def test_invalid_generation_configs_throws(model_tmp_path, generation_config): - model_id, temp_path = model_tmp_path - config_json = {} - ov_pipe = load_genai_pipe_with_configs([(config_json, "config.json")], temp_path) - with pytest.raises(RuntimeError): - ov_pipe.generate('blah blah', **generation_config) - - @pytest.mark.precommit @pytest.mark.nightly def test_eos_token_is_inherited_from_default_generation_config(model_tmp_path): @@ -529,28 +507,14 @@ def test_eos_token_is_inherited_from_default_generation_config(model_tmp_path): assert 37 == ov_pipe.get_generation_config().eos_token_id -invalid_py_configs = [ - dict(num_beam_groups=3, num_beams=15, do_sample=True), - # TODO: Currently unexpected params do not cause exceptions. Need to implement it in c++ and return this test - # dict(unexisting_key_name=True), # no eos_token_id no max_new_tokens, no max_len - dict(eos_token_id=42, ignore_eos=True), # no max_new_tokens, no max_len with ignore_eos - dict(repetition_penalty=-1.0, eos_token_id=42, max_new_tokens=20), # invalid penalty - dict(temperature=-1.0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid temp - dict(top_p=-1.0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid top_p - dict(top_k=0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid top_k -] @pytest.mark.precommit @pytest.mark.nightly -@pytest.mark.parametrize("generation_config", invalid_py_configs) -def test_python_generation_config_validation_throws(model_tmp_path, generation_config): - model_id, temp_path = model_tmp_path - ov_pipe = load_genai_pipe_with_configs([({"eos_token_id": 37}, "config.json")], temp_path) - - # 'unexisting_key_name' key validity is checked in pybind and ValueError will be returned - # instead of RuntimeError, which is returned when GenerationConfig values are validated - return_exception_type = ValueError if 'unexisting_key_name' in generation_config else RuntimeError - with pytest.raises(return_exception_type): - ov_pipe.set_generation_config(ov_genai.GenerationConfig(**generation_config)) +def test_pipeline_validates_generation_config(): + model_id, path = 'katuni4ka/tiny-random-phi3', Path('tiny-random-phi3') + ov_pipe = read_model((model_id, path))[4] + invalid_generation_config = dict(num_beam_groups=3, num_beams=15, do_sample=True) # beam sample is not supported + with pytest.raises(RuntimeError): + ov_pipe.generate("dummy prompt", **invalid_generation_config) # # Work with Unicode in Python API diff --git a/tests/python_tests/test_tokenizer.py b/tests/python_tests/test_tokenizer.py index 0c2a106d50..6c27edcd71 100644 --- a/tests/python_tests/test_tokenizer.py +++ b/tests/python_tests/test_tokenizer.py @@ -265,7 +265,7 @@ def test_load_special_tokens_from_special_tokens_map_json(model_tmp_path): @pytest.mark.precommit @pytest.mark.nightly @pytest.mark.skip(reason="CVS-158682 - RTInfo is not modified in tests for unknown reasons") -def test_load_special_tokens_from_tokenizer_config_json(model_tokenizers_path_tmp_path): +def test_load_special_tokens_from_tokenizer_config_json(model_tokenizers_tmp_path): # special_tokens_map is not available # but tokenize_config.json exists # will load both string and integer representations @@ -280,7 +280,7 @@ def test_load_special_tokens_from_tokenizer_config_json(model_tokenizers_path_tm "eos_token": "", } - tok = load_genai_tokenizer_with_configs([(tok_config_json, "tokenizer_config.json")], model_tokenizers_path_tmp_path[1]) + tok = load_genai_tokenizer_with_configs([(tok_config_json, "tokenizer_config.json")], model_tokenizers_tmp_path[1]) assert tok.get_pad_token() == tok_config_json['pad_token'] assert tok.get_bos_token() == tok_config_json['bos_token'] assert tok.get_eos_token() == tok_config_json['eos_token']