Skip to content

Commit

Permalink
Tests for generation config
Browse files Browse the repository at this point in the history
  • Loading branch information
ilya-lavrenov committed Dec 27, 2024
1 parent 82b44fa commit 987ecd7
Show file tree
Hide file tree
Showing 17 changed files with 328 additions and 323 deletions.
28 changes: 18 additions & 10 deletions src/cpp/include/openvino/genai/generation_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,22 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
bool echo = false;
size_t logprobs = 0;

// EOS special token
int64_t eos_token_id = -1;
std::set<std::string> stop_strings;
// Default setting in vLLM (and OpenAI API) is not to include stop string in the output
bool include_stop_str_in_output = false;
std::set<int64_t> stop_token_ids;

// penalties (not used in beam search)
float repetition_penalty = 1.0f;
float presence_penalty = 0.0;
float frequency_penalty = 0.0f;

// Beam search specific
size_t num_beam_groups = 1;
size_t num_beams = 1;
float diversity_penalty = 1.0f;
float diversity_penalty = 0.0f;
float length_penalty = 1.0f;
size_t num_return_sequences = 1;
size_t no_repeat_ngram_size = std::numeric_limits<size_t>::max();
Expand All @@ -112,19 +119,13 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
float top_p = 1.0f;
size_t top_k = std::numeric_limits<size_t>::max();
bool do_sample = false;
float repetition_penalty = 1.0f;
float presence_penalty = 0.0;
float frequency_penalty = 0.0f;
size_t rng_seed = 0;

// Assisting generation parameters
float assistant_confidence_threshold = 0.f;
size_t num_assistant_tokens = 0;
size_t max_ngram_size = 0;

// EOS special token
int64_t eos_token_id = -1;

std::optional<AdapterConfig> adapters;

/** @brief sets eos_token_id to tokenizer_eos_token_id if eos_token_id is less than 0.
Expand All @@ -136,11 +137,13 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
bool is_greedy_decoding() const;
bool is_beam_search() const;
bool is_multinomial() const;
OPENVINO_DEPRECATED("Please, use `is_assisting_generation()` instead of `is_speculative_decoding()`. This method will be removed in 2025.0.0 release")
bool is_speculative_decoding() const;
bool is_assisting_generation() const;
bool is_prompt_lookup() const;
void update_generation_config(const ov::AnyMap& config_map);

OPENVINO_DEPRECATED("Please, use `is_assisting_generation()` instead of `is_speculative_decoding()`. This method will be removed in 2026.0.0 release")
bool is_speculative_decoding() const;

void update_generation_config(const ov::AnyMap& properties);

template <typename... Properties>
util::EnableIfAllStringAny<void, Properties...> update_generation_config(Properties&&... properties) {
Expand Down Expand Up @@ -187,8 +190,13 @@ static constexpr ov::Property<float> assistant_confidence_threshold{"assistant_c
static constexpr ov::Property<size_t> num_assistant_tokens{"num_assistant_tokens"};

// Predefined Configs

OPENVINO_DEPRECATED("Please, use individual parameters instead of predefined configs. This method will be removed in 2026.0.0 release")
OPENVINO_GENAI_EXPORTS GenerationConfig beam_search();
OPENVINO_DEPRECATED("Please, use individual parameters instead of predefined configs. This method will be removed in 2026.0.0 release")
OPENVINO_GENAI_EXPORTS GenerationConfig greedy();
OPENVINO_DEPRECATED("Please, use individual parameters instead of predefined configs. This method will be removed in 2026.0.0 release")
OPENVINO_GENAI_EXPORTS GenerationConfig multinomial();

} // namespace genai
} // namespace ov
224 changes: 140 additions & 84 deletions src/cpp/src/generation_config.cpp

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions src/cpp/src/whisper_generation_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ WhisperGenerationConfig::WhisperGenerationConfig(const std::filesystem::path& js
}

read_json_param(data, "lang_to_id", lang_to_id);

validate();
}

void WhisperGenerationConfig::set_eos_token_id(int64_t tokenizer_eos_token_id) {
Expand Down Expand Up @@ -76,6 +78,8 @@ void WhisperGenerationConfig::update_generation_config(const ov::AnyMap& config_
read_anymap_param(config_map, "return_timestamps", return_timestamps);
read_anymap_param(config_map, "initial_prompt", initial_prompt);
read_anymap_param(config_map, "hotwords", hotwords);

validate();
}

size_t WhisperGenerationConfig::get_max_new_tokens(size_t prompt_length) const {
Expand Down
24 changes: 14 additions & 10 deletions src/python/openvino_genai/py_openvino_genai.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -367,16 +367,16 @@ class ContinuousBatchingPipeline:
def __init__(self, models_path: os.PathLike, tokenizer: Tokenizer, scheduler_config: SchedulerConfig, device: str, properties: dict[str, typing.Any] = {}) -> None:
...
@typing.overload
def add_request(self, request_id: int, input_ids: openvino._pyopenvino.Tensor, sampling_params: GenerationConfig) -> GenerationHandle:
def add_request(self, request_id: int, input_ids: openvino._pyopenvino.Tensor, generation_config: GenerationConfig) -> GenerationHandle:
...
@typing.overload
def add_request(self, request_id: int, prompt: str, sampling_params: GenerationConfig) -> GenerationHandle:
def add_request(self, request_id: int, prompt: str, generation_config: GenerationConfig) -> GenerationHandle:
...
@typing.overload
def generate(self, input_ids: list[openvino._pyopenvino.Tensor], sampling_params: list[GenerationConfig], streamer: typing.Callable[[str], bool] | StreamerBase | None = None) -> list[EncodedGenerationResult]:
def generate(self, input_ids: list[openvino._pyopenvino.Tensor], generation_config: list[GenerationConfig], streamer: typing.Callable[[str], bool] | StreamerBase | None = None) -> list[EncodedGenerationResult]:
...
@typing.overload
def generate(self, prompts: list[str], sampling_params: list[GenerationConfig], streamer: typing.Callable[[str], bool] | StreamerBase | None = None) -> list[GenerationResult]:
def generate(self, prompts: list[str], generation_config: list[GenerationConfig], streamer: typing.Callable[[str], bool] | StreamerBase | None = None) -> list[GenerationResult]:
...
def get_config(self) -> GenerationConfig:
...
Expand Down Expand Up @@ -613,7 +613,9 @@ class GenerationConfig:
...
def set_eos_token_id(self, tokenizer_eos_token_id: int) -> None:
...
def update_generation_config(self, config_map: dict[str, openvino._pyopenvino.OVAny]) -> None:
def update_generation_config(self, **kwargs) -> None:
...
def validate(self) -> None:
...
class GenerationFinishReason:
"""
Expand Down Expand Up @@ -826,7 +828,7 @@ class Image2ImagePipeline:
...
def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None:
...
def set_generation_config(self, generation_config: ImageGenerationConfig) -> None:
def set_generation_config(self, config: ImageGenerationConfig) -> None:
...
def set_scheduler(self, scheduler: Scheduler) -> None:
...
Expand Down Expand Up @@ -927,7 +929,7 @@ class InpaintingPipeline:
...
def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None:
...
def set_generation_config(self, generation_config: ImageGenerationConfig) -> None:
def set_generation_config(self, config: ImageGenerationConfig) -> None:
...
def set_scheduler(self, scheduler: Scheduler) -> None:
...
Expand Down Expand Up @@ -1615,7 +1617,7 @@ class Text2ImagePipeline:
...
def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None:
...
def set_generation_config(self, generation_config: ImageGenerationConfig) -> None:
def set_generation_config(self, config: ImageGenerationConfig) -> None:
...
def set_scheduler(self, scheduler: Scheduler) -> None:
...
Expand Down Expand Up @@ -1865,9 +1867,9 @@ class VLMPipeline:
...
def get_tokenizer(self) -> Tokenizer:
...
def set_chat_template(self, new_template: str) -> None:
def set_chat_template(self, chat_template: str) -> None:
...
def set_generation_config(self, new_config: GenerationConfig) -> None:
def set_generation_config(self, config: GenerationConfig) -> None:
...
def start_chat(self, system_message: str = '') -> None:
...
Expand Down Expand Up @@ -2043,6 +2045,8 @@ class WhisperGenerationConfig:
...
def set_eos_token_id(self, tokenizer_eos_token_id: int) -> None:
...
def update_generation_config(self, **kwargs) -> None:
...
class WhisperPerfMetrics(PerfMetrics):
"""
Expand Down
8 changes: 4 additions & 4 deletions src/python/py_continuous_batching_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,22 +235,22 @@ void init_continuous_batching_pipeline(py::module_& m) {
.def("get_tokenizer", &ContinuousBatchingPipeline::get_tokenizer)
.def("get_config", &ContinuousBatchingPipeline::get_config)
.def("get_metrics", &ContinuousBatchingPipeline::get_metrics)
.def("add_request", py::overload_cast<uint64_t, const ov::Tensor&, const ov::genai::GenerationConfig&>(&ContinuousBatchingPipeline::add_request), py::arg("request_id"), py::arg("input_ids"), py::arg("sampling_params"))
.def("add_request", py::overload_cast<uint64_t, const std::string&, const ov::genai::GenerationConfig&>(&ContinuousBatchingPipeline::add_request), py::arg("request_id"), py::arg("prompt"), py::arg("sampling_params"))
.def("add_request", py::overload_cast<uint64_t, const ov::Tensor&, const ov::genai::GenerationConfig&>(&ContinuousBatchingPipeline::add_request), py::arg("request_id"), py::arg("input_ids"), py::arg("generation_config"))
.def("add_request", py::overload_cast<uint64_t, const std::string&, const ov::genai::GenerationConfig&>(&ContinuousBatchingPipeline::add_request), py::arg("request_id"), py::arg("prompt"), py::arg("generation_config"))
.def("step", &ContinuousBatchingPipeline::step)
.def("has_non_finished_requests", &ContinuousBatchingPipeline::has_non_finished_requests)
.def(
"generate",
py::overload_cast<const std::vector<ov::Tensor>&, const std::vector<ov::genai::GenerationConfig>&, const ov::genai::StreamerVariant&>(&ContinuousBatchingPipeline::generate),
py::arg("input_ids"),
py::arg("sampling_params"),
py::arg("generation_config"),
py::arg("streamer") = std::monostate{}
)
.def(
"generate",
py::overload_cast<const std::vector<std::string>&, const std::vector<ov::genai::GenerationConfig>&, const ov::genai::StreamerVariant&>(&ContinuousBatchingPipeline::generate),
py::arg("prompts"),
py::arg("sampling_params"),
py::arg("generation_config"),
py::arg("streamer") = std::monostate{}
);
}
7 changes: 6 additions & 1 deletion src/python/py_generation_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,5 +120,10 @@ void init_generation_config(py::module_& m) {
.def("is_greedy_decoding", &GenerationConfig::is_greedy_decoding)
.def("is_assisting_generation", &GenerationConfig::is_assisting_generation)
.def("is_prompt_lookup", &GenerationConfig::is_prompt_lookup)
.def("update_generation_config", static_cast<void (GenerationConfig::*)(const ov::AnyMap&)>(&ov::genai::GenerationConfig::update_generation_config), py::arg("config_map"));
.def("validate", &GenerationConfig::validate)
.def("update_generation_config", [](
ov::genai::GenerationConfig config,
const py::kwargs& kwargs) {
config.update_generation_config(pyutils::kwargs_to_any_map(kwargs));
});
}
12 changes: 6 additions & 6 deletions src/python/py_image_generation_pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,8 @@ void init_image_generation_pipelines(py::module_& m) {
device (str): Device to run the model on (e.g., CPU, GPU).
kwargs: Text2ImagePipeline properties
)")
.def("get_generation_config", &ov::genai::Text2ImagePipeline::get_generation_config)
.def("set_generation_config", &ov::genai::Text2ImagePipeline::set_generation_config, py::arg("generation_config"))
.def("get_generation_config", &ov::genai::Text2ImagePipeline::get_generation_config, py::return_value_policy::copy)
.def("set_generation_config", &ov::genai::Text2ImagePipeline::set_generation_config, py::arg("config"))
.def("set_scheduler", &ov::genai::Text2ImagePipeline::set_scheduler, py::arg("scheduler"))
.def("reshape", &ov::genai::Text2ImagePipeline::reshape, py::arg("num_images_per_prompt"), py::arg("height"), py::arg("width"), py::arg("guidance_scale"))
.def_static("stable_diffusion", &ov::genai::Text2ImagePipeline::stable_diffusion, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("unet"), py::arg("vae"))
Expand Down Expand Up @@ -323,8 +323,8 @@ void init_image_generation_pipelines(py::module_& m) {
device (str): Device to run the model on (e.g., CPU, GPU).
kwargs: Image2ImagePipeline properties
)")
.def("get_generation_config", &ov::genai::Image2ImagePipeline::get_generation_config)
.def("set_generation_config", &ov::genai::Image2ImagePipeline::set_generation_config, py::arg("generation_config"))
.def("get_generation_config", &ov::genai::Image2ImagePipeline::get_generation_config, py::return_value_policy::copy)
.def("set_generation_config", &ov::genai::Image2ImagePipeline::set_generation_config, py::arg("config"))
.def("set_scheduler", &ov::genai::Image2ImagePipeline::set_scheduler, py::arg("scheduler"))
.def("reshape", &ov::genai::Image2ImagePipeline::reshape, py::arg("num_images_per_prompt"), py::arg("height"), py::arg("width"), py::arg("guidance_scale"))
.def_static("stable_diffusion", &ov::genai::Image2ImagePipeline::stable_diffusion, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("unet"), py::arg("vae"))
Expand Down Expand Up @@ -386,8 +386,8 @@ void init_image_generation_pipelines(py::module_& m) {
device (str): Device to run the model on (e.g., CPU, GPU).
kwargs: InpaintingPipeline properties
)")
.def("get_generation_config", &ov::genai::InpaintingPipeline::get_generation_config)
.def("set_generation_config", &ov::genai::InpaintingPipeline::set_generation_config, py::arg("generation_config"))
.def("get_generation_config", &ov::genai::InpaintingPipeline::get_generation_config, py::return_value_policy::copy)
.def("set_generation_config", &ov::genai::InpaintingPipeline::set_generation_config, py::arg("config"))
.def("set_scheduler", &ov::genai::InpaintingPipeline::set_scheduler, py::arg("scheduler"))
.def("reshape", &ov::genai::InpaintingPipeline::reshape, py::arg("num_images_per_prompt"), py::arg("height"), py::arg("width"), py::arg("guidance_scale"))
.def_static("stable_diffusion", &ov::genai::InpaintingPipeline::stable_diffusion, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("unet"), py::arg("vae"))
Expand Down
9 changes: 2 additions & 7 deletions src/python/py_llm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,10 @@ py::object call_common_generate(
const pyutils::PyBindStreamerVariant& py_streamer,
const py::kwargs& kwargs
) {
ov::genai::GenerationConfig default_config;
if (config.has_value()) {
default_config = *config;
} else {
default_config = pipe.get_generation_config();
}
ov::genai::GenerationConfig default_config = config.has_value() ? *config : pipe.get_generation_config();
auto updated_config = pyutils::update_config_from_kwargs(default_config, kwargs);

py::object results;
EncodedInputs tensor_data;
StreamerVariant streamer = pyutils::pystreamer_to_streamer(py_streamer);

// Call suitable generate overload for each type of input.
Expand Down
5 changes: 4 additions & 1 deletion src/python/py_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,10 @@ ov::genai::OptionalGenerationConfig update_config_from_kwargs(const ov::genai::O
ov::genai::GenerationConfig res_config;
if(config.has_value())
res_config = *config;
res_config.update_generation_config(kwargs_to_any_map(kwargs));

if (!kwargs.empty())
res_config.update_generation_config(kwargs_to_any_map(kwargs));

return res_config;
}

Expand Down
6 changes: 3 additions & 3 deletions src/python/py_vlm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,10 @@ void init_vlm_pipeline(py::module_& m) {

.def("start_chat", &ov::genai::VLMPipeline::start_chat, py::arg("system_message") = "")
.def("finish_chat", &ov::genai::VLMPipeline::finish_chat)
.def("set_chat_template", &ov::genai::VLMPipeline::set_chat_template, py::arg("new_template"))
.def("set_chat_template", &ov::genai::VLMPipeline::set_chat_template, py::arg("chat_template"))
.def("get_tokenizer", &ov::genai::VLMPipeline::get_tokenizer)
.def("get_generation_config", &ov::genai::VLMPipeline::get_generation_config)
.def("set_generation_config", &ov::genai::VLMPipeline::set_generation_config, py::arg("new_config"))
.def("get_generation_config", &ov::genai::VLMPipeline::get_generation_config, py::return_value_policy::copy)
.def("set_generation_config", &ov::genai::VLMPipeline::set_generation_config, py::arg("config"))
.def(
"generate",
[](ov::genai::VLMPipeline& pipe,
Expand Down
12 changes: 10 additions & 2 deletions src/python/py_whisper_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,10 @@ OptionalWhisperGenerationConfig update_whisper_config_from_kwargs(const Optional
WhisperGenerationConfig res_config;
if (config.has_value())
res_config = *config;
res_config.update_generation_config(pyutils::kwargs_to_any_map(kwargs));

if (!kwargs.empty())
res_config.update_generation_config(pyutils::kwargs_to_any_map(kwargs));

return res_config;
}

Expand Down Expand Up @@ -295,7 +298,12 @@ void init_whisper_pipeline(py::module_& m) {
.def_readwrite("return_timestamps", &WhisperGenerationConfig::return_timestamps)
.def_readwrite("initial_prompt", &WhisperGenerationConfig::initial_prompt)
.def_readwrite("hotwords", &WhisperGenerationConfig::hotwords)
.def("set_eos_token_id", &WhisperGenerationConfig::set_eos_token_id, py::arg("tokenizer_eos_token_id"));
.def("set_eos_token_id", &WhisperGenerationConfig::set_eos_token_id, py::arg("tokenizer_eos_token_id"))
.def("update_generation_config", [](
ov::genai::WhisperGenerationConfig config,
const py::kwargs& kwargs) {
config.update_generation_config(pyutils::kwargs_to_any_map(kwargs));
});;

py::class_<WhisperRawPerfMetrics>(m, "WhisperRawPerfMetrics", raw_perf_metrics_docstring)
.def(py::init<>())
Expand Down
4 changes: 2 additions & 2 deletions tests/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ file(GLOB src_files "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src/sequence_group.cpp"
"${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src/continuous_batching*.cpp"
"${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src/text_callback_streamer.cpp")

add_executable(${TEST_TARGET_NAME} ${tests_src}
block_allocator.cpp)
add_executable(${TEST_TARGET_NAME} ${tests_src})

target_link_libraries(${TEST_TARGET_NAME} PRIVATE openvino::genai gtest_main)
target_include_directories(${TEST_TARGET_NAME} PRIVATE "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src")
target_sources(${TEST_TARGET_NAME} PRIVATE ${src_files})
Loading

0 comments on commit 987ecd7

Please sign in to comment.