From ac7d39ffe66b04a52df69ad7950b4d7963d7f681 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Thu, 21 Nov 2024 22:08:05 +0100 Subject: [PATCH 1/6] parametrize decode in Tokenizers --- src/cpp/include/openvino/genai/tokenizer.hpp | 45 +++++++++- .../src/make_combine_segments_stateful.cpp | 44 ++++++++++ .../src/make_combine_segments_stateful.hpp | 37 +++++++++ src/cpp/src/tokenizer.cpp | 82 +++++++++++-------- src/python/py_tokenizer.cpp | 24 ++++-- tests/python_tests/test_chat_generate_api.py | 21 +++++ 6 files changed, 208 insertions(+), 45 deletions(-) diff --git a/src/cpp/include/openvino/genai/tokenizer.hpp b/src/cpp/include/openvino/genai/tokenizer.hpp index bcb8da68a3..e90e9c80de 100644 --- a/src/cpp/include/openvino/genai/tokenizer.hpp +++ b/src/cpp/include/openvino/genai/tokenizer.hpp @@ -87,23 +87,59 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { /** * @brief decode sequence of tokens * @param tokens vector storing tokens + * @param tokenization_params AnyMap with detokenization parameters, e.g. {'skip_special_tokens', false} * @return sequence string */ - std::string decode(std::vector tokens); - + std::string decode(std::vector tokens, const ov::AnyMap& detokenization_params = {}); + + /** + * @brief decode sequence of tokens + * @param tokens vector storing tokens + * @param tokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true) + * @return sequence string + */ + template + util::EnableIfAllStringAny decode(std::vector& tokens, Properties&&... properties) { + return decode(tokens, AnyMap{std::forward(properties)...}); + } + /** * @brief decode tokens. * @param tokens ov::Tensor with tokens with shape [batch_size, seq_len] + * @param tokenization_params AnyMap with detokenization parameters, e.g. {'skip_special_tokens', false} + * @return vector of std::string, with size = batch_size + */ + std::vector decode(ov::Tensor tokens, const ov::AnyMap& detokenization_params = {}); + + /** + * @brief decode sequence of tokens + * @param tokens ov::Tensor with tokens with shape [batch_size, seq_len] + * @param tokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true) * @return vector of std::string, with size = batch_size */ - std::vector decode(ov::Tensor tokens); + template + util::EnableIfAllStringAny, Properties...> decode(ov::Tensor tokens, Properties&&... properties) { + return decode(tokens, AnyMap{std::forward(properties)...}); + } /** * @brief batched decoding of tokens. * @param tokens vector of vectors with tokens, tokens.size() is equal to batch_size + * @param tokenization_params AnyMap with detokenization parameters, e.g. {'skip_special_tokens', false} * @return vector of std::string, with size equal to batch_size */ - std::vector decode(std::vector> tokens); + std::vector decode(std::vector> tokens, const ov::AnyMap& detokenization_params = {}); + + /** + * @brief decode sequence of tokens + * @param tokens ov::Tensor with tokens with shape [batch_size, seq_len] + * @param tokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true) + * @return vector of std::string, with size = batch_size + */ + template + util::EnableIfAllStringAny, Properties...> decode(std::vector> tokens, Properties&&... properties) { + return decode(tokens, AnyMap{std::forward(properties)...}); + } /** * @brief Embeds input prompts with special tags for a chat scenario. @@ -143,6 +179,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { }; static constexpr ov::Property add_special_tokens{"add_special_tokens"}; +static constexpr ov::Property skip_special_tokens{"skip_special_tokens"}; } // namespace genai } // namespace ov diff --git a/src/cpp/src/make_combine_segments_stateful.cpp b/src/cpp/src/make_combine_segments_stateful.cpp index 2285c172dc..26c58b8fca 100644 --- a/src/cpp/src/make_combine_segments_stateful.cpp +++ b/src/cpp/src/make_combine_segments_stateful.cpp @@ -4,6 +4,8 @@ #include "make_combine_segments_stateful.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/select.hpp" +#include "openvino/op/slice.hpp" +#include "openvino/op/multiply.hpp" #include "openvino/op/read_value.hpp" #include "openvino/op/assign.hpp" @@ -44,3 +46,45 @@ bool ov::genai::MakeCombineSegmentsSatateful::run_on_model(const std::shared_ptr model->add_variables({variable}); return true; } + +bool ov::genai::MakeVocabDecoderSatateful::run_on_model(const std::shared_ptr& model) { + + std::shared_ptr vocab_decoder_node; + for (auto node: model->get_ordered_ops()) { + if (strcmp(node->get_type_info().name, "VocabDecoder") == 0) { + vocab_decoder_node = node; + } + } + auto val = vocab_decoder_node->input_value(4); + auto val_type = vocab_decoder_node->input_value(4).get_element_type(); + + if (!vocab_decoder_node || !vocab_decoder_node->input_value(4).get_element_type().is_integral_number()) { + return false; + } + + std::shared_ptr skip_tokens_const = std::dynamic_pointer_cast(vocab_decoder_node->get_input_node_shared_ptr(4)); + if (!skip_tokens_const) { + return false; + } + + + auto start_const = std::make_shared(ov::element::i32, ov::Shape{1}, std::vector{0}); + auto int_max_const = std::make_shared(ov::element::i32, ov::Shape{1}, std::vector{std::numeric_limits::max()}); + auto one_const = std::make_shared(ov::element::i32, ov::Shape{1}, std::vector{1}); + + // By default, INT_MAX will multiply with 1 and all skip_tokens will be selected. + op::util::VariableInfo var_info{ov::Shape{1}, ov::element::i32, SKIP_SPECIAL_TOKENS_VAR_ID}; + auto variable = std::make_shared(var_info); + auto read_value = std::make_shared(one_const, variable); + // if flag is set, then slice up to the int_max which means skip all tokens. + auto stop = std::make_shared(int_max_const, read_value); + + std::shared_ptr slice_node = std::make_shared(skip_tokens_const, start_const, stop, one_const); + + vocab_decoder_node->input(4).replace_source_output(slice_node->output(0)); + + auto assign = std::make_shared(read_value, variable); + model->add_sinks({assign}); + model->add_variables({variable}); + return true; +} \ No newline at end of file diff --git a/src/cpp/src/make_combine_segments_stateful.hpp b/src/cpp/src/make_combine_segments_stateful.hpp index 6365497140..307c6199c8 100644 --- a/src/cpp/src/make_combine_segments_stateful.hpp +++ b/src/cpp/src/make_combine_segments_stateful.hpp @@ -38,7 +38,44 @@ class MakeCombineSegmentsSatateful : public ov::pass::ModelPass { bool run_on_model(const std::shared_ptr& model) override; }; +/** + * @brief This pass modifies tokenizer ov::Model so that special tokens adding will be + * enabled or disabled depending on stateful value. + * + * +--------------+ + * | DefaultMode | + * +--------------+ + * | + * v + * +------------+ +-----------+ + * | ReadValue | | INT_MAX | + * +------------+ +-----------+ + * \ / + * \ / + * v v + * +--------------------+ +---------+ +---------+ + * | Const with tokens | | start | | Mul | + * +--------------------+ +---------+ +---------+ + * \ | / + * \ | / + * v v v + * +-----------------+ + * | Slice | + * +-----------------+ + * | + * v + * +----------------------+ + * | VocabDecoder | + * +----------------------+ +**/ +class MakeVocabDecoderSatateful : public ov::pass::ModelPass { +public: + OPENVINO_RTTI("MakeVocabDecoderSatateful", "0"); + bool run_on_model(const std::shared_ptr& model) override; +}; + const std::string ADD_SPECIAL_TOKENS_VAR_ID = "add_special_tokens"; +const std::string SKIP_SPECIAL_TOKENS_VAR_ID = "skip_special_tokens"; } // namespace genai } // namespace ov diff --git a/src/cpp/src/tokenizer.cpp b/src/cpp/src/tokenizer.cpp index f52417a94e..78b94915dd 100644 --- a/src/cpp/src/tokenizer.cpp +++ b/src/cpp/src/tokenizer.cpp @@ -10,6 +10,7 @@ #include #include +#include "openvino/pass/visualize_tree.hpp" #include "openvino/pass/manager.hpp" #include "openvino/runtime/core.hpp" #include "openvino/genai/tokenizer.hpp" @@ -73,7 +74,8 @@ class Tokenizer::TokenizerImpl { std::unique_ptr> m_ireq_queue_detokenizer; // To change the adding special tokens mode we use a statefull subgraph, // this flag holds the current state value of the CompiledModel. - bool m_add_special_tokens = true; + bool m_add_special_tokens = true; + bool m_skip_special_tokens = false; bool m_older_than_24_5 = false; int64_t m_pad_token_id = -1; @@ -86,11 +88,16 @@ class Tokenizer::TokenizerImpl { std::string m_chat_template = {}; - void set_state_if_necessary(CircularBufferQueueElementGuard& infer_request_guard, bool add_special_tokens) { + void set_state_if_necessary(CircularBufferQueueElementGuard& infer_request_guard, const ov::AnyMap& params) { + bool add_special_tokens_flag = true; + bool skip_special_tokens_flag = false; + ov::genai::utils::read_anymap_param(params, add_special_tokens.name(), add_special_tokens_flag); + ov::genai::utils::read_anymap_param(params, skip_special_tokens.name(), skip_special_tokens_flag); + // If user requested add_special_tokens mode different from the current one, // need to set state variable. // If requested mode matches the stored state set, then don't touch states. - if (add_special_tokens == m_add_special_tokens) { + if (add_special_tokens_flag == m_add_special_tokens && skip_special_tokens_flag == m_skip_special_tokens) { return; } if (m_older_than_24_5) { @@ -100,19 +107,23 @@ class Tokenizer::TokenizerImpl { return; } - // auto states = m_ireq_queue_tokenizer->get(0).query_state(); + // add_special_tokens is managed by Select op with a bool input. ov::Tensor add_special_tensor = ov::Tensor(ov::element::boolean, {}); - *add_special_tensor.data() = add_special_tokens; + *add_special_tensor.data() = add_special_tokens_flag; + + // skip_special_tokens is managed by multiplication with a number, therefore i32. + ov::Tensor skip_special_tensor = ov::Tensor(ov::element::i32, {1}); + *skip_special_tensor.data() = skip_special_tokens_flag; for (auto& state: infer_request_guard.get().query_state()) { - if (state.get_name().find(ov::genai::ADD_SPECIAL_TOKENS_VAR_ID) == std::string::npos) { - // It's not add_special_tokens flag state. - continue; + if (state.get_name().find(ov::genai::ADD_SPECIAL_TOKENS_VAR_ID) != std::string::npos) { + state.set_state(add_special_tensor); + } else if (state.get_name().find(ov::genai::SKIP_SPECIAL_TOKENS_VAR_ID) != std::string::npos) { + state.set_state(skip_special_tensor); } - state.set_state(add_special_tensor); - break; } - m_add_special_tokens = add_special_tokens; + m_add_special_tokens = add_special_tokens_flag; + m_skip_special_tokens = skip_special_tokens_flag; } TokenizerImpl() = default; @@ -135,15 +146,25 @@ class Tokenizer::TokenizerImpl { auto device = "CPU"; // currently openvino_tokenizer supports only CPU auto ov_tokenizer = core.read_model(tokenizer_path / "openvino_tokenizer.xml"); + std::shared_ptr ov_detokenizer; + if (std::filesystem::exists(tokenizer_path / "openvino_detokenizer.xml")) { + ov_detokenizer = core.read_model(tokenizer_path / "openvino_detokenizer.xml"); + } m_older_than_24_5 = ov_tokenizer->get_rt_info().count("openvino_tokenizers_version") != 1; - ov::pass::Manager manager; - manager.register_pass(); - manager.run_passes(ov_tokenizer); + ov::pass::Manager manager_tok; + manager_tok.register_pass(); + manager_tok.run_passes(ov_tokenizer); + + ov::pass::Manager manager_detok; + manager_detok.register_pass("before.svg"); + manager_detok.register_pass(); + manager_detok.register_pass("after.svg"); + manager_detok.run_passes(ov_detokenizer); m_tokenizer = core.compile_model(ov_tokenizer, device, properties); if (std::filesystem::exists(tokenizer_path / "openvino_detokenizer.xml")) { - m_detokenizer = core.compile_model(tokenizer_path / "openvino_detokenizer.xml", device, properties); + m_detokenizer = core.compile_model(ov_detokenizer, device, properties); } @@ -298,11 +319,8 @@ class Tokenizer::TokenizerImpl { } TokenizedInputs encode(std::string prompt, const ov::AnyMap& tokenization_params = {}) { - bool add_special_tokens_flag = true; - ov::genai::utils::read_anymap_param(tokenization_params, add_special_tokens.name(), add_special_tokens_flag); - CircularBufferQueueElementGuard infer_request_guard(this->m_ireq_queue_tokenizer.get()); - set_state_if_necessary(infer_request_guard, add_special_tokens_flag); + set_state_if_necessary(infer_request_guard, tokenization_params); size_t batch_size = 1; infer_request_guard.get().set_input_tensor(ov::Tensor{ov::element::string, {batch_size}, &prompt}); infer_request_guard.get().start_async(); @@ -316,11 +334,8 @@ class Tokenizer::TokenizerImpl { TokenizedInputs encode(std::vector& prompts, const ov::AnyMap& tokenization_params = {}) { TokenizedInputs unpadded; { - bool add_special_tokens_flag = true; - ov::genai::utils::read_anymap_param(tokenization_params, add_special_tokens.name(), add_special_tokens_flag); - CircularBufferQueueElementGuard infer_request_guard(this->m_ireq_queue_tokenizer.get()); - set_state_if_necessary(infer_request_guard, add_special_tokens_flag); + set_state_if_necessary(infer_request_guard, tokenization_params); infer_request_guard.get().set_input_tensor(ov::Tensor{ov::element::string, {prompts.size()}, prompts.data()}); auto size_ = infer_request_guard.get().get_input_tensor().get_shape(); infer_request_guard.get().start_async(); @@ -343,10 +358,11 @@ class Tokenizer::TokenizerImpl { return {input_ids_, attention_mask_}; } - std::string decode(std::vector tokens) { + std::string decode(std::vector tokens, const ov::AnyMap& detokenization_params = {}) { OPENVINO_ASSERT(m_detokenizer, "Detokenize model has not been provided. Tokenizer::decode is not available"); CircularBufferQueueElementGuard infer_request_guard(this->m_ireq_queue_detokenizer.get()); + set_state_if_necessary(infer_request_guard, detokenization_params); size_t batch_size = 1; infer_request_guard.get().set_input_tensor(ov::Tensor{ov::element::i64, {batch_size, tokens.size()}, tokens.data()}); infer_request_guard.get().start_async(); @@ -354,12 +370,13 @@ class Tokenizer::TokenizerImpl { return infer_request_guard.get().get_output_tensor().data()[0]; } - std::vector decode(ov::Tensor tokens) { + std::vector decode(ov::Tensor tokens, const ov::AnyMap& detokenization_params = {}) { OPENVINO_ASSERT(m_detokenizer, "Detokenize model has not been provided. Tokenizer::decode is not available"); OPENVINO_ASSERT(tokens.get_element_type() == ov::element::i64, "tokens tensor element type should be an i64"); OPENVINO_ASSERT(tokens.get_shape().size() == 2, "tokens tensor should of rank 2 with shape [batch_size, seq_len]"); CircularBufferQueueElementGuard infer_request_guard(this->m_ireq_queue_detokenizer.get()); + set_state_if_necessary(infer_request_guard, detokenization_params); infer_request_guard.get().set_input_tensor(tokens); infer_request_guard.get().start_async(); infer_request_guard.get().wait(); @@ -369,7 +386,7 @@ class Tokenizer::TokenizerImpl { return std::vector(res_data, res_data + res.get_shape()[0]); } - std::vector decode(std::vector> lines) { + std::vector decode(std::vector> lines, const ov::AnyMap& detokenization_params = {}) { OPENVINO_ASSERT(m_detokenizer, "Detokenize model has not been provided. Tokenizer::decode is not available"); auto compare_lengths = [](const std::vector& a, const std::vector& b) { @@ -388,6 +405,7 @@ class Tokenizer::TokenizerImpl { } CircularBufferQueueElementGuard infer_request_guard(this->m_ireq_queue_detokenizer.get()); + set_state_if_necessary(infer_request_guard, detokenization_params); infer_request_guard.get().set_input_tensor(tokens); infer_request_guard.get().start_async(); infer_request_guard.get().wait(); @@ -517,16 +535,16 @@ TokenizedInputs Tokenizer::encode(std::initializer_list& text, cons return encode(std::vector(text.begin(), text.end()), tokenization_params); } -std::string Tokenizer::decode(std::vector tokens) { - return m_pimpl->decode(tokens); +std::string Tokenizer::decode(std::vector tokens, const ov::AnyMap& detokenization_params) { + return m_pimpl->decode(tokens, detokenization_params); } -std::vector Tokenizer::decode(ov::Tensor tokens) { - return m_pimpl->decode(tokens); +std::vector Tokenizer::decode(ov::Tensor tokens, const ov::AnyMap& detokenization_params) { + return m_pimpl->decode(tokens, detokenization_params); } -std::vector Tokenizer::decode(std::vector> lines) { - return m_pimpl->decode(lines); +std::vector Tokenizer::decode(std::vector> lines, const ov::AnyMap& detokenization_params) { + return m_pimpl->decode(lines, detokenization_params); } int64_t Tokenizer::get_bos_token_id() const { diff --git a/src/python/py_tokenizer.cpp b/src/python/py_tokenizer.cpp index 2ccccff4c0..db4643a65c 100644 --- a/src/python/py_tokenizer.cpp +++ b/src/python/py_tokenizer.cpp @@ -63,27 +63,33 @@ void init_tokenizer(py::module_& m) { .def( "decode", - [](Tokenizer& tok, std::vector& tokens) -> py::str { - return pyutils::handle_utf8(tok.decode(tokens)); + [](Tokenizer& tok, std::vector& tokens, bool skip_special_tokens) -> py::str { + ov::AnyMap detokenization_params; + detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens; + return pyutils::handle_utf8(tok.decode(tokens, detokenization_params)); }, - py::arg("tokens"), + py::arg("tokens"), py::arg("skip_special_tokens") = true, R"(Decode a sequence into a string prompt.)" ) .def( "decode", - [](Tokenizer& tok, ov::Tensor& tokens) -> py::typing::List { - return pyutils::handle_utf8(tok.decode(tokens)); + [](Tokenizer& tok, ov::Tensor& tokens, bool skip_special_tokens) -> py::typing::List { + ov::AnyMap detokenization_params; + detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens; + return pyutils::handle_utf8(tok.decode(tokens, detokenization_params)); }, - py::arg("tokens"), + py::arg("tokens"), py::arg("skip_special_tokens") = true, R"(Decode tensor into a list of string prompts.)") .def( "decode", - [](Tokenizer& tok, std::vector>& tokens) -> py::typing::List { - return pyutils::handle_utf8(tok.decode(tokens)); + [](Tokenizer& tok, std::vector>& tokens, bool skip_special_tokens) -> py::typing::List { + ov::AnyMap detokenization_params; + detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens; + return pyutils::handle_utf8(tok.decode(tokens, detokenization_params)); }, - py::arg("tokens"), + py::arg("tokens"), py::arg("skip_special_tokens") = true, R"(Decode a batch of tokens into a list of string prompt.)") .def("apply_chat_template", [](Tokenizer& tok, diff --git a/tests/python_tests/test_chat_generate_api.py b/tests/python_tests/test_chat_generate_api.py index 25d0798994..a87a2c7555 100644 --- a/tests/python_tests/test_chat_generate_api.py +++ b/tests/python_tests/test_chat_generate_api.py @@ -217,3 +217,24 @@ def test_add_special_tokens(add_special_tokens, prompt): res_genai = genai_tokenzier.encode(prompt, add_special_tokens).input_ids.data res_hf = hf_tokenizer(prompt, return_tensors="np", add_special_tokens=add_special_tokens)["input_ids"] assert np.all(res_genai == res_hf) + +@pytest.mark.precommit +@pytest.mark.nightly +@pytest.mark.parametrize("add_special_tokens", [True, False]) +@pytest.mark.parametrize("skip_special_tokens", [True, False]) +@pytest.mark.parametrize("prompt", prompts) +def test_add_special_tokens(add_special_tokens, skip_special_tokens, prompt): + import numpy as np + model_descr = get_chat_models_list()[0] + model_id, path, hf_tokenizer, model_opt, pipe = read_model((model_descr[0], model_descr[1] / '_test_chat')) + genai_tokenizer = pipe.get_tokenizer() + + # Calling encode with add_special_tokens will set state flag. + res_genai = genai_tokenizer.encode(prompt, add_special_tokens).input_ids.data + res_hf = hf_tokenizer(prompt, return_tensors="np", add_special_tokens=add_special_tokens)["input_ids"] + assert np.all(res_genai == res_hf) + + # Decode with skip_special_tokens + decoded_genai = genai_tokenizer.decode(res_genai, skip_special_tokens=skip_special_tokens) + decoded_hf = hf_tokenizer.decode(res_hf[0], skip_special_tokens=skip_special_tokens) + assert decoded_genai == decoded_hf From e46466d94124aa73daa34a91cf94a7e0ce4e1265 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Thu, 21 Nov 2024 22:10:47 +0100 Subject: [PATCH 2/6] rename pass --- .github/labeler.yml | 4 ++-- ...ine_segments_stateful.cpp => make_tokenizer_stateful.cpp} | 2 +- ...ine_segments_stateful.hpp => make_tokenizer_stateful.hpp} | 0 src/cpp/src/tokenizer.cpp | 5 +---- 4 files changed, 4 insertions(+), 7 deletions(-) rename src/cpp/src/{make_combine_segments_stateful.cpp => make_tokenizer_stateful.cpp} (98%) rename src/cpp/src/{make_combine_segments_stateful.hpp => make_tokenizer_stateful.hpp} (100%) diff --git a/.github/labeler.yml b/.github/labeler.yml index c5d0db312c..c162f6aff4 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -8,8 +8,8 @@ - 'src/cpp/src/tokenizers_path.hpp' - 'src/cpp/src/circular_buffer_queue.hpp' - 'src/cpp/src/synchronized_queue.hpp' -- 'src/cpp/src/make_combine_segments_stateful.cpp' -- 'src/cpp/src/make_combine_segments_stateful.hpp' +- 'src/cpp/src/make_tokenizer_stateful.cpp' +- 'src/cpp/src/make_tokenizer_stateful.hpp' - 'src/python/py_tokenizer.cpp' - 'thirdparty/openvino_tokenizers' - 'tests/python_tests/tokenizer_configs.py' diff --git a/src/cpp/src/make_combine_segments_stateful.cpp b/src/cpp/src/make_tokenizer_stateful.cpp similarity index 98% rename from src/cpp/src/make_combine_segments_stateful.cpp rename to src/cpp/src/make_tokenizer_stateful.cpp index 26c58b8fca..538a935e56 100644 --- a/src/cpp/src/make_combine_segments_stateful.cpp +++ b/src/cpp/src/make_tokenizer_stateful.cpp @@ -1,7 +1,7 @@ // Copyright (C) 2023-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 -#include "make_combine_segments_stateful.hpp" +#include "make_tokenizer_stateful.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/select.hpp" #include "openvino/op/slice.hpp" diff --git a/src/cpp/src/make_combine_segments_stateful.hpp b/src/cpp/src/make_tokenizer_stateful.hpp similarity index 100% rename from src/cpp/src/make_combine_segments_stateful.hpp rename to src/cpp/src/make_tokenizer_stateful.hpp diff --git a/src/cpp/src/tokenizer.cpp b/src/cpp/src/tokenizer.cpp index 78b94915dd..fc6ba75d90 100644 --- a/src/cpp/src/tokenizer.cpp +++ b/src/cpp/src/tokenizer.cpp @@ -10,12 +10,11 @@ #include #include -#include "openvino/pass/visualize_tree.hpp" #include "openvino/pass/manager.hpp" #include "openvino/runtime/core.hpp" #include "openvino/genai/tokenizer.hpp" -#include "make_combine_segments_stateful.hpp" +#include "make_tokenizer_stateful.hpp" #include "tokenizers_path.hpp" #include "circular_buffer_queue.hpp" #include "json_utils.hpp" @@ -157,9 +156,7 @@ class Tokenizer::TokenizerImpl { manager_tok.run_passes(ov_tokenizer); ov::pass::Manager manager_detok; - manager_detok.register_pass("before.svg"); manager_detok.register_pass(); - manager_detok.register_pass("after.svg"); manager_detok.run_passes(ov_detokenizer); m_tokenizer = core.compile_model(ov_tokenizer, device, properties); From 4529dec255b603d711a479f1a90c4cbec9ae3ebf Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 22 Nov 2024 10:49:11 +0100 Subject: [PATCH 3/6] fix typos --- src/cpp/include/openvino/genai/tokenizer.hpp | 28 ++++++++++---------- src/cpp/src/make_tokenizer_stateful.cpp | 17 +++++------- src/python/py_tokenizer.cpp | 6 ++--- tests/python_tests/test_chat_generate_api.py | 3 ++- 4 files changed, 25 insertions(+), 29 deletions(-) diff --git a/src/cpp/include/openvino/genai/tokenizer.hpp b/src/cpp/include/openvino/genai/tokenizer.hpp index e90e9c80de..8d2d63ea80 100644 --- a/src/cpp/include/openvino/genai/tokenizer.hpp +++ b/src/cpp/include/openvino/genai/tokenizer.hpp @@ -47,7 +47,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { /** * @brief encode a single prompt * @param prompt std::string with input prompt - * @param tokenization_params AnyMap with tokenization parameters, e.g. {'add_special_tokens', false} + * @param tokenization_params AnyMap with tokenization parameters, e.g. {"add_special_tokens", false} * @return pair of [input_ids, attention_mask] */ TokenizedInputs encode(const std::string prompt, const ov::AnyMap& tokenization_params = {}); @@ -55,7 +55,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { /** * @brief encode batch of prompts. Left padding will be applied by default * @param prompts vector storing batch of prompts - * @param tokenization_params AnyMap with tokenization parameters, e.g. {'add_special_tokens', false} + * @param tokenization_params AnyMap with tokenization parameters, e.g. {"add_special_tokens", false} * @return pair of [input_ids, attention_mask] */ TokenizedInputs encode(std::vector& prompt, const ov::AnyMap& tokenization_params = {}); @@ -87,7 +87,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { /** * @brief decode sequence of tokens * @param tokens vector storing tokens - * @param tokenization_params AnyMap with detokenization parameters, e.g. {'skip_special_tokens', false} + * @param tokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false} * @return sequence string */ std::string decode(std::vector tokens, const ov::AnyMap& detokenization_params = {}); @@ -95,18 +95,18 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { /** * @brief decode sequence of tokens * @param tokens vector storing tokens - * @param tokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true) + * @param detokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true) * @return sequence string */ template - util::EnableIfAllStringAny decode(std::vector& tokens, Properties&&... properties) { - return decode(tokens, AnyMap{std::forward(properties)...}); + util::EnableIfAllStringAny decode(std::vector& tokens, Properties&&... detokenization_params) { + return decode(tokens, AnyMap{std::forward(detokenization_params)...}); } /** * @brief decode tokens. * @param tokens ov::Tensor with tokens with shape [batch_size, seq_len] - * @param tokenization_params AnyMap with detokenization parameters, e.g. {'skip_special_tokens', false} + * @param tokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false} * @return vector of std::string, with size = batch_size */ std::vector decode(ov::Tensor tokens, const ov::AnyMap& detokenization_params = {}); @@ -114,18 +114,18 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { /** * @brief decode sequence of tokens * @param tokens ov::Tensor with tokens with shape [batch_size, seq_len] - * @param tokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true) + * @param detokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true) * @return vector of std::string, with size = batch_size */ template - util::EnableIfAllStringAny, Properties...> decode(ov::Tensor tokens, Properties&&... properties) { - return decode(tokens, AnyMap{std::forward(properties)...}); + util::EnableIfAllStringAny, Properties...> decode(ov::Tensor tokens, Properties&&... detokenization_params) { + return decode(tokens, AnyMap{std::forward(detokenization_params)...}); } /** * @brief batched decoding of tokens. * @param tokens vector of vectors with tokens, tokens.size() is equal to batch_size - * @param tokenization_params AnyMap with detokenization parameters, e.g. {'skip_special_tokens', false} + * @param tokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false} * @return vector of std::string, with size equal to batch_size */ std::vector decode(std::vector> tokens, const ov::AnyMap& detokenization_params = {}); @@ -133,12 +133,12 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { /** * @brief decode sequence of tokens * @param tokens ov::Tensor with tokens with shape [batch_size, seq_len] - * @param tokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true) + * @param detokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true) * @return vector of std::string, with size = batch_size */ template - util::EnableIfAllStringAny, Properties...> decode(std::vector> tokens, Properties&&... properties) { - return decode(tokens, AnyMap{std::forward(properties)...}); + util::EnableIfAllStringAny, Properties...> decode(std::vector> tokens, Properties&&... detokenization_params) { + return decode(tokens, AnyMap{std::forward(detokenization_params)...}); } /** diff --git a/src/cpp/src/make_tokenizer_stateful.cpp b/src/cpp/src/make_tokenizer_stateful.cpp index 538a935e56..3551e713c9 100644 --- a/src/cpp/src/make_tokenizer_stateful.cpp +++ b/src/cpp/src/make_tokenizer_stateful.cpp @@ -48,25 +48,20 @@ bool ov::genai::MakeCombineSegmentsSatateful::run_on_model(const std::shared_ptr } bool ov::genai::MakeVocabDecoderSatateful::run_on_model(const std::shared_ptr& model) { - std::shared_ptr vocab_decoder_node; for (auto node: model->get_ordered_ops()) { - if (strcmp(node->get_type_info().name, "VocabDecoder") == 0) { + if (strcmp(node->get_type_info().name, "VocabDecoder") == 0) vocab_decoder_node = node; - } } - auto val = vocab_decoder_node->input_value(4); - auto val_type = vocab_decoder_node->input_value(4).get_element_type(); - if (!vocab_decoder_node || !vocab_decoder_node->input_value(4).get_element_type().is_integral_number()) { + if (!vocab_decoder_node || vocab_decoder_node->get_input_size() < 5) + return false; + if (!vocab_decoder_node->input_value(4).get_element_type().is_integral_number()) return false; - } std::shared_ptr skip_tokens_const = std::dynamic_pointer_cast(vocab_decoder_node->get_input_node_shared_ptr(4)); - if (!skip_tokens_const) { + if (!skip_tokens_const) return false; - } - auto start_const = std::make_shared(ov::element::i32, ov::Shape{1}, std::vector{0}); auto int_max_const = std::make_shared(ov::element::i32, ov::Shape{1}, std::vector{std::numeric_limits::max()}); @@ -87,4 +82,4 @@ bool ov::genai::MakeVocabDecoderSatateful::run_on_model(const std::shared_ptradd_sinks({assign}); model->add_variables({variable}); return true; -} \ No newline at end of file +} diff --git a/src/python/py_tokenizer.cpp b/src/python/py_tokenizer.cpp index db4643a65c..dae2ffe775 100644 --- a/src/python/py_tokenizer.cpp +++ b/src/python/py_tokenizer.cpp @@ -68,7 +68,7 @@ void init_tokenizer(py::module_& m) { detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens; return pyutils::handle_utf8(tok.decode(tokens, detokenization_params)); }, - py::arg("tokens"), py::arg("skip_special_tokens") = true, + py::arg("tokens"), py::arg("skip_special_tokens") = false, R"(Decode a sequence into a string prompt.)" ) @@ -79,7 +79,7 @@ void init_tokenizer(py::module_& m) { detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens; return pyutils::handle_utf8(tok.decode(tokens, detokenization_params)); }, - py::arg("tokens"), py::arg("skip_special_tokens") = true, + py::arg("tokens"), py::arg("skip_special_tokens") = false, R"(Decode tensor into a list of string prompts.)") .def( @@ -89,7 +89,7 @@ void init_tokenizer(py::module_& m) { detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens; return pyutils::handle_utf8(tok.decode(tokens, detokenization_params)); }, - py::arg("tokens"), py::arg("skip_special_tokens") = true, + py::arg("tokens"), py::arg("skip_special_tokens") = false, R"(Decode a batch of tokens into a list of string prompt.)") .def("apply_chat_template", [](Tokenizer& tok, diff --git a/tests/python_tests/test_chat_generate_api.py b/tests/python_tests/test_chat_generate_api.py index a87a2c7555..efd1d87416 100644 --- a/tests/python_tests/test_chat_generate_api.py +++ b/tests/python_tests/test_chat_generate_api.py @@ -219,6 +219,7 @@ def test_add_special_tokens(add_special_tokens, prompt): assert np.all(res_genai == res_hf) @pytest.mark.precommit +@pytest.mark.xfail(reason="Need to turn them back on when openvino_tokenizers will be updated.") @pytest.mark.nightly @pytest.mark.parametrize("add_special_tokens", [True, False]) @pytest.mark.parametrize("skip_special_tokens", [True, False]) @@ -235,6 +236,6 @@ def test_add_special_tokens(add_special_tokens, skip_special_tokens, prompt): assert np.all(res_genai == res_hf) # Decode with skip_special_tokens - decoded_genai = genai_tokenizer.decode(res_genai, skip_special_tokens=skip_special_tokens) + decoded_genai = genai_tokenizer.decode(res_genai, skip_special_tokens=skip_special_tokens)[0] decoded_hf = hf_tokenizer.decode(res_hf[0], skip_special_tokens=skip_special_tokens) assert decoded_genai == decoded_hf From 21037497e6958c7df020131d77984a953a4beb08 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Mon, 25 Nov 2024 12:09:04 +0100 Subject: [PATCH 4/6] align with the openvino_tokenizers --- src/cpp/include/openvino/genai/tokenizer.hpp | 6 +++--- src/cpp/src/make_tokenizer_stateful.cpp | 13 +++++++++---- src/cpp/src/tokenizer.cpp | 5 ++--- src/python/py_tokenizer.cpp | 6 +++--- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/cpp/include/openvino/genai/tokenizer.hpp b/src/cpp/include/openvino/genai/tokenizer.hpp index 8d2d63ea80..36f63d2b5e 100644 --- a/src/cpp/include/openvino/genai/tokenizer.hpp +++ b/src/cpp/include/openvino/genai/tokenizer.hpp @@ -87,7 +87,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { /** * @brief decode sequence of tokens * @param tokens vector storing tokens - * @param tokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false} + * @param detokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false} * @return sequence string */ std::string decode(std::vector tokens, const ov::AnyMap& detokenization_params = {}); @@ -106,7 +106,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { /** * @brief decode tokens. * @param tokens ov::Tensor with tokens with shape [batch_size, seq_len] - * @param tokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false} + * @param detokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false} * @return vector of std::string, with size = batch_size */ std::vector decode(ov::Tensor tokens, const ov::AnyMap& detokenization_params = {}); @@ -125,7 +125,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { /** * @brief batched decoding of tokens. * @param tokens vector of vectors with tokens, tokens.size() is equal to batch_size - * @param tokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false} + * @param detokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false} * @return vector of std::string, with size equal to batch_size */ std::vector decode(std::vector> tokens, const ov::AnyMap& detokenization_params = {}); diff --git a/src/cpp/src/make_tokenizer_stateful.cpp b/src/cpp/src/make_tokenizer_stateful.cpp index 3551e713c9..4685b0e715 100644 --- a/src/cpp/src/make_tokenizer_stateful.cpp +++ b/src/cpp/src/make_tokenizer_stateful.cpp @@ -60,7 +60,8 @@ bool ov::genai::MakeVocabDecoderSatateful::run_on_model(const std::shared_ptr skip_tokens_const = std::dynamic_pointer_cast(vocab_decoder_node->get_input_node_shared_ptr(4)); - if (!skip_tokens_const) + std::shared_ptr skip_tokens_slice = std::dynamic_pointer_cast(vocab_decoder_node->get_input_node_shared_ptr(4)); + if (!skip_tokens_const && !skip_tokens_slice) return false; auto start_const = std::make_shared(ov::element::i32, ov::Shape{1}, std::vector{0}); @@ -74,10 +75,14 @@ bool ov::genai::MakeVocabDecoderSatateful::run_on_model(const std::shared_ptr(int_max_const, read_value); - std::shared_ptr slice_node = std::make_shared(skip_tokens_const, start_const, stop, one_const); + // If already has slice just replace the stop input. + if (skip_tokens_slice) { + skip_tokens_slice->input(2).replace_source_output(stop); + } else { + std::shared_ptr slice_node = std::make_shared(skip_tokens_const, start_const, stop, one_const); + vocab_decoder_node->input(4).replace_source_output(slice_node->output(0)); + } - vocab_decoder_node->input(4).replace_source_output(slice_node->output(0)); - auto assign = std::make_shared(read_value, variable); model->add_sinks({assign}); model->add_variables({variable}); diff --git a/src/cpp/src/tokenizer.cpp b/src/cpp/src/tokenizer.cpp index fc6ba75d90..d0a472a40f 100644 --- a/src/cpp/src/tokenizer.cpp +++ b/src/cpp/src/tokenizer.cpp @@ -74,7 +74,7 @@ class Tokenizer::TokenizerImpl { // To change the adding special tokens mode we use a statefull subgraph, // this flag holds the current state value of the CompiledModel. bool m_add_special_tokens = true; - bool m_skip_special_tokens = false; + bool m_skip_special_tokens = true; bool m_older_than_24_5 = false; int64_t m_pad_token_id = -1; @@ -89,7 +89,7 @@ class Tokenizer::TokenizerImpl { void set_state_if_necessary(CircularBufferQueueElementGuard& infer_request_guard, const ov::AnyMap& params) { bool add_special_tokens_flag = true; - bool skip_special_tokens_flag = false; + bool skip_special_tokens_flag = true; ov::genai::utils::read_anymap_param(params, add_special_tokens.name(), add_special_tokens_flag); ov::genai::utils::read_anymap_param(params, skip_special_tokens.name(), skip_special_tokens_flag); @@ -164,7 +164,6 @@ class Tokenizer::TokenizerImpl { m_detokenizer = core.compile_model(ov_detokenizer, device, properties); } - const size_t INFER_REQUEST_QUEUE_SIZE = m_tokenizer.get_property(ov::optimal_number_of_infer_requests); m_ireq_queue_tokenizer = std::make_unique>( INFER_REQUEST_QUEUE_SIZE, diff --git a/src/python/py_tokenizer.cpp b/src/python/py_tokenizer.cpp index dae2ffe775..db4643a65c 100644 --- a/src/python/py_tokenizer.cpp +++ b/src/python/py_tokenizer.cpp @@ -68,7 +68,7 @@ void init_tokenizer(py::module_& m) { detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens; return pyutils::handle_utf8(tok.decode(tokens, detokenization_params)); }, - py::arg("tokens"), py::arg("skip_special_tokens") = false, + py::arg("tokens"), py::arg("skip_special_tokens") = true, R"(Decode a sequence into a string prompt.)" ) @@ -79,7 +79,7 @@ void init_tokenizer(py::module_& m) { detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens; return pyutils::handle_utf8(tok.decode(tokens, detokenization_params)); }, - py::arg("tokens"), py::arg("skip_special_tokens") = false, + py::arg("tokens"), py::arg("skip_special_tokens") = true, R"(Decode tensor into a list of string prompts.)") .def( @@ -89,7 +89,7 @@ void init_tokenizer(py::module_& m) { detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens; return pyutils::handle_utf8(tok.decode(tokens, detokenization_params)); }, - py::arg("tokens"), py::arg("skip_special_tokens") = false, + py::arg("tokens"), py::arg("skip_special_tokens") = true, R"(Decode a batch of tokens into a list of string prompt.)") .def("apply_chat_template", [](Tokenizer& tok, From d26233b172d60063e50257058513a560e8e591b1 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Mon, 25 Nov 2024 12:56:37 +0100 Subject: [PATCH 5/6] update signature --- src/python/openvino_genai/py_openvino_genai.pyi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index df290a9744..5e4d2dd7b2 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -1303,17 +1303,17 @@ class Tokenizer: Embeds input prompts with special tags for a chat scenario. """ @typing.overload - def decode(self, tokens: list[int]) -> str: + def decode(self, tokens: list[int], skip_special_tokens: bool = True) -> str: """ Decode a sequence into a string prompt. """ @typing.overload - def decode(self, tokens: openvino._pyopenvino.Tensor) -> list[str]: + def decode(self, tokens: openvino._pyopenvino.Tensor, skip_special_tokens: bool = True) -> list[str]: """ Decode tensor into a list of string prompts. """ @typing.overload - def decode(self, tokens: list[list[int]]) -> list[str]: + def decode(self, tokens: list[list[int]], skip_special_tokens: bool = True) -> list[str]: """ Decode a batch of tokens into a list of string prompt. """ From 111bb5bb2afe5b6cc4b01ea935ed7af38c6075de Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Tue, 26 Nov 2024 10:45:37 +0100 Subject: [PATCH 6/6] add barier for AnyMap key names, apply review comments --- src/cpp/src/tokenizer.cpp | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/src/cpp/src/tokenizer.cpp b/src/cpp/src/tokenizer.cpp index d0a472a40f..41f9a6abd4 100644 --- a/src/cpp/src/tokenizer.cpp +++ b/src/cpp/src/tokenizer.cpp @@ -55,6 +55,14 @@ ov::genai::TokenizedInputs pad_left(ov::Tensor& input_ids, ov::Tensor& attention return {input_ids, attention_mask}; } +void check_arguments(const ov::AnyMap& parameters, std::set allowed_argnames) { + for (const auto& [key, value] : parameters) { + if (allowed_argnames.find(key) == allowed_argnames.end()) { + OPENVINO_THROW("unacceptable parameter key: " + key); + } + } +} + constexpr char bos_token_key_name[] = "bos_token"; constexpr char eos_token_key_name[] = "eos_token"; constexpr char pad_token_key_name[] = "pad_token"; @@ -88,8 +96,8 @@ class Tokenizer::TokenizerImpl { std::string m_chat_template = {}; void set_state_if_necessary(CircularBufferQueueElementGuard& infer_request_guard, const ov::AnyMap& params) { - bool add_special_tokens_flag = true; - bool skip_special_tokens_flag = true; + bool add_special_tokens_flag = m_add_special_tokens; + bool skip_special_tokens_flag = m_skip_special_tokens; ov::genai::utils::read_anymap_param(params, add_special_tokens.name(), add_special_tokens_flag); ov::genai::utils::read_anymap_param(params, skip_special_tokens.name(), skip_special_tokens_flag); @@ -145,7 +153,7 @@ class Tokenizer::TokenizerImpl { auto device = "CPU"; // currently openvino_tokenizer supports only CPU auto ov_tokenizer = core.read_model(tokenizer_path / "openvino_tokenizer.xml"); - std::shared_ptr ov_detokenizer; + std::shared_ptr ov_detokenizer = nullptr; if (std::filesystem::exists(tokenizer_path / "openvino_detokenizer.xml")) { ov_detokenizer = core.read_model(tokenizer_path / "openvino_detokenizer.xml"); } @@ -155,12 +163,11 @@ class Tokenizer::TokenizerImpl { manager_tok.register_pass(); manager_tok.run_passes(ov_tokenizer); - ov::pass::Manager manager_detok; - manager_detok.register_pass(); - manager_detok.run_passes(ov_detokenizer); - m_tokenizer = core.compile_model(ov_tokenizer, device, properties); - if (std::filesystem::exists(tokenizer_path / "openvino_detokenizer.xml")) { + if (ov_detokenizer) { + ov::pass::Manager manager_detok; + manager_detok.register_pass(); + manager_detok.run_passes(ov_detokenizer); m_detokenizer = core.compile_model(ov_detokenizer, device, properties); } @@ -516,30 +523,37 @@ Tokenizer::Tokenizer(const std::filesystem::path& tokenizer_path, const ov::AnyM } TokenizedInputs Tokenizer::encode(const std::string prompt, const ov::AnyMap& tokenization_params) { + check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()}); return m_pimpl->encode(std::move(prompt), tokenization_params); } TokenizedInputs Tokenizer::encode(std::vector& prompts, const ov::AnyMap& tokenization_params) { + check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()}); return m_pimpl->encode(prompts, tokenization_params); } TokenizedInputs Tokenizer::encode(std::vector&& prompts, const ov::AnyMap& tokenization_params) { + check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()}); return m_pimpl->encode(prompts, tokenization_params); } TokenizedInputs Tokenizer::encode(std::initializer_list& text, const ov::AnyMap& tokenization_params) { + check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()}); return encode(std::vector(text.begin(), text.end()), tokenization_params); } std::string Tokenizer::decode(std::vector tokens, const ov::AnyMap& detokenization_params) { + check_arguments(detokenization_params, {ov::genai::skip_special_tokens.name()}); return m_pimpl->decode(tokens, detokenization_params); } std::vector Tokenizer::decode(ov::Tensor tokens, const ov::AnyMap& detokenization_params) { + check_arguments(detokenization_params, {ov::genai::skip_special_tokens.name()}); return m_pimpl->decode(tokens, detokenization_params); } std::vector Tokenizer::decode(std::vector> lines, const ov::AnyMap& detokenization_params) { + check_arguments(detokenization_params, {ov::genai::skip_special_tokens.name()}); return m_pimpl->decode(lines, detokenization_params); }