diff --git a/.github/labeler.yml b/.github/labeler.yml index c5d0db312c..c162f6aff4 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -8,8 +8,8 @@ - 'src/cpp/src/tokenizers_path.hpp' - 'src/cpp/src/circular_buffer_queue.hpp' - 'src/cpp/src/synchronized_queue.hpp' -- 'src/cpp/src/make_combine_segments_stateful.cpp' -- 'src/cpp/src/make_combine_segments_stateful.hpp' +- 'src/cpp/src/make_tokenizer_stateful.cpp' +- 'src/cpp/src/make_tokenizer_stateful.hpp' - 'src/python/py_tokenizer.cpp' - 'thirdparty/openvino_tokenizers' - 'tests/python_tests/tokenizer_configs.py' diff --git a/src/cpp/include/openvino/genai/tokenizer.hpp b/src/cpp/include/openvino/genai/tokenizer.hpp index bcb8da68a3..36f63d2b5e 100644 --- a/src/cpp/include/openvino/genai/tokenizer.hpp +++ b/src/cpp/include/openvino/genai/tokenizer.hpp @@ -47,7 +47,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { /** * @brief encode a single prompt * @param prompt std::string with input prompt - * @param tokenization_params AnyMap with tokenization parameters, e.g. {'add_special_tokens', false} + * @param tokenization_params AnyMap with tokenization parameters, e.g. {"add_special_tokens", false} * @return pair of [input_ids, attention_mask] */ TokenizedInputs encode(const std::string prompt, const ov::AnyMap& tokenization_params = {}); @@ -55,7 +55,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { /** * @brief encode batch of prompts. Left padding will be applied by default * @param prompts vector storing batch of prompts - * @param tokenization_params AnyMap with tokenization parameters, e.g. {'add_special_tokens', false} + * @param tokenization_params AnyMap with tokenization parameters, e.g. {"add_special_tokens", false} * @return pair of [input_ids, attention_mask] */ TokenizedInputs encode(std::vector& prompt, const ov::AnyMap& tokenization_params = {}); @@ -87,23 +87,59 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { /** * @brief decode sequence of tokens * @param tokens vector storing tokens + * @param detokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false} * @return sequence string */ - std::string decode(std::vector tokens); - + std::string decode(std::vector tokens, const ov::AnyMap& detokenization_params = {}); + + /** + * @brief decode sequence of tokens + * @param tokens vector storing tokens + * @param detokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true) + * @return sequence string + */ + template + util::EnableIfAllStringAny decode(std::vector& tokens, Properties&&... detokenization_params) { + return decode(tokens, AnyMap{std::forward(detokenization_params)...}); + } + /** * @brief decode tokens. * @param tokens ov::Tensor with tokens with shape [batch_size, seq_len] + * @param detokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false} + * @return vector of std::string, with size = batch_size + */ + std::vector decode(ov::Tensor tokens, const ov::AnyMap& detokenization_params = {}); + + /** + * @brief decode sequence of tokens + * @param tokens ov::Tensor with tokens with shape [batch_size, seq_len] + * @param detokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true) * @return vector of std::string, with size = batch_size */ - std::vector decode(ov::Tensor tokens); + template + util::EnableIfAllStringAny, Properties...> decode(ov::Tensor tokens, Properties&&... detokenization_params) { + return decode(tokens, AnyMap{std::forward(detokenization_params)...}); + } /** * @brief batched decoding of tokens. * @param tokens vector of vectors with tokens, tokens.size() is equal to batch_size + * @param detokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false} * @return vector of std::string, with size equal to batch_size */ - std::vector decode(std::vector> tokens); + std::vector decode(std::vector> tokens, const ov::AnyMap& detokenization_params = {}); + + /** + * @brief decode sequence of tokens + * @param tokens ov::Tensor with tokens with shape [batch_size, seq_len] + * @param detokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true) + * @return vector of std::string, with size = batch_size + */ + template + util::EnableIfAllStringAny, Properties...> decode(std::vector> tokens, Properties&&... detokenization_params) { + return decode(tokens, AnyMap{std::forward(detokenization_params)...}); + } /** * @brief Embeds input prompts with special tags for a chat scenario. @@ -143,6 +179,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { }; static constexpr ov::Property add_special_tokens{"add_special_tokens"}; +static constexpr ov::Property skip_special_tokens{"skip_special_tokens"}; } // namespace genai } // namespace ov diff --git a/src/cpp/src/make_combine_segments_stateful.cpp b/src/cpp/src/make_combine_segments_stateful.cpp deleted file mode 100644 index 2285c172dc..0000000000 --- a/src/cpp/src/make_combine_segments_stateful.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (C) 2023-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "make_combine_segments_stateful.hpp" -#include "openvino/op/constant.hpp" -#include "openvino/op/select.hpp" -#include "openvino/op/read_value.hpp" -#include "openvino/op/assign.hpp" - - -using namespace ov; -using namespace ov::op; - -bool ov::genai::MakeCombineSegmentsSatateful::run_on_model(const std::shared_ptr& model) { - - std::shared_ptr combine_seg_node; - for (auto node: model->get_ordered_ops()) { - if (strcmp(node->get_type_info().name, "CombineSegments") == 0) { - combine_seg_node = node; - } - } - if (!combine_seg_node || combine_seg_node->input_value(1).get_element_type() != ov::element::i32) { - return false; - } - - std::shared_ptr input_1_const = std::dynamic_pointer_cast(combine_seg_node->get_input_node_shared_ptr(1)); - if (!input_1_const) { - return false; - } - - op::util::VariableInfo var_info{ov::Shape{}, ov::element::boolean, ADD_SPECIAL_TOKENS_VAR_ID}; - auto variable = std::make_shared(var_info); - - // Default mode is add_special_tokens. - auto default_mode_const = std::make_shared(ov::element::boolean, ov::Shape{}, std::vector{true}); - auto read_value = std::make_shared(default_mode_const, variable); - auto zero_constant = std::make_shared(ov::element::i32, ov::Shape{}, std::vector{0}); - auto select_node = std::make_shared(read_value, input_1_const, zero_constant); - combine_seg_node->input(1).replace_source_output(select_node->output(0)); - - auto assign = std::make_shared(read_value, variable); - - model->add_sinks({assign}); - model->add_variables({variable}); - return true; -} diff --git a/src/cpp/src/make_combine_segments_stateful.hpp b/src/cpp/src/make_combine_segments_stateful.hpp deleted file mode 100644 index 6365497140..0000000000 --- a/src/cpp/src/make_combine_segments_stateful.hpp +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (C) 2023-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -#include "openvino/op/constant.hpp" -#include "openvino/pass/pass.hpp" - -namespace ov { -namespace genai { - -/** - * @brief This pass modifies tokenizer ov::Model so that special tokens adding will be - * enabled or disabled depending on stateful value. - * - * +--------------+ - * | DefaultMode | - * +--------------+ - * | - * | - * v - * +--------------+ +--------+ +------------------+ - * | ReadValue | | ends | | const value = 0 | - * +--------------+ +--------+ +------------------+ - * \ | / - * \ | / - * v v v - * +--------------+ - * | Select | - * +--------------+ - * | - * v - * +-------------------------+ - * | CombineSegments | - * +-------------------------+ -**/ -class MakeCombineSegmentsSatateful : public ov::pass::ModelPass { -public: - OPENVINO_RTTI("MakeCombineSegmentsSatateful", "0"); - bool run_on_model(const std::shared_ptr& model) override; -}; - -const std::string ADD_SPECIAL_TOKENS_VAR_ID = "add_special_tokens"; - -} // namespace genai -} // namespace ov diff --git a/src/cpp/src/make_tokenizer_stateful.cpp b/src/cpp/src/make_tokenizer_stateful.cpp new file mode 100644 index 0000000000..4685b0e715 --- /dev/null +++ b/src/cpp/src/make_tokenizer_stateful.cpp @@ -0,0 +1,90 @@ +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "make_tokenizer_stateful.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/select.hpp" +#include "openvino/op/slice.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/read_value.hpp" +#include "openvino/op/assign.hpp" + + +using namespace ov; +using namespace ov::op; + +bool ov::genai::MakeCombineSegmentsSatateful::run_on_model(const std::shared_ptr& model) { + + std::shared_ptr combine_seg_node; + for (auto node: model->get_ordered_ops()) { + if (strcmp(node->get_type_info().name, "CombineSegments") == 0) { + combine_seg_node = node; + } + } + if (!combine_seg_node || combine_seg_node->input_value(1).get_element_type() != ov::element::i32) { + return false; + } + + std::shared_ptr input_1_const = std::dynamic_pointer_cast(combine_seg_node->get_input_node_shared_ptr(1)); + if (!input_1_const) { + return false; + } + + op::util::VariableInfo var_info{ov::Shape{}, ov::element::boolean, ADD_SPECIAL_TOKENS_VAR_ID}; + auto variable = std::make_shared(var_info); + + // Default mode is add_special_tokens. + auto default_mode_const = std::make_shared(ov::element::boolean, ov::Shape{}, std::vector{true}); + auto read_value = std::make_shared(default_mode_const, variable); + auto zero_constant = std::make_shared(ov::element::i32, ov::Shape{}, std::vector{0}); + auto select_node = std::make_shared(read_value, input_1_const, zero_constant); + combine_seg_node->input(1).replace_source_output(select_node->output(0)); + + auto assign = std::make_shared(read_value, variable); + + model->add_sinks({assign}); + model->add_variables({variable}); + return true; +} + +bool ov::genai::MakeVocabDecoderSatateful::run_on_model(const std::shared_ptr& model) { + std::shared_ptr vocab_decoder_node; + for (auto node: model->get_ordered_ops()) { + if (strcmp(node->get_type_info().name, "VocabDecoder") == 0) + vocab_decoder_node = node; + } + + if (!vocab_decoder_node || vocab_decoder_node->get_input_size() < 5) + return false; + if (!vocab_decoder_node->input_value(4).get_element_type().is_integral_number()) + return false; + + std::shared_ptr skip_tokens_const = std::dynamic_pointer_cast(vocab_decoder_node->get_input_node_shared_ptr(4)); + std::shared_ptr skip_tokens_slice = std::dynamic_pointer_cast(vocab_decoder_node->get_input_node_shared_ptr(4)); + if (!skip_tokens_const && !skip_tokens_slice) + return false; + + auto start_const = std::make_shared(ov::element::i32, ov::Shape{1}, std::vector{0}); + auto int_max_const = std::make_shared(ov::element::i32, ov::Shape{1}, std::vector{std::numeric_limits::max()}); + auto one_const = std::make_shared(ov::element::i32, ov::Shape{1}, std::vector{1}); + + // By default, INT_MAX will multiply with 1 and all skip_tokens will be selected. + op::util::VariableInfo var_info{ov::Shape{1}, ov::element::i32, SKIP_SPECIAL_TOKENS_VAR_ID}; + auto variable = std::make_shared(var_info); + auto read_value = std::make_shared(one_const, variable); + // if flag is set, then slice up to the int_max which means skip all tokens. + auto stop = std::make_shared(int_max_const, read_value); + + // If already has slice just replace the stop input. + if (skip_tokens_slice) { + skip_tokens_slice->input(2).replace_source_output(stop); + } else { + std::shared_ptr slice_node = std::make_shared(skip_tokens_const, start_const, stop, one_const); + vocab_decoder_node->input(4).replace_source_output(slice_node->output(0)); + } + + auto assign = std::make_shared(read_value, variable); + model->add_sinks({assign}); + model->add_variables({variable}); + return true; +} diff --git a/src/cpp/src/make_tokenizer_stateful.hpp b/src/cpp/src/make_tokenizer_stateful.hpp new file mode 100644 index 0000000000..307c6199c8 --- /dev/null +++ b/src/cpp/src/make_tokenizer_stateful.hpp @@ -0,0 +1,81 @@ +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/op/constant.hpp" +#include "openvino/pass/pass.hpp" + +namespace ov { +namespace genai { + +/** + * @brief This pass modifies tokenizer ov::Model so that special tokens adding will be + * enabled or disabled depending on stateful value. + * + * +--------------+ + * | DefaultMode | + * +--------------+ + * | + * | + * v + * +--------------+ +--------+ +------------------+ + * | ReadValue | | ends | | const value = 0 | + * +--------------+ +--------+ +------------------+ + * \ | / + * \ | / + * v v v + * +--------------+ + * | Select | + * +--------------+ + * | + * v + * +-------------------------+ + * | CombineSegments | + * +-------------------------+ +**/ +class MakeCombineSegmentsSatateful : public ov::pass::ModelPass { +public: + OPENVINO_RTTI("MakeCombineSegmentsSatateful", "0"); + bool run_on_model(const std::shared_ptr& model) override; +}; + +/** + * @brief This pass modifies tokenizer ov::Model so that special tokens adding will be + * enabled or disabled depending on stateful value. + * + * +--------------+ + * | DefaultMode | + * +--------------+ + * | + * v + * +------------+ +-----------+ + * | ReadValue | | INT_MAX | + * +------------+ +-----------+ + * \ / + * \ / + * v v + * +--------------------+ +---------+ +---------+ + * | Const with tokens | | start | | Mul | + * +--------------------+ +---------+ +---------+ + * \ | / + * \ | / + * v v v + * +-----------------+ + * | Slice | + * +-----------------+ + * | + * v + * +----------------------+ + * | VocabDecoder | + * +----------------------+ +**/ +class MakeVocabDecoderSatateful : public ov::pass::ModelPass { +public: + OPENVINO_RTTI("MakeVocabDecoderSatateful", "0"); + bool run_on_model(const std::shared_ptr& model) override; +}; + +const std::string ADD_SPECIAL_TOKENS_VAR_ID = "add_special_tokens"; +const std::string SKIP_SPECIAL_TOKENS_VAR_ID = "skip_special_tokens"; + +} // namespace genai +} // namespace ov diff --git a/src/cpp/src/tokenizer.cpp b/src/cpp/src/tokenizer.cpp index f52417a94e..41f9a6abd4 100644 --- a/src/cpp/src/tokenizer.cpp +++ b/src/cpp/src/tokenizer.cpp @@ -14,7 +14,7 @@ #include "openvino/runtime/core.hpp" #include "openvino/genai/tokenizer.hpp" -#include "make_combine_segments_stateful.hpp" +#include "make_tokenizer_stateful.hpp" #include "tokenizers_path.hpp" #include "circular_buffer_queue.hpp" #include "json_utils.hpp" @@ -55,6 +55,14 @@ ov::genai::TokenizedInputs pad_left(ov::Tensor& input_ids, ov::Tensor& attention return {input_ids, attention_mask}; } +void check_arguments(const ov::AnyMap& parameters, std::set allowed_argnames) { + for (const auto& [key, value] : parameters) { + if (allowed_argnames.find(key) == allowed_argnames.end()) { + OPENVINO_THROW("unacceptable parameter key: " + key); + } + } +} + constexpr char bos_token_key_name[] = "bos_token"; constexpr char eos_token_key_name[] = "eos_token"; constexpr char pad_token_key_name[] = "pad_token"; @@ -73,7 +81,8 @@ class Tokenizer::TokenizerImpl { std::unique_ptr> m_ireq_queue_detokenizer; // To change the adding special tokens mode we use a statefull subgraph, // this flag holds the current state value of the CompiledModel. - bool m_add_special_tokens = true; + bool m_add_special_tokens = true; + bool m_skip_special_tokens = true; bool m_older_than_24_5 = false; int64_t m_pad_token_id = -1; @@ -86,11 +95,16 @@ class Tokenizer::TokenizerImpl { std::string m_chat_template = {}; - void set_state_if_necessary(CircularBufferQueueElementGuard& infer_request_guard, bool add_special_tokens) { + void set_state_if_necessary(CircularBufferQueueElementGuard& infer_request_guard, const ov::AnyMap& params) { + bool add_special_tokens_flag = m_add_special_tokens; + bool skip_special_tokens_flag = m_skip_special_tokens; + ov::genai::utils::read_anymap_param(params, add_special_tokens.name(), add_special_tokens_flag); + ov::genai::utils::read_anymap_param(params, skip_special_tokens.name(), skip_special_tokens_flag); + // If user requested add_special_tokens mode different from the current one, // need to set state variable. // If requested mode matches the stored state set, then don't touch states. - if (add_special_tokens == m_add_special_tokens) { + if (add_special_tokens_flag == m_add_special_tokens && skip_special_tokens_flag == m_skip_special_tokens) { return; } if (m_older_than_24_5) { @@ -100,19 +114,23 @@ class Tokenizer::TokenizerImpl { return; } - // auto states = m_ireq_queue_tokenizer->get(0).query_state(); + // add_special_tokens is managed by Select op with a bool input. ov::Tensor add_special_tensor = ov::Tensor(ov::element::boolean, {}); - *add_special_tensor.data() = add_special_tokens; + *add_special_tensor.data() = add_special_tokens_flag; + + // skip_special_tokens is managed by multiplication with a number, therefore i32. + ov::Tensor skip_special_tensor = ov::Tensor(ov::element::i32, {1}); + *skip_special_tensor.data() = skip_special_tokens_flag; for (auto& state: infer_request_guard.get().query_state()) { - if (state.get_name().find(ov::genai::ADD_SPECIAL_TOKENS_VAR_ID) == std::string::npos) { - // It's not add_special_tokens flag state. - continue; + if (state.get_name().find(ov::genai::ADD_SPECIAL_TOKENS_VAR_ID) != std::string::npos) { + state.set_state(add_special_tensor); + } else if (state.get_name().find(ov::genai::SKIP_SPECIAL_TOKENS_VAR_ID) != std::string::npos) { + state.set_state(skip_special_tensor); } - state.set_state(add_special_tensor); - break; } - m_add_special_tokens = add_special_tokens; + m_add_special_tokens = add_special_tokens_flag; + m_skip_special_tokens = skip_special_tokens_flag; } TokenizerImpl() = default; @@ -135,18 +153,24 @@ class Tokenizer::TokenizerImpl { auto device = "CPU"; // currently openvino_tokenizer supports only CPU auto ov_tokenizer = core.read_model(tokenizer_path / "openvino_tokenizer.xml"); + std::shared_ptr ov_detokenizer = nullptr; + if (std::filesystem::exists(tokenizer_path / "openvino_detokenizer.xml")) { + ov_detokenizer = core.read_model(tokenizer_path / "openvino_detokenizer.xml"); + } m_older_than_24_5 = ov_tokenizer->get_rt_info().count("openvino_tokenizers_version") != 1; - ov::pass::Manager manager; - manager.register_pass(); - manager.run_passes(ov_tokenizer); + ov::pass::Manager manager_tok; + manager_tok.register_pass(); + manager_tok.run_passes(ov_tokenizer); m_tokenizer = core.compile_model(ov_tokenizer, device, properties); - if (std::filesystem::exists(tokenizer_path / "openvino_detokenizer.xml")) { - m_detokenizer = core.compile_model(tokenizer_path / "openvino_detokenizer.xml", device, properties); + if (ov_detokenizer) { + ov::pass::Manager manager_detok; + manager_detok.register_pass(); + manager_detok.run_passes(ov_detokenizer); + m_detokenizer = core.compile_model(ov_detokenizer, device, properties); } - const size_t INFER_REQUEST_QUEUE_SIZE = m_tokenizer.get_property(ov::optimal_number_of_infer_requests); m_ireq_queue_tokenizer = std::make_unique>( INFER_REQUEST_QUEUE_SIZE, @@ -298,11 +322,8 @@ class Tokenizer::TokenizerImpl { } TokenizedInputs encode(std::string prompt, const ov::AnyMap& tokenization_params = {}) { - bool add_special_tokens_flag = true; - ov::genai::utils::read_anymap_param(tokenization_params, add_special_tokens.name(), add_special_tokens_flag); - CircularBufferQueueElementGuard infer_request_guard(this->m_ireq_queue_tokenizer.get()); - set_state_if_necessary(infer_request_guard, add_special_tokens_flag); + set_state_if_necessary(infer_request_guard, tokenization_params); size_t batch_size = 1; infer_request_guard.get().set_input_tensor(ov::Tensor{ov::element::string, {batch_size}, &prompt}); infer_request_guard.get().start_async(); @@ -316,11 +337,8 @@ class Tokenizer::TokenizerImpl { TokenizedInputs encode(std::vector& prompts, const ov::AnyMap& tokenization_params = {}) { TokenizedInputs unpadded; { - bool add_special_tokens_flag = true; - ov::genai::utils::read_anymap_param(tokenization_params, add_special_tokens.name(), add_special_tokens_flag); - CircularBufferQueueElementGuard infer_request_guard(this->m_ireq_queue_tokenizer.get()); - set_state_if_necessary(infer_request_guard, add_special_tokens_flag); + set_state_if_necessary(infer_request_guard, tokenization_params); infer_request_guard.get().set_input_tensor(ov::Tensor{ov::element::string, {prompts.size()}, prompts.data()}); auto size_ = infer_request_guard.get().get_input_tensor().get_shape(); infer_request_guard.get().start_async(); @@ -343,10 +361,11 @@ class Tokenizer::TokenizerImpl { return {input_ids_, attention_mask_}; } - std::string decode(std::vector tokens) { + std::string decode(std::vector tokens, const ov::AnyMap& detokenization_params = {}) { OPENVINO_ASSERT(m_detokenizer, "Detokenize model has not been provided. Tokenizer::decode is not available"); CircularBufferQueueElementGuard infer_request_guard(this->m_ireq_queue_detokenizer.get()); + set_state_if_necessary(infer_request_guard, detokenization_params); size_t batch_size = 1; infer_request_guard.get().set_input_tensor(ov::Tensor{ov::element::i64, {batch_size, tokens.size()}, tokens.data()}); infer_request_guard.get().start_async(); @@ -354,12 +373,13 @@ class Tokenizer::TokenizerImpl { return infer_request_guard.get().get_output_tensor().data()[0]; } - std::vector decode(ov::Tensor tokens) { + std::vector decode(ov::Tensor tokens, const ov::AnyMap& detokenization_params = {}) { OPENVINO_ASSERT(m_detokenizer, "Detokenize model has not been provided. Tokenizer::decode is not available"); OPENVINO_ASSERT(tokens.get_element_type() == ov::element::i64, "tokens tensor element type should be an i64"); OPENVINO_ASSERT(tokens.get_shape().size() == 2, "tokens tensor should of rank 2 with shape [batch_size, seq_len]"); CircularBufferQueueElementGuard infer_request_guard(this->m_ireq_queue_detokenizer.get()); + set_state_if_necessary(infer_request_guard, detokenization_params); infer_request_guard.get().set_input_tensor(tokens); infer_request_guard.get().start_async(); infer_request_guard.get().wait(); @@ -369,7 +389,7 @@ class Tokenizer::TokenizerImpl { return std::vector(res_data, res_data + res.get_shape()[0]); } - std::vector decode(std::vector> lines) { + std::vector decode(std::vector> lines, const ov::AnyMap& detokenization_params = {}) { OPENVINO_ASSERT(m_detokenizer, "Detokenize model has not been provided. Tokenizer::decode is not available"); auto compare_lengths = [](const std::vector& a, const std::vector& b) { @@ -388,6 +408,7 @@ class Tokenizer::TokenizerImpl { } CircularBufferQueueElementGuard infer_request_guard(this->m_ireq_queue_detokenizer.get()); + set_state_if_necessary(infer_request_guard, detokenization_params); infer_request_guard.get().set_input_tensor(tokens); infer_request_guard.get().start_async(); infer_request_guard.get().wait(); @@ -502,31 +523,38 @@ Tokenizer::Tokenizer(const std::filesystem::path& tokenizer_path, const ov::AnyM } TokenizedInputs Tokenizer::encode(const std::string prompt, const ov::AnyMap& tokenization_params) { + check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()}); return m_pimpl->encode(std::move(prompt), tokenization_params); } TokenizedInputs Tokenizer::encode(std::vector& prompts, const ov::AnyMap& tokenization_params) { + check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()}); return m_pimpl->encode(prompts, tokenization_params); } TokenizedInputs Tokenizer::encode(std::vector&& prompts, const ov::AnyMap& tokenization_params) { + check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()}); return m_pimpl->encode(prompts, tokenization_params); } TokenizedInputs Tokenizer::encode(std::initializer_list& text, const ov::AnyMap& tokenization_params) { + check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()}); return encode(std::vector(text.begin(), text.end()), tokenization_params); } -std::string Tokenizer::decode(std::vector tokens) { - return m_pimpl->decode(tokens); +std::string Tokenizer::decode(std::vector tokens, const ov::AnyMap& detokenization_params) { + check_arguments(detokenization_params, {ov::genai::skip_special_tokens.name()}); + return m_pimpl->decode(tokens, detokenization_params); } -std::vector Tokenizer::decode(ov::Tensor tokens) { - return m_pimpl->decode(tokens); +std::vector Tokenizer::decode(ov::Tensor tokens, const ov::AnyMap& detokenization_params) { + check_arguments(detokenization_params, {ov::genai::skip_special_tokens.name()}); + return m_pimpl->decode(tokens, detokenization_params); } -std::vector Tokenizer::decode(std::vector> lines) { - return m_pimpl->decode(lines); +std::vector Tokenizer::decode(std::vector> lines, const ov::AnyMap& detokenization_params) { + check_arguments(detokenization_params, {ov::genai::skip_special_tokens.name()}); + return m_pimpl->decode(lines, detokenization_params); } int64_t Tokenizer::get_bos_token_id() const { diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index df290a9744..5e4d2dd7b2 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -1303,17 +1303,17 @@ class Tokenizer: Embeds input prompts with special tags for a chat scenario. """ @typing.overload - def decode(self, tokens: list[int]) -> str: + def decode(self, tokens: list[int], skip_special_tokens: bool = True) -> str: """ Decode a sequence into a string prompt. """ @typing.overload - def decode(self, tokens: openvino._pyopenvino.Tensor) -> list[str]: + def decode(self, tokens: openvino._pyopenvino.Tensor, skip_special_tokens: bool = True) -> list[str]: """ Decode tensor into a list of string prompts. """ @typing.overload - def decode(self, tokens: list[list[int]]) -> list[str]: + def decode(self, tokens: list[list[int]], skip_special_tokens: bool = True) -> list[str]: """ Decode a batch of tokens into a list of string prompt. """ diff --git a/src/python/py_tokenizer.cpp b/src/python/py_tokenizer.cpp index 2ccccff4c0..db4643a65c 100644 --- a/src/python/py_tokenizer.cpp +++ b/src/python/py_tokenizer.cpp @@ -63,27 +63,33 @@ void init_tokenizer(py::module_& m) { .def( "decode", - [](Tokenizer& tok, std::vector& tokens) -> py::str { - return pyutils::handle_utf8(tok.decode(tokens)); + [](Tokenizer& tok, std::vector& tokens, bool skip_special_tokens) -> py::str { + ov::AnyMap detokenization_params; + detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens; + return pyutils::handle_utf8(tok.decode(tokens, detokenization_params)); }, - py::arg("tokens"), + py::arg("tokens"), py::arg("skip_special_tokens") = true, R"(Decode a sequence into a string prompt.)" ) .def( "decode", - [](Tokenizer& tok, ov::Tensor& tokens) -> py::typing::List { - return pyutils::handle_utf8(tok.decode(tokens)); + [](Tokenizer& tok, ov::Tensor& tokens, bool skip_special_tokens) -> py::typing::List { + ov::AnyMap detokenization_params; + detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens; + return pyutils::handle_utf8(tok.decode(tokens, detokenization_params)); }, - py::arg("tokens"), + py::arg("tokens"), py::arg("skip_special_tokens") = true, R"(Decode tensor into a list of string prompts.)") .def( "decode", - [](Tokenizer& tok, std::vector>& tokens) -> py::typing::List { - return pyutils::handle_utf8(tok.decode(tokens)); + [](Tokenizer& tok, std::vector>& tokens, bool skip_special_tokens) -> py::typing::List { + ov::AnyMap detokenization_params; + detokenization_params[ov::genai::skip_special_tokens.name()] = skip_special_tokens; + return pyutils::handle_utf8(tok.decode(tokens, detokenization_params)); }, - py::arg("tokens"), + py::arg("tokens"), py::arg("skip_special_tokens") = true, R"(Decode a batch of tokens into a list of string prompt.)") .def("apply_chat_template", [](Tokenizer& tok, diff --git a/tests/python_tests/test_chat_generate_api.py b/tests/python_tests/test_chat_generate_api.py index 25d0798994..efd1d87416 100644 --- a/tests/python_tests/test_chat_generate_api.py +++ b/tests/python_tests/test_chat_generate_api.py @@ -217,3 +217,25 @@ def test_add_special_tokens(add_special_tokens, prompt): res_genai = genai_tokenzier.encode(prompt, add_special_tokens).input_ids.data res_hf = hf_tokenizer(prompt, return_tensors="np", add_special_tokens=add_special_tokens)["input_ids"] assert np.all(res_genai == res_hf) + +@pytest.mark.precommit +@pytest.mark.xfail(reason="Need to turn them back on when openvino_tokenizers will be updated.") +@pytest.mark.nightly +@pytest.mark.parametrize("add_special_tokens", [True, False]) +@pytest.mark.parametrize("skip_special_tokens", [True, False]) +@pytest.mark.parametrize("prompt", prompts) +def test_add_special_tokens(add_special_tokens, skip_special_tokens, prompt): + import numpy as np + model_descr = get_chat_models_list()[0] + model_id, path, hf_tokenizer, model_opt, pipe = read_model((model_descr[0], model_descr[1] / '_test_chat')) + genai_tokenizer = pipe.get_tokenizer() + + # Calling encode with add_special_tokens will set state flag. + res_genai = genai_tokenizer.encode(prompt, add_special_tokens).input_ids.data + res_hf = hf_tokenizer(prompt, return_tensors="np", add_special_tokens=add_special_tokens)["input_ids"] + assert np.all(res_genai == res_hf) + + # Decode with skip_special_tokens + decoded_genai = genai_tokenizer.decode(res_genai, skip_special_tokens=skip_special_tokens)[0] + decoded_hf = hf_tokenizer.decode(res_hf[0], skip_special_tokens=skip_special_tokens) + assert decoded_genai == decoded_hf