Skip to content

Commit

Permalink
Add parametrization for the detokenization/decoding (#1246)
Browse files Browse the repository at this point in the history
![image](https://github.com/user-attachments/assets/6b08793a-b279-42e3-908c-8708d4fa9d41)

Tokenizers IRs should be converted after
openvinotoolkit/openvino_tokenizers#325 is
merged

Ticket CVS-154151
  • Loading branch information
andrei-kochin authored Nov 27, 2024
2 parents 3da2aeb + 139d4cc commit 5ee41ec
Show file tree
Hide file tree
Showing 10 changed files with 319 additions and 145 deletions.
4 changes: 2 additions & 2 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
- 'src/cpp/src/tokenizers_path.hpp'
- 'src/cpp/src/circular_buffer_queue.hpp'
- 'src/cpp/src/synchronized_queue.hpp'
- 'src/cpp/src/make_combine_segments_stateful.cpp'
- 'src/cpp/src/make_combine_segments_stateful.hpp'
- 'src/cpp/src/make_tokenizer_stateful.cpp'
- 'src/cpp/src/make_tokenizer_stateful.hpp'
- 'src/python/py_tokenizer.cpp'
- 'thirdparty/openvino_tokenizers'
- 'tests/python_tests/tokenizer_configs.py'
Expand Down
49 changes: 43 additions & 6 deletions src/cpp/include/openvino/genai/tokenizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,15 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
/**
* @brief encode a single prompt
* @param prompt std::string with input prompt
* @param tokenization_params AnyMap with tokenization parameters, e.g. {'add_special_tokens', false}
* @param tokenization_params AnyMap with tokenization parameters, e.g. {"add_special_tokens", false}
* @return pair of [input_ids, attention_mask]
*/
TokenizedInputs encode(const std::string prompt, const ov::AnyMap& tokenization_params = {});

/**
* @brief encode batch of prompts. Left padding will be applied by default
* @param prompts vector storing batch of prompts
* @param tokenization_params AnyMap with tokenization parameters, e.g. {'add_special_tokens', false}
* @param tokenization_params AnyMap with tokenization parameters, e.g. {"add_special_tokens", false}
* @return pair of [input_ids, attention_mask]
*/
TokenizedInputs encode(std::vector<std::string>& prompt, const ov::AnyMap& tokenization_params = {});
Expand Down Expand Up @@ -87,23 +87,59 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
/**
* @brief decode sequence of tokens
* @param tokens vector storing tokens
* @param detokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false}
* @return sequence string
*/
std::string decode(std::vector<int64_t> tokens);

std::string decode(std::vector<int64_t> tokens, const ov::AnyMap& detokenization_params = {});

/**
* @brief decode sequence of tokens
* @param tokens vector storing tokens
* @param detokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true)
* @return sequence string
*/
template <typename... Properties>
util::EnableIfAllStringAny<std::string, Properties...> decode(std::vector<int64_t>& tokens, Properties&&... detokenization_params) {
return decode(tokens, AnyMap{std::forward<Properties>(detokenization_params)...});
}

/**
* @brief decode tokens.
* @param tokens ov::Tensor with tokens with shape [batch_size, seq_len]
* @param detokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false}
* @return vector of std::string, with size = batch_size
*/
std::vector<std::string> decode(ov::Tensor tokens, const ov::AnyMap& detokenization_params = {});

/**
* @brief decode sequence of tokens
* @param tokens ov::Tensor with tokens with shape [batch_size, seq_len]
* @param detokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true)
* @return vector of std::string, with size = batch_size
*/
std::vector<std::string> decode(ov::Tensor tokens);
template <typename... Properties>
util::EnableIfAllStringAny<std::vector<std::string>, Properties...> decode(ov::Tensor tokens, Properties&&... detokenization_params) {
return decode(tokens, AnyMap{std::forward<Properties>(detokenization_params)...});
}

/**
* @brief batched decoding of tokens.
* @param tokens vector of vectors with tokens, tokens.size() is equal to batch_size
* @param detokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false}
* @return vector of std::string, with size equal to batch_size
*/
std::vector<std::string> decode(std::vector<std::vector<int64_t>> tokens);
std::vector<std::string> decode(std::vector<std::vector<int64_t>> tokens, const ov::AnyMap& detokenization_params = {});

/**
* @brief decode sequence of tokens
* @param tokens ov::Tensor with tokens with shape [batch_size, seq_len]
* @param detokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true)
* @return vector of std::string, with size = batch_size
*/
template <typename... Properties>
util::EnableIfAllStringAny<std::vector<std::string>, Properties...> decode(std::vector<std::vector<int64_t>> tokens, Properties&&... detokenization_params) {
return decode(tokens, AnyMap{std::forward<Properties>(detokenization_params)...});
}

/**
* @brief Embeds input prompts with special tags for a chat scenario.
Expand Down Expand Up @@ -143,6 +179,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
};

static constexpr ov::Property<bool> add_special_tokens{"add_special_tokens"};
static constexpr ov::Property<bool> skip_special_tokens{"skip_special_tokens"};

} // namespace genai
} // namespace ov
46 changes: 0 additions & 46 deletions src/cpp/src/make_combine_segments_stateful.cpp

This file was deleted.

44 changes: 0 additions & 44 deletions src/cpp/src/make_combine_segments_stateful.hpp

This file was deleted.

90 changes: 90 additions & 0 deletions src/cpp/src/make_tokenizer_stateful.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Copyright (C) 2023-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#include "make_tokenizer_stateful.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/select.hpp"
#include "openvino/op/slice.hpp"
#include "openvino/op/multiply.hpp"
#include "openvino/op/read_value.hpp"
#include "openvino/op/assign.hpp"


using namespace ov;
using namespace ov::op;

bool ov::genai::MakeCombineSegmentsSatateful::run_on_model(const std::shared_ptr<ov::Model>& model) {

std::shared_ptr<ov::Node> combine_seg_node;
for (auto node: model->get_ordered_ops()) {
if (strcmp(node->get_type_info().name, "CombineSegments") == 0) {
combine_seg_node = node;
}
}
if (!combine_seg_node || combine_seg_node->input_value(1).get_element_type() != ov::element::i32) {
return false;
}

std::shared_ptr<v0::Constant> input_1_const = std::dynamic_pointer_cast<v0::Constant>(combine_seg_node->get_input_node_shared_ptr(1));
if (!input_1_const) {
return false;
}

op::util::VariableInfo var_info{ov::Shape{}, ov::element::boolean, ADD_SPECIAL_TOKENS_VAR_ID};
auto variable = std::make_shared<op::util::Variable>(var_info);

// Default mode is add_special_tokens.
auto default_mode_const = std::make_shared<v0::Constant>(ov::element::boolean, ov::Shape{}, std::vector{true});
auto read_value = std::make_shared<v6::ReadValue>(default_mode_const, variable);
auto zero_constant = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{}, std::vector{0});
auto select_node = std::make_shared<v1::Select>(read_value, input_1_const, zero_constant);
combine_seg_node->input(1).replace_source_output(select_node->output(0));

auto assign = std::make_shared<v6::Assign>(read_value, variable);

model->add_sinks({assign});
model->add_variables({variable});
return true;
}

bool ov::genai::MakeVocabDecoderSatateful::run_on_model(const std::shared_ptr<ov::Model>& model) {
std::shared_ptr<ov::Node> vocab_decoder_node;
for (auto node: model->get_ordered_ops()) {
if (strcmp(node->get_type_info().name, "VocabDecoder") == 0)
vocab_decoder_node = node;
}

if (!vocab_decoder_node || vocab_decoder_node->get_input_size() < 5)
return false;
if (!vocab_decoder_node->input_value(4).get_element_type().is_integral_number())
return false;

std::shared_ptr<v0::Constant> skip_tokens_const = std::dynamic_pointer_cast<v0::Constant>(vocab_decoder_node->get_input_node_shared_ptr(4));
std::shared_ptr<v8::Slice> skip_tokens_slice = std::dynamic_pointer_cast<v8::Slice>(vocab_decoder_node->get_input_node_shared_ptr(4));
if (!skip_tokens_const && !skip_tokens_slice)
return false;

auto start_const = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{1}, std::vector{0});
auto int_max_const = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{1}, std::vector{std::numeric_limits<int>::max()});
auto one_const = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{1}, std::vector{1});

// By default, INT_MAX will multiply with 1 and all skip_tokens will be selected.
op::util::VariableInfo var_info{ov::Shape{1}, ov::element::i32, SKIP_SPECIAL_TOKENS_VAR_ID};
auto variable = std::make_shared<op::util::Variable>(var_info);
auto read_value = std::make_shared<v6::ReadValue>(one_const, variable);
// if flag is set, then slice up to the int_max which means skip all tokens.
auto stop = std::make_shared<v1::Multiply>(int_max_const, read_value);

// If already has slice just replace the stop input.
if (skip_tokens_slice) {
skip_tokens_slice->input(2).replace_source_output(stop);
} else {
std::shared_ptr<v8::Slice> slice_node = std::make_shared<v8::Slice>(skip_tokens_const, start_const, stop, one_const);
vocab_decoder_node->input(4).replace_source_output(slice_node->output(0));
}

auto assign = std::make_shared<v6::Assign>(read_value, variable);
model->add_sinks({assign});
model->add_variables({variable});
return true;
}
81 changes: 81 additions & 0 deletions src/cpp/src/make_tokenizer_stateful.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Copyright (C) 2023-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#include "openvino/op/constant.hpp"
#include "openvino/pass/pass.hpp"

namespace ov {
namespace genai {

/**
* @brief This pass modifies tokenizer ov::Model so that special tokens adding will be
* enabled or disabled depending on stateful value.
*
* +--------------+
* | DefaultMode |
* +--------------+
* |
* |
* v
* +--------------+ +--------+ +------------------+
* | ReadValue | | ends | | const value = 0 |
* +--------------+ +--------+ +------------------+
* \ | /
* \ | /
* v v v
* +--------------+
* | Select |
* +--------------+
* |
* v
* +-------------------------+
* | CombineSegments |
* +-------------------------+
**/
class MakeCombineSegmentsSatateful : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("MakeCombineSegmentsSatateful", "0");
bool run_on_model(const std::shared_ptr<ov::Model>& model) override;
};

/**
* @brief This pass modifies tokenizer ov::Model so that special tokens adding will be
* enabled or disabled depending on stateful value.
*
* +--------------+
* | DefaultMode |
* +--------------+
* |
* v
* +------------+ +-----------+
* | ReadValue | | INT_MAX |
* +------------+ +-----------+
* \ /
* \ /
* v v
* +--------------------+ +---------+ +---------+
* | Const with tokens | | start | | Mul |
* +--------------------+ +---------+ +---------+
* \ | /
* \ | /
* v v v
* +-----------------+
* | Slice |
* +-----------------+
* |
* v
* +----------------------+
* | VocabDecoder |
* +----------------------+
**/
class MakeVocabDecoderSatateful : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("MakeVocabDecoderSatateful", "0");
bool run_on_model(const std::shared_ptr<ov::Model>& model) override;
};

const std::string ADD_SPECIAL_TOKENS_VAR_ID = "add_special_tokens";
const std::string SKIP_SPECIAL_TOKENS_VAR_ID = "skip_special_tokens";

} // namespace genai
} // namespace ov
Loading

0 comments on commit 5ee41ec

Please sign in to comment.