Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parametrization for the detokenization/decoding #1246

Merged
merged 7 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
- 'src/cpp/src/tokenizers_path.hpp'
- 'src/cpp/src/circular_buffer_queue.hpp'
- 'src/cpp/src/synchronized_queue.hpp'
- 'src/cpp/src/make_combine_segments_stateful.cpp'
- 'src/cpp/src/make_combine_segments_stateful.hpp'
- 'src/cpp/src/make_tokenizer_stateful.cpp'
- 'src/cpp/src/make_tokenizer_stateful.hpp'
- 'src/python/py_tokenizer.cpp'
- 'thirdparty/openvino_tokenizers'
- 'tests/python_tests/tokenizer_configs.py'
Expand Down
49 changes: 43 additions & 6 deletions src/cpp/include/openvino/genai/tokenizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,15 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
/**
* @brief encode a single prompt
* @param prompt std::string with input prompt
* @param tokenization_params AnyMap with tokenization parameters, e.g. {'add_special_tokens', false}
* @param tokenization_params AnyMap with tokenization parameters, e.g. {"add_special_tokens", false}
* @return pair of [input_ids, attention_mask]
*/
TokenizedInputs encode(const std::string prompt, const ov::AnyMap& tokenization_params = {});

/**
* @brief encode batch of prompts. Left padding will be applied by default
* @param prompts vector storing batch of prompts
* @param tokenization_params AnyMap with tokenization parameters, e.g. {'add_special_tokens', false}
* @param tokenization_params AnyMap with tokenization parameters, e.g. {"add_special_tokens", false}
* @return pair of [input_ids, attention_mask]
*/
TokenizedInputs encode(std::vector<std::string>& prompt, const ov::AnyMap& tokenization_params = {});
Expand Down Expand Up @@ -87,23 +87,59 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
/**
* @brief decode sequence of tokens
* @param tokens vector storing tokens
* @param detokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false}
* @return sequence string
*/
std::string decode(std::vector<int64_t> tokens);

std::string decode(std::vector<int64_t> tokens, const ov::AnyMap& detokenization_params = {});

/**
* @brief decode sequence of tokens
* @param tokens vector storing tokens
* @param detokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true)
* @return sequence string
*/
template <typename... Properties>
util::EnableIfAllStringAny<std::string, Properties...> decode(std::vector<int64_t>& tokens, Properties&&... detokenization_params) {
return decode(tokens, AnyMap{std::forward<Properties>(detokenization_params)...});
}

/**
* @brief decode tokens.
* @param tokens ov::Tensor with tokens with shape [batch_size, seq_len]
* @param detokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false}
* @return vector of std::string, with size = batch_size
*/
std::vector<std::string> decode(ov::Tensor tokens, const ov::AnyMap& detokenization_params = {});

/**
* @brief decode sequence of tokens
* @param tokens ov::Tensor with tokens with shape [batch_size, seq_len]
* @param detokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true)
* @return vector of std::string, with size = batch_size
*/
std::vector<std::string> decode(ov::Tensor tokens);
template <typename... Properties>
util::EnableIfAllStringAny<std::vector<std::string>, Properties...> decode(ov::Tensor tokens, Properties&&... detokenization_params) {
return decode(tokens, AnyMap{std::forward<Properties>(detokenization_params)...});
}

/**
* @brief batched decoding of tokens.
* @param tokens vector of vectors with tokens, tokens.size() is equal to batch_size
* @param detokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false}
* @return vector of std::string, with size equal to batch_size
*/
std::vector<std::string> decode(std::vector<std::vector<int64_t>> tokens);
std::vector<std::string> decode(std::vector<std::vector<int64_t>> tokens, const ov::AnyMap& detokenization_params = {});

/**
* @brief decode sequence of tokens
* @param tokens ov::Tensor with tokens with shape [batch_size, seq_len]
* @param detokenization_params detokenization parameters, e.g. ov::genai::skip_special_tokens(true)
* @return vector of std::string, with size = batch_size
*/
template <typename... Properties>
util::EnableIfAllStringAny<std::vector<std::string>, Properties...> decode(std::vector<std::vector<int64_t>> tokens, Properties&&... detokenization_params) {
return decode(tokens, AnyMap{std::forward<Properties>(detokenization_params)...});
}

/**
* @brief Embeds input prompts with special tags for a chat scenario.
Expand Down Expand Up @@ -143,6 +179,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
};

static constexpr ov::Property<bool> add_special_tokens{"add_special_tokens"};
static constexpr ov::Property<bool> skip_special_tokens{"skip_special_tokens"};

} // namespace genai
} // namespace ov
46 changes: 0 additions & 46 deletions src/cpp/src/make_combine_segments_stateful.cpp

This file was deleted.

44 changes: 0 additions & 44 deletions src/cpp/src/make_combine_segments_stateful.hpp

This file was deleted.

90 changes: 90 additions & 0 deletions src/cpp/src/make_tokenizer_stateful.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Copyright (C) 2023-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#include "make_tokenizer_stateful.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/select.hpp"
#include "openvino/op/slice.hpp"
#include "openvino/op/multiply.hpp"
#include "openvino/op/read_value.hpp"
#include "openvino/op/assign.hpp"


using namespace ov;
using namespace ov::op;

bool ov::genai::MakeCombineSegmentsSatateful::run_on_model(const std::shared_ptr<ov::Model>& model) {

std::shared_ptr<ov::Node> combine_seg_node;
for (auto node: model->get_ordered_ops()) {
if (strcmp(node->get_type_info().name, "CombineSegments") == 0) {
combine_seg_node = node;
}
}
if (!combine_seg_node || combine_seg_node->input_value(1).get_element_type() != ov::element::i32) {
return false;
}

std::shared_ptr<v0::Constant> input_1_const = std::dynamic_pointer_cast<v0::Constant>(combine_seg_node->get_input_node_shared_ptr(1));
if (!input_1_const) {
return false;
}

op::util::VariableInfo var_info{ov::Shape{}, ov::element::boolean, ADD_SPECIAL_TOKENS_VAR_ID};
auto variable = std::make_shared<op::util::Variable>(var_info);

// Default mode is add_special_tokens.
auto default_mode_const = std::make_shared<v0::Constant>(ov::element::boolean, ov::Shape{}, std::vector{true});
auto read_value = std::make_shared<v6::ReadValue>(default_mode_const, variable);
auto zero_constant = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{}, std::vector{0});
auto select_node = std::make_shared<v1::Select>(read_value, input_1_const, zero_constant);
combine_seg_node->input(1).replace_source_output(select_node->output(0));

auto assign = std::make_shared<v6::Assign>(read_value, variable);

model->add_sinks({assign});
model->add_variables({variable});
return true;
}

bool ov::genai::MakeVocabDecoderSatateful::run_on_model(const std::shared_ptr<ov::Model>& model) {
std::shared_ptr<ov::Node> vocab_decoder_node;
for (auto node: model->get_ordered_ops()) {
if (strcmp(node->get_type_info().name, "VocabDecoder") == 0)
vocab_decoder_node = node;
}

if (!vocab_decoder_node || vocab_decoder_node->get_input_size() < 5)
return false;
if (!vocab_decoder_node->input_value(4).get_element_type().is_integral_number())
return false;

std::shared_ptr<v0::Constant> skip_tokens_const = std::dynamic_pointer_cast<v0::Constant>(vocab_decoder_node->get_input_node_shared_ptr(4));
std::shared_ptr<v8::Slice> skip_tokens_slice = std::dynamic_pointer_cast<v8::Slice>(vocab_decoder_node->get_input_node_shared_ptr(4));
if (!skip_tokens_const && !skip_tokens_slice)
return false;

auto start_const = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{1}, std::vector{0});
auto int_max_const = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{1}, std::vector{std::numeric_limits<int>::max()});
auto one_const = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{1}, std::vector{1});

// By default, INT_MAX will multiply with 1 and all skip_tokens will be selected.
op::util::VariableInfo var_info{ov::Shape{1}, ov::element::i32, SKIP_SPECIAL_TOKENS_VAR_ID};
auto variable = std::make_shared<op::util::Variable>(var_info);
auto read_value = std::make_shared<v6::ReadValue>(one_const, variable);
// if flag is set, then slice up to the int_max which means skip all tokens.
auto stop = std::make_shared<v1::Multiply>(int_max_const, read_value);

// If already has slice just replace the stop input.
if (skip_tokens_slice) {
skip_tokens_slice->input(2).replace_source_output(stop);
} else {
std::shared_ptr<v8::Slice> slice_node = std::make_shared<v8::Slice>(skip_tokens_const, start_const, stop, one_const);
vocab_decoder_node->input(4).replace_source_output(slice_node->output(0));
}

auto assign = std::make_shared<v6::Assign>(read_value, variable);
model->add_sinks({assign});
model->add_variables({variable});
return true;
}
81 changes: 81 additions & 0 deletions src/cpp/src/make_tokenizer_stateful.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Copyright (C) 2023-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#include "openvino/op/constant.hpp"
#include "openvino/pass/pass.hpp"

namespace ov {
namespace genai {

/**
* @brief This pass modifies tokenizer ov::Model so that special tokens adding will be
* enabled or disabled depending on stateful value.
*
* +--------------+
* | DefaultMode |
* +--------------+
* |
* |
* v
* +--------------+ +--------+ +------------------+
* | ReadValue | | ends | | const value = 0 |
* +--------------+ +--------+ +------------------+
* \ | /
* \ | /
* v v v
* +--------------+
* | Select |
* +--------------+
* |
* v
* +-------------------------+
* | CombineSegments |
* +-------------------------+
**/
class MakeCombineSegmentsSatateful : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("MakeCombineSegmentsSatateful", "0");
bool run_on_model(const std::shared_ptr<ov::Model>& model) override;
};

/**
* @brief This pass modifies tokenizer ov::Model so that special tokens adding will be
* enabled or disabled depending on stateful value.
*
* +--------------+
* | DefaultMode |
* +--------------+
* |
* v
* +------------+ +-----------+
* | ReadValue | | INT_MAX |
* +------------+ +-----------+
* \ /
* \ /
* v v
* +--------------------+ +---------+ +---------+
* | Const with tokens | | start | | Mul |
* +--------------------+ +---------+ +---------+
* \ | /
* \ | /
* v v v
* +-----------------+
* | Slice |
* +-----------------+
* |
* v
* +----------------------+
* | VocabDecoder |
* +----------------------+
**/
class MakeVocabDecoderSatateful : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("MakeVocabDecoderSatateful", "0");
bool run_on_model(const std::shared_ptr<ov::Model>& model) override;
};

const std::string ADD_SPECIAL_TOKENS_VAR_ID = "add_special_tokens";
const std::string SKIP_SPECIAL_TOKENS_VAR_ID = "skip_special_tokens";

} // namespace genai
} // namespace ov
Loading
Loading