Add parametrization for the detokenization/decoding (#1246)

![image](https://github.com/user-attachments/assets/6b08793a-b279-42e3-908c-8708d4fa9d41) Tokenizers IRs should be converted after openvinotoolkit/openvino_tokenizers#325 is merged Ticket CVS-154151
openvinotoolkit · Nov 27, 2024 · 5ee41ec · 5ee41ec
2 parents 3da2aeb + 139d4cc
commit 5ee41ec
Show file tree

Hide file tree

Showing 10 changed files with 319 additions and 145 deletions.
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -8,8 +8,8 @@
 - 'src/cpp/src/tokenizers_path.hpp'
 - 'src/cpp/src/circular_buffer_queue.hpp'
 - 'src/cpp/src/synchronized_queue.hpp'
-- 'src/cpp/src/make_combine_segments_stateful.cpp'
-- 'src/cpp/src/make_combine_segments_stateful.hpp'
+- 'src/cpp/src/make_tokenizer_stateful.cpp'
+- 'src/cpp/src/make_tokenizer_stateful.hpp'
 - 'src/python/py_tokenizer.cpp'
 - 'thirdparty/openvino_tokenizers'
 - 'tests/python_tests/tokenizer_configs.py'

diff --git a/src/cpp/include/openvino/genai/tokenizer.hpp b/src/cpp/include/openvino/genai/tokenizer.hpp
@@ -47,15 +47,15 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
     /**
     * @brief encode a single prompt
     * @param prompt std::string with input prompt
-    * @param tokenization_params AnyMap with tokenization parameters, e.g. {'add_special_tokens', false}
+    * @param tokenization_params AnyMap with tokenization parameters, e.g. {"add_special_tokens", false}
     * @return pair of [input_ids, attention_mask]
     */
     TokenizedInputs encode(const std::string prompt, const ov::AnyMap& tokenization_params = {});
 
     /**
     * @brief encode batch of prompts. Left padding will be applied by default
     * @param prompts vector storing batch of prompts
-    * @param tokenization_params AnyMap with tokenization parameters, e.g. {'add_special_tokens', false}
+    * @param tokenization_params AnyMap with tokenization parameters, e.g. {"add_special_tokens", false}
     * @return pair of [input_ids, attention_mask]
     */
     TokenizedInputs encode(std::vector<std::string>& prompt, const ov::AnyMap& tokenization_params = {});
@@ -87,23 +87,59 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
     /**
     * @brief decode sequence of tokens
     * @param tokens vector storing tokens
+    * @param detokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false}
     * @return sequence string
     */
-    std::string decode(std::vector<int64_t> tokens);
-
+    std::string decode(std::vector<int64_t> tokens, const ov::AnyMap& detokenization_params = {});
+
+    /**
+    * @brief decode sequence of tokens
+    * @param tokens vector storing tokens
+    * @param detokenization_params detokenization parameters,  e.g. ov::genai::skip_special_tokens(true)
+    * @return sequence string
+    */
+    template <typename... Properties>
+    util::EnableIfAllStringAny<std::string, Properties...> decode(std::vector<int64_t>& tokens, Properties&&... detokenization_params) {
+        return decode(tokens, AnyMap{std::forward<Properties>(detokenization_params)...});
+    }
+
     /**
     * @brief decode tokens. 
     * @param tokens ov::Tensor with tokens with shape [batch_size, seq_len]
+    * @param detokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false}
+    * @return vector of std::string, with size = batch_size
+    */
+    std::vector<std::string> decode(ov::Tensor tokens, const ov::AnyMap& detokenization_params = {});
+
+    /**
+    * @brief decode sequence of tokens
+    * @param tokens ov::Tensor with tokens with shape [batch_size, seq_len]
+    * @param detokenization_params detokenization parameters,  e.g. ov::genai::skip_special_tokens(true)
     * @return vector of std::string, with size = batch_size
     */
-    std::vector<std::string> decode(ov::Tensor tokens);
+    template <typename... Properties>
+    util::EnableIfAllStringAny<std::vector<std::string>, Properties...> decode(ov::Tensor tokens, Properties&&... detokenization_params) {
+        return decode(tokens, AnyMap{std::forward<Properties>(detokenization_params)...});
+    }
 
     /**
     * @brief batched decoding of tokens. 
     * @param tokens vector of vectors with tokens, tokens.size() is equal to batch_size
+    * @param detokenization_params AnyMap with detokenization parameters, e.g. {"skip_special_tokens", false}
     * @return vector of std::string, with size equal to batch_size
     */
-    std::vector<std::string> decode(std::vector<std::vector<int64_t>> tokens);
+    std::vector<std::string> decode(std::vector<std::vector<int64_t>> tokens, const ov::AnyMap& detokenization_params = {});
+
+    /**
+    * @brief decode sequence of tokens
+    * @param tokens ov::Tensor with tokens with shape [batch_size, seq_len]
+    * @param detokenization_params detokenization parameters,  e.g. ov::genai::skip_special_tokens(true)
+    * @return vector of std::string, with size = batch_size
+    */
+    template <typename... Properties>
+    util::EnableIfAllStringAny<std::vector<std::string>, Properties...> decode(std::vector<std::vector<int64_t>> tokens, Properties&&... detokenization_params) {
+        return decode(tokens, AnyMap{std::forward<Properties>(detokenization_params)...});
+    }
 
     /**
      * @brief Embeds input prompts with special tags for a chat scenario.
@@ -143,6 +179,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
 };
 
 static constexpr ov::Property<bool> add_special_tokens{"add_special_tokens"};
+static constexpr ov::Property<bool> skip_special_tokens{"skip_special_tokens"};
 
 }  // namespace genai
 }  // namespace ov
diff --git a/src/cpp/src/make_combine_segments_stateful.cpp b/src/cpp/src/make_combine_segments_stateful.cpp
diff --git a/src/cpp/src/make_combine_segments_stateful.hpp b/src/cpp/src/make_combine_segments_stateful.hpp
diff --git a/src/cpp/src/make_tokenizer_stateful.cpp b/src/cpp/src/make_tokenizer_stateful.cpp
@@ -0,0 +1,90 @@
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "make_tokenizer_stateful.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/select.hpp"
+#include "openvino/op/slice.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/read_value.hpp"
+#include "openvino/op/assign.hpp"
+
+
+using namespace ov;
+using namespace ov::op;
+
+bool ov::genai::MakeCombineSegmentsSatateful::run_on_model(const std::shared_ptr<ov::Model>& model) {
+
+    std::shared_ptr<ov::Node> combine_seg_node;
+    for (auto node: model->get_ordered_ops()) {
+        if (strcmp(node->get_type_info().name, "CombineSegments") == 0) {
+            combine_seg_node = node;
+        }
+    }
+    if (!combine_seg_node || combine_seg_node->input_value(1).get_element_type() != ov::element::i32) {
+        return false;
+    }
+
+    std::shared_ptr<v0::Constant> input_1_const = std::dynamic_pointer_cast<v0::Constant>(combine_seg_node->get_input_node_shared_ptr(1));
+    if (!input_1_const) {
+        return false;
+    }
+
+    op::util::VariableInfo var_info{ov::Shape{}, ov::element::boolean, ADD_SPECIAL_TOKENS_VAR_ID};
+    auto variable = std::make_shared<op::util::Variable>(var_info);
+
+    // Default mode is add_special_tokens.
+    auto default_mode_const = std::make_shared<v0::Constant>(ov::element::boolean, ov::Shape{}, std::vector{true});
+    auto read_value = std::make_shared<v6::ReadValue>(default_mode_const, variable);
+    auto zero_constant = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{}, std::vector{0});
+    auto select_node = std::make_shared<v1::Select>(read_value, input_1_const, zero_constant);
+    combine_seg_node->input(1).replace_source_output(select_node->output(0));
+
+    auto assign = std::make_shared<v6::Assign>(read_value, variable);
+
+    model->add_sinks({assign});
+    model->add_variables({variable});
+    return true;
+}
+
+bool ov::genai::MakeVocabDecoderSatateful::run_on_model(const std::shared_ptr<ov::Model>& model) {
+    std::shared_ptr<ov::Node> vocab_decoder_node;
+    for (auto node: model->get_ordered_ops()) {
+        if (strcmp(node->get_type_info().name, "VocabDecoder") == 0)
+            vocab_decoder_node = node;
+    }
+
+    if (!vocab_decoder_node || vocab_decoder_node->get_input_size() < 5)
+        return false;
+    if (!vocab_decoder_node->input_value(4).get_element_type().is_integral_number())
+        return false;
+
+    std::shared_ptr<v0::Constant> skip_tokens_const = std::dynamic_pointer_cast<v0::Constant>(vocab_decoder_node->get_input_node_shared_ptr(4));
+    std::shared_ptr<v8::Slice> skip_tokens_slice = std::dynamic_pointer_cast<v8::Slice>(vocab_decoder_node->get_input_node_shared_ptr(4));
+    if (!skip_tokens_const && !skip_tokens_slice)
+        return false;
+
+    auto start_const = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{1}, std::vector{0});
+    auto int_max_const = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{1}, std::vector{std::numeric_limits<int>::max()});
+    auto one_const = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{1}, std::vector{1});
+
+    // By default, INT_MAX will multiply with 1 and all skip_tokens will be selected.
+    op::util::VariableInfo var_info{ov::Shape{1}, ov::element::i32, SKIP_SPECIAL_TOKENS_VAR_ID};
+    auto variable = std::make_shared<op::util::Variable>(var_info);
+    auto read_value = std::make_shared<v6::ReadValue>(one_const, variable);
+    // if flag is set, then slice up to the int_max which means skip all tokens.
+    auto stop = std::make_shared<v1::Multiply>(int_max_const, read_value);
+
+    // If already has slice just replace the stop input.
+    if (skip_tokens_slice) {
+        skip_tokens_slice->input(2).replace_source_output(stop);
+    } else {
+        std::shared_ptr<v8::Slice> slice_node = std::make_shared<v8::Slice>(skip_tokens_const, start_const, stop, one_const);
+        vocab_decoder_node->input(4).replace_source_output(slice_node->output(0));
+    }
+
+    auto assign = std::make_shared<v6::Assign>(read_value, variable);
+    model->add_sinks({assign});
+    model->add_variables({variable});
+    return true;
+}
diff --git a/src/cpp/src/make_tokenizer_stateful.hpp b/src/cpp/src/make_tokenizer_stateful.hpp
@@ -0,0 +1,81 @@
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "openvino/op/constant.hpp"
+#include "openvino/pass/pass.hpp"
+
+namespace ov {
+namespace genai {
+
+/** 
+ * @brief This pass modifies tokenizer ov::Model so that special tokens adding will be
+ *  enabled or disabled depending on stateful value.
+ * 
+ *  +--------------+
+ *  |  DefaultMode |
+ *  +--------------+
+ *         |
+ *         |
+ *         v
+ *  +--------------+  +--------+  +------------------+
+ *  |  ReadValue   |  |  ends  |  | const value = 0  |
+ *  +--------------+  +--------+  +------------------+
+ *             \          |        /
+ *              \         |       /
+ *               v        v      v
+ *                +--------------+
+ *                |    Select    |
+ *                +--------------+
+ *                       |
+ *                       v
+ *          +-------------------------+
+ *          |     CombineSegments     |
+ *          +-------------------------+
+**/
+class MakeCombineSegmentsSatateful : public ov::pass::ModelPass {
+public:
+    OPENVINO_RTTI("MakeCombineSegmentsSatateful", "0");
+    bool run_on_model(const std::shared_ptr<ov::Model>& model) override;
+};
+
+/** 
+ * @brief This pass modifies tokenizer ov::Model so that special tokens adding will be
+ *  enabled or disabled depending on stateful value.
+ *                                          
+ *                                  +--------------+
+ *                                  |  DefaultMode |
+ *                                  +--------------+
+ *                                         |
+ *                                         v
+ *                                  +------------+   +-----------+
+ *                                  |  ReadValue |   |  INT_MAX  |
+ *                                  +------------+   +-----------+
+ *                                          \           /
+ *                                           \         /
+ *                                            v       v
+ *   +--------------------+     +---------+  +---------+
+ *   |  Const with tokens |     |  start  |  |   Mul   |
+ *   +--------------------+     +---------+  +---------+
+ *                         \          |          /
+ *                           \        |         /
+ *                             v      v        v
+ *                            +-----------------+
+ *                            |      Slice      |
+ *                            +-----------------+
+ *                                     |
+ *                                     v
+ *                          +----------------------+
+ *                          |     VocabDecoder     |
+ *                          +----------------------+
+**/
+class MakeVocabDecoderSatateful : public ov::pass::ModelPass {
+public:
+    OPENVINO_RTTI("MakeVocabDecoderSatateful", "0");
+    bool run_on_model(const std::shared_ptr<ov::Model>& model) override;
+};
+
+const std::string ADD_SPECIAL_TOKENS_VAR_ID = "add_special_tokens";
+const std::string SKIP_SPECIAL_TOKENS_VAR_ID = "skip_special_tokens";
+
+} // namespace genai
+} // namespace ov