[TF FE] Extend conversion for RaggedTensorToTensor in case rowids for…

…mat and introduce Equal for 1D string tensors (openvinotoolkit#70) * [TF FE] Extend conversion for RaggedTensorToTensor in case rowids format and introduce Equal for 1D string tensors Signed-off-by: Kazantsev, Roman <[email protected]> * Fix conversion of Equal operation Signed-off-by: Kazantsev, Roman <[email protected]> * Fix RaggedToRagged operation Signed-off-by: Kazantsev, Roman <[email protected]> * Fix RaggedToRagged operation extension Signed-off-by: Kazantsev, Roman <[email protected]> * Fix conversion for RaggedTensorToTensor operation Signed-off-by: Kazantsev, Roman <[email protected]> --------- Signed-off-by: Kazantsev, Roman <[email protected]>
apaniukov · Mar 12, 2024 · 4db4de6 · 4db4de6
1 parent fbb2e0c
commit 4db4de6
Show file tree

Hide file tree

Showing 8 changed files with 315 additions and 25 deletions.
diff --git a/src/equal_str.cpp b/src/equal_str.cpp
@@ -0,0 +1,56 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "equal_str.hpp"
+#include "utils.hpp"
+
+using namespace ov;
+
+
+void EqualStr::validate_and_infer_types() {
+    OPENVINO_ASSERT(get_input_size() == 6);
+
+    auto begins_type1 = this->get_input_element_type(0);
+    auto ends_type1 = this->get_input_element_type(1);
+    auto begins_type2 = this->get_input_element_type(3);
+    auto ends_type2 = this->get_input_element_type(4);
+
+    OPENVINO_ASSERT(begins_type1 == element::i32 && begins_type2 == element::i32,
+        "Expected an i32 begins for string tensor representation.");
+    OPENVINO_ASSERT(ends_type1 == element::i32 && ends_type2 == element::i32,
+        "Expected an i32 ends for string tensor representation.");
+
+    set_output_type(0, ov::element::boolean, PartialShape({ Dimension::dynamic() }));
+}
+
+bool EqualStr::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const {
+    auto begins1 = inputs[0].data<const int32_t>();
+    auto ends1 = inputs[1].data<const int32_t>();
+    auto chars1 = inputs[2].data<const uint8_t>();
+    auto begins2 = inputs[3].data<const int32_t>();
+    auto ends2 = inputs[4].data<const int32_t>();
+    auto chars2 = inputs[5].data<const uint8_t>();
+
+    size_t num_elems1 = inputs[0].get_size();
+    size_t num_elems2 = inputs[3].get_size();
+    size_t num_elems = std::max(num_elems1, num_elems2);
+    outputs[0].set_shape(ov::Shape{ num_elems });
+    auto result = outputs[0].data<bool>();
+
+    for (size_t idx = 0; idx < num_elems; ++idx) {
+        // handle indices due to broadcasting case
+        size_t idx1 = (idx < num_elems1) ? idx : 0;
+        size_t idx2 = (idx < num_elems2) ? idx : 0;
+
+        std::vector<uint8_t> op1(chars1 + begins1[idx1], chars1 + ends1[idx1]);
+        std::vector<uint8_t> op2(chars2 + begins2[idx2], chars2 + ends2[idx2]);
+        if (op1 == op2) {
+            result[idx] = true;
+        }
+        else {
+            result[idx] = false;
+        }
+    }
+    return true;
+}
diff --git a/src/equal_str.hpp b/src/equal_str.hpp
@@ -0,0 +1,38 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <openvino/op/op.hpp>
+
+// EqualStr compares two unpacked string tensors and outputs 1D boolean tensor
+// The operation is only applicable if output shape of string tensor corresponds to 1D tensor
+class EqualStr : public ov::op::Op {
+public:
+    OPENVINO_OP("EqualStr");
+
+    EqualStr() = default;
+
+    EqualStr(ov::OutputVector inputs)
+        : ov::op::Op(inputs) {
+        constructor_validate_and_infer_types();
+    }
+
+    void validate_and_infer_types() override;
+
+    std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector& inputs) const override {
+        auto result = std::make_shared<EqualStr>(inputs);
+        return result;
+    }
+
+    bool visit_attributes(ov::AttributeVisitor& visitor) override {
+        return true;
+    }
+
+    bool has_evaluate() const override {
+        return true;
+    }
+
+    bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override;
+};
diff --git a/src/ov_extension.cpp b/src/ov_extension.cpp
@@ -22,7 +22,8 @@
     std::make_shared<ov::frontend::tensorflow::ConversionExtension>("LookupTableFind", translate_lookup_table_find_op),                          \
     std::make_shared<ov::frontend::tensorflow::ConversionExtension>("LookupTableFindV2", translate_lookup_table_find_op),                        \
     std::make_shared<ov::frontend::tensorflow::ConversionExtension>("StringSplitV2", translate_string_split),                                    \
-    std::make_shared<ov::frontend::tensorflow::ConversionExtension>("RaggedTensorToTensor", translate_ragged_tensor_to_tensor)
+    std::make_shared<ov::frontend::tensorflow::ConversionExtension>("RaggedTensorToTensor", translate_ragged_tensor_to_tensor),                  \
+    std::make_shared<ov::frontend::tensorflow::ConversionExtension>("Equal", translate_equal)
 #else
 #define OPENVINO_TOKENIZERS_TENSORFLOW_CONVERSION_EXTENSIONS
 #endif
@@ -35,6 +36,7 @@ OPENVINO_CREATE_EXTENSIONS(
             std::make_shared<ov::OpExtension<RaggedTensorPack>>(),
             std::make_shared<ov::OpExtension<StringTensorUnpack>>(),
             std::make_shared<ov::OpExtension<CaseFold>>(),
+            std::make_shared<ov::OpExtension<EqualStr>>(),
             std::make_shared<ov::OpExtension<NormalizeUnicode>>(),
             std::make_shared<ov::OpExtension<RegexNormalization>>(),
             std::make_shared<ov::OpExtension<RegexSplit>>(),
@@ -44,6 +46,7 @@ OPENVINO_CREATE_EXTENSIONS(
             std::make_shared<ov::OpExtension<CombineSegments>>(),
             std::make_shared<ov::OpExtension<RaggedToDense>>(),
             std::make_shared<ov::OpExtension<RaggedToSparse>>(),
+            std::make_shared<ov::OpExtension<RaggedToRagged>>(),
             std::make_shared<ov::OpExtension<VocabEncoder>>(),
             std::make_shared<ov::OpExtension<VocabDecoder>>(),
             std::make_shared<ov::OpExtension<CharsToBytes>>(),

diff --git a/src/ragged_to_ragged.cpp b/src/ragged_to_ragged.cpp
@@ -0,0 +1,82 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <openvino/op/constant.hpp>
+
+#include "ragged_to_ragged.hpp"
+#include "utils.hpp"
+
+using namespace ov;
+using op::v0::Constant;
+
+void RaggedToRagged::validate_and_infer_types() {
+    OPENVINO_ASSERT(get_input_size() == 2);
+
+    auto rowids_type = this->get_input_element_type(0);
+    auto first_dim_size_type = this->get_input_element_type(1);
+
+    OPENVINO_ASSERT(rowids_type == element::i32, "Expected an i32 rowids tensor ragged representation.");
+    OPENVINO_ASSERT(first_dim_size_type == element::i32, "Expected an i32 first dim size tensor ragged representation.");
+
+    set_output_type(0, get_input_element_type(0), PartialShape({ Dimension::dynamic() }));
+    set_output_type(1, get_input_element_type(0), PartialShape({ Dimension::dynamic() }));
+}
+
+
+bool RaggedToRagged::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const {
+    auto rowids = inputs[0].data<const int32_t>();
+    auto rowids_size = static_cast<int32_t>(inputs[0].get_size());
+    auto first_dim_size = inputs[1].data<const int32_t>();
+
+    const uint64_t batch_size = static_cast<uint64_t>(first_dim_size[0]);
+    outputs[0].set_shape(ov::Shape{ batch_size });
+    outputs[1].set_shape(ov::Shape{ batch_size });
+
+    auto begins = outputs[0].data<int32_t>();
+    auto ends = outputs[1].data<int32_t>();
+
+    // prev_row_id_idx stores value idx for previous row
+    int32_t prev_row_id_idx = 0;
+    // prev_row_id stores row id for previous row
+    int32_t prev_row_id = -1;
+    for (int32_t rowids_idx = 0; rowids_idx < rowids_size; ++rowids_idx) {
+        int32_t curr_row_id = rowids[rowids_idx];
+        OPENVINO_ASSERT(0 <= curr_row_id, "row id must be non-negative");
+        if (curr_row_id >= batch_size) {
+            break;
+        }
+
+        if (prev_row_id != curr_row_id) {
+            if (prev_row_id != -1) {
+                begins[prev_row_id] = prev_row_id_idx;
+                ends[prev_row_id] = rowids_idx;
+            }
+
+            int32_t idx = prev_row_id + 1;
+            while (idx < curr_row_id) {
+                begins[idx] = rowids_idx;
+                ends[idx] = rowids_idx;
+                ++idx;
+            }
+
+            prev_row_id_idx = rowids_idx;
+            prev_row_id = curr_row_id;
+        }
+
+        if (rowids_idx + 1 == rowids_size) {
+            begins[curr_row_id] = prev_row_id_idx;
+            ends[curr_row_id] = rowids_size;
+            prev_row_id = curr_row_id;
+            prev_row_id_idx = rowids_size;
+        }
+    }
+
+    prev_row_id = (prev_row_id < 0) ? 0 : prev_row_id + 1;
+    for (int32_t batch_idx = prev_row_id; batch_idx < batch_size; ++batch_idx) {
+        begins[batch_idx] = prev_row_id_idx;
+        ends[batch_idx] = prev_row_id_idx;
+    }
+
+    return true;
+}
diff --git a/src/ragged_to_ragged.hpp b/src/ragged_to_ragged.hpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <openvino/op/op.hpp>
+
+// Operation that transforms ragged tensor from rowids format to begins-ends format
+// value_rowids just defines to which row each value from values vector belongs
+// for example, rowids = [0, 0, 2, 3, 3, 3] and first_dims_size = 5
+// it corresponds to ragged tensor with 
+// begins = [0, 2, 2, 3, 6]
+// ends   = [2, 2, 3, 6, 6]
+class RaggedToRagged : public ov::op::Op {
+public:
+    OPENVINO_OP("RaggedToRagged");
+
+    RaggedToRagged() = default;
+
+    RaggedToRagged(const ov::OutputVector& arguments) :
+        ov::op::Op(arguments) {
+        constructor_validate_and_infer_types();
+    }
+
+    void validate_and_infer_types() override;
+
+    std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector& inputs) const override {
+        return std::make_shared<RaggedToRagged>(inputs);
+    }
+
+    bool visit_attributes(ov::AttributeVisitor& visitor) override {
+        return true;
+    }
+
+    bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override;
+
+    bool has_evaluate() const override {
+        return true;
+    }
+};
diff --git a/src/tensorflow_translators.cpp b/src/tensorflow_translators.cpp
@@ -14,9 +14,11 @@
 #include "string_tensor_unpack.hpp"
 #include "sentence_piece.hpp"
 #include "case_fold.hpp"
+#include "equal_str.hpp"
 #include "normalize_unicode.hpp"
 #include "ragged_to_dense.hpp"
 #include "ragged_to_sparse.hpp"
+#include "ragged_to_ragged.hpp"
 #include "regex_normalization.hpp"
 #include "regex_split.hpp"
 #include "vocab_encoder.hpp"
@@ -328,15 +330,14 @@ NamedOutputVector translate_string_split(const ov::frontend::NodeContext& node)
 ov::OutputVector translate_ragged_tensor_to_tensor(const ov::frontend::NodeContext& node) {
     auto node_name = node.get_name();
     auto node_input_size = node.get_input_size();
-    TENSORFLOW_OP_VALIDATION(node, node_input_size == 4, "[TensorFlow Frontend] internal error: RaggedTensorToTensor is supported only with one row partition tensor");
+    TENSORFLOW_OP_VALIDATION(node, node_input_size == 4 || node_input_size == 5,
+        "[TensorFlow Frontend] internal error: RaggedTensorToTensor is supported only with one row partition tensor");
     auto shape = node.get_input(0);
     auto values = node.get_input(1);
     auto default_value = node.get_input(2);
-    auto row_partition_tensor = node.get_input(3);
     auto row_partition_types = node.get_attribute<std::vector<std::string>>("row_partition_types");
-    TENSORFLOW_OP_VALIDATION(node, row_partition_types.size() == 1,
-        "[TensorFlow Frontend] inconsistent model: RaggedTensorToTensor must have one row_partition_type");
-    TENSORFLOW_OP_VALIDATION(node, row_partition_types[0] == "ROW_SPLITS",
+    TENSORFLOW_OP_VALIDATION(node, (row_partition_types == std::vector<std::string>{"ROW_SPLITS"}) ||
+        (row_partition_types == std::vector<std::string>{"FIRST_DIM_SIZE", "VALUE_ROWIDS"}),
         "[TensorFlow Frontend] internal error: RaggedTensorToTensor is supported only for ROW_SPLITS type");
     // currently we support only shape for 2D tensor in output
     // for example, shape can be equal to [2, 5] or [-1, 8]
@@ -345,30 +346,68 @@ ov::OutputVector translate_ragged_tensor_to_tensor(const ov::frontend::NodeConte
 
     // since begins, ends and target shape are expected to be of int32 type
     shape = std::make_shared<Convert>(shape, ov::element::i32);
-    row_partition_tensor = std::make_shared<Convert>(row_partition_tensor, ov::element::i32);
-
-    // compute vectors of begins and ends
-    auto rpt_shape = std::make_shared<ShapeOf>(row_partition_tensor, ov::element::i32)->output(0);
-    auto const_one = std::make_shared<Constant>(ov::element::i32, Shape{}, 1);
-    auto rpt_shape_minus_one = std::make_shared<Subtract>(rpt_shape, const_one);
-    auto begins_start = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 0);
-    auto ends_start = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 1);
-    auto step = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 1);
-    auto begins = std::make_shared<Slice>(row_partition_tensor, begins_start, rpt_shape_minus_one, step);
-    auto ends = std::make_shared<Slice>(row_partition_tensor, ends_start, rpt_shape, step);
-
-    // since shape can contain -1 dimension that means dimension size will be defined automatically
-    // such shape must be adjusted based on other inputs to RaggedTensorToTensor
-    // compute the longest row in a tensor
-    auto longest_row_size = std::make_shared<Subtract>(ends, begins)->output(0);
-    auto reduce_axis = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 0);
-    longest_row_size = std::make_shared<ReduceMax>(longest_row_size, reduce_axis, true);
+
+    ov::Output<ov::Node> begins, ends;
+    ov::Output<ov::Node> longest_batch, longest_row_size;
+    if (row_partition_types == std::vector<std::string>{"ROW_SPLITS"}) {
+        auto row_partition_tensor = node.get_input(3);
+        row_partition_tensor = std::make_shared<Convert>(row_partition_tensor, ov::element::i32);
+
+        // compute vectors of begins and ends
+        auto rpt_shape = std::make_shared<ShapeOf>(row_partition_tensor, ov::element::i32)->output(0);
+        auto const_one = std::make_shared<Constant>(ov::element::i32, Shape{}, 1);
+        auto rpt_shape_minus_one = std::make_shared<Subtract>(rpt_shape, const_one)->output(0);
+        auto begins_start = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 0);
+        auto ends_start = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 1);
+        auto step = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 1);
+        begins = std::make_shared<Slice>(row_partition_tensor, begins_start, rpt_shape_minus_one, step);
+        ends = std::make_shared<Slice>(row_partition_tensor, ends_start, rpt_shape, step);
+        longest_batch = rpt_shape_minus_one;
+
+        // since shape can contain -1 dimension that means dimension size will be defined automatically
+        // such shape must be adjusted based on other inputs to RaggedTensorToTensor
+        // compute the longest row in a tensor
+        longest_row_size = std::make_shared<Subtract>(ends, begins)->output(0);
+        auto reduce_axis = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 0);
+        longest_row_size = std::make_shared<ReduceMax>(longest_row_size, reduce_axis, true);
+    }
+    else {
+        auto first_dim_size = node.get_input(3);
+        auto value_rowids = node.get_input(4);
+
+        first_dim_size = std::make_shared<Convert>(first_dim_size, ov::element::i32);
+        auto new_first_dim_size_shape = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 1);
+        first_dim_size = std::make_shared<Reshape>(first_dim_size, new_first_dim_size_shape, false);
+        value_rowids = std::make_shared<Convert>(value_rowids, ov::element::i32);
+
+        auto ragged_to_ragged = std::make_shared<RaggedToRagged>(ov::OutputVector{ value_rowids , first_dim_size });
+        begins = ragged_to_ragged->output(0);
+        ends = ragged_to_ragged->output(1);
+        longest_batch = first_dim_size;
+
+        // compute longest_row_size
+        auto scalar_shape = std::make_shared<Constant>(ov::element::i32, Shape{ 0 }, std::vector<int32_t>{});
+        first_dim_size = std::make_shared<Reshape>(first_dim_size, scalar_shape, false);
+        auto const_zero = std::make_shared<Constant>(ov::element::i32, Shape{}, 0);
+        auto const_one = std::make_shared<Constant>(ov::element::i32, Shape{}, 1);
+        auto range_row_ids = std::make_shared<Range>(const_zero, first_dim_size, const_one, ov::element::i32)->output(0);
+        auto unsqueeze_axis = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 1)->output(0);
+        range_row_ids = std::make_shared<Unsqueeze>(range_row_ids, unsqueeze_axis);
+        unsqueeze_axis = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 0)->output(0);
+        value_rowids = std::make_shared<Unsqueeze>(value_rowids, unsqueeze_axis);
+        auto mask = std::make_shared<Equal>(range_row_ids, value_rowids)->output(0);
+        mask = std::make_shared<Select>(mask, const_one, const_zero);
+        auto reduce_axis = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 1)->output(0);
+        longest_row_size = std::make_shared<ReduceSum>(mask, reduce_axis, false);
+        reduce_axis = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 0)->output(0);
+        longest_row_size = std::make_shared<ReduceMax>(longest_row_size, reduce_axis, true);
+    }
 
     auto ragged_to_dense = std::make_shared<RaggedToDense>(ov::OutputVector{ begins, ends, values, longest_row_size, default_value })->output(0);
 
     // adjust shape value since it can contain -1 value that means a dimension must be deduced based on minimal dimension size
     // to store output tensor
-    auto replace_shape = std::make_shared<Concat>(ov::OutputVector{ rpt_shape_minus_one, longest_row_size }, 0)->output(0);
+    auto replace_shape = std::make_shared<Concat>(ov::OutputVector{ longest_batch, longest_row_size }, 0)->output(0);
     auto const_zero = std::make_shared<Constant>(ov::element::i32, Shape{}, 0);
     auto shape_less_zero = std::make_shared<Less>(shape, const_zero);
     shape = std::make_shared<Select>(shape_less_zero, replace_shape, shape);
@@ -386,3 +425,31 @@ ov::OutputVector translate_ragged_tensor_to_tensor(const ov::frontend::NodeConte
 
     return { result_dense_tensor };
 }
+
+ov::OutputVector translate_equal(const ov::frontend::NodeContext& node) {
+    auto node_name = node.get_name();
+    auto node_input_size = node.get_input_size();
+    TENSORFLOW_OP_VALIDATION(node, node_input_size == 2,
+        "[TensorFlow Frontend] inconsistent model: Equal must have two inputs");
+    auto input1 = node.get_input(0);
+    auto input2 = node.get_input(1);
+
+    ov::Output<ov::Node> result;
+    if (input1.get_element_type() == ov::element::string ||
+        input2.get_element_type() == ov::element::string) {
+        ov::OutputVector unpacked_input1 = pre_translate_string_tensor_input(input1);
+        ov::OutputVector unpacked_input2 = pre_translate_string_tensor_input(input2);
+        ov::OutputVector inputs = unpacked_input1;
+        inputs.insert(inputs.end(), unpacked_input2.begin(), unpacked_input2.end());
+
+        result = std::make_shared<EqualStr>(inputs)->output(0);
+    }
+    else {
+        result = std::make_shared<Equal>(input1, input2)->output(0);
+    }
+
+    result.get_node_shared_ptr()->set_friendly_name(node_name);
+    result.set_names({ node_name + ":0" });
+
+    return { result };
+}