Skip to content

Commit

Permalink
[TF FE] Extend conversion for RaggedTensorToTensor in case rowids for…
Browse files Browse the repository at this point in the history
…mat and introduce Equal for 1D string tensors (openvinotoolkit#70)

* [TF FE] Extend conversion for RaggedTensorToTensor in case rowids format and introduce Equal for 1D string tensors

Signed-off-by: Kazantsev, Roman <[email protected]>

* Fix conversion of Equal operation

Signed-off-by: Kazantsev, Roman <[email protected]>

* Fix RaggedToRagged operation

Signed-off-by: Kazantsev, Roman <[email protected]>

* Fix RaggedToRagged operation extension

Signed-off-by: Kazantsev, Roman <[email protected]>

* Fix conversion for RaggedTensorToTensor operation

Signed-off-by: Kazantsev, Roman <[email protected]>

---------

Signed-off-by: Kazantsev, Roman <[email protected]>
  • Loading branch information
rkazants authored Mar 12, 2024
1 parent fbb2e0c commit 4db4de6
Show file tree
Hide file tree
Showing 8 changed files with 315 additions and 25 deletions.
56 changes: 56 additions & 0 deletions src/equal_str.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "equal_str.hpp"
#include "utils.hpp"

using namespace ov;


void EqualStr::validate_and_infer_types() {
OPENVINO_ASSERT(get_input_size() == 6);

auto begins_type1 = this->get_input_element_type(0);
auto ends_type1 = this->get_input_element_type(1);
auto begins_type2 = this->get_input_element_type(3);
auto ends_type2 = this->get_input_element_type(4);

OPENVINO_ASSERT(begins_type1 == element::i32 && begins_type2 == element::i32,
"Expected an i32 begins for string tensor representation.");
OPENVINO_ASSERT(ends_type1 == element::i32 && ends_type2 == element::i32,
"Expected an i32 ends for string tensor representation.");

set_output_type(0, ov::element::boolean, PartialShape({ Dimension::dynamic() }));
}

bool EqualStr::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const {
auto begins1 = inputs[0].data<const int32_t>();
auto ends1 = inputs[1].data<const int32_t>();
auto chars1 = inputs[2].data<const uint8_t>();
auto begins2 = inputs[3].data<const int32_t>();
auto ends2 = inputs[4].data<const int32_t>();
auto chars2 = inputs[5].data<const uint8_t>();

size_t num_elems1 = inputs[0].get_size();
size_t num_elems2 = inputs[3].get_size();
size_t num_elems = std::max(num_elems1, num_elems2);
outputs[0].set_shape(ov::Shape{ num_elems });
auto result = outputs[0].data<bool>();

for (size_t idx = 0; idx < num_elems; ++idx) {
// handle indices due to broadcasting case
size_t idx1 = (idx < num_elems1) ? idx : 0;
size_t idx2 = (idx < num_elems2) ? idx : 0;

std::vector<uint8_t> op1(chars1 + begins1[idx1], chars1 + ends1[idx1]);
std::vector<uint8_t> op2(chars2 + begins2[idx2], chars2 + ends2[idx2]);
if (op1 == op2) {
result[idx] = true;
}
else {
result[idx] = false;
}
}
return true;
}
38 changes: 38 additions & 0 deletions src/equal_str.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <openvino/op/op.hpp>

// EqualStr compares two unpacked string tensors and outputs 1D boolean tensor
// The operation is only applicable if output shape of string tensor corresponds to 1D tensor
class EqualStr : public ov::op::Op {
public:
OPENVINO_OP("EqualStr");

EqualStr() = default;

EqualStr(ov::OutputVector inputs)
: ov::op::Op(inputs) {
constructor_validate_and_infer_types();
}

void validate_and_infer_types() override;

std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector& inputs) const override {
auto result = std::make_shared<EqualStr>(inputs);
return result;
}

bool visit_attributes(ov::AttributeVisitor& visitor) override {
return true;
}

bool has_evaluate() const override {
return true;
}

bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override;
};
5 changes: 4 additions & 1 deletion src/ov_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
std::make_shared<ov::frontend::tensorflow::ConversionExtension>("LookupTableFind", translate_lookup_table_find_op), \
std::make_shared<ov::frontend::tensorflow::ConversionExtension>("LookupTableFindV2", translate_lookup_table_find_op), \
std::make_shared<ov::frontend::tensorflow::ConversionExtension>("StringSplitV2", translate_string_split), \
std::make_shared<ov::frontend::tensorflow::ConversionExtension>("RaggedTensorToTensor", translate_ragged_tensor_to_tensor)
std::make_shared<ov::frontend::tensorflow::ConversionExtension>("RaggedTensorToTensor", translate_ragged_tensor_to_tensor), \
std::make_shared<ov::frontend::tensorflow::ConversionExtension>("Equal", translate_equal)
#else
#define OPENVINO_TOKENIZERS_TENSORFLOW_CONVERSION_EXTENSIONS
#endif
Expand All @@ -35,6 +36,7 @@ OPENVINO_CREATE_EXTENSIONS(
std::make_shared<ov::OpExtension<RaggedTensorPack>>(),
std::make_shared<ov::OpExtension<StringTensorUnpack>>(),
std::make_shared<ov::OpExtension<CaseFold>>(),
std::make_shared<ov::OpExtension<EqualStr>>(),
std::make_shared<ov::OpExtension<NormalizeUnicode>>(),
std::make_shared<ov::OpExtension<RegexNormalization>>(),
std::make_shared<ov::OpExtension<RegexSplit>>(),
Expand All @@ -44,6 +46,7 @@ OPENVINO_CREATE_EXTENSIONS(
std::make_shared<ov::OpExtension<CombineSegments>>(),
std::make_shared<ov::OpExtension<RaggedToDense>>(),
std::make_shared<ov::OpExtension<RaggedToSparse>>(),
std::make_shared<ov::OpExtension<RaggedToRagged>>(),
std::make_shared<ov::OpExtension<VocabEncoder>>(),
std::make_shared<ov::OpExtension<VocabDecoder>>(),
std::make_shared<ov::OpExtension<CharsToBytes>>(),
Expand Down
82 changes: 82 additions & 0 deletions src/ragged_to_ragged.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <openvino/op/constant.hpp>

#include "ragged_to_ragged.hpp"
#include "utils.hpp"

using namespace ov;
using op::v0::Constant;

void RaggedToRagged::validate_and_infer_types() {
OPENVINO_ASSERT(get_input_size() == 2);

auto rowids_type = this->get_input_element_type(0);
auto first_dim_size_type = this->get_input_element_type(1);

OPENVINO_ASSERT(rowids_type == element::i32, "Expected an i32 rowids tensor ragged representation.");
OPENVINO_ASSERT(first_dim_size_type == element::i32, "Expected an i32 first dim size tensor ragged representation.");

set_output_type(0, get_input_element_type(0), PartialShape({ Dimension::dynamic() }));
set_output_type(1, get_input_element_type(0), PartialShape({ Dimension::dynamic() }));
}


bool RaggedToRagged::evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const {
auto rowids = inputs[0].data<const int32_t>();
auto rowids_size = static_cast<int32_t>(inputs[0].get_size());
auto first_dim_size = inputs[1].data<const int32_t>();

const uint64_t batch_size = static_cast<uint64_t>(first_dim_size[0]);
outputs[0].set_shape(ov::Shape{ batch_size });
outputs[1].set_shape(ov::Shape{ batch_size });

auto begins = outputs[0].data<int32_t>();
auto ends = outputs[1].data<int32_t>();

// prev_row_id_idx stores value idx for previous row
int32_t prev_row_id_idx = 0;
// prev_row_id stores row id for previous row
int32_t prev_row_id = -1;
for (int32_t rowids_idx = 0; rowids_idx < rowids_size; ++rowids_idx) {
int32_t curr_row_id = rowids[rowids_idx];
OPENVINO_ASSERT(0 <= curr_row_id, "row id must be non-negative");
if (curr_row_id >= batch_size) {
break;
}

if (prev_row_id != curr_row_id) {
if (prev_row_id != -1) {
begins[prev_row_id] = prev_row_id_idx;
ends[prev_row_id] = rowids_idx;
}

int32_t idx = prev_row_id + 1;
while (idx < curr_row_id) {
begins[idx] = rowids_idx;
ends[idx] = rowids_idx;
++idx;
}

prev_row_id_idx = rowids_idx;
prev_row_id = curr_row_id;
}

if (rowids_idx + 1 == rowids_size) {
begins[curr_row_id] = prev_row_id_idx;
ends[curr_row_id] = rowids_size;
prev_row_id = curr_row_id;
prev_row_id_idx = rowids_size;
}
}

prev_row_id = (prev_row_id < 0) ? 0 : prev_row_id + 1;
for (int32_t batch_idx = prev_row_id; batch_idx < batch_size; ++batch_idx) {
begins[batch_idx] = prev_row_id_idx;
ends[batch_idx] = prev_row_id_idx;
}

return true;
}
41 changes: 41 additions & 0 deletions src/ragged_to_ragged.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <openvino/op/op.hpp>

// Operation that transforms ragged tensor from rowids format to begins-ends format
// value_rowids just defines to which row each value from values vector belongs
// for example, rowids = [0, 0, 2, 3, 3, 3] and first_dims_size = 5
// it corresponds to ragged tensor with
// begins = [0, 2, 2, 3, 6]
// ends = [2, 2, 3, 6, 6]
class RaggedToRagged : public ov::op::Op {
public:
OPENVINO_OP("RaggedToRagged");

RaggedToRagged() = default;

RaggedToRagged(const ov::OutputVector& arguments) :
ov::op::Op(arguments) {
constructor_validate_and_infer_types();
}

void validate_and_infer_types() override;

std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector& inputs) const override {
return std::make_shared<RaggedToRagged>(inputs);
}

bool visit_attributes(ov::AttributeVisitor& visitor) override {
return true;
}

bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override;

bool has_evaluate() const override {
return true;
}
};
115 changes: 91 additions & 24 deletions src/tensorflow_translators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
#include "string_tensor_unpack.hpp"
#include "sentence_piece.hpp"
#include "case_fold.hpp"
#include "equal_str.hpp"
#include "normalize_unicode.hpp"
#include "ragged_to_dense.hpp"
#include "ragged_to_sparse.hpp"
#include "ragged_to_ragged.hpp"
#include "regex_normalization.hpp"
#include "regex_split.hpp"
#include "vocab_encoder.hpp"
Expand Down Expand Up @@ -328,15 +330,14 @@ NamedOutputVector translate_string_split(const ov::frontend::NodeContext& node)
ov::OutputVector translate_ragged_tensor_to_tensor(const ov::frontend::NodeContext& node) {
auto node_name = node.get_name();
auto node_input_size = node.get_input_size();
TENSORFLOW_OP_VALIDATION(node, node_input_size == 4, "[TensorFlow Frontend] internal error: RaggedTensorToTensor is supported only with one row partition tensor");
TENSORFLOW_OP_VALIDATION(node, node_input_size == 4 || node_input_size == 5,
"[TensorFlow Frontend] internal error: RaggedTensorToTensor is supported only with one row partition tensor");
auto shape = node.get_input(0);
auto values = node.get_input(1);
auto default_value = node.get_input(2);
auto row_partition_tensor = node.get_input(3);
auto row_partition_types = node.get_attribute<std::vector<std::string>>("row_partition_types");
TENSORFLOW_OP_VALIDATION(node, row_partition_types.size() == 1,
"[TensorFlow Frontend] inconsistent model: RaggedTensorToTensor must have one row_partition_type");
TENSORFLOW_OP_VALIDATION(node, row_partition_types[0] == "ROW_SPLITS",
TENSORFLOW_OP_VALIDATION(node, (row_partition_types == std::vector<std::string>{"ROW_SPLITS"}) ||
(row_partition_types == std::vector<std::string>{"FIRST_DIM_SIZE", "VALUE_ROWIDS"}),
"[TensorFlow Frontend] internal error: RaggedTensorToTensor is supported only for ROW_SPLITS type");
// currently we support only shape for 2D tensor in output
// for example, shape can be equal to [2, 5] or [-1, 8]
Expand All @@ -345,30 +346,68 @@ ov::OutputVector translate_ragged_tensor_to_tensor(const ov::frontend::NodeConte

// since begins, ends and target shape are expected to be of int32 type
shape = std::make_shared<Convert>(shape, ov::element::i32);
row_partition_tensor = std::make_shared<Convert>(row_partition_tensor, ov::element::i32);

// compute vectors of begins and ends
auto rpt_shape = std::make_shared<ShapeOf>(row_partition_tensor, ov::element::i32)->output(0);
auto const_one = std::make_shared<Constant>(ov::element::i32, Shape{}, 1);
auto rpt_shape_minus_one = std::make_shared<Subtract>(rpt_shape, const_one);
auto begins_start = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 0);
auto ends_start = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 1);
auto step = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 1);
auto begins = std::make_shared<Slice>(row_partition_tensor, begins_start, rpt_shape_minus_one, step);
auto ends = std::make_shared<Slice>(row_partition_tensor, ends_start, rpt_shape, step);

// since shape can contain -1 dimension that means dimension size will be defined automatically
// such shape must be adjusted based on other inputs to RaggedTensorToTensor
// compute the longest row in a tensor
auto longest_row_size = std::make_shared<Subtract>(ends, begins)->output(0);
auto reduce_axis = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 0);
longest_row_size = std::make_shared<ReduceMax>(longest_row_size, reduce_axis, true);

ov::Output<ov::Node> begins, ends;
ov::Output<ov::Node> longest_batch, longest_row_size;
if (row_partition_types == std::vector<std::string>{"ROW_SPLITS"}) {
auto row_partition_tensor = node.get_input(3);
row_partition_tensor = std::make_shared<Convert>(row_partition_tensor, ov::element::i32);

// compute vectors of begins and ends
auto rpt_shape = std::make_shared<ShapeOf>(row_partition_tensor, ov::element::i32)->output(0);
auto const_one = std::make_shared<Constant>(ov::element::i32, Shape{}, 1);
auto rpt_shape_minus_one = std::make_shared<Subtract>(rpt_shape, const_one)->output(0);
auto begins_start = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 0);
auto ends_start = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 1);
auto step = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 1);
begins = std::make_shared<Slice>(row_partition_tensor, begins_start, rpt_shape_minus_one, step);
ends = std::make_shared<Slice>(row_partition_tensor, ends_start, rpt_shape, step);
longest_batch = rpt_shape_minus_one;

// since shape can contain -1 dimension that means dimension size will be defined automatically
// such shape must be adjusted based on other inputs to RaggedTensorToTensor
// compute the longest row in a tensor
longest_row_size = std::make_shared<Subtract>(ends, begins)->output(0);
auto reduce_axis = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 0);
longest_row_size = std::make_shared<ReduceMax>(longest_row_size, reduce_axis, true);
}
else {
auto first_dim_size = node.get_input(3);
auto value_rowids = node.get_input(4);

first_dim_size = std::make_shared<Convert>(first_dim_size, ov::element::i32);
auto new_first_dim_size_shape = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 1);
first_dim_size = std::make_shared<Reshape>(first_dim_size, new_first_dim_size_shape, false);
value_rowids = std::make_shared<Convert>(value_rowids, ov::element::i32);

auto ragged_to_ragged = std::make_shared<RaggedToRagged>(ov::OutputVector{ value_rowids , first_dim_size });
begins = ragged_to_ragged->output(0);
ends = ragged_to_ragged->output(1);
longest_batch = first_dim_size;

// compute longest_row_size
auto scalar_shape = std::make_shared<Constant>(ov::element::i32, Shape{ 0 }, std::vector<int32_t>{});
first_dim_size = std::make_shared<Reshape>(first_dim_size, scalar_shape, false);
auto const_zero = std::make_shared<Constant>(ov::element::i32, Shape{}, 0);
auto const_one = std::make_shared<Constant>(ov::element::i32, Shape{}, 1);
auto range_row_ids = std::make_shared<Range>(const_zero, first_dim_size, const_one, ov::element::i32)->output(0);
auto unsqueeze_axis = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 1)->output(0);
range_row_ids = std::make_shared<Unsqueeze>(range_row_ids, unsqueeze_axis);
unsqueeze_axis = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 0)->output(0);
value_rowids = std::make_shared<Unsqueeze>(value_rowids, unsqueeze_axis);
auto mask = std::make_shared<Equal>(range_row_ids, value_rowids)->output(0);
mask = std::make_shared<Select>(mask, const_one, const_zero);
auto reduce_axis = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 1)->output(0);
longest_row_size = std::make_shared<ReduceSum>(mask, reduce_axis, false);
reduce_axis = std::make_shared<Constant>(ov::element::i32, Shape{ 1 }, 0)->output(0);
longest_row_size = std::make_shared<ReduceMax>(longest_row_size, reduce_axis, true);
}

auto ragged_to_dense = std::make_shared<RaggedToDense>(ov::OutputVector{ begins, ends, values, longest_row_size, default_value })->output(0);

// adjust shape value since it can contain -1 value that means a dimension must be deduced based on minimal dimension size
// to store output tensor
auto replace_shape = std::make_shared<Concat>(ov::OutputVector{ rpt_shape_minus_one, longest_row_size }, 0)->output(0);
auto replace_shape = std::make_shared<Concat>(ov::OutputVector{ longest_batch, longest_row_size }, 0)->output(0);
auto const_zero = std::make_shared<Constant>(ov::element::i32, Shape{}, 0);
auto shape_less_zero = std::make_shared<Less>(shape, const_zero);
shape = std::make_shared<Select>(shape_less_zero, replace_shape, shape);
Expand All @@ -386,3 +425,31 @@ ov::OutputVector translate_ragged_tensor_to_tensor(const ov::frontend::NodeConte

return { result_dense_tensor };
}

ov::OutputVector translate_equal(const ov::frontend::NodeContext& node) {
auto node_name = node.get_name();
auto node_input_size = node.get_input_size();
TENSORFLOW_OP_VALIDATION(node, node_input_size == 2,
"[TensorFlow Frontend] inconsistent model: Equal must have two inputs");
auto input1 = node.get_input(0);
auto input2 = node.get_input(1);

ov::Output<ov::Node> result;
if (input1.get_element_type() == ov::element::string ||
input2.get_element_type() == ov::element::string) {
ov::OutputVector unpacked_input1 = pre_translate_string_tensor_input(input1);
ov::OutputVector unpacked_input2 = pre_translate_string_tensor_input(input2);
ov::OutputVector inputs = unpacked_input1;
inputs.insert(inputs.end(), unpacked_input2.begin(), unpacked_input2.end());

result = std::make_shared<EqualStr>(inputs)->output(0);
}
else {
result = std::make_shared<Equal>(input1, input2)->output(0);
}

result.get_node_shared_ptr()->set_friendly_name(node_name);
result.set_names({ node_name + ":0" });

return { result };
}
Loading

0 comments on commit 4db4de6

Please sign in to comment.