Skip to content

Commit

Permalink
Add a flag to set the default number of XNNPACK threads
Browse files Browse the repository at this point in the history
XNNPACK defaults to 1 thread for gLinux runners. This is very slow for desktop runner apps, where usually a number of #cores/2 is a good value, but it's a) risky to change this default and b) there are situations where this is not wanted.

PiperOrigin-RevId: 683230250
  • Loading branch information
MediaPipe Team authored and copybara-github committed Oct 7, 2024
1 parent fd68417 commit 876bdd5
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 41 deletions.
6 changes: 6 additions & 0 deletions mediapipe/calculators/tensor/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,7 @@ cc_library_with_tflite(
"//mediapipe/framework/port:status",
"//mediapipe/framework/stream_handler:fixed_size_input_stream_handler",
"//mediapipe/framework/tool:subgraph_expansion",
"//mediapipe/util:cpu_util",
"@com_google_absl//absl/memory",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
Expand Down Expand Up @@ -816,6 +817,8 @@ cc_library(
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/log:absl_check",
"@com_google_absl//absl/log:absl_log",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
Expand All @@ -841,9 +844,12 @@ cc_test(
name = "inference_calculator_utils_test",
srcs = ["inference_calculator_utils_test.cc"],
deps = [
":inference_calculator_cc_proto",
":inference_calculator_utils",
"//mediapipe/framework/formats:tensor",
"//mediapipe/framework/port:gtest_main",
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:absl_check",
"@com_google_absl//absl/status",
"@com_google_absl//absl/types:span",
Expand Down
12 changes: 10 additions & 2 deletions mediapipe/calculators/tensor/inference_calculator_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <string>
#include <vector>

#include "absl/flags/flag.h"
#include "absl/log/absl_log.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
Expand All @@ -38,15 +39,22 @@
#include "tensorflow/lite/portable_type_to_tflitetype.h"
#include "tensorflow/lite/string_util.h"

ABSL_FLAG(int, xnnpack_default_num_threads, 0,
"Default number of xnnpack threads to use. If unset, determines a "
"good default number based on the platform.");

#if !defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__)
#include "mediapipe/util/cpu_util.h"
#endif // !__EMSCRIPTEN__ || __EMSCRIPTEN_PTHREADS__

namespace mediapipe {

namespace {

int GetXnnpackDefaultNumThreads() {
int default_from_flag = absl::GetFlag(FLAGS_xnnpack_default_num_threads);
if (default_from_flag > 0) {
return default_from_flag;
}
#if defined(MEDIAPIPE_ANDROID) || defined(MEDIAPIPE_IOS) || \
defined(__EMSCRIPTEN_PTHREADS__)
constexpr int kMinNumThreadsByDefault = 1;
Expand Down Expand Up @@ -216,7 +224,7 @@ absl::Status CopyTfLiteTensorToTensor<char>(const TfLiteTensor& tflite_tensor,
} // namespace

int GetXnnpackNumThreads(
const bool opts_has_delegate,
bool opts_has_delegate,
const mediapipe::InferenceCalculatorOptions::Delegate& opts_delegate) {
static constexpr int kDefaultNumThreads = -1;
if (opts_has_delegate && opts_delegate.has_xnnpack() &&
Expand Down
8 changes: 6 additions & 2 deletions mediapipe/calculators/tensor/inference_calculator_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cstddef>
#include <cstdint>

#include "absl/flags/declare.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "mediapipe/calculators/tensor/inference_calculator.pb.h"
Expand All @@ -27,13 +28,16 @@
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/util.h"

ABSL_DECLARE_FLAG(int, xnnpack_default_num_threads);

namespace mediapipe {

// Returns number of threads to configure XNNPACK delegate with.
// Returns user provided value if specified. Otherwise, tries to choose optimal
// number of threads depending on the device.
// number of threads depending on the device. The default can be overridden by
// setting the --xnnpack_default_num_threads flag.
int GetXnnpackNumThreads(
const bool opts_has_delegate,
bool opts_has_delegate,
const mediapipe::InferenceCalculatorOptions::Delegate& opts_delegate);

absl::Status CopyCpuInputIntoTfLiteTensor(const Tensor& input_tensor,
Expand Down
131 changes: 94 additions & 37 deletions mediapipe/calculators/tensor/inference_calculator_utils_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,12 @@
#include <utility>
#include <vector>

#include "absl/flags/flag.h"
#include "absl/log/absl_check.h"
#include "absl/log/log.h"
#include "absl/status/status.h"
#include "absl/types/span.h"
#include "mediapipe/calculators/tensor/inference_calculator.pb.h"
#include "mediapipe/framework/formats/tensor.h"
#include "mediapipe/framework/port/gmock.h"
#include "mediapipe/framework/port/gtest.h"
Expand All @@ -40,6 +43,8 @@
namespace mediapipe {
namespace {

constexpr int kDefaultNumXnnpackThreads = 1;

using ElementType = ::mediapipe::Tensor::ElementType;
using ::testing::ElementsAreArray;
using ::testing::HasSubstr;
Expand Down Expand Up @@ -103,8 +108,60 @@ std::vector<char> TfLiteInputTensorData<char>(const Interpreter& interpreter,
return std::vector<char>(str.begin(), str.end());
}

TEST(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorWorksCorrectlyForInt32) {
class InferenceCalculatorUtilsTest : public ::testing::Test {
protected:
void TearDown() override {
absl::SetFlag(&FLAGS_xnnpack_default_num_threads, 0);
}
};

TEST_F(InferenceCalculatorUtilsTest, GetXnnpackNumThreadsReturnsDefault) {
EXPECT_EQ(GetXnnpackNumThreads(/*opts_has_delegate=*/false,
/*opts_delegate=*/{}),
kDefaultNumXnnpackThreads);
}

TEST_F(InferenceCalculatorUtilsTest, GetXnnpackNumThreadsReturnsSetDefault) {
absl::SetFlag(&FLAGS_xnnpack_default_num_threads, 42);
EXPECT_EQ(GetXnnpackNumThreads(/*opts_has_delegate=*/false,
/*opts_delegate=*/{}),
42);
}

TEST_F(InferenceCalculatorUtilsTest,
GetXnnpackNumThreadsReturnsDefaultIfHasDelegateIsTrueButUnset) {
EXPECT_EQ(GetXnnpackNumThreads(/*opts_has_delegate=*/true,
/*opts_delegate=*/{}),
kDefaultNumXnnpackThreads);
}

TEST_F(InferenceCalculatorUtilsTest,
GetXnnpackNumThreadsReturnsDefaultIfThreadsNotSpecified) {
mediapipe::InferenceCalculatorOptions::Delegate opts_delegate;
opts_delegate.mutable_xnnpack();
EXPECT_EQ(GetXnnpackNumThreads(/*opts_has_delegate=*/true, opts_delegate),
kDefaultNumXnnpackThreads);
}

TEST_F(InferenceCalculatorUtilsTest,
GetXnnpackNumThreadsReturnsSetNumberOfThreads) {
absl::SetFlag(&FLAGS_xnnpack_default_num_threads, 42);
mediapipe::InferenceCalculatorOptions::Delegate opts_delegate;
opts_delegate.mutable_xnnpack()->set_num_threads(43);
EXPECT_EQ(GetXnnpackNumThreads(/*opts_has_delegate=*/true, opts_delegate),
43);
}

TEST_F(InferenceCalculatorUtilsTest,
GetXnnpackNumThreadsReturnsDefaultIfHasDelegateIsFalse) {
mediapipe::InferenceCalculatorOptions::Delegate opts_delegate;
opts_delegate.mutable_xnnpack()->set_num_threads(44);
EXPECT_EQ(GetXnnpackNumThreads(/*opts_has_delegate=*/false, opts_delegate),
kDefaultNumXnnpackThreads);
}

TEST_F(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorWorksCorrectlyForInt32) {
tflite::Interpreter interpreter;
int tensor_index, tensor_len = 4;
AddInterpreterInput(kTfLiteInt32, tensor_len, tensor_index,
Expand All @@ -120,8 +177,8 @@ TEST(InferenceCalculatorUtilsTest,
ElementsAreArray(values));
}

TEST(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorWorksCorrectlyForInt64) {
TEST_F(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorWorksCorrectlyForInt64) {
tflite::Interpreter interpreter;
int tensor_index, tensor_len = 4;
AddInterpreterInput(kTfLiteInt64, tensor_len, tensor_index,
Expand All @@ -137,8 +194,8 @@ TEST(InferenceCalculatorUtilsTest,
ElementsAreArray(values));
}

TEST(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorWorksCorrectlyForUInt8) {
TEST_F(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorWorksCorrectlyForUInt8) {
tflite::Interpreter interpreter;
int tensor_index, tensor_len = 4;
AddInterpreterInput(kTfLiteUInt8, tensor_len, tensor_index,
Expand All @@ -154,8 +211,8 @@ TEST(InferenceCalculatorUtilsTest,
ElementsAreArray(values));
}

TEST(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorWorksCorrectlyForInt8) {
TEST_F(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorWorksCorrectlyForInt8) {
tflite::Interpreter interpreter;
int tensor_index, tensor_len = 4;
AddInterpreterInput(kTfLiteInt8, tensor_len, tensor_index,
Expand All @@ -171,8 +228,8 @@ TEST(InferenceCalculatorUtilsTest,
ElementsAreArray(values));
}

TEST(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorWorksCorrectlyForFloat32) {
TEST_F(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorWorksCorrectlyForFloat32) {
tflite::Interpreter interpreter;
int tensor_index, tensor_len = 4;
AddInterpreterInput(kTfLiteFloat32, tensor_len, tensor_index,
Expand All @@ -188,8 +245,8 @@ TEST(InferenceCalculatorUtilsTest,
ElementsAreArray(values));
}

TEST(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorWorksCorrectlyForString) {
TEST_F(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorWorksCorrectlyForString) {
tflite::Interpreter interpreter;
int tensor_index, tensor_len = 4;
AddInterpreterInput(kTfLiteString, tensor_len, tensor_index,
Expand All @@ -205,8 +262,8 @@ TEST(InferenceCalculatorUtilsTest,
ElementsAreArray(values));
}

TEST(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorTypeMismatch) {
TEST_F(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorTypeMismatch) {
tflite::Interpreter interpreter;
int tensor_index, tensor_len = 4;
AddInterpreterInput(kTfLiteInt32, tensor_len, tensor_index,
Expand All @@ -223,8 +280,8 @@ TEST(InferenceCalculatorUtilsTest,
HasSubstr("Input and interpreter tensor type do not match"));
}

TEST(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorSizeMismatch) {
TEST_F(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorSizeMismatch) {
tflite::Interpreter interpreter;
int tensor_index, tensor_len = 5;
AddInterpreterInput(kTfLiteFloat32, tensor_len, tensor_index,
Expand All @@ -241,8 +298,8 @@ TEST(InferenceCalculatorUtilsTest,
HasSubstr("TfLiteTensor and Tensor sizes do not match"));
}

TEST(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorNullBuffer) {
TEST_F(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorNullBuffer) {
tflite::Interpreter interpreter;
int tensor_index, tensor_len = 4;
// Make TFLite interpreter's buffer null.
Expand All @@ -259,8 +316,8 @@ TEST(InferenceCalculatorUtilsTest,
EXPECT_THAT(status.message(), HasSubstr("TfLiteTensor data is null"));
}

TEST(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorUnsupportedType) {
TEST_F(InferenceCalculatorUtilsTest,
CopyCpuInputIntoInterpreterTensorUnsupportedType) {
tflite::Interpreter interpreter;
int tensor_index, tensor_len = 4;

Expand All @@ -283,8 +340,8 @@ TEST(InferenceCalculatorUtilsTest,
EXPECT_THAT(status.message(), HasSubstr("Unsupported input data type:"));
}

TEST(InferenceCalculatorUtilsTest,
CopyInterpreterTensorIntoCpuOutputWorksCorrectlyForFloat32) {
TEST_F(InferenceCalculatorUtilsTest,
CopyInterpreterTensorIntoCpuOutputWorksCorrectlyForFloat32) {
std::vector<float> values{100.f, 200.f, 300.f, 400.f, 500.f, 600.f};

tflite::CastOpModel m({TensorType_INT32, {2, 3}},
Expand All @@ -300,8 +357,8 @@ TEST(InferenceCalculatorUtilsTest,
ElementsAreArray(values));
}

TEST(InferenceCalculatorUtilsTest,
CopyInterpreterTensorIntoCpuOutputWorksCorrectlyForString) {
TEST_F(InferenceCalculatorUtilsTest,
CopyInterpreterTensorIntoCpuOutputWorksCorrectlyForString) {
std::vector<char> values{'a', 'b', 'c', 'd'};
int values_len = values.size();
Tensor tensor(ElementType::kChar, Tensor::Shape({values_len}));
Expand All @@ -323,8 +380,8 @@ TEST(InferenceCalculatorUtilsTest,
ElementsAreArray(values));
}

TEST(InferenceCalculatorUtilsTest,
CopyInterpreterTensorIntoCpuOutputTypeMismatch) {
TEST_F(InferenceCalculatorUtilsTest,
CopyInterpreterTensorIntoCpuOutputTypeMismatch) {
std::vector<float> values{100.f, 200.f, 300.f, 400.f, 500.f, 600.f};

tflite::CastOpModel m({TensorType_INT32, {2, 3}},
Expand All @@ -341,8 +398,8 @@ TEST(InferenceCalculatorUtilsTest,
HasSubstr("Output and TfLiteTensor types do not match"));
}

TEST(InferenceCalculatorUtilsTest,
CopyInterpreterTensorIntoCpuOutputSizeMismatch) {
TEST_F(InferenceCalculatorUtilsTest,
CopyInterpreterTensorIntoCpuOutputSizeMismatch) {
std::vector<float> values{100.f, 200.f, 300.f, 400.f, 500.f, 600.f};

tflite::CastOpModel m({TensorType_INT32, {2, 3}},
Expand All @@ -359,8 +416,8 @@ TEST(InferenceCalculatorUtilsTest,
HasSubstr("TfLiteTensor and Tensor shape do not match"));
}

TEST(InferenceCalculatorUtilsTest,
CopyInterpreterTensorIntoCpuOutputNullBuffer) {
TEST_F(InferenceCalculatorUtilsTest,
CopyInterpreterTensorIntoCpuOutputNullBuffer) {
tflite::Interpreter interpreter;
int tensor_index, tensor_len = 4;
// Make TFLite interpreter's buffer null.
Expand All @@ -378,8 +435,8 @@ TEST(InferenceCalculatorUtilsTest,
HasSubstr("TfLiteTensor tensor buffer is null"));
}

TEST(InferenceCalculatorUtilsTest,
CopyInterpreterTensorIntoCpuOutputUnsupportedType) {
TEST_F(InferenceCalculatorUtilsTest,
CopyInterpreterTensorIntoCpuOutputUnsupportedType) {
tflite::Interpreter interpreter;
int tensor_index, tensor_len = 4;

Expand All @@ -402,7 +459,7 @@ TEST(InferenceCalculatorUtilsTest,
EXPECT_THAT(status.message(), HasSubstr("Unsupported output data type:"));
}

TEST(InferenceCalculatorUtilsTest, ConvertTfLiteTensorToFloat32) {
TEST_F(InferenceCalculatorUtilsTest, ConvertTfLiteTensorToFloat32) {
const std::vector<float> expected_values{100.f, 200.f, 300.f,
400.f, 500.f, 600.f};

Expand All @@ -420,7 +477,7 @@ TEST(InferenceCalculatorUtilsTest, ConvertTfLiteTensorToFloat32) {
ElementsAreArray(expected_values));
}

TEST(InferenceCalculatorUtilsTest, ShouldSetCustomAllocatorForCpuWriteView) {
TEST_F(InferenceCalculatorUtilsTest, ShouldSetCustomAllocatorForCpuWriteView) {
tflite::Interpreter interpreter;
int tensor_index, tensor_size = 4;
AddInterpreterInput(kTfLiteInt32, tensor_size, tensor_index,
Expand Down Expand Up @@ -449,7 +506,7 @@ TEST(InferenceCalculatorUtilsTest, ShouldSetCustomAllocatorForCpuWriteView) {
}
}

TEST(InferenceCalculatorUtilsTest, ShouldSetCustomAllocatorForCpuReadView) {
TEST_F(InferenceCalculatorUtilsTest, ShouldSetCustomAllocatorForCpuReadView) {
tflite::Interpreter interpreter;
int tensor_index, tensor_size = 4;
AddInterpreterInput(kTfLiteInt32, tensor_size, tensor_index,
Expand All @@ -470,7 +527,7 @@ TEST(InferenceCalculatorUtilsTest, ShouldSetCustomAllocatorForCpuReadView) {
ElementsAreArray(values));
}

TEST(InferenceCalculatorUtilsTest, ShouldConfirmTfLiteMemoryAlignment) {
TEST_F(InferenceCalculatorUtilsTest, ShouldConfirmTfLiteMemoryAlignment) {
std::vector<int32_t> values{1, 2, 3, 4};
int values_len = values.size();
Tensor tensor(ElementType::kInt32, Tensor::Shape({values_len}),
Expand All @@ -481,7 +538,7 @@ TEST(InferenceCalculatorUtilsTest, ShouldConfirmTfLiteMemoryAlignment) {
EXPECT_TRUE(IsAlignedWithTFLiteDefaultAlignment(read_view.buffer<int32_t>()));
}

TEST(InferenceCalculatorUtilsTest, ShouldNotConfirmTfLiteMemoryAlignment) {
TEST_F(InferenceCalculatorUtilsTest, ShouldNotConfirmTfLiteMemoryAlignment) {
std::vector<int32_t> values{1, 2, 3, 4};
int values_len = values.size();
Tensor tensor(ElementType::kInt32, Tensor::Shape({values_len}),
Expand Down

0 comments on commit 876bdd5

Please sign in to comment.