diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h index 6d9102c668..700ae3db13 100644 --- a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer.h @@ -79,7 +79,7 @@ struct GestureRecognizerOptions { // true for the lifetime of the callback function. // // A caller is responsible for closing gesture recognizer result. - typedef void (*result_callback_fn)(GestureRecognizerResult* result, + typedef void (*result_callback_fn)(const GestureRecognizerResult* result, const MpImage& image, int64_t timestamp_ms, char* error_msg); result_callback_fn result_callback; diff --git a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc index 06a46b2199..cd256ae910 100644 --- a/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc +++ b/mediapipe/tasks/c/vision/gesture_recognizer/gesture_recognizer_test.cc @@ -47,7 +47,7 @@ std::string GetFullPath(absl::string_view file_name) { return JoinPath("./", kTestDataDirectory, file_name); } -void MatchesGestureRecognizerResult(GestureRecognizerResult* result, +void MatchesGestureRecognizerResult(const GestureRecognizerResult* result, const float score_precision, const float landmark_precision) { // Expects to have the same number of hands detected. @@ -188,7 +188,7 @@ TEST(GestureRecognizerTest, VideoModeTest) { // timestamp is greater than the previous one. struct LiveStreamModeCallback { static int64_t last_timestamp; - static void Fn(GestureRecognizerResult* recognizer_result, + static void Fn(const GestureRecognizerResult* recognizer_result, const MpImage& image, int64_t timestamp, char* error_msg) { ASSERT_NE(recognizer_result, nullptr); ASSERT_EQ(error_msg, nullptr); diff --git a/mediapipe/tasks/c/vision/image_classifier/BUILD b/mediapipe/tasks/c/vision/image_classifier/BUILD index b1930fb0ed..edd70bf81f 100644 --- a/mediapipe/tasks/c/vision/image_classifier/BUILD +++ b/mediapipe/tasks/c/vision/image_classifier/BUILD @@ -58,6 +58,7 @@ cc_test( "//mediapipe/framework/formats:image", "//mediapipe/framework/port:gtest", "//mediapipe/tasks/c/components/containers:category", + "//mediapipe/tasks/c/vision/core:common", "//mediapipe/tasks/cc/vision/utils:image_utils", "@com_google_absl//absl/flags:flag", "@com_google_absl//absl/strings", diff --git a/mediapipe/tasks/c/vision/image_classifier/image_classifier.h b/mediapipe/tasks/c/vision/image_classifier/image_classifier.h index 2a1691d3c4..9cc1dcbe7d 100644 --- a/mediapipe/tasks/c/vision/image_classifier/image_classifier.h +++ b/mediapipe/tasks/c/vision/image_classifier/image_classifier.h @@ -59,7 +59,7 @@ struct ImageClassifierOptions { // true for the lifetime of the callback function. // // A caller is responsible for closing image classifier result. - typedef void (*result_callback_fn)(ImageClassifierResult* result, + typedef void (*result_callback_fn)(const ImageClassifierResult* result, const MpImage& image, int64_t timestamp_ms, char* error_msg); result_callback_fn result_callback; diff --git a/mediapipe/tasks/c/vision/image_classifier/image_classifier_test.cc b/mediapipe/tasks/c/vision/image_classifier/image_classifier_test.cc index 2b0114dc64..9e16f6bae2 100644 --- a/mediapipe/tasks/c/vision/image_classifier/image_classifier_test.cc +++ b/mediapipe/tasks/c/vision/image_classifier/image_classifier_test.cc @@ -26,6 +26,7 @@ limitations under the License. #include "mediapipe/framework/port/gmock.h" #include "mediapipe/framework/port/gtest.h" #include "mediapipe/tasks/c/components/containers/category.h" +#include "mediapipe/tasks/c/vision/core/common.h" #include "mediapipe/tasks/cc/vision/utils/image_utils.h" namespace { @@ -142,8 +143,8 @@ TEST(ImageClassifierTest, VideoModeTest) { // timestamp is greater than the previous one. struct LiveStreamModeCallback { static int64_t last_timestamp; - static void Fn(ImageClassifierResult* classifier_result, const MpImage& image, - int64_t timestamp, char* error_msg) { + static void Fn(const ImageClassifierResult* classifier_result, + const MpImage& image, int64_t timestamp, char* error_msg) { ASSERT_NE(classifier_result, nullptr); ASSERT_EQ(error_msg, nullptr); EXPECT_EQ( diff --git a/mediapipe/tasks/c/vision/image_embedder/image_embedder.h b/mediapipe/tasks/c/vision/image_embedder/image_embedder.h index 809c7f2f87..7f71dd72f8 100644 --- a/mediapipe/tasks/c/vision/image_embedder/image_embedder.h +++ b/mediapipe/tasks/c/vision/image_embedder/image_embedder.h @@ -61,7 +61,7 @@ struct ImageEmbedderOptions { // is true for the lifetime of the callback function. // // A caller is responsible for closing image embedder result. - typedef void (*result_callback_fn)(ImageEmbedderResult* result, + typedef void (*result_callback_fn)(const ImageEmbedderResult* result, const MpImage& image, int64_t timestamp_ms, char* error_msg); result_callback_fn result_callback; diff --git a/mediapipe/tasks/c/vision/image_embedder/image_embedder_test.cc b/mediapipe/tasks/c/vision/image_embedder/image_embedder_test.cc index 5daeac9494..0de26c19e3 100644 --- a/mediapipe/tasks/c/vision/image_embedder/image_embedder_test.cc +++ b/mediapipe/tasks/c/vision/image_embedder/image_embedder_test.cc @@ -199,8 +199,8 @@ TEST(ImageEmbedderTest, VideoModeTest) { // timestamp is greater than the previous one. struct LiveStreamModeCallback { static int64_t last_timestamp; - static void Fn(ImageEmbedderResult* embedder_result, const MpImage& image, - int64_t timestamp, char* error_msg) { + static void Fn(const ImageEmbedderResult* embedder_result, + const MpImage& image, int64_t timestamp, char* error_msg) { ASSERT_NE(embedder_result, nullptr); ASSERT_EQ(error_msg, nullptr); CheckMobileNetV3Result(*embedder_result, false); diff --git a/mediapipe/tasks/c/vision/object_detector/object_detector.h b/mediapipe/tasks/c/vision/object_detector/object_detector.h index e14523a496..cbadf942ff 100644 --- a/mediapipe/tasks/c/vision/object_detector/object_detector.h +++ b/mediapipe/tasks/c/vision/object_detector/object_detector.h @@ -81,7 +81,7 @@ struct ObjectDetectorOptions { // true for the lifetime of the callback function. // // A caller is responsible for closing object detector result. - typedef void (*result_callback_fn)(ObjectDetectorResult* result, + typedef void (*result_callback_fn)(const ObjectDetectorResult* result, const MpImage& image, int64_t timestamp_ms, char* error_msg); result_callback_fn result_callback; diff --git a/mediapipe/tasks/c/vision/object_detector/object_detector_test.cc b/mediapipe/tasks/c/vision/object_detector/object_detector_test.cc index 8e53fa5c9b..677ddd29b1 100644 --- a/mediapipe/tasks/c/vision/object_detector/object_detector_test.cc +++ b/mediapipe/tasks/c/vision/object_detector/object_detector_test.cc @@ -139,8 +139,8 @@ TEST(ObjectDetectorTest, VideoModeTest) { // timestamp is greater than the previous one. struct LiveStreamModeCallback { static int64_t last_timestamp; - static void Fn(ObjectDetectorResult* detector_result, const MpImage& image, - int64_t timestamp, char* error_msg) { + static void Fn(const ObjectDetectorResult* detector_result, + const MpImage& image, int64_t timestamp, char* error_msg) { ASSERT_NE(detector_result, nullptr); ASSERT_EQ(error_msg, nullptr); EXPECT_EQ(detector_result->detections_count, 3); diff --git a/mediapipe/tasks/c/vision/pose_landmarker/BUILD b/mediapipe/tasks/c/vision/pose_landmarker/BUILD new file mode 100644 index 0000000000..f78f6ee482 --- /dev/null +++ b/mediapipe/tasks/c/vision/pose_landmarker/BUILD @@ -0,0 +1,151 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +cc_library( + name = "pose_landmarker_result", + hdrs = ["pose_landmarker_result.h"], + visibility = ["//visibility:public"], + deps = [ + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/c/vision/core:common", + ], +) + +cc_library( + name = "pose_landmarker_result_converter", + srcs = ["pose_landmarker_result_converter.cc"], + hdrs = ["pose_landmarker_result_converter.h"], + deps = [ + ":pose_landmarker_result", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/c/components/containers:landmark_converter", + "//mediapipe/tasks/c/vision/core:common", + "//mediapipe/tasks/cc/components/containers:landmark", + "//mediapipe/tasks/cc/vision/pose_landmarker:pose_landmarker_result", + ], +) + +cc_test( + name = "pose_landmarker_result_converter_test", + srcs = ["pose_landmarker_result_converter_test.cc"], + data = [ + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/tasks/testdata/vision:test_images", + ], + linkstatic = 1, + deps = [ + ":pose_landmarker_result", + ":pose_landmarker_result_converter", + "//mediapipe/framework/deps:file_path", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/port:gtest", + "//mediapipe/framework/port:gtest_main", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/cc/components/containers:landmark", + "//mediapipe/tasks/cc/vision/pose_landmarker:pose_landmarker_result", + "//mediapipe/tasks/cc/vision/utils:image_utils", + "@com_google_absl//absl/flags:flag", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "pose_landmarker_lib", + srcs = ["pose_landmarker.cc"], + hdrs = ["pose_landmarker.h"], + visibility = ["//visibility:public"], + deps = [ + ":pose_landmarker_result", + ":pose_landmarker_result_converter", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/formats:image_frame", + "//mediapipe/tasks/c/core:base_options", + "//mediapipe/tasks/c/core:base_options_converter", + "//mediapipe/tasks/c/vision/core:common", + "//mediapipe/tasks/cc/vision/core:running_mode", + "//mediapipe/tasks/cc/vision/pose_landmarker", + "//mediapipe/tasks/cc/vision/pose_landmarker:pose_landmarker_result", + "//mediapipe/tasks/cc/vision/utils:image_utils", + "@com_google_absl//absl/log:absl_log", + "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", + ], + alwayslink = 1, +) + +cc_test( + name = "pose_landmarker_test", + srcs = ["pose_landmarker_test.cc"], + data = [ + "//mediapipe/framework/formats:image_frame_opencv", + "//mediapipe/framework/port:opencv_core", + "//mediapipe/framework/port:opencv_imgproc", + "//mediapipe/tasks/testdata/vision:test_images", + "//mediapipe/tasks/testdata/vision:test_models", + ], + linkstatic = 1, + deps = [ + ":pose_landmarker_lib", + ":pose_landmarker_result", + "//mediapipe/framework/deps:file_path", + "//mediapipe/framework/formats:image", + "//mediapipe/framework/port:gtest", + "//mediapipe/tasks/c/components/containers:landmark", + "//mediapipe/tasks/c/vision/core:common", + "//mediapipe/tasks/cc/vision/utils:image_utils", + "@com_google_absl//absl/flags:flag", + "@com_google_absl//absl/strings", + "@com_google_googletest//:gtest_main", + ], +) + +# bazel build -c opt --linkopt -s --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision/pose_landmarker:libpose_landmarker.so +cc_binary( + name = "libpose_landmarker.so", + linkopts = [ + "-Wl,-soname=libpose_landmarker.so", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = [":pose_landmarker_lib"], +) + +# bazel build --config darwin_arm64 -c opt --strip always --define MEDIAPIPE_DISABLE_GPU=1 \ +# //mediapipe/tasks/c/vision/pose_landmarker:libpose_landmarker.dylib +cc_binary( + name = "libpose_landmarker.dylib", + linkopts = [ + "-Wl,-install_name,libpose_landmarker.dylib", + "-fvisibility=hidden", + ], + linkshared = True, + tags = [ + "manual", + "nobuilder", + "notap", + ], + deps = [":pose_landmarker_lib"], +) diff --git a/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker.cc b/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker.cc new file mode 100644 index 0000000000..9485f017d8 --- /dev/null +++ b/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker.cc @@ -0,0 +1,285 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker.h" + +#include +#include +#include +#include + +#include "absl/log/absl_log.h" +#include "absl/status/status.h" +#include "absl/status/statusor.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/formats/image_frame.h" +#include "mediapipe/tasks/c/core/base_options_converter.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result.h" +#include "mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result_converter.h" +#include "mediapipe/tasks/cc/vision/core/running_mode.h" +#include "mediapipe/tasks/cc/vision/pose_landmarker/pose_landmarker.h" +#include "mediapipe/tasks/cc/vision/pose_landmarker/pose_landmarker_result.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace mediapipe::tasks::c::vision::pose_landmarker { + +namespace { + +using ::mediapipe::tasks::c::components::containers:: + CppClosePoseLandmarkerResult; +using ::mediapipe::tasks::c::components::containers:: + CppConvertToPoseLandmarkerResult; +using ::mediapipe::tasks::c::core::CppConvertToBaseOptions; +using ::mediapipe::tasks::vision::CreateImageFromBuffer; +using ::mediapipe::tasks::vision::core::RunningMode; +using ::mediapipe::tasks::vision::pose_landmarker::PoseLandmarker; +typedef ::mediapipe::tasks::vision::pose_landmarker::PoseLandmarkerResult + CppPoseLandmarkerResult; + +int CppProcessError(absl::Status status, char** error_msg) { + if (error_msg) { + *error_msg = strdup(status.ToString().c_str()); + } + return status.raw_code(); +} + +} // namespace + +void CppConvertToPoseLandmarkerOptions( + const PoseLandmarkerOptions& in, + mediapipe::tasks::vision::pose_landmarker::PoseLandmarkerOptions* out) { + out->num_poses = in.num_poses; + out->min_pose_detection_confidence = in.min_pose_detection_confidence; + out->min_pose_presence_confidence = in.min_pose_presence_confidence; + out->min_tracking_confidence = in.min_tracking_confidence; + out->output_segmentation_masks = in.output_segmentation_masks; +} + +PoseLandmarker* CppPoseLandmarkerCreate(const PoseLandmarkerOptions& options, + char** error_msg) { + auto cpp_options = std::make_unique< + ::mediapipe::tasks::vision::pose_landmarker::PoseLandmarkerOptions>(); + + CppConvertToBaseOptions(options.base_options, &cpp_options->base_options); + CppConvertToPoseLandmarkerOptions(options, cpp_options.get()); + cpp_options->running_mode = static_cast(options.running_mode); + + // Enable callback for processing live stream data when the running mode is + // set to RunningMode::LIVE_STREAM. + if (cpp_options->running_mode == RunningMode::LIVE_STREAM) { + if (options.result_callback == nullptr) { + const absl::Status status = absl::InvalidArgumentError( + "Provided null pointer to callback function."); + ABSL_LOG(ERROR) << "Failed to create PoseLandmarker: " << status; + CppProcessError(status, error_msg); + return nullptr; + } + + PoseLandmarkerOptions::result_callback_fn result_callback = + options.result_callback; + cpp_options->result_callback = + [result_callback](absl::StatusOr cpp_result, + const Image& image, int64_t timestamp) { + char* error_msg = nullptr; + + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Detection failed: " << cpp_result.status(); + CppProcessError(cpp_result.status(), &error_msg); + result_callback(nullptr, MpImage(), timestamp, error_msg); + free(error_msg); + return; + } + + // Result is valid for the lifetime of the callback function. + PoseLandmarkerResult result; + CppConvertToPoseLandmarkerResult(*cpp_result, &result); + + const auto& image_frame = image.GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = { + .format = static_cast<::ImageFormat>(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + result_callback(&result, mp_image, timestamp, + /* error_msg= */ nullptr); + + CppClosePoseLandmarkerResult(&result); + }; + } + + auto landmarker = PoseLandmarker::Create(std::move(cpp_options)); + if (!landmarker.ok()) { + ABSL_LOG(ERROR) << "Failed to create PoseLandmarker: " + << landmarker.status(); + CppProcessError(landmarker.status(), error_msg); + return nullptr; + } + return landmarker->release(); +} + +int CppPoseLandmarkerDetect(void* landmarker, const MpImage& image, + PoseLandmarkerResult* result, char** error_msg) { + if (image.type == MpImage::GPU_BUFFER) { + const absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet."); + + ABSL_LOG(ERROR) << "Detection failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_landmarker = static_cast(landmarker); + auto cpp_result = cpp_landmarker->Detect(*img); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Detection failed: " << cpp_result.status(); + return CppProcessError(cpp_result.status(), error_msg); + } + CppConvertToPoseLandmarkerResult(*cpp_result, result); + return 0; +} + +int CppPoseLandmarkerDetectForVideo(void* landmarker, const MpImage& image, + int64_t timestamp_ms, + PoseLandmarkerResult* result, + char** error_msg) { + if (image.type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Detection failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_landmarker = static_cast(landmarker); + auto cpp_result = cpp_landmarker->DetectForVideo(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Detection failed: " << cpp_result.status(); + return CppProcessError(cpp_result.status(), error_msg); + } + CppConvertToPoseLandmarkerResult(*cpp_result, result); + return 0; +} + +int CppPoseLandmarkerDetectAsync(void* landmarker, const MpImage& image, + int64_t timestamp_ms, char** error_msg) { + if (image.type == MpImage::GPU_BUFFER) { + absl::Status status = + absl::InvalidArgumentError("GPU Buffer not supported yet"); + + ABSL_LOG(ERROR) << "Detection failed: " << status.message(); + return CppProcessError(status, error_msg); + } + + const auto img = CreateImageFromBuffer( + static_cast(image.image_frame.format), + image.image_frame.image_buffer, image.image_frame.width, + image.image_frame.height); + + if (!img.ok()) { + ABSL_LOG(ERROR) << "Failed to create Image: " << img.status(); + return CppProcessError(img.status(), error_msg); + } + + auto cpp_landmarker = static_cast(landmarker); + auto cpp_result = cpp_landmarker->DetectAsync(*img, timestamp_ms); + if (!cpp_result.ok()) { + ABSL_LOG(ERROR) << "Data preparation for the landmark detection failed: " + << cpp_result; + return CppProcessError(cpp_result, error_msg); + } + return 0; +} + +void CppPoseLandmarkerCloseResult(PoseLandmarkerResult* result) { + CppClosePoseLandmarkerResult(result); +} + +int CppPoseLandmarkerClose(void* landmarker, char** error_msg) { + auto cpp_landmarker = static_cast(landmarker); + auto result = cpp_landmarker->Close(); + if (!result.ok()) { + ABSL_LOG(ERROR) << "Failed to close PoseLandmarker: " << result; + return CppProcessError(result, error_msg); + } + delete cpp_landmarker; + return 0; +} + +} // namespace mediapipe::tasks::c::vision::pose_landmarker + +extern "C" { + +void* pose_landmarker_create(struct PoseLandmarkerOptions* options, + char** error_msg) { + return mediapipe::tasks::c::vision::pose_landmarker::CppPoseLandmarkerCreate( + *options, error_msg); +} + +int pose_landmarker_detect_image(void* landmarker, const MpImage& image, + PoseLandmarkerResult* result, + char** error_msg) { + return mediapipe::tasks::c::vision::pose_landmarker::CppPoseLandmarkerDetect( + landmarker, image, result, error_msg); +} + +int pose_landmarker_detect_for_video(void* landmarker, const MpImage& image, + int64_t timestamp_ms, + PoseLandmarkerResult* result, + char** error_msg) { + return mediapipe::tasks::c::vision::pose_landmarker:: + CppPoseLandmarkerDetectForVideo(landmarker, image, timestamp_ms, result, + error_msg); +} + +int pose_landmarker_detect_async(void* landmarker, const MpImage& image, + int64_t timestamp_ms, char** error_msg) { + return mediapipe::tasks::c::vision::pose_landmarker:: + CppPoseLandmarkerDetectAsync(landmarker, image, timestamp_ms, error_msg); +} + +void pose_landmarker_close_result(PoseLandmarkerResult* result) { + mediapipe::tasks::c::vision::pose_landmarker::CppPoseLandmarkerCloseResult( + result); +} + +int pose_landmarker_close(void* landmarker, char** error_ms) { + return mediapipe::tasks::c::vision::pose_landmarker::CppPoseLandmarkerClose( + landmarker, error_ms); +} + +} // extern "C" diff --git a/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker.h b/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker.h new file mode 100644 index 0000000000..8f29c8fcaa --- /dev/null +++ b/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker.h @@ -0,0 +1,149 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_VISION_POSE_LANDMARKER_POSE_LANDMARKER_H_ +#define MEDIAPIPE_TASKS_C_VISION_POSE_LANDMARKER_POSE_LANDMARKER_H_ + +#include "mediapipe/tasks/c/core/base_options.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result.h" + +#ifndef MP_EXPORT +#define MP_EXPORT __attribute__((visibility("default"))) +#endif // MP_EXPORT + +#ifdef __cplusplus +extern "C" { +#endif + +// The options for configuring a MediaPipe pose landmarker task. +struct PoseLandmarkerOptions { + // Base options for configuring MediaPipe Tasks, such as specifying the model + // file with metadata, accelerator options, op resolver, etc. + struct BaseOptions base_options; + + // The running mode of the task. Default to the image mode. + // PoseLandmarker has three running modes: + // 1) The image mode for recognizing pose landmarks on single image inputs. + // 2) The video mode for recognizing pose landmarks on the decoded frames of a + // video. + // 3) The live stream mode for recognizing pose landmarks on the live stream + // of input data, such as from camera. In this mode, the "result_callback" + // below must be specified to receive the detection results asynchronously. + RunningMode running_mode; + + // The maximum number of poses can be detected by the PoseLandmarker. + int num_poses = 1; + + // The minimum confidence score for the pose detection to be considered + // successful. + float min_pose_detection_confidence = 0.5; + + // The minimum confidence score of pose presence score in the pose landmark + // detection. + float min_pose_presence_confidence = 0.5; + + // The minimum confidence score for the pose tracking to be considered + // successful. + float min_tracking_confidence = 0.5; + + // Whether to output segmentation masks. + bool output_segmentation_masks = false; + + // The user-defined result callback for processing live stream data. + // The result callback should only be specified when the running mode is set + // to RunningMode::LIVE_STREAM. Arguments of the callback function include: + // the pointer to recognition result, the image that result was obtained + // on, the timestamp relevant to recognition results and pointer to error + // message in case of any failure. The validity of the passed arguments is + // true for the lifetime of the callback function. + // + // A caller is responsible for closing pose landmarker result. + typedef void (*result_callback_fn)(const PoseLandmarkerResult* result, + const MpImage& image, int64_t timestamp_ms, + char* error_msg); + result_callback_fn result_callback; +}; + +// Creates an PoseLandmarker from the provided `options`. +// Returns a pointer to the pose landmarker on success. +// If an error occurs, returns `nullptr` and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT void* pose_landmarker_create(struct PoseLandmarkerOptions* options, + char** error_msg); + +// Performs pose landmark detection on the input `image`. Returns `0` on +// success. If an error occurs, returns an error code and sets the error +// parameter to an an error message (if `error_msg` is not `nullptr`). You must +// free the memory allocated for the error message. +MP_EXPORT int pose_landmarker_detect_image(void* landmarker, + const MpImage& image, + PoseLandmarkerResult* result, + char** error_msg); + +// Performs pose landmark detection on the provided video frame. +// Only use this method when the PoseLandmarker is created with the video +// running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide the video frame's timestamp (in milliseconds). The input timestamps +// must be monotonically increasing. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int pose_landmarker_detect_for_video(void* landmarker, + const MpImage& image, + int64_t timestamp_ms, + PoseLandmarkerResult* result, + char** error_msg); + +// Sends live image data to pose landmark detection, and the results will be +// available via the `result_callback` provided in the PoseLandmarkerOptions. +// Only use this method when the PoseLandmarker is created with the live +// stream running mode. +// The image can be of any size with format RGB or RGBA. It's required to +// provide a timestamp (in milliseconds) to indicate when the input image is +// sent to the pose landmarker. The input timestamps must be monotonically +// increasing. +// The `result_callback` provides: +// - The recognition results as an PoseLandmarkerResult object. +// - The const reference to the corresponding input image that the pose +// landmarker runs on. Note that the const reference to the image will no +// longer be valid when the callback returns. To access the image data +// outside of the callback, callers need to make a copy of the image. +// - The input timestamp in milliseconds. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int pose_landmarker_detect_async(void* landmarker, + const MpImage& image, + int64_t timestamp_ms, + char** error_msg); + +// Frees the memory allocated inside a PoseLandmarkerResult result. +// Does not free the result pointer itself. +MP_EXPORT void pose_landmarker_close_result(PoseLandmarkerResult* result); + +// Frees pose landmarker. +// If an error occurs, returns an error code and sets the error parameter to an +// an error message (if `error_msg` is not `nullptr`). You must free the memory +// allocated for the error message. +MP_EXPORT int pose_landmarker_close(void* landmarker, char** error_msg); + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_VISION_POSE_LANDMARKER_POSE_LANDMARKER_H_ diff --git a/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result.h b/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result.h new file mode 100644 index 0000000000..055ff805ab --- /dev/null +++ b/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result.h @@ -0,0 +1,58 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_VISION_POSE_LANDMARKER_RESULT_POSE_LANDMARKER_RESULT_H_ +#define MEDIAPIPE_TASKS_C_VISION_POSE_LANDMARKER_RESULT_POSE_LANDMARKER_RESULT_H_ + +#include + +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/vision/core/common.h" + +#ifndef MP_EXPORT +#define MP_EXPORT __attribute__((visibility("default"))) +#endif // MP_EXPORT + +#ifdef __cplusplus +extern "C" { +#endif + +// The hand landmarker result from PoseLandmarker, where each vector +// element represents a single hand detected in the image. +struct PoseLandmarkerResult { + // Segmentation masks for pose. + struct MpImage* segmentation_masks; + + // The number of elements in the segmentation_masks array. + uint32_t segmentation_masks_count; + + // Detected hand landmarks in normalized image coordinates. + struct NormalizedLandmarks* pose_landmarks; + + // The number of elements in the pose_landmarks array. + uint32_t pose_landmarks_count; + + // Detected hand landmarks in world coordinates. + struct Landmarks* pose_world_landmarks; + + // The number of elements in the pose_world_landmarks array. + uint32_t pose_world_landmarks_count; +}; + +#ifdef __cplusplus +} // extern C +#endif + +#endif // MEDIAPIPE_TASKS_C_VISION_POSE_LANDMARKER_RESULT_POSE_LANDMARKER_RESULT_H_ diff --git a/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result_converter.cc b/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result_converter.cc new file mode 100644 index 0000000000..18ef923422 --- /dev/null +++ b/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result_converter.cc @@ -0,0 +1,105 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result_converter.h" + +#include +#include + +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/components/containers/landmark_converter.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result.h" +#include "mediapipe/tasks/cc/components/containers/landmark.h" +#include "mediapipe/tasks/cc/vision/pose_landmarker/pose_landmarker_result.h" + +namespace mediapipe::tasks::c::components::containers { + +using CppLandmark = ::mediapipe::tasks::components::containers::Landmark; +using CppNormalizedLandmark = + ::mediapipe::tasks::components::containers::NormalizedLandmark; + +void CppConvertToPoseLandmarkerResult( + const mediapipe::tasks::vision::pose_landmarker::PoseLandmarkerResult& in, + PoseLandmarkerResult* out) { + if (in.segmentation_masks.has_value()) { + out->segmentation_masks_count = in.segmentation_masks.value().size(); + out->segmentation_masks = new MpImage[out->segmentation_masks_count]; + for (uint32_t i = 0; i < out->segmentation_masks_count; ++i) { + const auto& image_frame = + in.segmentation_masks.value()[i].GetImageFrameSharedPtr(); + MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = { + .format = static_cast<::ImageFormat>(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + out->segmentation_masks[i] = mp_image; + } + } else { + out->segmentation_masks_count = 0; + out->segmentation_masks = nullptr; + } + + out->pose_landmarks_count = in.pose_landmarks.size(); + out->pose_landmarks = new NormalizedLandmarks[out->pose_landmarks_count]; + for (uint32_t i = 0; i < out->pose_landmarks_count; ++i) { + std::vector cpp_normalized_landmarks; + for (uint32_t j = 0; j < in.pose_landmarks[i].landmarks.size(); ++j) { + const auto& cpp_landmark = in.pose_landmarks[i].landmarks[j]; + cpp_normalized_landmarks.push_back(cpp_landmark); + } + CppConvertToNormalizedLandmarks(cpp_normalized_landmarks, + &out->pose_landmarks[i]); + } + + out->pose_world_landmarks_count = in.pose_world_landmarks.size(); + out->pose_world_landmarks = new Landmarks[out->pose_world_landmarks_count]; + for (uint32_t i = 0; i < out->pose_world_landmarks_count; ++i) { + std::vector cpp_landmarks; + for (uint32_t j = 0; j < in.pose_world_landmarks[i].landmarks.size(); ++j) { + const auto& cpp_landmark = in.pose_world_landmarks[i].landmarks[j]; + cpp_landmarks.push_back(cpp_landmark); + } + CppConvertToLandmarks(cpp_landmarks, &out->pose_world_landmarks[i]); + } +} + +void CppClosePoseLandmarkerResult(PoseLandmarkerResult* result) { + if (result->segmentation_masks) { + delete[] result->segmentation_masks; + result->segmentation_masks = nullptr; + result->segmentation_masks_count = 0; + } + + for (uint32_t i = 0; i < result->pose_landmarks_count; ++i) { + CppCloseNormalizedLandmarks(&result->pose_landmarks[i]); + } + delete[] result->pose_landmarks; + + for (uint32_t i = 0; i < result->pose_world_landmarks_count; ++i) { + CppCloseLandmarks(&result->pose_world_landmarks[i]); + } + delete[] result->pose_world_landmarks; + + result->pose_landmarks = nullptr; + result->pose_world_landmarks = nullptr; + + result->pose_landmarks_count = 0; + result->pose_world_landmarks_count = 0; +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result_converter.h b/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result_converter.h new file mode 100644 index 0000000000..1291914d40 --- /dev/null +++ b/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result_converter.h @@ -0,0 +1,32 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_POSE_LANDMARKER_RESULT_CONVERTER_H_ +#define MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_POSE_LANDMARKER_RESULT_CONVERTER_H_ + +#include "mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result.h" +#include "mediapipe/tasks/cc/vision/pose_landmarker/pose_landmarker_result.h" + +namespace mediapipe::tasks::c::components::containers { + +void CppConvertToPoseLandmarkerResult( + const mediapipe::tasks::vision::pose_landmarker::PoseLandmarkerResult& in, + PoseLandmarkerResult* out); + +void CppClosePoseLandmarkerResult(PoseLandmarkerResult* result); + +} // namespace mediapipe::tasks::c::components::containers + +#endif // MEDIAPIPE_TASKS_C_COMPONENTS_CONTAINERS_POSE_LANDMARKER_RESULT_CONVERTER_H_ diff --git a/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result_converter_test.cc b/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result_converter_test.cc new file mode 100644 index 0000000000..ed85c9f5ad --- /dev/null +++ b/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result_converter_test.cc @@ -0,0 +1,136 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result_converter.h" + +#include + +#include "absl/flags/flag.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/framework/port/status_matchers.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result.h" +#include "mediapipe/tasks/cc/components/containers/landmark.h" +#include "mediapipe/tasks/cc/vision/pose_landmarker/pose_landmarker_result.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace mediapipe::tasks::c::components::containers { + +using ::mediapipe::file::JoinPath; +using ::mediapipe::tasks::vision::DecodeImageFromFile; + +constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; +constexpr char kMaskImage[] = "segmentation_input_rotation0.jpg"; + +void InitPoseLandmarkerResult( + ::mediapipe::tasks::vision::pose_landmarker::PoseLandmarkerResult* + cpp_result) { + // Initialize pose_landmarks + mediapipe::tasks::components::containers::NormalizedLandmark + cpp_normalized_landmark = {/* x= */ 0.1f, + /* y= */ 0.2f, + /* z= */ 0.3f}; + mediapipe::tasks::components::containers::NormalizedLandmarks + cpp_normalized_landmarks; + cpp_normalized_landmarks.landmarks.push_back(cpp_normalized_landmark); + cpp_result->pose_landmarks.push_back(cpp_normalized_landmarks); + + // Initialize pose_world_landmarks + mediapipe::tasks::components::containers::Landmark cpp_landmark = { + /* x= */ 1.0f, + /* y= */ 1.1f, + /* z= */ 1.2f}; + mediapipe::tasks::components::containers::Landmarks cpp_landmarks; + cpp_landmarks.landmarks.push_back(cpp_landmark); + cpp_result->pose_world_landmarks.push_back(cpp_landmarks); + + // Initialize segmentation_masks + MP_ASSERT_OK_AND_ASSIGN( + Image mask_image, + DecodeImageFromFile(JoinPath("./", kTestDataDirectory, kMaskImage))); + + // Ensure segmentation_masks is instantiated and add the mask_image to it. + if (!cpp_result->segmentation_masks) { + cpp_result->segmentation_masks.emplace(); + } + cpp_result->segmentation_masks->push_back(mask_image); +} + +TEST(PoseLandmarkerResultConverterTest, ConvertsCustomResult) { + ::mediapipe::tasks::vision::pose_landmarker::PoseLandmarkerResult cpp_result; + InitPoseLandmarkerResult(&cpp_result); + + PoseLandmarkerResult c_result; + CppConvertToPoseLandmarkerResult(cpp_result, &c_result); + + // Verify conversion of pose_landmarks + EXPECT_NE(c_result.pose_landmarks, nullptr); + EXPECT_EQ(c_result.pose_landmarks_count, cpp_result.pose_landmarks.size()); + + for (uint32_t i = 0; i < c_result.pose_landmarks_count; ++i) { + EXPECT_EQ(c_result.pose_landmarks[i].landmarks_count, + cpp_result.pose_landmarks[i].landmarks.size()); + for (uint32_t j = 0; j < c_result.pose_landmarks[i].landmarks_count; ++j) { + const auto& landmark = cpp_result.pose_landmarks[i].landmarks[j]; + EXPECT_FLOAT_EQ(c_result.pose_landmarks[i].landmarks[j].x, landmark.x); + EXPECT_FLOAT_EQ(c_result.pose_landmarks[i].landmarks[j].y, landmark.y); + EXPECT_FLOAT_EQ(c_result.pose_landmarks[i].landmarks[j].z, landmark.z); + } + } + + // Verify conversion of pose_world_landmarks + EXPECT_NE(c_result.pose_world_landmarks, nullptr); + EXPECT_EQ(c_result.pose_world_landmarks_count, + cpp_result.pose_world_landmarks.size()); + + for (uint32_t i = 0; i < c_result.pose_landmarks_count; ++i) { + EXPECT_EQ(c_result.pose_world_landmarks[i].landmarks_count, + cpp_result.pose_world_landmarks[i].landmarks.size()); + for (uint32_t j = 0; j < c_result.pose_world_landmarks[i].landmarks_count; + ++j) { + const auto& landmark = cpp_result.pose_world_landmarks[i].landmarks[j]; + EXPECT_FLOAT_EQ(c_result.pose_world_landmarks[i].landmarks[j].x, + landmark.x); + EXPECT_FLOAT_EQ(c_result.pose_world_landmarks[i].landmarks[j].y, + landmark.y); + EXPECT_FLOAT_EQ(c_result.pose_world_landmarks[i].landmarks[j].z, + landmark.z); + } + } + + CppClosePoseLandmarkerResult(&c_result); +} + +TEST(PoseLandmarkerResultConverterTest, FreesMemory) { + ::mediapipe::tasks::vision::pose_landmarker::PoseLandmarkerResult cpp_result; + InitPoseLandmarkerResult(&cpp_result); + + PoseLandmarkerResult c_result; + CppConvertToPoseLandmarkerResult(cpp_result, &c_result); + + EXPECT_NE(c_result.pose_landmarks, nullptr); + EXPECT_NE(c_result.pose_world_landmarks, nullptr); + EXPECT_NE(c_result.segmentation_masks, nullptr); + + CppClosePoseLandmarkerResult(&c_result); + + EXPECT_EQ(c_result.pose_landmarks, nullptr); + EXPECT_EQ(c_result.pose_world_landmarks, nullptr); + EXPECT_EQ(c_result.segmentation_masks, nullptr); +} + +} // namespace mediapipe::tasks::c::components::containers diff --git a/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_test.cc b/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_test.cc new file mode 100644 index 0000000000..7a0919ae16 --- /dev/null +++ b/mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_test.cc @@ -0,0 +1,262 @@ +/* Copyright 2023 The MediaPipe Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker.h" + +#include +#include +#include + +#include "absl/flags/flag.h" +#include "absl/strings/string_view.h" +#include "mediapipe/framework/deps/file_path.h" +#include "mediapipe/framework/formats/image.h" +#include "mediapipe/framework/port/gmock.h" +#include "mediapipe/framework/port/gtest.h" +#include "mediapipe/tasks/c/components/containers/landmark.h" +#include "mediapipe/tasks/c/vision/core/common.h" +#include "mediapipe/tasks/c/vision/pose_landmarker/pose_landmarker_result.h" +#include "mediapipe/tasks/cc/vision/utils/image_utils.h" + +namespace { + +using ::mediapipe::file::JoinPath; +using ::mediapipe::tasks::vision::DecodeImageFromFile; +using testing::HasSubstr; + +constexpr char kTestDataDirectory[] = "/mediapipe/tasks/testdata/vision/"; +constexpr char kModelName[] = "pose_landmarker.task"; +constexpr char kImageFile[] = "pose.jpg"; +constexpr float kLandmarkPrecision = 1e-1; +constexpr int kIterations = 5; + +std::string GetFullPath(absl::string_view file_name) { + return JoinPath("./", kTestDataDirectory, file_name); +} + +void MatchesPoseLandmarkerResult(const PoseLandmarkerResult* result, + const float landmark_precision) { + // Expects to have the same number of poses detected. + EXPECT_EQ(result->pose_landmarks_count, 1); + + // Expects to have the same number of segmentation_masks detected. + EXPECT_EQ(result->segmentation_masks_count, 1); + EXPECT_EQ(result->segmentation_masks->image_frame.width, 1000); + EXPECT_EQ(result->segmentation_masks->image_frame.height, 667); + + // Actual landmarks match expected landmarks. + EXPECT_NEAR(result->pose_landmarks[0].landmarks[0].x, 0.4649f, + landmark_precision); + EXPECT_NEAR(result->pose_landmarks[0].landmarks[0].y, 0.4228f, + landmark_precision); + EXPECT_NEAR(result->pose_landmarks[0].landmarks[0].z, -0.1500f, + landmark_precision); + EXPECT_NEAR(result->pose_world_landmarks[0].landmarks[0].x, -0.0852f, + landmark_precision); + EXPECT_NEAR(result->pose_world_landmarks[0].landmarks[0].y, -0.6153f, + landmark_precision); + EXPECT_NEAR(result->pose_world_landmarks[0].landmarks[0].z, -0.1469f, + landmark_precision); +} + +TEST(PoseLandmarkerTest, ImageModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + PoseLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::IMAGE, + /* num_poses= */ 1, + /* min_pose_detection_confidence= */ 0.5, + /* min_pose_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + /* output_segmentation_masks= */ true, + }; + + void* landmarker = pose_landmarker_create(&options, /* error_msg */ nullptr); + EXPECT_NE(landmarker, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + PoseLandmarkerResult result; + pose_landmarker_detect_image(landmarker, mp_image, &result, + /* error_msg */ nullptr); + MatchesPoseLandmarkerResult(&result, kLandmarkPrecision); + pose_landmarker_close_result(&result); + pose_landmarker_close(landmarker, /* error_msg */ nullptr); +} + +TEST(PoseLandmarkerTest, VideoModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + PoseLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::VIDEO, + /* num_poses= */ 1, + /* min_pose_detection_confidence= */ 0.5, + /* min_pose_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + /* output_segmentation_masks= */ true, + }; + + void* landmarker = pose_landmarker_create(&options, /* error_msg */ nullptr); + EXPECT_NE(landmarker, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + PoseLandmarkerResult result; + pose_landmarker_detect_for_video(landmarker, mp_image, i, &result, + /* error_msg */ nullptr); + + MatchesPoseLandmarkerResult(&result, kLandmarkPrecision); + pose_landmarker_close_result(&result); + } + pose_landmarker_close(landmarker, /* error_msg */ nullptr); +} + +// A structure to support LiveStreamModeTest below. This structure holds a +// static method `Fn` for a callback function of C API. A `static` qualifier +// allows to take an address of the method to follow API style. Another static +// struct member is `last_timestamp` that is used to verify that current +// timestamp is greater than the previous one. +struct LiveStreamModeCallback { + static int64_t last_timestamp; + static void Fn(const PoseLandmarkerResult* landmarker_result, + const MpImage& image, int64_t timestamp, char* error_msg) { + ASSERT_NE(landmarker_result, nullptr); + ASSERT_EQ(error_msg, nullptr); + MatchesPoseLandmarkerResult(landmarker_result, kLandmarkPrecision); + EXPECT_GT(image.image_frame.width, 0); + EXPECT_GT(image.image_frame.height, 0); + EXPECT_GT(timestamp, last_timestamp); + ++last_timestamp; + } +}; +int64_t LiveStreamModeCallback::last_timestamp = -1; + +TEST(PoseLandmarkerTest, LiveStreamModeTest) { + const auto image = DecodeImageFromFile(GetFullPath(kImageFile)); + ASSERT_TRUE(image.ok()); + + const std::string model_path = GetFullPath(kModelName); + + PoseLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::LIVE_STREAM, + /* num_poses= */ 1, + /* min_pose_detection_confidence= */ 0.5, + /* min_pose_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + /* output_segmentation_masks= */ true, + /* result_callback= */ LiveStreamModeCallback::Fn, + }; + + void* landmarker = pose_landmarker_create(&options, /* error_msg */ nullptr); + EXPECT_NE(landmarker, nullptr); + + const auto& image_frame = image->GetImageFrameSharedPtr(); + const MpImage mp_image = { + .type = MpImage::IMAGE_FRAME, + .image_frame = {.format = static_cast(image_frame->Format()), + .image_buffer = image_frame->PixelData(), + .width = image_frame->Width(), + .height = image_frame->Height()}}; + + for (int i = 0; i < kIterations; ++i) { + EXPECT_GE(pose_landmarker_detect_async(landmarker, mp_image, i, + /* error_msg */ nullptr), + 0); + } + pose_landmarker_close(landmarker, /* error_msg */ nullptr); + + // Due to the flow limiter, the total of outputs might be smaller than the + // number of iterations. + EXPECT_LE(LiveStreamModeCallback::last_timestamp, kIterations); + EXPECT_GT(LiveStreamModeCallback::last_timestamp, 0); +} + +TEST(PoseLandmarkerTest, InvalidArgumentHandling) { + // It is an error to set neither the asset buffer nor the path. + PoseLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ nullptr}, + /* running_mode= */ RunningMode::IMAGE, + /* num_poses= */ 1, + /* min_pose_detection_confidence= */ 0.5, + /* min_pose_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + /* output_segmentation_masks= */ true, + }; + + char* error_msg; + void* landmarker = pose_landmarker_create(&options, &error_msg); + EXPECT_EQ(landmarker, nullptr); + + EXPECT_THAT(error_msg, HasSubstr("ExternalFile must specify")); + + free(error_msg); +} + +TEST(PoseLandmarkerTest, FailedRecognitionHandling) { + const std::string model_path = GetFullPath(kModelName); + PoseLandmarkerOptions options = { + /* base_options= */ {/* model_asset_buffer= */ nullptr, + /* model_asset_buffer_count= */ 0, + /* model_asset_path= */ model_path.c_str()}, + /* running_mode= */ RunningMode::IMAGE, + /* num_poses= */ 1, + /* min_pose_detection_confidence= */ 0.5, + /* min_pose_presence_confidence= */ 0.5, + /* min_tracking_confidence= */ 0.5, + /* output_segmentation_masks= */ true, + }; + + void* landmarker = pose_landmarker_create(&options, /* error_msg */ + nullptr); + EXPECT_NE(landmarker, nullptr); + + const MpImage mp_image = {.type = MpImage::GPU_BUFFER, .gpu_buffer = {}}; + PoseLandmarkerResult result; + char* error_msg; + pose_landmarker_detect_image(landmarker, mp_image, &result, &error_msg); + EXPECT_THAT(error_msg, HasSubstr("GPU Buffer not supported yet")); + free(error_msg); + pose_landmarker_close(landmarker, /* error_msg */ nullptr); +} + +} // namespace