From a456fd50031ee46bd92a56a82f8ae8fe78355c13 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Wed, 6 Dec 2023 21:33:24 +0530 Subject: [PATCH 1/7] Added iOS Image Embedder Result --- .../tasks/ios/vision/image_embedder/BUILD | 28 +++++++++++ .../sources/MPPImageEmbedderResult.h | 48 +++++++++++++++++++ .../sources/MPPImageEmbedderResult.m | 28 +++++++++++ 3 files changed, 104 insertions(+) create mode 100644 mediapipe/tasks/ios/vision/image_embedder/BUILD create mode 100644 mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderResult.h create mode 100644 mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderResult.m diff --git a/mediapipe/tasks/ios/vision/image_embedder/BUILD b/mediapipe/tasks/ios/vision/image_embedder/BUILD new file mode 100644 index 0000000000..5df080dce5 --- /dev/null +++ b/mediapipe/tasks/ios/vision/image_embedder/BUILD @@ -0,0 +1,28 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +objc_library( + name = "MPPImageEmbedderResult", + srcs = ["sources/MPPImageEmbedderResult.m"], + hdrs = ["sources/MPPImageEmbedderResult.h"], + deps = [ + "//mediapipe/tasks/ios/components/containers:MPPEmbeddingResult", + "//mediapipe/tasks/ios/core:MPPTaskResult", + ], +) + diff --git a/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderResult.h b/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderResult.h new file mode 100644 index 0000000000..35452f2f0e --- /dev/null +++ b/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderResult.h @@ -0,0 +1,48 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import +#import "mediapipe/tasks/ios/components/containers/sources/MPPEmbeddingResult.h" +#import "mediapipe/tasks/ios/core/sources/MPPTaskResult.h" + +NS_ASSUME_NONNULL_BEGIN + +/** Represents the embedding results generated by `ImageEmbedder`. **/ +NS_SWIFT_NAME(ImageEmbedderResult) +@interface MPPImageEmbedderResult : MPPTaskResult + +/** The `MPPEmbedderResult` instance containing one embedding per embedder head. **/ +@property(nonatomic, readonly) MPPEmbeddingResult *embeddingResult; + +/** + * Initializes a new `ImageEmbedderResult` with the given `MPPEmbeddingResult` and + * timestamp (in milliseconds). + * + * @param embeddingResult The `EmbeddingResult` instance containing one set of results per + * classifier head. + * @param timestampInMilliseconds The timestamp (in millisecondss) for this result. + * + * @return An instance of `ImageEmbedderResult` initialized with the given + * `MPPEmbeddingResult` and timestamp (in milliseconds). + */ +- (instancetype)initWithEmbeddingResult:(MPPEmbeddingResult *)embeddingResult + timestampInMilliseconds:(NSInteger)timestampInMilliseconds; + +- (instancetype)init NS_UNAVAILABLE; + ++ (instancetype)new NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderResult.m b/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderResult.m new file mode 100644 index 0000000000..a3270d07dd --- /dev/null +++ b/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderResult.m @@ -0,0 +1,28 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderResult.h" + +@implementation MPPImageEmbedderResult + +- (instancetype)initWithEmbeddingResult:(MPPEmbeddingResult *)embeddingResult + timestampInMilliseconds:(NSInteger)timestampInMilliseconds { + self = [super initWithTimestampInMilliseconds:timestampInMilliseconds]; + if (self) { + _embeddingResult = embeddingResult; + } + return self; +} + +@end From 763b6ee63e61a0500a3c9793d7b06ebcffe79d2d Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Wed, 6 Dec 2023 21:34:10 +0530 Subject: [PATCH 2/7] Added iOS Image Embedder Options --- .../tasks/ios/vision/image_embedder/BUILD | 11 ++ .../sources/MPPImageEmbedderOptions.h | 104 ++++++++++++++++++ .../sources/MPPImageEmbedderOptions.m | 29 +++++ 3 files changed, 144 insertions(+) create mode 100644 mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderOptions.h create mode 100644 mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderOptions.m diff --git a/mediapipe/tasks/ios/vision/image_embedder/BUILD b/mediapipe/tasks/ios/vision/image_embedder/BUILD index 5df080dce5..438c82f4b3 100644 --- a/mediapipe/tasks/ios/vision/image_embedder/BUILD +++ b/mediapipe/tasks/ios/vision/image_embedder/BUILD @@ -26,3 +26,14 @@ objc_library( ], ) +objc_library( + name = "MPPImageEmbedderOptions", + srcs = ["sources/MPPImageEmbedderOptions.m"], + hdrs = ["sources/MPPImageEmbedderOptions.h"], + deps = [ + ":MPPImageEmbedderResult", + "//mediapipe/tasks/ios/core:MPPTaskOptions", + "//mediapipe/tasks/ios/vision/core:MPPRunningMode", + ], +) + diff --git a/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderOptions.h b/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderOptions.h new file mode 100644 index 0000000000..ab749e8ebe --- /dev/null +++ b/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderOptions.h @@ -0,0 +1,104 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/tasks/ios/core/sources/MPPTaskOptions.h" +#import "mediapipe/tasks/ios/vision/core/sources/MPPRunningMode.h" +#import "mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderResult.h" + +NS_ASSUME_NONNULL_BEGIN + +@class MPPImageEmbedder; + +/** + * This protocol defines an interface for the delegates of `ImageEmbedder` object to receive + * results of asynchronous embedding extraction on images (i.e, when `runningMode` = `.liveStream`). + * + * The delegate of `ImageEmbedder` must adopt `ImageEmbedderLiveStreamDelegate` protocol. + * The methods in this protocol are optional. + */ +NS_SWIFT_NAME(ImageEmbedderLiveStreamDelegate) +@protocol MPPImageEmbedderLiveStreamDelegate + +@optional +/** + * This method notifies a delegate that the results of asynchronous embedding extraction on + * an image submitted to the `ImageEmbedder` is available. + * + * This method is called on a private serial queue created by the `ImageEmbedder` + * for performing the asynchronous delegates calls. + * + * @param imageEmbedder The image embedder which performed the embedding extraction. + * This is useful to test equality when there are multiple instances of `ImageEmbedder`. + * @param result An `ImageEmbedderResult` object that contains a list of generated image embeddings. + * @param timestampInMilliseconds The timestamp (in milliseconds) which indicates when the input + * image was sent to the image embedder. + * @param error An optional error parameter populated when there is an error in performing embedding + * extraction on the input live stream image data. + */ +- (void)imageEmbedder:(MPPImageEmbedder *)imageEmbedder + didFinishEmbeddingWithResult:(nullable MPPImageEmbedderResult *)result + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(nullable NSError *)error + NS_SWIFT_NAME(imageEmbedder(_:didFinishClassification:timestampInMilliseconds:error:)); +@end + +/** + * Options for setting up a `ImageEmbedder`. + */ +NS_SWIFT_NAME(ImageEmbedderOptions) +@interface MPPImageEmbedderOptions : MPPTaskOptions + +/** + * Running mode of the image embedder task. Defaults to `.image`. + * `ImageEmbedder` can be created with one of the following running modes: + * 1. `.image`: The mode for performing embedding extraction on single image inputs. + * 2. `.video`: The mode for performing embedding extraction on the decoded frames of a + * video. + * 3. `.liveStream`: The mode for performing embedding extraction on a live stream of input + * data, such as from the camera. + */ +@property(nonatomic) MPPRunningMode runningMode; + +/** + * An object that confirms to `ImageEmbedderLiveStreamDelegate` protocol. This object must + * implement `imageEmbedder(_:didFinishEmbeddingWithResult:timestampInMilliseconds:error:)` to + * receive the results of asynchronous embedding extraction on images (i.e, when `runningMode = + * .liveStream`). + */ +@property(nonatomic, weak, nullable) id + imageEmbedderLiveStreamDelegate; + +/** + * @brief Sets whether L2 normalization should be performed on the returned embeddings. + * Use this option only if the model does not already contain a native L2_NORMALIZATION TF Lite Op. + * In most cases, this is already the case and L2 norm is thus achieved through TF Lite inference. + * + * `NO` by default. + */ +@property(nonatomic) BOOL l2Normalize; + +/** + * @brief Sets whether the returned embedding should be quantized to bytes via scalar quantization. + * Embeddings are implicitly assumed to be unit-norm and therefore any dimensions is guaranteed to + * have value in [-1.0, 1.0]. Use the `l2Normalize` property if this is not the case. + * + * `NO` by default. + */ +@property(nonatomic) BOOL quantize; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderOptions.m b/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderOptions.m new file mode 100644 index 0000000000..11a42fd732 --- /dev/null +++ b/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderOptions.m @@ -0,0 +1,29 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderOptions.h" + +@implementation MPPImageEmbedderOptions + +- (id)copyWithZone:(NSZone *)zone { + MPPImageEmbedderOptions *imageEmbedderOptions = [super copyWithZone:zone]; + + imageEmbedderOptions.runningMode = self.runningMode; + imageEmbedderOptions.l2Normalize = self.l2Normalize; + imageEmbedderOptions.quantize = self.quantize; + + return imageEmbedderOptions; +} + +@end From 2ff26a51150ef6446638e3017409a0aa8ec77f66 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Wed, 6 Dec 2023 21:34:22 +0530 Subject: [PATCH 3/7] Added iOS Image Embedder Header --- .../tasks/ios/vision/image_embedder/BUILD | 15 + .../image_embedder/sources/MPPImageEmbedder.h | 264 ++++++++++++++++++ 2 files changed, 279 insertions(+) create mode 100644 mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedder.h diff --git a/mediapipe/tasks/ios/vision/image_embedder/BUILD b/mediapipe/tasks/ios/vision/image_embedder/BUILD index 438c82f4b3..2498e8b3e0 100644 --- a/mediapipe/tasks/ios/vision/image_embedder/BUILD +++ b/mediapipe/tasks/ios/vision/image_embedder/BUILD @@ -37,3 +37,18 @@ objc_library( ], ) +objc_library( + name = "MPPImageEmbedder", + hdrs = ["sources/MPPImageEmbedder.h"], + copts = [ + "-ObjC++", + "-std=c++17", + "-x objective-c++", + ], + module_name = "MPPImageEmbedder", + deps = [ + ":MPPImageEmbedderOptions", + ":MPPImageEmbedderResult", + "//mediapipe/tasks/ios/vision/core:MPPImage", + ], +) diff --git a/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedder.h b/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedder.h new file mode 100644 index 0000000000..ed0d820ff0 --- /dev/null +++ b/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedder.h @@ -0,0 +1,264 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "mediapipe/tasks/ios/vision/core/sources/MPPImage.h" +#import "mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderOptions.h" +#import "mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderResult.h" + +NS_ASSUME_NONNULL_BEGIN + +/** + * @brief Performs embedding extraction on images. + * + * The API expects a TFLite model with optional, but strongly recommended, + * [TFLite Model Metadata.](https://www.tensorflow.org/lite/convert/metadata"). + * + * The API supports models with one image input tensor and one or more output tensors. To be more + * specific, here are the requirements. + * + * Input image tensor + * (kTfLiteUInt8/kTfLiteFloat32) + * - image input of size `[batch x height x width x channels]`. + * - batch inference is not supported (`batch` is required to be 1). + * - only RGB inputs are supported (`channels` is required to be 3). + * - if type is kTfLiteFloat32, NormalizationOptions are required to be attached to the metadata + * for input normalization. + * + * At least one output tensor (kTfLiteUInt8/kTfLiteFloat32) with shape `[1 x N]` where N + * is the number of dimensions in the produced embeddings. + */ +NS_SWIFT_NAME(ImageEmbedder) +@interface MPPImageEmbedder : NSObject + +/** + * Creates a new instance of `ImageEmbedder` from an absolute path to a TensorFlow Lite model file + * stored locally on the device and the default `ImageEmbedderOptions`. + * + * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device. + * + * @return A new instance of `ImageEmbedder` with the given model path. `nil` if there is an + * error in initializing the image embedder. + */ +- (nullable instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error; + +/** + * Creates a new instance of `ImageEmbedder` from the given `ImageEmbedderOptions`. + * + * @param options The options of type `ImageEmbedderOptions` to use for configuring the + * `ImageEmbedder`. + * + * @return A new instance of `ImageEmbedder` with the given options. `nil` if there is an error in + * initializing the image embedder. + */ +- (nullable instancetype)initWithOptions:(MPPImageEmbedderOptions *)options + error:(NSError **)error NS_DESIGNATED_INITIALIZER; + +/** + * Performs embedding extraction on the provided `MPImage` using the whole image as region of + * interest. Rotation will be applied according to the `orientation` property of the provided + * `MPImage`. Only use this method when the `ImageEmbedder` is created with running mode, `.image`. + * + * This method supports embedding extraction on RGBA images. If your `MPImage` has a source type of + * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following + * pixel format types: + * 1. kCVPixelFormatType_32BGRA + * 2. kCVPixelFormatType_32RGBA + * + * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha + * channel. + * + * @param image The `MPImage` on which embedding extraction is to be performed. + * + * @return An `ImageEmbedderResult` object that contains a list of embedding extraction. + */ +- (nullable MPPImageEmbedderResult *)embedImage:(MPPImage *)image + error:(NSError **)error NS_SWIFT_NAME(embed(image:)); + +/** + * Performs embedding extraction on the provided `MPImage` cropped to the specified region of + * interest. Rotation will be applied on the cropped image according to the `orientation` property + * of the provided `MPImage`. Only use this method when the `ImageEmbedder` is created with running + * mode, `.image`. + * + * This method supports embedding extraction on RGBA images. If your `MPImage` has a source type of + * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following + * pixel format types: + * 1. kCVPixelFormatType_32BGRA + * 2. kCVPixelFormatType_32RGBA + * + * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha + * channel. + * + * @param image The `MPImage` on which embedding extraction is to be performed. + * @param roi A `CGRect` specifying the region of interest within the given `MPImage`, on which + * embedding extraction should be performed. + * + * @return An `ImageEmbedderResult` object that contains a list of generated image embeddings. + */ +- (nullable MPPImageEmbedderResult *)embedImage:(MPPImage *)image + regionOfInterest:(CGRect)roi + error:(NSError **)error + NS_SWIFT_NAME(embed(image:regionOfInterest:)); + +/** + * Performs embedding extraction on the provided video frame of type `MPImage` using the whole image + * as region of interest. Rotation will be applied according to the `orientation` property of the + * provided `MPImage`. Only use this method when the `ImageEmbedder` is created with running mode + * `.video`. + * + * It's required to provide the video frame's timestamp (in milliseconds). The input timestamps must + * be monotonically increasing. + * + * This method supports embedding extraction on RGBA images. If your `MPImage` has a source type of + * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following + * pixel format types: + * 1. kCVPixelFormatType_32BGRA + * 2. kCVPixelFormatType_32RGBA + * + * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha + * channel. + * + * @param image The `MPImage` on which embedding extraction is to be performed. + * @param timestampInMilliseconds The video frame's timestamp (in milliseconds). The input + * timestamps must be monotonically increasing. + * + * @return An `ImageEmbedderResult` object that contains a list of generated image embeddings. + */ +- (nullable MPPImageEmbedderResult *)embedVideoFrame:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error + NS_SWIFT_NAME(embed(videoFrame:timestampInMilliseconds:)); + +/** + * Performs embedding extraction on the provided video frame of type `MPImage` cropped to the + * specified region of interest. Rotation will be applied according to the `orientation` property of + * the provided `MPImage`. Only use this method when the `ImageEmbedder` is created with `.video`. + * + * It's required to provide the video frame's timestamp (in milliseconds). The input timestamps must + * be monotonically increasing. + * + * This method supports embedding extraction on RGBA images. If your `MPImage` has a source type of + * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following + * pixel format types: + * 1. kCVPixelFormatType_32BGRA + * 2. kCVPixelFormatType_32RGBA + * + * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha + * channel. + * + * @param image A live stream image data of type `MPImage` on which embedding extraction is to be + * performed. + * @param timestampInMilliseconds The video frame's timestamp (in milliseconds). The input + * timestamps must be monotonically increasing. + * @param roi A `CGRect` specifying the region of interest within the video frame of type + * `MPImage`, on which embedding extraction should be performed. + * + * @return An `ImageEmbedderResult` object that contains a list of generated image embeddings. + */ +- (nullable MPPImageEmbedderResult *)embedVideoFrame:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + regionOfInterest:(CGRect)roi + error:(NSError **)error + NS_SWIFT_NAME(embed(videoFrame:timestampInMilliseconds:regionOfInterest:)); + +/** + * Sends live stream image data of type `MPImage` to perform embedding extraction using the whole + * image as region of interest. Rotation will be applied according to the `orientation` property of + * the provided `MPImage`. Only use this method when the `ImageEmbedder` is created with running + * mode `.liveStream`. + * + * The object which needs to be continuously notified of the available results of image + * embedding extraction must confirm to `ImageEmbedderLiveStreamDelegate` protocol and implement the + * `imageEmbedder(_:didFinishEmbeddingWithResult:timestampInMilliseconds:error:)` delegate + * method. + * + * It's required to provide a timestamp (in milliseconds) to indicate when the input image is sent + * to the image embedder. The input timestamps must be monotonically increasing. + * + * This method supports embedding extraction on RGBA images. If your `MPImage` has a source type of + * .pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following + * pixel format types: + * 1. kCVPixelFormatType_32BGRA + * 2. kCVPixelFormatType_32RGBA + * + * If the input `MPImage` has a source type of `.image` ensure that the color space is RGB with an + * Alpha channel. + * + * If this method is used for embedding live camera frames using `AVFoundation`, ensure that you + * request `AVCaptureVideoDataOutput` to output frames in `kCMPixelFormat_32RGBA` using its + * `videoSettings` property. + * + * @param image A live stream image data of type `MPImage` on which embedding extraction is to be + * performed. + * @param timestampInMilliseconds The timestamp (in milliseconds) which indicates when the input + * image is sent to the image embedder. The input timestamps must be monotonically increasing. + * + * @return `true` if the image was sent to the task successfully, otherwise `false`. + */ +- (BOOL)embedAsyncImage:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError **)error + NS_SWIFT_NAME(embedAsync(image:timestampInMilliseconds:)); + +/** + * Sends live stream image data of type `MPImage` to perform embedding extraction, cropped to the + * specified region of interest.. Rotation will be applied according to the `orientation` property + * of the provided `MPImage`. Only use this method when the `ImageEmbedder` is created with + * `.liveStream`. + * + * The object which needs to be continuously notified of the available results of image embedding + * extraction must confirm to `ImageEmbedderLiveStreamDelegate` protocol and implement the + * `imageEmbedder(_:didFinishEmbeddingWithResult:timestampInMilliseconds:error:)` delegate + * method. + * + * It's required to provide a timestamp (in milliseconds) to indicate when the input image is sent + * to the image embedder. The input timestamps must be monotonically increasing. + * + * This method supports embedding extraction on RGBA images. If your `MPImage` has a source type of + * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following + * pixel format types: + * 1. kCVPixelFormatType_32BGRA + * 2. kCVPixelFormatType_32RGBA + * + * If the input `MPImage` has a source type of `.image` ensure that the color space is RGB with an + * Alpha channel. + * + * If this method is used for embedding live camera frames using `AVFoundation`, ensure that you + * request `AVCaptureVideoDataOutput` to output frames in `kCMPixelFormat_32RGBA` using its + * `videoSettings` property. + * + * @param image A live stream image data of type `MPImage` on which embedding extraction is to be + * performed. + * @param timestampInMilliseconds The timestamp (in milliseconds) which indicates when the input + * image is sent to the image embedder. The input timestamps must be monotonically increasing. + * @param roi A `CGRect` specifying the region of interest within the given live stream image data + * of type `MPImage`, on which embedding extraction should be performed. + * + * @return `true` if the image was sent to the task successfully, otherwise `false`. + */ +- (BOOL)embedAsyncImage:(MPPImage *)image + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + regionOfInterest:(CGRect)roi + error:(NSError **)error + NS_SWIFT_NAME(embedAsync(image:timestampInMilliseconds:regionOfInterest:)); + +- (instancetype)init NS_UNAVAILABLE; + ++ (instancetype)new NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END From af5a797b6bc99cf53996feeec89a320cdb66af21 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Wed, 6 Dec 2023 21:34:58 +0530 Subject: [PATCH 4/7] Added iOS Image Embedder Options Helpers --- .../ios/vision/image_embedder/utils/BUILD | 33 ++++++++++++++ .../sources/MPPImageEmbedderOptions+Helpers.h | 27 +++++++++++ .../MPPImageEmbedderOptions+Helpers.mm | 45 +++++++++++++++++++ 3 files changed, 105 insertions(+) create mode 100644 mediapipe/tasks/ios/vision/image_embedder/utils/BUILD create mode 100644 mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderOptions+Helpers.h create mode 100644 mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderOptions+Helpers.mm diff --git a/mediapipe/tasks/ios/vision/image_embedder/utils/BUILD b/mediapipe/tasks/ios/vision/image_embedder/utils/BUILD new file mode 100644 index 0000000000..32594b2b2e --- /dev/null +++ b/mediapipe/tasks/ios/vision/image_embedder/utils/BUILD @@ -0,0 +1,33 @@ +# Copyright 2023 The MediaPipe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package(default_visibility = ["//mediapipe/tasks:internal"]) + +licenses(["notice"]) + +objc_library( + name = "MPPImageEmbedderOptionsHelpers", + srcs = ["sources/MPPImageEmbedderOptions+Helpers.mm"], + hdrs = ["sources/MPPImageEmbedderOptions+Helpers.h"], + deps = [ + "//mediapipe/framework:calculator_options_cc_proto", + "//mediapipe/tasks/cc/components/processors/proto:embedder_options_cc_proto", + "//mediapipe/tasks/cc/vision/image_embedder/proto:image_embedder_graph_options_cc_proto", + "//mediapipe/tasks/ios/common/utils:NSStringHelpers", + "//mediapipe/tasks/ios/core:MPPTaskOptionsProtocol", + "//mediapipe/tasks/ios/core/utils:MPPBaseOptionsHelpers", + "//mediapipe/tasks/ios/vision/image_embedder:MPPImageEmbedderOptions", + ], +) + diff --git a/mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderOptions+Helpers.h b/mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderOptions+Helpers.h new file mode 100644 index 0000000000..69232b56ad --- /dev/null +++ b/mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderOptions+Helpers.h @@ -0,0 +1,27 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mediapipe/framework/calculator_options.pb.h" +#import "mediapipe/tasks/ios/core/sources/MPPTaskOptionsProtocol.h" +#import "mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderOptions.h" + +NS_ASSUME_NONNULL_BEGIN + +@interface MPPImageEmbedderOptions (Helpers) + +- (void)copyToProto:(::mediapipe::CalculatorOptions *)optionsProto; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderOptions+Helpers.mm b/mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderOptions+Helpers.mm new file mode 100644 index 0000000000..9dbe0817ab --- /dev/null +++ b/mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderOptions+Helpers.mm @@ -0,0 +1,45 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderOptions+Helpers.h" + +#import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" +#import "mediapipe/tasks/ios/core/utils/sources/MPPBaseOptions+Helpers.h" + +#include "mediapipe/tasks/cc/components/processors/proto/embedder_options.pb.h" +#include "mediapipe/tasks/cc/vision/image_embedder/proto/image_embedder_graph_options.pb.h" + +namespace { +using CalculatorOptionsProto = ::mediapipe::CalculatorOptions; +using ImageEmbedderGraphOptionsProto = + ::mediapipe::tasks::vision::image_embedder::proto::ImageEmbedderGraphOptions; +using EmbedderOptionsProto = ::mediapipe::tasks::components::processors::proto::EmbedderOptions; +} // namespace + +@implementation MPPImageEmbedderOptions (Helpers) + +- (void)copyToProto:(CalculatorOptionsProto *)optionsProto { + ImageEmbedderGraphOptionsProto *graphOptions = + optionsProto->MutableExtension(ImageEmbedderGraphOptionsProto::ext); + [self.baseOptions copyToProto:graphOptions->mutable_base_options() + withUseStreamMode:self.runningMode != MPPRunningModeImage]; + + EmbedderOptionsProto *embedderOptionsProto = graphOptions->mutable_embedder_options(); + embedderOptionsProto->Clear(); + + embedderOptionsProto->set_l2_normalize(self.l2Normalize ? true : false); + embedderOptionsProto->set_quantize(self.quantize ? true : false); +} + +@end From efbd33f058122fb8bb30aff3e3cda3319c44d498 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Wed, 6 Dec 2023 21:35:10 +0530 Subject: [PATCH 5/7] Added iOS Image Embedder Result Helpers --- .../ios/vision/image_embedder/utils/BUILD | 11 +++++ .../sources/MPPImageEmbedderResult+Helpers.h | 27 ++++++++++++ .../sources/MPPImageEmbedderResult+Helpers.mm | 41 +++++++++++++++++++ 3 files changed, 79 insertions(+) create mode 100644 mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderResult+Helpers.h create mode 100644 mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderResult+Helpers.mm diff --git a/mediapipe/tasks/ios/vision/image_embedder/utils/BUILD b/mediapipe/tasks/ios/vision/image_embedder/utils/BUILD index 32594b2b2e..8e26a5e85f 100644 --- a/mediapipe/tasks/ios/vision/image_embedder/utils/BUILD +++ b/mediapipe/tasks/ios/vision/image_embedder/utils/BUILD @@ -31,3 +31,14 @@ objc_library( ], ) +objc_library( + name = "MPPImageEmbedderResultHelpers", + srcs = ["sources/MPPImageEmbedderResult+Helpers.mm"], + hdrs = ["sources/MPPImageEmbedderResult+Helpers.h"], + deps = [ + "//mediapipe/framework:packet", + "//mediapipe/tasks/cc/components/containers/proto:embeddings_cc_proto", + "//mediapipe/tasks/ios/components/containers/utils:MPPEmbeddingResultHelpers", + "//mediapipe/tasks/ios/vision/image_embedder:MPPImageEmbedderResult", + ], +) diff --git a/mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderResult+Helpers.h b/mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderResult+Helpers.h new file mode 100644 index 0000000000..e52b330706 --- /dev/null +++ b/mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderResult+Helpers.h @@ -0,0 +1,27 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedderResult.h" + +#include "mediapipe/framework/packet.h" + +NS_ASSUME_NONNULL_BEGIN + +@interface MPPImageEmbedderResult (Helpers) + ++ (MPPImageEmbedderResult *)imageEmbedderResultWithOutputPacket:(const mediapipe::Packet &)packet; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderResult+Helpers.mm b/mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderResult+Helpers.mm new file mode 100644 index 0000000000..9f8fa3ff05 --- /dev/null +++ b/mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderResult+Helpers.mm @@ -0,0 +1,41 @@ +// Copyright 2023 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/components/containers/utils/sources/MPPEmbeddingResult+Helpers.h" +#import "mediapipe/tasks/ios/vision/image_embedder/utils/sources/MPPImageEmbedderResult+Helpers.h" + +#include "mediapipe/tasks/cc/components/containers/proto/embeddings.pb.h" + +static const int kMicrosecondsPerMillisecond = 1000; + +namespace { +using EmbeddingResultProto = ::mediapipe::tasks::components::containers::proto::EmbeddingResult; +using ::mediapipe::Packet; +} // namespace + +#define int kMicrosecondsPerMillisecond = 1000; + +@implementation MPPImageEmbedderResult (Helpers) + ++ (MPPImageEmbedderResult *)imageEmbedderResultWithOutputPacket:(const Packet &)packet { + MPPEmbeddingResult *embeddingResult = + [MPPEmbeddingResult embeddingResultWithProto:packet.Get()]; + + return [[MPPImageEmbedderResult alloc] + initWithEmbeddingResult:embeddingResult + timestampInMilliseconds:(NSInteger)(packet.Timestamp().Value() / + kMicrosecondsPerMillisecond)]; +} + +@end From 8b7564b4fd0d55701a909b05e5fde3be4e2df8cd Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Fri, 22 Dec 2023 21:44:38 +0530 Subject: [PATCH 6/7] Updated documentation of MPPImageEmbedder --- .../image_embedder/sources/MPPImageEmbedder.h | 48 +++++++------------ 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedder.h b/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedder.h index ed0d820ff0..b3cb795bf2 100644 --- a/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedder.h +++ b/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedder.h @@ -71,11 +71,9 @@ NS_SWIFT_NAME(ImageEmbedder) * interest. Rotation will be applied according to the `orientation` property of the provided * `MPImage`. Only use this method when the `ImageEmbedder` is created with running mode, `.image`. * - * This method supports embedding extraction on RGBA images. If your `MPImage` has a source type of - * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following - * pixel format types: - * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA + * This method supports embedding extraction on RGBA images. If your `MPImage` has a + * source type of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use + * `kCVPixelFormatType_32BGRA` as its pixel format. * * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha * channel. @@ -93,11 +91,9 @@ NS_SWIFT_NAME(ImageEmbedder) * of the provided `MPImage`. Only use this method when the `ImageEmbedder` is created with running * mode, `.image`. * - * This method supports embedding extraction on RGBA images. If your `MPImage` has a source type of - * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following - * pixel format types: - * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA + * This method supports embedding extraction on RGBA images. If your `MPImage` has a + * source type of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use + * `kCVPixelFormatType_32BGRA` as its pixel format. * * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha * channel. @@ -122,11 +118,9 @@ NS_SWIFT_NAME(ImageEmbedder) * It's required to provide the video frame's timestamp (in milliseconds). The input timestamps must * be monotonically increasing. * - * This method supports embedding extraction on RGBA images. If your `MPImage` has a source type of - * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following - * pixel format types: - * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA + * This method supports embedding extraction on RGBA images. If your `MPImage` has a + * source type of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use + * `kCVPixelFormatType_32BGRA` as its pixel format. * * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha * channel. @@ -150,11 +144,9 @@ NS_SWIFT_NAME(ImageEmbedder) * It's required to provide the video frame's timestamp (in milliseconds). The input timestamps must * be monotonically increasing. * - * This method supports embedding extraction on RGBA images. If your `MPImage` has a source type of - * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following - * pixel format types: - * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA + * This method supports embedding extraction on RGBA images. If your `MPImage` has a + * source type of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use + * `kCVPixelFormatType_32BGRA` as its pixel format. * * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha * channel. @@ -188,11 +180,9 @@ NS_SWIFT_NAME(ImageEmbedder) * It's required to provide a timestamp (in milliseconds) to indicate when the input image is sent * to the image embedder. The input timestamps must be monotonically increasing. * - * This method supports embedding extraction on RGBA images. If your `MPImage` has a source type of - * .pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following - * pixel format types: - * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA + * This method supports embedding extraction on RGBA images. If your `MPImage` has a + * source type of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use + * `kCVPixelFormatType_32BGRA` as its pixel format. * * If the input `MPImage` has a source type of `.image` ensure that the color space is RGB with an * Alpha channel. @@ -227,11 +217,9 @@ NS_SWIFT_NAME(ImageEmbedder) * It's required to provide a timestamp (in milliseconds) to indicate when the input image is sent * to the image embedder. The input timestamps must be monotonically increasing. * - * This method supports embedding extraction on RGBA images. If your `MPImage` has a source type of - * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following - * pixel format types: - * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA + * This method supports embedding extraction on RGBA images. If your `MPImage` has a + * source type of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use + * `kCVPixelFormatType_32BGRA` as its pixel format. * * If the input `MPImage` has a source type of `.image` ensure that the color space is RGB with an * Alpha channel. From f3006929295aefc263b30483116d92c6342a756e Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Fri, 22 Dec 2023 21:44:54 +0530 Subject: [PATCH 7/7] Added cosine similarity method to iOS MPPImagEmbedder --- .../image_embedder/sources/MPPImageEmbedder.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedder.h b/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedder.h index b3cb795bf2..df518210a7 100644 --- a/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedder.h +++ b/mediapipe/tasks/ios/vision/image_embedder/sources/MPPImageEmbedder.h @@ -245,6 +245,24 @@ NS_SWIFT_NAME(ImageEmbedder) - (instancetype)init NS_UNAVAILABLE; +/** + * Utility function to compute[cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) + * between two `MPPEmbedding` objects. + * + * @param embedding1 One of the two `MPPEmbedding`s between whom cosine similarity is to be + * computed. + * @param embedding2 One of the two `MPPEmbedding`s between whom cosine similarity is to be + * computed. + * @param error An optional error parameter populated when there is an error in calculating cosine + * similarity between two embeddings. + * + * @return An `NSNumber` which holds the cosine similarity of type `double`. + */ ++ (nullable NSNumber *)cosineSimilarityBetweenEmbedding1:(MPPEmbedding *)embedding1 + andEmbedding2:(MPPEmbedding *)embedding2 + error:(NSError **)error + NS_SWIFT_NAME(cosineSimilarity(embedding1:embedding2:)); + + (instancetype)new NS_UNAVAILABLE; @end