diff --git a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm index e80d912538..f89a44b061 100644 --- a/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm +++ b/mediapipe/tasks/ios/vision/core/utils/sources/MPPImage+Utils.mm @@ -30,6 +30,20 @@ using ::mediapipe::ImageFormat; using ::mediapipe::ImageFrame; +vImage_Buffer CreateEmptyVImageBufferFromImageFrame(ImageFrame &imageFrame, bool shouldAllocate) { + UInt8 *data = shouldAllocate ? new UInt8[imageFrame.Height() * imageFrame.WidthStep()] : nullptr; + return {.data = data, + .height = static_cast(imageFrame.Height()), + .width = static_cast(imageFrame.Width()), + .rowBytes = static_cast(imageFrame.WidthStep())}; +} + +vImage_Buffer CreateVImageBufferFromImageFrame(ImageFrame &imageFrame) { + vImage_Buffer imageBuffer = CreateEmptyVImageBufferFromImageFrame(imageFrame, false); + imageBuffer.data = imageFrame.MutablePixelData(); + return imageBuffer; +} + vImage_Buffer allocatedVImageBuffer(vImagePixelCount width, vImagePixelCount height, size_t rowBytes) { UInt8 *data = new UInt8[height * rowBytes]; @@ -40,6 +54,8 @@ static void FreeDataProviderReleaseCallback(void *buffer, const void *data, size delete[] (vImage_Buffer *)buffer; } +static void FreeRefConReleaseCallback(void *refCon, const void *baseAddress) { free(refCon); } + } // namespace @interface MPPPixelDataUtils : NSObject @@ -51,6 +67,10 @@ @interface MPPPixelDataUtils : NSObject pixelBufferFormat:(OSType)pixelBufferFormatType error:(NSError **)error; ++ (UInt8 *)pixelDataFromImageFrame:(ImageFrame &)imageFrame + shouldCopy:(BOOL)shouldCopy + error:(NSError **)error; + @end @interface MPPCVPixelBufferUtils : NSObject @@ -58,6 +78,24 @@ @interface MPPCVPixelBufferUtils : NSObject + (std::unique_ptr)imageFrameFromCVPixelBuffer:(CVPixelBufferRef)pixelBuffer error:(NSError **)error; +// This method is used to create CVPixelBuffer from output images of tasks like `FaceStylizer` only +// when the input `MPImage` source type is `pixelBuffer`. +// Always copies the pixel data of the image frame to the created `CVPixelBuffer`. +// +// The only possible 32 RGBA pixel format of input `CVPixelBuffer` is `kCVPixelFormatType_32BGRA`. +// But Mediapipe does not support inference on images of format `BGRA`. Hence the channels of the +// underlying pixel data of `CVPixelBuffer` are permuted to the supported RGBA format before passing +// them to the task for inference. The pixel format of the output images of any MediaPipe task will +// be the same as the pixel format of the input image. (RGBA in this case). +// +// Since creation of `CVPixelBuffer` from the output image pixels with a format of +// `kCVPixelFormatType_32RGBA` is not possible, the channels of the output C++ image `RGBA` have to +// be permuted to the format `BGRA`. When the pixels are copied to create `CVPixelBuffer` this does +// not pose a challenge. +// +// TODO: Investigate if permuting channels of output `mediapipe::Image` in place is possible for +// creating `CVPixelBuffer`s without copying the underlying pixels. ++ (CVPixelBufferRef)cvPixelBufferFromImageFrame:(ImageFrame &)imageFrame error:(NSError **)error; @end @interface MPPCGImageUtils : NSObject @@ -99,6 +137,9 @@ @implementation MPPPixelDataUtils : NSObject // Convert the raw pixel data to RGBA format and un-premultiply the alpha from the R, G, B values // since MediaPipe C++ APIs only accept un pre-multiplied channels. + // + // This method is commonly used for `MPImage`s of all source types. Hence supporting BGRA and RGBA + // formats. Only `pixelBuffer` source type is restricted to `BGRA` format. switch (pixelBufferFormatType) { case kCVPixelFormatType_32RGBA: { destBuffer = allocatedVImageBuffer((vImagePixelCount)width, (vImagePixelCount)height, @@ -107,6 +148,8 @@ @implementation MPPPixelDataUtils : NSObject break; } case kCVPixelFormatType_32BGRA: { + // Permute channels to `RGBA` since MediaPipe tasks don't support inference on images of + // format `BGRA`. const uint8_t permute_map[4] = {2, 1, 0, 3}; destBuffer = allocatedVImageBuffer((vImagePixelCount)width, (vImagePixelCount)height, destinationBytesPerRow); @@ -120,8 +163,7 @@ @implementation MPPPixelDataUtils : NSObject default: { [MPPCommonUtils createCustomError:error withCode:MPPTasksErrorCodeInvalidArgumentError - description:@"Invalid source pixel buffer format. Expecting one of " - @"kCVPixelFormatType_32RGBA, kCVPixelFormatType_32BGRA"]; + description:@"Some internal error occured."]; return nullptr; } } @@ -136,7 +178,47 @@ @implementation MPPPixelDataUtils : NSObject // Uses default deleter return std::make_unique(imageFormat, width, height, destinationBytesPerRow, - static_cast(destBuffer.data)); + static_cast(destBuffer.data)); +} + ++ (UInt8 *)pixelDataFromImageFrame:(ImageFrame &)imageFrame + shouldCopy:(BOOL)shouldCopy + error:(NSError **)error { + vImage_Buffer sourceBuffer = CreateVImageBufferFromImageFrame(imageFrame); + + // Pre-multiply the raw pixels from a `mediapipe::Image` before creating a `CGImage` to ensure + // that pixels are displayed correctly irrespective of their alpha values. + vImage_Error premultiplyError; + vImage_Buffer destinationBuffer; + + switch (imageFrame.Format()) { + case ImageFormat::SRGBA: { + destinationBuffer = + shouldCopy ? CreateEmptyVImageBufferFromImageFrame(imageFrame, true) : sourceBuffer; + premultiplyError = + vImagePremultiplyData_RGBA8888(&sourceBuffer, &destinationBuffer, kvImageNoFlags); + break; + } + default: { + [MPPCommonUtils createCustomError:error + withCode:MPPTasksErrorCodeInternalError + description:@"An error occured while processing the output image " + @"pixels of the vision task."]; + return nullptr; + } + } + + if (premultiplyError != kvImageNoError) { + [MPPCommonUtils + createCustomError:error + withCode:MPPTasksErrorCodeInternalError + description: + @"An error occured while processing the output image pixels of the vision task."]; + + return nullptr; + } + + return (UInt8 *)destinationBuffer.data; } @end @@ -149,7 +231,8 @@ @implementation MPPCVPixelBufferUtils std::unique_ptr imageFrame = nullptr; switch (pixelBufferFormat) { - case kCVPixelFormatType_32RGBA: + // Core Video only supports pixel data of order BGRA for 32 bit RGBA images. + // Thus other formats like `kCVPixelFormatType_32BGRA` don't need to be accounted for. case kCVPixelFormatType_32BGRA: { CVPixelBufferLockBaseAddress(pixelBuffer, 0); imageFrame = [MPPPixelDataUtils @@ -165,15 +248,58 @@ @implementation MPPCVPixelBufferUtils default: { [MPPCommonUtils createCustomError:error withCode:MPPTasksErrorCodeInvalidArgumentError - description:@"Unsupported pixel format for CVPixelBuffer. Supported " - @"pixel format types are kCVPixelFormatType_32BGRA and " - @"kCVPixelFormatType_32RGBA"]; + description:@"Unsupported pixel format for CVPixelBuffer. Expected " + @"kCVPixelFormatType_32BGRA"]; } } return imageFrame; } ++ (CVPixelBufferRef)cvPixelBufferFromImageFrame:(ImageFrame &)imageFrame error:(NSError **)error { + if (imageFrame.Format() != ImageFormat::SRGBA) { + [MPPCommonUtils createCustomError:error + withCode:MPPTasksErrorCodeInternalError + description:@"An error occured while creating a CVPixelBuffer from the " + @"output image of the vision task."]; + return nullptr; + } + + UInt8 *pixelData = [MPPPixelDataUtils pixelDataFromImageFrame:imageFrame + shouldCopy:YES + error:error]; + + if (!pixelData) { + return nullptr; + } + + const uint8_t permute_map[4] = {2, 1, 0, 3}; + vImage_Buffer sourceBuffer = CreateEmptyVImageBufferFromImageFrame(imageFrame, NO); + sourceBuffer.data = pixelData; + + if (vImagePermuteChannels_ARGB8888(&sourceBuffer, &sourceBuffer, permute_map, kvImageNoFlags) == + kvImageNoError) { + CVPixelBufferRef outputBuffer; + + OSType pixelBufferFormatType = kCVPixelFormatType_32BGRA; + + // Since data is copied, pass in a release callback that will be invoked when the pixel buffer + // is destroyed. + if (CVPixelBufferCreateWithBytes(kCFAllocatorDefault, imageFrame.Width(), imageFrame.Height(), + pixelBufferFormatType, pixelData, imageFrame.WidthStep(), + FreeRefConReleaseCallback, pixelData, nullptr, + &outputBuffer) == kCVReturnSuccess) { + return outputBuffer; + } + } + + [MPPCommonUtils createCustomError:error + withCode:MPPTasksErrorCodeInternalError + description:@"An error occured while creating a CVPixelBuffer from the " + @"output image of the vision task."]; + return nullptr; +} + @end @implementation MPPCGImageUtils @@ -232,7 +358,14 @@ + (CGImageRef)cgImageFromImageFrame:(std::shared_ptr)imageFrame CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipLast | kCGBitmapByteOrderDefault; ImageFrame *internalImageFrame = imageFrame.get(); - size_t channelCount = 4; + + UInt8 *pixelData = [MPPPixelDataUtils pixelDataFromImageFrame:*internalImageFrame + shouldCopy:shouldCopyPixelData + error:error]; + + if (!pixelData) { + return nullptr; + } switch (internalImageFrame->Format()) { case ImageFormat::SRGBA: { @@ -242,56 +375,41 @@ + (CGImageRef)cgImageFromImageFrame:(std::shared_ptr)imageFrame default: [MPPCommonUtils createCustomError:error withCode:MPPTasksErrorCodeInternalError - description:@"An internal error occured."]; + description:@"An error occured while creating a CGImage from the " + @"output image of the vision task."]; return nullptr; } - size_t bitsPerComponent = 8; + CGDataProviderReleaseDataCallback callback = nullptr; - vImage_Buffer sourceBuffer = { - .data = (void *)internalImageFrame->MutablePixelData(), - .height = static_cast(internalImageFrame->Height()), - .width = static_cast(internalImageFrame->Width()), - .rowBytes = static_cast(internalImageFrame->WidthStep())}; + CGDataProviderRef provider = CGDataProviderCreateWithData( + pixelData, pixelData, internalImageFrame->WidthStep() * internalImageFrame->Height(), + callback); - vImage_Buffer destBuffer; + CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); - CGDataProviderReleaseDataCallback callback = nullptr; + CGImageRef cgImageRef = nullptr; - if (shouldCopyPixelData) { - destBuffer = allocatedVImageBuffer(static_cast(internalImageFrame->Width()), - static_cast(internalImageFrame->Height()), - static_cast(internalImageFrame->WidthStep())); - callback = FreeDataProviderReleaseCallback; - } else { - destBuffer = sourceBuffer; + if (provider && colorSpace) { + size_t bitsPerComponent = 8; + size_t channelCount = 4; + cgImageRef = + CGImageCreate(internalImageFrame->Width(), internalImageFrame->Height(), bitsPerComponent, + bitsPerComponent * channelCount, internalImageFrame->WidthStep(), colorSpace, + bitmapInfo, provider, nullptr, YES, kCGRenderingIntentDefault); } - // Pre-multiply the raw pixels from a `mediapipe::Image` before creating a `CGImage` to ensure - // that pixels are displayed correctly irrespective of their alpha values. - vImage_Error premultiplyError = - vImagePremultiplyData_RGBA8888(&sourceBuffer, &destBuffer, kvImageNoFlags); + // Can safely pass `NULL` to these functions according to iOS docs. + CGDataProviderRelease(provider); + CGColorSpaceRelease(colorSpace); - if (premultiplyError != kvImageNoError) { + if (!cgImageRef) { [MPPCommonUtils createCustomError:error withCode:MPPTasksErrorCodeInternalError - description:@"An internal error occured."]; - - return nullptr; + description:@"An error occured while converting the output image of the " + @"vision task to a CGImage."]; } - CGDataProviderRef provider = CGDataProviderCreateWithData( - destBuffer.data, destBuffer.data, - internalImageFrame->WidthStep() * internalImageFrame->Height(), callback); - CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); - CGImageRef cgImageRef = - CGImageCreate(internalImageFrame->Width(), internalImageFrame->Height(), bitsPerComponent, - bitsPerComponent * channelCount, internalImageFrame->WidthStep(), colorSpace, - bitmapInfo, provider, nullptr, YES, kCGRenderingIntentDefault); - - CGDataProviderRelease(provider); - CGColorSpaceRelease(colorSpace); - return cgImageRef; } @@ -347,8 +465,30 @@ - (nullable instancetype)initWithCppImage:(mediapipe::Image &)image return [self initWithUIImage:image orientation:sourceImage.orientation error:nil]; } + case MPPImageSourceTypePixelBuffer: { + if (!shouldCopyPixelData) { + // TODO: Investigate possibility of permuting channels of `mediapipe::Image` returned by + // vision tasks in place to ensure that we can support creating `CVPixelBuffer`s without + // copying the pixel data. + [MPPCommonUtils + createCustomError:error + withCode:MPPTasksErrorCodeInvalidArgumentError + description: + @"When the source type is pixel buffer, you cannot request uncopied data"]; + return nil; + } + + CVPixelBufferRef pixelBuffer = + [MPPCVPixelBufferUtils cvPixelBufferFromImageFrame:*(image.GetImageFrameSharedPtr()) + error:error]; + MPPImage *image = [self initWithPixelBuffer:pixelBuffer + orientation:sourceImage.orientation + error:nil]; + CVPixelBufferRelease(pixelBuffer); + return image; + } default: - // TODO Implement Other Source Types. + // TODO Implement CMSampleBuffer. return nil; } } diff --git a/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.h b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.h index 5b9b24fb6f..6e7ae57bfa 100644 --- a/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.h +++ b/mediapipe/tasks/ios/vision/image_classifier/sources/MPPImageClassifier.h @@ -82,10 +82,8 @@ NS_SWIFT_NAME(ImageClassifier) * `.image`. * * This method supports classification of RGBA images. If your `MPImage` has a source type - * ofm`.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following - * pixel format types: - * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA + * of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use + * `kCVPixelFormatType_32BGRA` as its pixel format. * * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha * channel. @@ -104,11 +102,9 @@ NS_SWIFT_NAME(ImageClassifier) * of the provided `MPImage`. Only use this method when the `ImageClassifier` is created with * running mode, `.image`. * - * This method supports classification of RGBA images. If your `MPImage` has a source type of - * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following - * pixel format types: - * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA + * This method supports classification of RGBA images. If your `MPImage` has a source type + * of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use + * `kCVPixelFormatType_32BGRA` as its pixel format. * * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha * channel. @@ -133,11 +129,9 @@ NS_SWIFT_NAME(ImageClassifier) * It's required to provide the video frame's timestamp (in milliseconds). The input timestamps must * be monotonically increasing. * - * This method supports classification of RGBA images. If your `MPImage` has a source type of - * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following - * pixel format types: - * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA + * This method supports classification of RGBA images. If your `MPImage` has a source type + * of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use + * `kCVPixelFormatType_32BGRA` as its pixel format. * * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha * channel. @@ -161,11 +155,9 @@ NS_SWIFT_NAME(ImageClassifier) * It's required to provide the video frame's timestamp (in milliseconds). The input timestamps must * be monotonically increasing. * - * This method supports classification of RGBA images. If your `MPImage` has a source type of - * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following - * pixel format types: - * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA + * This method supports classification of RGBA images. If your `MPImage` has a source type + * of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use + * `kCVPixelFormatType_32BGRA` as its pixel format. * * If your `MPImage` has a source type of `.image` ensure that the color space is RGB with an Alpha * channel. @@ -199,11 +191,9 @@ NS_SWIFT_NAME(ImageClassifier) * It's required to provide a timestamp (in milliseconds) to indicate when the input image is sent * to the image classifier. The input timestamps must be monotonically increasing. * - * This method supports classification of RGBA images. If your `MPImage` has a source type of - * .pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following - * pixel format types: - * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA + * This method supports classification of RGBA images. If your `MPImage` has a source type + * of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use + * `kCVPixelFormatType_32BGRA` as its pixel format. * * If the input `MPImage` has a source type of `.image` ensure that the color space is RGB with an * Alpha channel. @@ -238,17 +228,15 @@ NS_SWIFT_NAME(ImageClassifier) * It's required to provide a timestamp (in milliseconds) to indicate when the input image is sent * to the image classifier. The input timestamps must be monotonically increasing. * - * This method supports classification of RGBA images. If your `MPImage` has a source type of - * `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must have one of the following - * pixel format types: - * 1. kCVPixelFormatType_32BGRA - * 2. kCVPixelFormatType_32RGBA + * This method supports classification of RGBA images. If your `MPImage` has a source type + * of `.pixelBuffer` or `.sampleBuffer`, the underlying pixel buffer must use + * `kCVPixelFormatType_32BGRA` as its pixel format. * * If the input `MPImage` has a source type of `.image` ensure that the color space is RGB with an * Alpha channel. * * If this method is used for classifying live camera frames using `AVFoundation`, ensure that you - * request `AVCaptureVideoDataOutput` to output frames in `kCMPixelFormat_32RGBA` using its + * request `AVCaptureVideoDataOutput` to output frames in `kCMPixelFormat_32BGRA` using its * `videoSettings` property. * * @param image A live stream image data of type `MPImage` on which image classification is to be