From cc8a8829517ebb11dfd07e9a0919b8418cacf82a Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Mon, 23 Sep 2024 20:57:36 +0530 Subject: [PATCH 1/8] Fixed index error in AVAudioFile+TestUtils --- .../ios/test/audio/core/utils/sources/AVAudioFile+TestUtils.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mediapipe/tasks/ios/test/audio/core/utils/sources/AVAudioFile+TestUtils.m b/mediapipe/tasks/ios/test/audio/core/utils/sources/AVAudioFile+TestUtils.m index 8b3fb57f17..4b2f75af04 100644 --- a/mediapipe/tasks/ios/test/audio/core/utils/sources/AVAudioFile+TestUtils.m +++ b/mediapipe/tasks/ios/test/audio/core/utils/sources/AVAudioFile+TestUtils.m @@ -65,7 +65,7 @@ @implementation AVAudioFile (TestUtils) sampleCount:lengthToBeLoaded / audioDataFormat.channelCount]; MPPFloatBuffer *floatBuffer = - [[MPPFloatBuffer alloc] initWithData:audioPCMBuffer.floatChannelData + currentPosition + [[MPPFloatBuffer alloc] initWithData:audioPCMBuffer.floatChannelData[0] + currentPosition length:lengthToBeLoaded]; [audioData loadBuffer:floatBuffer offset:0 length:floatBuffer.length error:nil]; From 29e74d5de882a6ea085f432425bf05ce1a73abb7 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Mon, 23 Sep 2024 21:07:20 +0530 Subject: [PATCH 2/8] Fixed audio classifier result processing in stream mode --- .../sources/MPPAudioClassifierResult+Helpers.mm | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierResult+Helpers.mm b/mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierResult+Helpers.mm index 01615c95ee..78b35f18eb 100644 --- a/mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierResult+Helpers.mm +++ b/mediapipe/tasks/ios/audio/audio_classifier/utils/sources/MPPAudioClassifierResult+Helpers.mm @@ -33,14 +33,21 @@ + (nullable MPPAudioClassifierResult *)audioClassifierResultWithClassificationsP NSInteger timestampInMilliseconds = (NSInteger)(packet.Timestamp().Value() / kMicrosecondsPerMillisecond); - if (!packet.ValidateAsType>().ok()) { + std::vector cppClassificationResults; + if (packet.ValidateAsType().ok()) { + // If `runningMode = .audioStream`, only a single `ClassificationResult` will be returned in the + // result packet. + cppClassificationResults.emplace_back(packet.Get()); + } else if (packet.ValidateAsType>().ok()) { + // If `runningMode = .audioStream`, a vector of timestamped `ClassificationResult`s will be + // returned in the result packet. + cppClassificationResults = packet.Get>(); + } else { + // If packet does not contain protobuf of a type expected by the audio classifier. return [[MPPAudioClassifierResult alloc] initWithClassificationResults:@[] timestampInMilliseconds:timestampInMilliseconds]; } - std::vector cppClassificationResults = - packet.Get>(); - NSMutableArray *classificationResults = [NSMutableArray arrayWithCapacity:cppClassificationResults.size()]; From b8571f1d60ede10672f40e00ad697c6397cff0b4 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Mon, 23 Sep 2024 21:07:40 +0530 Subject: [PATCH 3/8] Added close method to iOS audio classifier --- .../sources/MPPAudioClassifier.h | 19 ++++++++++++++++++- .../sources/MPPAudioClassifier.mm | 4 ++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifier.h b/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifier.h index a33e75644f..18d3348029 100644 --- a/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifier.h +++ b/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifier.h @@ -146,6 +146,22 @@ NS_SWIFT_NAME(AudioClassifier) error:(NSError **)error NS_SWIFT_NAME(classifyAsync(audioBlock:timestampInMilliseconds:)); +/** + * Closes and cleans up the MediaPipe audio task. + * + * For tasks initialized with `.audioStream` mode, ensure that this method is called after all audio + * blocks in an audio stream are sent for inference using + * `classifyAsync(audioBlock:timestampInMilliseconds:)`. Otherwise, the task will not + * process the last audio block (of type `AudioData`) in the stream if its `bufferLength` is shorter + * than the model's input length. Once a task is closed, you cannot send any inference requests + * to the task. You must create a new instance of the task to send any pending requests. Ensure that + * you are ready to dispose off the task before this method is invoked. + * + * @return Returns successfully if the task was closed. Fails otherwise. Otherwise, throws an error + * indicating the reason for failure. + */ +- (BOOL)closeWithError:(NSError **)error; + - (instancetype)init NS_UNAVAILABLE; /** @@ -169,7 +185,8 @@ NS_SWIFT_NAME(AudioClassifier) + (MPPAudioRecord *)createAudioRecordWithChannelCount:(NSUInteger)channelCount sampleRate:(double)sampleRate bufferLength:(NSUInteger)bufferLength - error:(NSError **)error; + error:(NSError **)error + NS_SWIFT_NAME(createAudioRecord(channelCount:sampleRate:bufferLength:)); + (instancetype)new NS_UNAVAILABLE; diff --git a/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifier.mm b/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifier.mm index 7977f03820..430f844bca 100644 --- a/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifier.mm +++ b/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifier.mm @@ -157,6 +157,10 @@ + (MPPAudioRecord *)createAudioRecordWithChannelCount:(NSUInteger)channelCount error:error]; } +- (BOOL)closeWithError:(NSError **)error { + return [_audioTaskRunner closeWithError:error]; +} + #pragma mark - Private - (void)processAudioStreamResult:(absl::StatusOr)audioStreamResult { From ec67ba1523f47563b16419d5cea61d2cf34990c5 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Mon, 23 Sep 2024 21:08:55 +0530 Subject: [PATCH 4/8] Added iOS MPPAudioData test utils --- .../tasks/ios/test/audio/core/utils/BUILD | 9 +++++ .../utils/sources/MPPAudioData+TestUtils.h | 38 +++++++++++++++++++ .../utils/sources/MPPAudioData+TestUtils.m | 31 +++++++++++++++ 3 files changed, 78 insertions(+) create mode 100644 mediapipe/tasks/ios/test/audio/core/utils/sources/MPPAudioData+TestUtils.h create mode 100644 mediapipe/tasks/ios/test/audio/core/utils/sources/MPPAudioData+TestUtils.m diff --git a/mediapipe/tasks/ios/test/audio/core/utils/BUILD b/mediapipe/tasks/ios/test/audio/core/utils/BUILD index bf3622a8b0..cf40fee687 100644 --- a/mediapipe/tasks/ios/test/audio/core/utils/BUILD +++ b/mediapipe/tasks/ios/test/audio/core/utils/BUILD @@ -42,3 +42,12 @@ objc_library( ], ) +objc_library( + name = "MPPAudioDataTestUtils", + srcs = ["sources/MPPAudioData+TestUtils.m"], + hdrs = ["sources/MPPAudioData+TestUtils.h"], + deps = [ + "//mediapipe/tasks/ios/audio/core:MPPAudioData", + ], +) + diff --git a/mediapipe/tasks/ios/test/audio/core/utils/sources/MPPAudioData+TestUtils.h b/mediapipe/tasks/ios/test/audio/core/utils/sources/MPPAudioData+TestUtils.h new file mode 100644 index 0000000000..a8e214eaf3 --- /dev/null +++ b/mediapipe/tasks/ios/test/audio/core/utils/sources/MPPAudioData+TestUtils.h @@ -0,0 +1,38 @@ +// Copyright 2024 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import +#import "mediapipe/tasks/ios/audio/core/sources/MPPAudioData.h" + +NS_ASSUME_NONNULL_BEGIN + +/** Helper utility for initializing `MPPAudioData` for MediaPipe iOS audio library tests. */ +@interface MPPAudioData (TestUtils) + +/** + * Initializes an `MPPAudioData` from channel count, sample rate and sample count. + * + * @param channelCount Number of channels. + * @param channelCount Sample rate. + * @param channelCount Sample count. + * + * @return The `MPPAudioData` object with the specified channel count, sample rate and sample count. + */ +- (instancetype)initWithChannelCount:(NSUInteger)channelCount + sampleRate:(double)sampleRate + sampleCount:(NSUInteger)sampleCount; + +@end + +NS_ASSUME_NONNULL_END diff --git a/mediapipe/tasks/ios/test/audio/core/utils/sources/MPPAudioData+TestUtils.m b/mediapipe/tasks/ios/test/audio/core/utils/sources/MPPAudioData+TestUtils.m new file mode 100644 index 0000000000..dc36043c8f --- /dev/null +++ b/mediapipe/tasks/ios/test/audio/core/utils/sources/MPPAudioData+TestUtils.m @@ -0,0 +1,31 @@ +// Copyright 2024 The MediaPipe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "mediapipe/tasks/ios/test/audio/core/utils/sources/MPPAudioData+TestUtils.h" + +@implementation MPPAudioData (TestUtils) + +- (instancetype)initWithChannelCount:(NSUInteger)channelCount + sampleRate:(double)sampleRate + sampleCount:(NSUInteger)sampleCount { + MPPAudioDataFormat *audioDataFormat = + [[MPPAudioDataFormat alloc] initWithChannelCount:channelCount sampleRate:sampleRate]; + + MPPAudioData *audioData = [[MPPAudioData alloc] initWithFormat:audioDataFormat + sampleCount:sampleCount]; + + return audioData; +} + +@end From 5cb91ec7df0c95c0b6ea6a26928bf3b060c08378 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Mon, 23 Sep 2024 21:09:27 +0530 Subject: [PATCH 5/8] Added stream mode tests for iOS audio classifier --- .../ios/test/audio/audio_classifier/BUILD | 2 + .../MPPAudioClassifierTests.mm | 250 ++++++++++++++++-- 2 files changed, 237 insertions(+), 15 deletions(-) diff --git a/mediapipe/tasks/ios/test/audio/audio_classifier/BUILD b/mediapipe/tasks/ios/test/audio/audio_classifier/BUILD index 39a9164ecc..7352e04f51 100644 --- a/mediapipe/tasks/ios/test/audio/audio_classifier/BUILD +++ b/mediapipe/tasks/ios/test/audio/audio_classifier/BUILD @@ -45,7 +45,9 @@ objc_library( "//mediapipe/tasks/ios/audio/core:MPPAudioData", "//mediapipe/tasks/ios/common:MPPCommon", "//mediapipe/tasks/ios/common/utils:NSStringHelpers", + "//mediapipe/tasks/ios/test/audio/core/utils:AVAudioFileTestUtils", "//mediapipe/tasks/ios/test/audio/core/utils:AVAudioPCMBufferTestUtils", + "//mediapipe/tasks/ios/test/audio/core/utils:MPPAudioDataTestUtils", "//mediapipe/tasks/ios/test/utils:MPPFileInfo", "//third_party/apple_frameworks:XCTest", ], diff --git a/mediapipe/tasks/ios/test/audio/audio_classifier/MPPAudioClassifierTests.mm b/mediapipe/tasks/ios/test/audio/audio_classifier/MPPAudioClassifierTests.mm index 0404498152..b2be3eaaa4 100644 --- a/mediapipe/tasks/ios/test/audio/audio_classifier/MPPAudioClassifierTests.mm +++ b/mediapipe/tasks/ios/test/audio/audio_classifier/MPPAudioClassifierTests.mm @@ -17,7 +17,9 @@ #import "mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifier.h" #import "mediapipe/tasks/ios/common/sources/MPPCommon.h" #import "mediapipe/tasks/ios/common/utils/sources/NSString+Helpers.h" +#import "mediapipe/tasks/ios/test/audio/core/utils/sources/AVAudioFile+TestUtils.h" #import "mediapipe/tasks/ios/test/audio/core/utils/sources/AVAudioPCMBuffer+TestUtils.h" +#import "mediapipe/tasks/ios/test/audio/core/utils/sources/MPPAudioData+TestUtils.h" #import "mediapipe/tasks/ios/test/utils/sources/MPPFileInfo.h" static MPPFileInfo *const kYamnetModelFileInfo = @@ -35,6 +37,11 @@ static const NSInteger kYamnetCategoriesCount = 521; static const NSInteger kYamnetClassificationResultsCount = 5; +static const NSInteger kYamnetSampleCount = 15600; +static const double kYamnetSampleRate = 16000.0; +static const NSInteger kMillisecondsPerSeconds = 1000; +static const NSInteger kYamnetIntervalSizeInMilliseconds = + (NSInteger)((float)kYamnetSampleCount / kYamnetSampleRate * kMillisecondsPerSeconds); static NSString *const kYamnetModelHeadName = @"scores"; static NSString *const kTwoHeadsModelYamnetHeadName = @"yamnet_classification"; static NSString *const kTwoHeadsModelBirdClassificationHeadName = @"bird_classification"; @@ -49,6 +56,8 @@ }; static NSString *const kExpectedErrorDomain = @"com.google.mediapipe.tasks"; +static NSString *const kAudioStreamTestsDictClassifierKey = @"audioClassifier"; +static NSString *const kAudioStreamTestsDictExpectationKey = @"expectation"; #define AssertEqualErrors(error, expectedError) \ XCTAssertNotNil(error); \ @@ -64,7 +73,11 @@ XCTAssertEqualObjects(category.displayName, expectedCategory.displayName, @"index i = %d", \ categoryIndex); -@interface MPPAudioClassifierTests : XCTestCase +@interface MPPAudioClassifierTests : XCTestCase { + NSDictionary *_16kHZAudioStreamSucceedsTestDict; + NSDictionary *_48kHZAudioStreamSucceedsTestDict; + NSDictionary *_outOfOrderTimestampTestDict; +} @end @implementation MPPAudioClassifierTests @@ -267,9 +280,9 @@ - (void)testClassifyWithInsufficientDataSucceeds { const NSInteger channelCount = 1; const NSInteger expectedClassificationResultsCount = 1; - MPPAudioDataFormat *format = [[MPPAudioDataFormat alloc] initWithChannelCount:channelCount - sampleRate:sampleRate]; - MPPAudioData *audioData = [[MPPAudioData alloc] initWithFormat:format sampleCount:sampleCount]; + MPPAudioData *audioData = [[MPPAudioData alloc] initWithChannelCount:channelCount + sampleRate:sampleRate + sampleCount:sampleCount]; MPPAudioClassifierResult *result = [audioClassifier classifyAudioClip:audioData error:nil]; XCTAssertNotNil(result); @@ -281,6 +294,44 @@ - (void)testClassifyWithInsufficientDataSucceeds { expectedClassificationHeadsCategoryCountInfo:kYamnetModelHeadsInfo]; } +- (void)testClassifyAfterCloseFailsInAudioClipsMode { + MPPAudioClassifier *audioClassifier = + [[MPPAudioClassifier alloc] initWithModelPath:kYamnetModelFileInfo.path error:nil]; + XCTAssertNotNil(audioClassifier); + + // Classify 48KHz speech file. + [MPPAudioClassifierTests + assertResultsOfClassifyAudioClipWithFileInfo:kSpeech48KHzMonoFileInfo + usingAudioClassifier:audioClassifier + approximatelyEqualsExpectedAudioClassifierResult:[MPPAudioClassifierTests + expectedPartialYamnetResult] + expectedClassificationResultsCount:kYamnetClassificationResultsCount + expectedClassificationHeadsCategoryCountInfo:kYamnetModelHeadsInfo]; + + NSError *closeError; + XCTAssertTrue([audioClassifier closeWithError:&closeError]); + XCTAssertNil(closeError); + + const NSInteger channelCount = 1; + MPPAudioData *audioData = [[MPPAudioData alloc] initWithChannelCount:channelCount + sampleRate:kYamnetSampleRate + sampleCount:kYamnetSampleCount]; + + NSError *classifyError; + + [audioClassifier classifyAudioClip:audioData error:&classifyError]; + + NSError *expectedClassifyError = [NSError + errorWithDomain:kExpectedErrorDomain + code:MPPTasksErrorCodeInvalidArgumentError + userInfo:@{ + NSLocalizedDescriptionKey : [NSString + stringWithFormat:@"INVALID_ARGUMENT: Task runner is currently not running."] + }]; + + AssertEqualErrors(classifyError, expectedClassifyError); +} + - (void)testCreateAudioClassifierFailsWithDelegateInAudioClipsMode { MPPAudioClassifierOptions *options = [MPPAudioClassifierTests audioClassifierOptionsWithModelFileInfo:kYamnetModelFileInfo]; @@ -353,6 +404,121 @@ - (void)testClassifyFailsWithCallingWrongApiInAudioStreamMode { AssertEqualErrors(error, expectedError); } +- (void)testClassifyWithAudioStreamModeAndOutOfOrderTimestampsFails { + MPPAudioClassifier *audioClassifier = + [self audioClassifierInStreamModeWithModelFileInfo:kYamnetModelFileInfo]; + NSArray *streamedAudioDataList = + [MPPAudioClassifierTests streamedAudioDataListforYamnet]; + + XCTestExpectation *expectation = [[XCTestExpectation alloc] + initWithDescription:@"classifyWithOutOfOrderTimestampsAndLiveStream"]; + expectation.expectedFulfillmentCount = 1; + + _outOfOrderTimestampTestDict = @{ + kAudioStreamTestsDictClassifierKey : audioClassifier, + kAudioStreamTestsDictExpectationKey : expectation + }; + + // Can safely access indices 1 and 0 `streamedAudioDataList` count is already asserted. + XCTAssertTrue([audioClassifier + classifyAsyncAudioBlock:streamedAudioDataList[1].audioData + timestampInMilliseconds:streamedAudioDataList[1].timestampInMilliseconds + error:nil]); + + NSError *error; + XCTAssertFalse([audioClassifier + classifyAsyncAudioBlock:streamedAudioDataList[0].audioData + timestampInMilliseconds:streamedAudioDataList[0].timestampInMilliseconds + error:&error]); + + NSError *expectedError = + [NSError errorWithDomain:kExpectedErrorDomain + code:MPPTasksErrorCodeInvalidArgumentError + userInfo:@{ + NSLocalizedDescriptionKey : + @"INVALID_ARGUMENT: Input timestamp must be monotonically increasing." + }]; + AssertEqualErrors(error, expectedError); + + [audioClassifier closeWithError:nil]; + + NSTimeInterval timeout = 1.0f; + [self waitForExpectations:@[ expectation ] timeout:timeout]; +} + +- (void)testClassifyWithAudioStreamModeSucceeds { + [self testClassifyUsingYamnetAsyncAudioFileWithInfo:kSpeech16KHzMonoFileInfo + info:&_16kHZAudioStreamSucceedsTestDict]; + [self testClassifyUsingYamnetAsyncAudioFileWithInfo:kSpeech48KHzMonoFileInfo + info:&_48kHZAudioStreamSucceedsTestDict]; +} + +// info is strong here since address of global variables will be passed to this function. By default +// `NSDictionary **` will be `NSDictionary * __autoreleasing *. +- (void)testClassifyUsingYamnetAsyncAudioFileWithInfo:(MPPFileInfo *)audioFileInfo + info:(NSDictionary *__strong *) + info { + MPPAudioClassifier *audioClassifier = + [self audioClassifierInStreamModeWithModelFileInfo:kYamnetModelFileInfo]; + + NSArray *streamedAudioDataList = + [MPPAudioClassifierTests streamedAudioDataListforYamnet]; + + XCTestExpectation *expectation = [[XCTestExpectation alloc] + initWithDescription:[NSString + stringWithFormat:@"classifyWithStreamMode_%@", audioFileInfo.name]]; + expectation.expectedFulfillmentCount = streamedAudioDataList.count; + + *info = @{ + kAudioStreamTestsDictClassifierKey : audioClassifier, + kAudioStreamTestsDictExpectationKey : expectation + }; + + for (MPPTimestampedAudioData *timestampedAudioData in streamedAudioDataList) { + XCTAssertTrue([audioClassifier + classifyAsyncAudioBlock:timestampedAudioData.audioData + timestampInMilliseconds:timestampedAudioData.timestampInMilliseconds + error:nil]); + } + + [audioClassifier closeWithError:nil]; + + NSTimeInterval timeout = 1.0f; + [self waitForExpectations:@[ expectation ] timeout:timeout]; +} + +- (void)audioClassifier:(MPPAudioClassifier *)audioClassifier + didFinishClassificationWithResult:(MPPAudioClassifierResult *)result + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError *)error { + // Can safely test for yamnet results before `audioClassifier` object tests since only yamnet with + // 16khz and 48khz speech files are used for async tests. + + // Returns a `nil` `expectedResult` for the last timestamp to prevent the result from being + // tested. + MPPAudioClassifierResult *expectedResult = [MPPAudioClassifierTests + expectedPartialYamnetResultWithTimestampInMilliseconds:timestampInMilliseconds + isStreamMode:YES]; + + // `expectedResult` will be `nil` for last timestamp since we are not testing for it. + if (expectedResult) { + [MPPAudioClassifierTests assertAudioClassifierResult:result + approximatelyEqualToExpectedAudioClassifierResult:expectedResult + expectedClassificationResultsCount:1 + expectedClassificationHeadsCategoryCountInfo:kYamnetModelHeadsInfo]; + } + + if (audioClassifier == _outOfOrderTimestampTestDict[kAudioStreamTestsDictClassifierKey]) { + [_outOfOrderTimestampTestDict[kAudioStreamTestsDictExpectationKey] fulfill]; + } else if (audioClassifier == + _16kHZAudioStreamSucceedsTestDict[kAudioStreamTestsDictClassifierKey]) { + [_16kHZAudioStreamSucceedsTestDict[kAudioStreamTestsDictExpectationKey] fulfill]; + } else if (audioClassifier == + _48kHZAudioStreamSucceedsTestDict[kAudioStreamTestsDictClassifierKey]) { + [_48kHZAudioStreamSucceedsTestDict[kAudioStreamTestsDictExpectationKey] fulfill]; + } +} + #pragma mark Audio Data Initializers + (MPPAudioData *)audioDataFromAudioFileWithInfo:(MPPFileInfo *)fileInfo { @@ -366,21 +532,52 @@ + (MPPAudioData *)audioDataFromAudioFileWithInfo:(MPPFileInfo *)fileInfo { MPPFloatBuffer *bufferData = [[MPPFloatBuffer alloc] initWithData:buffer.floatChannelData[0] length:buffer.frameLength]; - // Create the audio data with the same format as the `AVAudioPCMBuffer`. + MPPAudioData *audioData = [[MPPAudioData alloc] initWithChannelCount:buffer.format.channelCount + sampleRate:buffer.format.sampleRate + sampleCount:buffer.frameLength]; + + // Load all the samples in the audio file to the newly created audio data. + [audioData loadBuffer:bufferData offset:0 length:bufferData.length error:nil]; + return audioData; +} + ++ (MPPAudioData *)audioDataWithChannelCount:(NSUInteger)channelCount + sampleRate:(double)sampleRate + sampleCount:(NSUInteger)sampleCount { MPPAudioDataFormat *audioDataFormat = - [[MPPAudioDataFormat alloc] initWithChannelCount:buffer.format.channelCount - sampleRate:buffer.format.sampleRate]; + [[MPPAudioDataFormat alloc] initWithChannelCount:channelCount sampleRate:sampleRate]; MPPAudioData *audioData = [[MPPAudioData alloc] initWithFormat:audioDataFormat - sampleCount:buffer.frameLength]; + sampleCount:sampleCount]; - // Load all the samples in the audio file to the newly created audio data. - [audioData loadBuffer:bufferData offset:0 length:bufferData.length error:nil]; return audioData; } ++ (NSArray *)streamedAudioDataListforYamnet { + NSArray *streamedAudioDataList = + [AVAudioFile streamedAudioBlocksFromAudioFileWithInfo:kSpeech16KHzMonoFileInfo + modelSampleCount:kYamnetSampleCount + modelSampleRate:kYamnetSampleRate]; + + XCTAssertEqual(streamedAudioDataList.count, 5); + + return streamedAudioDataList; +} + #pragma mark Audio Classifier Initializers +- (MPPAudioClassifier *)audioClassifierInStreamModeWithModelFileInfo:(MPPFileInfo *)fileInfo { + MPPAudioClassifierOptions *options = + [MPPAudioClassifierTests audioClassifierOptionsWithModelFileInfo:kYamnetModelFileInfo]; + options.runningMode = MPPAudioRunningModeAudioStream; + options.audioClassifierStreamDelegate = self; + + MPPAudioClassifier *audioClassifier = + [MPPAudioClassifierTests audioClassifierWithOptions:options]; + + return audioClassifier; +} + + (MPPAudioClassifierOptions *)audioClassifierOptionsWithModelFileInfo: (MPPFileInfo *)modelFileInfo { MPPAudioClassifierOptions *options = [[MPPAudioClassifierOptions alloc] init]; @@ -523,15 +720,34 @@ + (void)assertClassificationHead:(MPPClassifications *)classifications } + (MPPAudioClassifierResult *)expectedPartialYamnetResult { - return [MPPAudioClassifierTests expectedPartialYamnetResultWithTimestampInMilliseconds:0]; + return [MPPAudioClassifierTests expectedPartialYamnetResultWithTimestampInMilliseconds:0 + isStreamMode:NO]; } // Returns only one top category for each classification head. // Last classification result (timestamped result) is omitted because it varies between test // runs due to the low confidence score. Ensure that the subset of classification results in the // predicted audio classifier result is compared with the expected result returned from this method. -+ (MPPAudioClassifierResult *)expectedPartialYamnetResultWithTimestampInMilliseconds: - (NSInteger)timestampInMilliseconds { +// If `isStream` mode is set, returned result will only have the `classificationResult` for the +// given `timestampInMilliseconds`. ++ (MPPAudioClassifierResult *) + expectedPartialYamnetResultWithTimestampInMilliseconds:(NSInteger)timestampInMilliseconds + isStreamMode:(BOOL)isStreamMode { + const NSInteger maxTimestampToCompare = 2925; + const NSInteger minTimestampToCompare = 0; + + // Last timestamp and any other illegal values of timestamp are not allowed to pass through. + if (timestampInMilliseconds > maxTimestampToCompare || + timestampInMilliseconds < minTimestampToCompare || + timestampInMilliseconds % kYamnetIntervalSizeInMilliseconds != 0) { + return nil; + } + + // Only one of the classification results corresponding to the given timestamp is to be returned + // as the expected result for stream mode. Calculate index of the `classificationResult` to be + // returned based on the timestamp and the input size of the Yamnet model in milliseconds. + NSInteger index = timestampInMilliseconds / kYamnetIntervalSizeInMilliseconds; + NSArray *classificationResults = @[ [[MPPClassificationResult alloc] initWithClassifications:@[ [[MPPClassifications alloc] initWithHeadIndex:0 @@ -572,8 +788,12 @@ + (MPPAudioClassifierResult *)expectedPartialYamnetResultWithTimestampInMillisec ]; - return [[MPPAudioClassifierResult alloc] initWithClassificationResults:classificationResults - timestampInMilliseconds:timestampInMilliseconds]; + // In stream mode, only one classification result corresponding to the requested timestamp is + // returned. In clips mode, the full array of classification results are returned. + return [[MPPAudioClassifierResult alloc] + initWithClassificationResults:isStreamMode ? @[ classificationResults[index] ] + : classificationResults + timestampInMilliseconds:timestampInMilliseconds]; } + (MPPAudioClassifierResult *)expectedPartial44kHzTwoHeadsResult { From 75880c00346fdfdc0730bcffd20f940159bbe485 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Mon, 23 Sep 2024 21:15:27 +0530 Subject: [PATCH 6/8] Updated comments in AVAudioFile+TestUtils --- .../ios/test/audio/core/utils/sources/AVAudioFile+TestUtils.m | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mediapipe/tasks/ios/test/audio/core/utils/sources/AVAudioFile+TestUtils.m b/mediapipe/tasks/ios/test/audio/core/utils/sources/AVAudioFile+TestUtils.m index 4b2f75af04..6992a7cb5c 100644 --- a/mediapipe/tasks/ios/test/audio/core/utils/sources/AVAudioFile+TestUtils.m +++ b/mediapipe/tasks/ios/test/audio/core/utils/sources/AVAudioFile+TestUtils.m @@ -63,7 +63,8 @@ @implementation AVAudioFile (TestUtils) MPPAudioData *audioData = [[MPPAudioData alloc] initWithFormat:audioDataFormat sampleCount:lengthToBeLoaded / audioDataFormat.channelCount]; - + // Can safely access `floatChannelData[0]` since the input file is expected to have atleast 1 + // channel. MPPFloatBuffer *floatBuffer = [[MPPFloatBuffer alloc] initWithData:audioPCMBuffer.floatChannelData[0] + currentPosition length:lengthToBeLoaded]; From 4d9cc0f94a5058ba0ecc9578a57fd0758221447d Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Tue, 24 Sep 2024 02:10:38 +0530 Subject: [PATCH 7/8] Fixed typos in iOS audio classifier --- .../sources/MPPAudioClassifier.h | 16 ++++++++-------- .../core/utils/sources/MPPAudioData+TestUtils.h | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifier.h b/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifier.h index 18d3348029..21e7b92b70 100644 --- a/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifier.h +++ b/mediapipe/tasks/ios/audio/audio_classifier/sources/MPPAudioClassifier.h @@ -147,17 +147,17 @@ NS_SWIFT_NAME(AudioClassifier) NS_SWIFT_NAME(classifyAsync(audioBlock:timestampInMilliseconds:)); /** - * Closes and cleans up the MediaPipe audio task. + * Closes and cleans up the MediaPipe audio classifier. * - * For tasks initialized with `.audioStream` mode, ensure that this method is called after all audio - * blocks in an audio stream are sent for inference using - * `classifyAsync(audioBlock:timestampInMilliseconds:)`. Otherwise, the task will not + * For audio classifiers initialized with `.audioStream` mode, ensure that this method is called + * after all audio blocks in an audio stream are sent for inference using + * `classifyAsync(audioBlock:timestampInMilliseconds:)`. Otherwise, the audio classifier will not * process the last audio block (of type `AudioData`) in the stream if its `bufferLength` is shorter - * than the model's input length. Once a task is closed, you cannot send any inference requests - * to the task. You must create a new instance of the task to send any pending requests. Ensure that - * you are ready to dispose off the task before this method is invoked. + * than the model's input length. Once an audio classifier is closed, you cannot send any inference + * requests to it. You must create a new instance of `AudioClassifier` to send any pending requests. + * Ensure that you are ready to dispose off the audio classifier before this method is invoked. * - * @return Returns successfully if the task was closed. Fails otherwise. Otherwise, throws an error + * @return Returns successfully if the task was closed. Otherwise, throws an error * indicating the reason for failure. */ - (BOOL)closeWithError:(NSError **)error; diff --git a/mediapipe/tasks/ios/test/audio/core/utils/sources/MPPAudioData+TestUtils.h b/mediapipe/tasks/ios/test/audio/core/utils/sources/MPPAudioData+TestUtils.h index a8e214eaf3..a646438454 100644 --- a/mediapipe/tasks/ios/test/audio/core/utils/sources/MPPAudioData+TestUtils.h +++ b/mediapipe/tasks/ios/test/audio/core/utils/sources/MPPAudioData+TestUtils.h @@ -24,8 +24,8 @@ NS_ASSUME_NONNULL_BEGIN * Initializes an `MPPAudioData` from channel count, sample rate and sample count. * * @param channelCount Number of channels. - * @param channelCount Sample rate. - * @param channelCount Sample count. + * @param sampleRate Sample rate. + * @param sampleCount Sample count. * * @return The `MPPAudioData` object with the specified channel count, sample rate and sample count. */ From 63bb10d7ecb502670d245579fdfc492a4aa79c55 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Tue, 24 Sep 2024 19:21:30 +0530 Subject: [PATCH 8/8] Changed the name of iOS audio classifier async test helper --- .../MPPAudioClassifierTests.mm | 68 ++++++++++--------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/mediapipe/tasks/ios/test/audio/audio_classifier/MPPAudioClassifierTests.mm b/mediapipe/tasks/ios/test/audio/audio_classifier/MPPAudioClassifierTests.mm index 8d8129f9c0..e4048bd2f2 100644 --- a/mediapipe/tasks/ios/test/audio/audio_classifier/MPPAudioClassifierTests.mm +++ b/mediapipe/tasks/ios/test/audio/audio_classifier/MPPAudioClassifierTests.mm @@ -447,9 +447,43 @@ - (void)testClassifyWithAudioStreamModeSucceeds { info:&_48kHZAudioStreamSucceedsTestDict]; } +- (void)audioClassifier:(MPPAudioClassifier *)audioClassifier + didFinishClassificationWithResult:(MPPAudioClassifierResult *)result + timestampInMilliseconds:(NSInteger)timestampInMilliseconds + error:(NSError *)error { + // Can safely test for yamnet results before `audioClassifier` object tests since only yamnet with + // 16khz and 48khz speech files are used for async tests. + + // Returns a `nil` `expectedResult` for the last timestamp to prevent the result from being + // tested. + MPPAudioClassifierResult *expectedResult = [MPPAudioClassifierTests + expectedPartialYamnetResultWithTimestampInMilliseconds:timestampInMilliseconds + isStreamMode:YES]; + + // `expectedResult` will be `nil` for last timestamp since we are not testing for it. + if (expectedResult) { + [MPPAudioClassifierTests assertAudioClassifierResult:result + approximatelyEqualToExpectedAudioClassifierResult:expectedResult + expectedClassificationResultsCount:1 + expectedClassificationHeadsCategoryCountInfo:kYamnetModelHeadsInfo]; + } + + if (audioClassifier == _outOfOrderTimestampTestDict[kAudioStreamTestsDictClassifierKey]) { + [_outOfOrderTimestampTestDict[kAudioStreamTestsDictExpectationKey] fulfill]; + } else if (audioClassifier == + _16kHZAudioStreamSucceedsTestDict[kAudioStreamTestsDictClassifierKey]) { + [_16kHZAudioStreamSucceedsTestDict[kAudioStreamTestsDictExpectationKey] fulfill]; + } else if (audioClassifier == + _48kHZAudioStreamSucceedsTestDict[kAudioStreamTestsDictClassifierKey]) { + [_48kHZAudioStreamSucceedsTestDict[kAudioStreamTestsDictExpectationKey] fulfill]; + } +} + +#pragma mark Audio Stream Mode Test Helpers + // info is strong here since address of global variables will be passed to this function. By default // `NSDictionary **` will be `NSDictionary * __autoreleasing *. -- (void)testClassifyUsingYamnetAsyncAudioFileWithInfo:(MPPFileInfo *)audioFileInfo +- (void)classifyUsingYamnetAsyncAudioFileWithInfo:(MPPFileInfo *)audioFileInfo info:(NSDictionary *__strong *) info { MPPAudioClassifier *audioClassifier = @@ -481,38 +515,6 @@ - (void)testClassifyUsingYamnetAsyncAudioFileWithInfo:(MPPFileInfo *)audioFileIn [self waitForExpectations:@[ expectation ] timeout:timeout]; } -- (void)audioClassifier:(MPPAudioClassifier *)audioClassifier - didFinishClassificationWithResult:(MPPAudioClassifierResult *)result - timestampInMilliseconds:(NSInteger)timestampInMilliseconds - error:(NSError *)error { - // Can safely test for yamnet results before `audioClassifier` object tests since only yamnet with - // 16khz and 48khz speech files are used for async tests. - - // Returns a `nil` `expectedResult` for the last timestamp to prevent the result from being - // tested. - MPPAudioClassifierResult *expectedResult = [MPPAudioClassifierTests - expectedPartialYamnetResultWithTimestampInMilliseconds:timestampInMilliseconds - isStreamMode:YES]; - - // `expectedResult` will be `nil` for last timestamp since we are not testing for it. - if (expectedResult) { - [MPPAudioClassifierTests assertAudioClassifierResult:result - approximatelyEqualToExpectedAudioClassifierResult:expectedResult - expectedClassificationResultsCount:1 - expectedClassificationHeadsCategoryCountInfo:kYamnetModelHeadsInfo]; - } - - if (audioClassifier == _outOfOrderTimestampTestDict[kAudioStreamTestsDictClassifierKey]) { - [_outOfOrderTimestampTestDict[kAudioStreamTestsDictExpectationKey] fulfill]; - } else if (audioClassifier == - _16kHZAudioStreamSucceedsTestDict[kAudioStreamTestsDictClassifierKey]) { - [_16kHZAudioStreamSucceedsTestDict[kAudioStreamTestsDictExpectationKey] fulfill]; - } else if (audioClassifier == - _48kHZAudioStreamSucceedsTestDict[kAudioStreamTestsDictClassifierKey]) { - [_48kHZAudioStreamSucceedsTestDict[kAudioStreamTestsDictExpectationKey] fulfill]; - } -} - #pragma mark Audio Data Initializers + (MPPAudioData *)audioDataFromAudioFileWithInfo:(MPPFileInfo *)fileInfo {