From 3a603b028aeccd39a66f21df57812538337945ff Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 19 Sep 2024 11:58:07 +0530 Subject: [PATCH 1/4] Fixed leading spaces trimming in iOS Mediapipe LlmInference --- .../ios/genai/inference/sources/LlmInference+Session.swift | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift b/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift index 678fc846ee..32b7caec6d 100644 --- a/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift +++ b/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift @@ -291,7 +291,8 @@ extension String { .replacingOccurrences(of: String.newLine, with: "\n") humanReadableString = stripLeadingWhitespaces - ? humanReadableString.trimmingCharacters(in: .whitespaces) : humanReadableString + ? String(humanReadableString.drop(while: { $0.isWhitespace || $0.isNewline })) + : humanReadableString return humanReadableString.components(separatedBy: String.eod).first } } From d1e23182a8717eeeca0b8ee4420697adec409643 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 19 Sep 2024 11:58:32 +0530 Subject: [PATCH 2/4] Added new options to iOS LlmInference --- .../tasks/ios/genai/inference/sources/LlmInference.swift | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift b/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift index 55ac6ddfcf..62cfd03e0f 100644 --- a/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift +++ b/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift @@ -66,7 +66,7 @@ import MediaPipeTasksGenAIC supported_lora_ranks: supportedLoraRanks.baseAddress, max_top_k: options.maxTopk, llm_activation_data_type: options.activationDataType.activationDataTypeC, - num_draft_tokens: 0) + num_draft_tokens: options.draftTokenCount) return try LlmTaskRunner(modelSettings: modelSetting) } } @@ -224,6 +224,10 @@ extension LlmInference { /// The activation data type for the model. @objc public var activationDataType: ActivationDataType = .default + /// Number of draft tokens to generate when using speculative decoding. Setting to 0 will + /// disable speculative decoding. + @objc public var draftTokenCount: Int = 0 + /// Creates a new instance of `Options` with the given `modelPath` and default values of /// `maxTokens`, `maxTopk`, `supportedLoraRanks` and `activationDataType`. /// This function is only intended to be used from Objective C. From 1b088caabefd0ca5b6bfd6dd625dd064014678c8 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 19 Sep 2024 14:27:17 +0530 Subject: [PATCH 3/4] Fixed leading white space stripping in iOS LlmInference+Session --- .../inference/sources/LlmInference+Session.swift | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift b/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift index 32b7caec6d..b922eaca64 100644 --- a/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift +++ b/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift @@ -171,8 +171,14 @@ extension LlmInference { return } - /// Reset state after first response is processed. - receivedFirstToken = false + /// Reset state after first non empty response is processed. Ensures that leading + /// whitespaces are stripped from the first non empty response. + /// Some models generate series of empty responses for a few times in the beginning before + /// generating a valid response. Ensures that leading white spaces are stripped from the + /// first non empty response. + if !humanReadableLlmResponse.isEmpty { + receivedFirstToken = false + } progress(humanReadableLlmResponse, nil) }, @@ -291,7 +297,7 @@ extension String { .replacingOccurrences(of: String.newLine, with: "\n") humanReadableString = stripLeadingWhitespaces - ? String(humanReadableString.drop(while: { $0.isWhitespace || $0.isNewline })) + ? String(humanReadableString.drop(while: {$0.isWhitespace})) : humanReadableString return humanReadableString.components(separatedBy: String.eod).first } From f50f2913dae50e0198d0de4b729699341d8e1fc2 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Thu, 19 Sep 2024 15:01:26 +0530 Subject: [PATCH 4/4] Made condition check for leading whitespaces synonymous with variable name in iOS LlmInference --- .../ios/genai/inference/sources/LlmInference+Session.swift | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift b/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift index b922eaca64..01d0af256e 100644 --- a/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift +++ b/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift @@ -159,13 +159,13 @@ extension LlmInference { try llmInference.shouldContinueWithResponseGeneration() /// Used to make a decision about whitespace stripping. - var receivedFirstToken = true + var receivedFirstNonEmptyToken = false llmSessionRunner.predictAsync( progress: { partialResponseStrings, error in guard let responseStrings = partialResponseStrings, let humanReadableLlmResponse = Session.humanReadableString( - llmResponses: responseStrings, stripLeadingWhitespaces: receivedFirstToken) + llmResponses: responseStrings, stripLeadingWhitespaces: !receivedFirstNonEmptyToken) else { progress(nil, GenAiInferenceError.invalidResponse) return @@ -177,7 +177,7 @@ extension LlmInference { /// generating a valid response. Ensures that leading white spaces are stripped from the /// first non empty response. if !humanReadableLlmResponse.isEmpty { - receivedFirstToken = false + receivedFirstNonEmptyToken = true } progress(humanReadableLlmResponse, nil)