From 3a603b028aeccd39a66f21df57812538337945ff Mon Sep 17 00:00:00 2001
From: Prianka Liz Kariat <prianka.kariat@codeandtheory.com>
Date: Thu, 19 Sep 2024 11:58:07 +0530
Subject: [PATCH 1/4] Fixed leading spaces trimming in iOS Mediapipe
 LlmInference

---
 .../ios/genai/inference/sources/LlmInference+Session.swift     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift b/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift
index 678fc846ee..32b7caec6d 100644
--- a/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift
+++ b/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift
@@ -291,7 +291,8 @@ extension String {
       .replacingOccurrences(of: String.newLine, with: "\n")
     humanReadableString =
       stripLeadingWhitespaces
-      ? humanReadableString.trimmingCharacters(in: .whitespaces) : humanReadableString
+      ? String(humanReadableString.drop(while: { $0.isWhitespace || $0.isNewline }))
+      : humanReadableString
     return humanReadableString.components(separatedBy: String.eod).first
   }
 }

From d1e23182a8717eeeca0b8ee4420697adec409643 Mon Sep 17 00:00:00 2001
From: Prianka Liz Kariat <prianka.kariat@codeandtheory.com>
Date: Thu, 19 Sep 2024 11:58:32 +0530
Subject: [PATCH 2/4] Added new options to iOS LlmInference

---
 .../tasks/ios/genai/inference/sources/LlmInference.swift    | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift b/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift
index 55ac6ddfcf..62cfd03e0f 100644
--- a/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift
+++ b/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift
@@ -66,7 +66,7 @@ import MediaPipeTasksGenAIC
             supported_lora_ranks: supportedLoraRanks.baseAddress,
             max_top_k: options.maxTopk,
             llm_activation_data_type: options.activationDataType.activationDataTypeC,
-            num_draft_tokens: 0)
+            num_draft_tokens: options.draftTokenCount)
           return try LlmTaskRunner(modelSettings: modelSetting)
         }
       }
@@ -224,6 +224,10 @@ extension LlmInference {
     /// The activation data type for the model.
     @objc public var activationDataType: ActivationDataType = .default
 
+    /// Number of draft tokens to generate when using speculative decoding. Setting to 0 will 
+    /// disable speculative decoding.
+    @objc public var draftTokenCount: Int = 0
+
     /// Creates a new instance of `Options` with the given `modelPath` and default values of
     /// `maxTokens`, `maxTopk`, `supportedLoraRanks` and `activationDataType`.
     /// This function is only intended to be used from Objective C.

From 1b088caabefd0ca5b6bfd6dd625dd064014678c8 Mon Sep 17 00:00:00 2001
From: Prianka Liz Kariat <prianka.kariat@codeandtheory.com>
Date: Thu, 19 Sep 2024 14:27:17 +0530
Subject: [PATCH 3/4] Fixed leading white space stripping in iOS
 LlmInference+Session

---
 .../inference/sources/LlmInference+Session.swift     | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift b/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift
index 32b7caec6d..b922eaca64 100644
--- a/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift
+++ b/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift
@@ -171,8 +171,14 @@ extension LlmInference {
             return
           }
 
-          /// Reset state after first response is processed.
-          receivedFirstToken = false
+          /// Reset state after first non empty response is processed. Ensures that leading 
+          /// whitespaces are stripped from the first non empty response.
+          /// Some models generate series of empty responses for a few times in the beginning before 
+          /// generating a valid response. Ensures that leading white spaces are stripped from the 
+          /// first non empty response.
+          if !humanReadableLlmResponse.isEmpty {
+            receivedFirstToken = false
+          }
 
           progress(humanReadableLlmResponse, nil)
         },
@@ -291,7 +297,7 @@ extension String {
       .replacingOccurrences(of: String.newLine, with: "\n")
     humanReadableString =
       stripLeadingWhitespaces
-      ? String(humanReadableString.drop(while: { $0.isWhitespace || $0.isNewline }))
+      ? String(humanReadableString.drop(while: {$0.isWhitespace}))
       : humanReadableString
     return humanReadableString.components(separatedBy: String.eod).first
   }

From f50f2913dae50e0198d0de4b729699341d8e1fc2 Mon Sep 17 00:00:00 2001
From: Prianka Liz Kariat <prianka.kariat@codeandtheory.com>
Date: Thu, 19 Sep 2024 15:01:26 +0530
Subject: [PATCH 4/4] Made condition check for leading whitespaces synonymous
 with variable name in iOS LlmInference

---
 .../ios/genai/inference/sources/LlmInference+Session.swift  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift b/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift
index b922eaca64..01d0af256e 100644
--- a/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift
+++ b/mediapipe/tasks/ios/genai/inference/sources/LlmInference+Session.swift
@@ -159,13 +159,13 @@ extension LlmInference {
       try llmInference.shouldContinueWithResponseGeneration()
 
       /// Used to make a decision about whitespace stripping.
-      var receivedFirstToken = true
+      var receivedFirstNonEmptyToken = false
 
       llmSessionRunner.predictAsync(
         progress: { partialResponseStrings, error in
           guard let responseStrings = partialResponseStrings,
             let humanReadableLlmResponse = Session.humanReadableString(
-              llmResponses: responseStrings, stripLeadingWhitespaces: receivedFirstToken)
+              llmResponses: responseStrings, stripLeadingWhitespaces: !receivedFirstNonEmptyToken)
           else {
             progress(nil, GenAiInferenceError.invalidResponse)
             return
@@ -177,7 +177,7 @@ extension LlmInference {
           /// generating a valid response. Ensures that leading white spaces are stripped from the 
           /// first non empty response.
           if !humanReadableLlmResponse.isEmpty {
-            receivedFirstToken = false
+            receivedFirstNonEmptyToken = true
           }
 
           progress(humanReadableLlmResponse, nil)