feat: add queueStrategy option (#139)

* Implement support of QUEUE_ADD strategy in android * Implement queue strategy in ios * Update doc * Apply review: Update definitions.ts * Fix the inversion between Add and Flush in the documentation * Fix linting --------- Co-authored-by: Enzo MENEGALDO <[email protected]>
capacitor-community · Nov 13, 2024 · cb3e92c · cb3e92c
1 parent 8ad38a5
commit cb3e92c
Show file tree

Hide file tree

Showing 7 changed files with 106 additions and 35 deletions.
diff --git a/README.md b/README.md
@@ -51,6 +51,7 @@ const speak = async () => {
     pitch: 1.0,
     volume: 1.0,
     category: 'ambient',
+    queueStrategy: 1
   });
 };
 
@@ -83,6 +84,7 @@ const isLanguageSupported = async (lang: string) => {
 * [`openInstall()`](#openinstall)
 * [`addListener('onRangeStart', ...)`](#addlisteneronrangestart)
 * [Interfaces](#interfaces)
+* [Enums](#enums)
 
 </docgen-index>
 
@@ -201,6 +203,7 @@ addListener(eventName: 'onRangeStart', listenerFunc: (info: { start: number; end
 | **`volume`**   | <code>number</code> | The volume that the utterance will be spoken at.                                                                                                                               | <code>1.0</code>       |
 | **`voice`**    | <code>number</code> | The index of the selected voice that will be used to speak the utterance. Possible voices can be queried using `getSupportedVoices`.                                           |                        |
 | **`category`** | <code>string</code> | Select the iOS Audio session category. Possible values: `ambient` and `playback`. Use `playback` to play audio even when the app is in the background. Only available for iOS. | <code>"ambient"</code> |
+| **`queueStrategy`** | <code><a href="#queuestrategy">QueueStrategy</a></code> | Select the strategy to adopt when several requests to speak overlap. | <code>QueueStrategy.Flush</code> | 5.1.0 |
 
 
 #### SpeechSynthesisVoice
@@ -222,6 +225,17 @@ The <a href="#speechsynthesisvoice">SpeechSynthesisVoice</a> interface represent
 | ------------ | ----------------------------------------- |
 | **`remove`** | <code>() =&gt; Promise&lt;void&gt;</code> |
 
+
+### Enums
+
+
+#### QueueStrategy
+
+| Members     | Value          | Description                                                                                                            |
+| ----------- | -------------- | ---------------------------------------------------------------------------------------------------------------------- |
+| **`Flush`** | <code>0</code> | Use `Flush` to stop the current request when a new request is sent.                                                  |
+| **`Add`**   | <code>1</code> | Use `Add` to buffer the speech request. The request will be executed when all previous requests have been completed. |
+
 </docgen-api>
 
 ## Changelog

diff --git a/android/src/main/java/com/getcapacitor/community/tts/SpeakResultCallback.java b/android/src/main/java/com/getcapacitor/community/tts/SpeakResultCallback.java
@@ -3,5 +3,5 @@
 public interface SpeakResultCallback {
     void onDone();
     void onError();
-    void onRangeStart(int start, int end, String spokenWord);
+    void onRangeStart(int start, int end);
 }
diff --git a/android/src/main/java/com/getcapacitor/community/tts/TextToSpeech.java b/android/src/main/java/com/getcapacitor/community/tts/TextToSpeech.java
@@ -15,6 +15,7 @@
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Locale;
+import java.util.Map;
 import java.util.Set;
 
 public class TextToSpeech implements android.speech.tts.TextToSpeech.OnInitListener {
@@ -25,11 +26,44 @@ public class TextToSpeech implements android.speech.tts.TextToSpeech.OnInitListe
     private android.speech.tts.TextToSpeech tts = null;
     private int initializationStatus;
     private JSObject[] supportedVoices = null;
+    private Map<String, SpeakResultCallback> requests = new HashMap();
 
     TextToSpeech(Context context) {
         this.context = context;
         try {
             tts = new android.speech.tts.TextToSpeech(context, this);
+            tts.setOnUtteranceProgressListener(
+                new UtteranceProgressListener() {
+                    @Override
+                    public void onStart(String utteranceId) {}
+
+                    @Override
+                    public void onDone(String utteranceId) {
+                        SpeakResultCallback callback = requests.get(utteranceId);
+                        if (callback != null) {
+                            callback.onDone();
+                            requests.remove(utteranceId);
+                        }
+                    }
+
+                    @Override
+                    public void onError(String utteranceId) {
+                        SpeakResultCallback callback = requests.get(utteranceId);
+                        if (callback != null) {
+                            callback.onError();
+                            requests.remove(utteranceId);
+                        }
+                    }
+
+                    @Override
+                    public void onRangeStart(String utteranceId, int start, int end, int frame) {
+                        SpeakResultCallback callback = requests.get(utteranceId);
+                        if (callback != null) {
+                            callback.onRangeStart(start, end);
+                        }
+                    }
+                }
+            );
         } catch (Exception ex) {
             Log.d(LOG_TAG, ex.getLocalizedMessage());
         }
@@ -50,29 +84,24 @@ public void speak(
         String callbackId,
         SpeakResultCallback resultCallback
     ) {
-        tts.stop();
-        tts.setOnUtteranceProgressListener(
-            new UtteranceProgressListener() {
-                @Override
-                public void onStart(String utteranceId) {}
-
-                @Override
-                public void onDone(String utteranceId) {
-                    resultCallback.onDone();
-                }
-
-                @Override
-                public void onError(String utteranceId) {
-                    resultCallback.onError();
-                }
+        speak(text, lang, rate, pitch, volume, voice, callbackId, resultCallback, android.speech.tts.TextToSpeech.QUEUE_FLUSH);
+    }
 
-                @Override
-                public void onRangeStart(String utteranceId, int start, int end, int frame) {
-                    String spokenWord = text.substring(start, end);
-                    resultCallback.onRangeStart(start, end, spokenWord);
-                }
-            }
-        );
+    public void speak(
+        String text,
+        String lang,
+        float rate,
+        float pitch,
+        float volume,
+        int voice,
+        String callbackId,
+        SpeakResultCallback resultCallback,
+        int queueStrategy
+    ) {
+        if (queueStrategy != android.speech.tts.TextToSpeech.QUEUE_ADD) {
+            stop();
+        }
+        requests.put(callbackId, resultCallback);
 
         Locale locale = Locale.forLanguageTag(lang);
 
@@ -92,8 +121,7 @@ public void onRangeStart(String utteranceId, int start, int end, int frame) {
                     int resultCode = tts.setVoice(newVoice);
                 }
             }
-
-            tts.speak(text, android.speech.tts.TextToSpeech.QUEUE_FLUSH, ttsParams, callbackId);
+            tts.speak(text, queueStrategy, ttsParams, callbackId);
         } else {
             HashMap<String, String> ttsParams = new HashMap<>();
             ttsParams.put(android.speech.tts.TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, callbackId);
@@ -102,12 +130,13 @@ public void onRangeStart(String utteranceId, int start, int end, int frame) {
             tts.setLanguage(locale);
             tts.setSpeechRate(rate);
             tts.setPitch(pitch);
-            tts.speak(text, android.speech.tts.TextToSpeech.QUEUE_FLUSH, ttsParams);
+            tts.speak(text, queueStrategy, ttsParams);
         }
     }
 
     public void stop() {
         tts.stop();
+        requests.clear();
     }
 
     public JSArray getSupportedLanguages() {

diff --git a/android/src/main/java/com/getcapacitor/community/tts/TextToSpeechPlugin.java b/android/src/main/java/com/getcapacitor/community/tts/TextToSpeechPlugin.java
@@ -1,6 +1,7 @@
 package com.getcapacitor.community.tts;
 
 import android.util.Base64;
+import android.util.Log;
 import com.getcapacitor.JSArray;
 import com.getcapacitor.JSObject;
 import com.getcapacitor.Plugin;
@@ -37,6 +38,7 @@ public void speak(PluginCall call) {
         float pitch = call.getFloat("pitch", 1.0f);
         float volume = call.getFloat("volume", 1.0f);
         int voice = call.getInt("voice", -1);
+        int queueStrategy = call.getInt("queueStrategy", 0);
 
         boolean isLanguageSupported = implementation.isLanguageSupported(lang);
         if (!isLanguageSupported) {
@@ -56,17 +58,18 @@ public void onError() {
             }
 
             @Override
-            public void onRangeStart(int start, int end, String spokenWord) {
+            public void onRangeStart(int start, int end) {
                 JSObject ret = new JSObject();
                 ret.put("start", start);
                 ret.put("end", end);
+                String spokenWord = text.substring(start, end);
                 ret.put("spokenWord", spokenWord);
                 notifyListeners("onRangeStart", ret);
             }
         };
 
         try {
-            implementation.speak(text, lang, rate, pitch, volume, voice, call.getCallbackId(), resultCallback);
+            implementation.speak(text, lang, rate, pitch, volume, voice, call.getCallbackId(), resultCallback, queueStrategy);
         } catch (Exception ex) {
             call.reject(ex.getLocalizedMessage());
         }

diff --git a/ios/Plugin/TextToSpeech.swift b/ios/Plugin/TextToSpeech.swift
@@ -1,6 +1,10 @@
 import AVFoundation
 import Capacitor
 
+enum QUEUE_STRATEGY: Int {
+    case QUEUE_ADD = 1, QUEUE_FLUSH = 0
+}
+
 @objc public class TextToSpeech: NSObject, AVSpeechSynthesizerDelegate {
     let synthesizer = AVSpeechSynthesizer()
     var calls: [CAPPluginCall] = []
@@ -29,8 +33,10 @@ import Capacitor
         self.resolveCurrentCall()
     }
 
-    @objc public func speak(_ text: String, _ lang: String, _ rate: Float, _ pitch: Float, _ category: String, _ volume: Float, _ voice: Int, _ call: CAPPluginCall) throws {
-        self.synthesizer.stopSpeaking(at: .immediate)
+    @objc public func speak(_ text: String, _ lang: String, _ rate: Float, _ pitch: Float, _ category: String, _ volume: Float, _ voice: Int, _ queueStrategy: Int, _ call: CAPPluginCall) throws {
+        if queueStrategy == QUEUE_STRATEGY.QUEUE_FLUSH.rawValue {
+            self.synthesizer.stopSpeaking(at: .immediate)
+        }
         self.calls.append(call)
 
         let utterance = AVSpeechUtterance(string: text)
@@ -68,10 +74,10 @@ import Capacitor
 
     // Adjust rate for a closer match to other platform.
     @objc private func adjustRate(_ rate: Float) -> Float {
-         let baseRate: Float = AVSpeechUtteranceDefaultSpeechRate
-         if (rate >= 1.0 ) {
-             return (0.1 * rate) + (baseRate - 0.1)
-         }
+        let baseRate: Float = AVSpeechUtteranceDefaultSpeechRate
+        if rate >= 1.0 {
+            return (0.1 * rate) + (baseRate - 0.1)
+        }
         return rate * baseRate
     }
 

diff --git a/ios/Plugin/TextToSpeechPlugin.swift b/ios/Plugin/TextToSpeechPlugin.swift
@@ -20,6 +20,7 @@ public class TextToSpeechPlugin: CAPPlugin {
         let volume = call.getFloat("volume") ?? 1.0
         let voice = call.getInt("voice") ?? -1
         let category = call.getString("category") ?? "ambient"
+        let queueStrategy = call.getInt("queueStrategy") ?? 0
 
         let isLanguageSupported = implementation.isLanguageSupported(lang)
         guard isLanguageSupported else {
@@ -28,7 +29,7 @@ public class TextToSpeechPlugin: CAPPlugin {
         }
 
         do {
-            try implementation.speak(text, lang, rate, pitch, category, volume, voice, call)
+            try implementation.speak(text, lang, rate, pitch, category, volume, voice, queueStrategy, call)
         } catch {
             call.reject(error.localizedDescription)
         }

diff --git a/src/definitions.ts b/src/definitions.ts
@@ -40,6 +40,17 @@ export interface TextToSpeechPlugin {
   ): Promise<PluginListenerHandle>;
 }
 
+export enum QueueStrategy {
+  /**
+   * Use `Flush` to stop the current request when a new request is sent.
+   */
+  Flush = 0,
+  /**
+   * Use `Add` to buffer the speech request. The request will be executed when all previous requests have been completed.
+   */
+  Add = 1,
+}
+
 export interface TTSOptions {
   /**
    * The text that will be synthesised when the utterance is spoken.
@@ -87,6 +98,13 @@ export interface TTSOptions {
    * @default "ambient"
    */
   category?: string;
+  /**
+   * Select the strategy to adopt when several requests to speak overlap.
+   *
+   * @since 5.1.0
+   * @default QueueStrategy.Flush
+   */
+  queueStrategy?: QueueStrategy;
 }
 
 /**