Skip to content

Commit

Permalink
feat: add queueStrategy option (#139)
Browse files Browse the repository at this point in the history
* Implement support of QUEUE_ADD strategy in android

* Implement queue strategy in ios

* Update doc

* Apply review: Update definitions.ts

* Fix the inversion between Add and Flush in the documentation

* Fix linting

---------

Co-authored-by: Enzo MENEGALDO <[email protected]>
  • Loading branch information
EnzoMenegaldo and Enzo MENEGALDO authored Nov 13, 2024
1 parent 8ad38a5 commit cb3e92c
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 35 deletions.
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ const speak = async () => {
pitch: 1.0,
volume: 1.0,
category: 'ambient',
queueStrategy: 1
});
};

Expand Down Expand Up @@ -83,6 +84,7 @@ const isLanguageSupported = async (lang: string) => {
* [`openInstall()`](#openinstall)
* [`addListener('onRangeStart', ...)`](#addlisteneronrangestart)
* [Interfaces](#interfaces)
* [Enums](#enums)

</docgen-index>

Expand Down Expand Up @@ -201,6 +203,7 @@ addListener(eventName: 'onRangeStart', listenerFunc: (info: { start: number; end
| **`volume`** | <code>number</code> | The volume that the utterance will be spoken at. | <code>1.0</code> |
| **`voice`** | <code>number</code> | The index of the selected voice that will be used to speak the utterance. Possible voices can be queried using `getSupportedVoices`. | |
| **`category`** | <code>string</code> | Select the iOS Audio session category. Possible values: `ambient` and `playback`. Use `playback` to play audio even when the app is in the background. Only available for iOS. | <code>"ambient"</code> |
| **`queueStrategy`** | <code><a href="#queuestrategy">QueueStrategy</a></code> | Select the strategy to adopt when several requests to speak overlap. | <code>QueueStrategy.Flush</code> | 5.1.0 |


#### SpeechSynthesisVoice
Expand All @@ -222,6 +225,17 @@ The <a href="#speechsynthesisvoice">SpeechSynthesisVoice</a> interface represent
| ------------ | ----------------------------------------- |
| **`remove`** | <code>() =&gt; Promise&lt;void&gt;</code> |


### Enums


#### QueueStrategy

| Members | Value | Description |
| ----------- | -------------- | ---------------------------------------------------------------------------------------------------------------------- |
| **`Flush`** | <code>0</code> | Use `Flush` to stop the current request when a new request is sent. |
| **`Add`** | <code>1</code> | Use `Add` to buffer the speech request. The request will be executed when all previous requests have been completed. |

</docgen-api>

## Changelog
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
public interface SpeakResultCallback {
void onDone();
void onError();
void onRangeStart(int start, int end, String spokenWord);
void onRangeStart(int start, int end);
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Set;

public class TextToSpeech implements android.speech.tts.TextToSpeech.OnInitListener {
Expand All @@ -25,11 +26,44 @@ public class TextToSpeech implements android.speech.tts.TextToSpeech.OnInitListe
private android.speech.tts.TextToSpeech tts = null;
private int initializationStatus;
private JSObject[] supportedVoices = null;
private Map<String, SpeakResultCallback> requests = new HashMap();

TextToSpeech(Context context) {
this.context = context;
try {
tts = new android.speech.tts.TextToSpeech(context, this);
tts.setOnUtteranceProgressListener(
new UtteranceProgressListener() {
@Override
public void onStart(String utteranceId) {}

@Override
public void onDone(String utteranceId) {
SpeakResultCallback callback = requests.get(utteranceId);
if (callback != null) {
callback.onDone();
requests.remove(utteranceId);
}
}

@Override
public void onError(String utteranceId) {
SpeakResultCallback callback = requests.get(utteranceId);
if (callback != null) {
callback.onError();
requests.remove(utteranceId);
}
}

@Override
public void onRangeStart(String utteranceId, int start, int end, int frame) {
SpeakResultCallback callback = requests.get(utteranceId);
if (callback != null) {
callback.onRangeStart(start, end);
}
}
}
);
} catch (Exception ex) {
Log.d(LOG_TAG, ex.getLocalizedMessage());
}
Expand All @@ -50,29 +84,24 @@ public void speak(
String callbackId,
SpeakResultCallback resultCallback
) {
tts.stop();
tts.setOnUtteranceProgressListener(
new UtteranceProgressListener() {
@Override
public void onStart(String utteranceId) {}

@Override
public void onDone(String utteranceId) {
resultCallback.onDone();
}

@Override
public void onError(String utteranceId) {
resultCallback.onError();
}
speak(text, lang, rate, pitch, volume, voice, callbackId, resultCallback, android.speech.tts.TextToSpeech.QUEUE_FLUSH);
}

@Override
public void onRangeStart(String utteranceId, int start, int end, int frame) {
String spokenWord = text.substring(start, end);
resultCallback.onRangeStart(start, end, spokenWord);
}
}
);
public void speak(
String text,
String lang,
float rate,
float pitch,
float volume,
int voice,
String callbackId,
SpeakResultCallback resultCallback,
int queueStrategy
) {
if (queueStrategy != android.speech.tts.TextToSpeech.QUEUE_ADD) {
stop();
}
requests.put(callbackId, resultCallback);

Locale locale = Locale.forLanguageTag(lang);

Expand All @@ -92,8 +121,7 @@ public void onRangeStart(String utteranceId, int start, int end, int frame) {
int resultCode = tts.setVoice(newVoice);
}
}

tts.speak(text, android.speech.tts.TextToSpeech.QUEUE_FLUSH, ttsParams, callbackId);
tts.speak(text, queueStrategy, ttsParams, callbackId);
} else {
HashMap<String, String> ttsParams = new HashMap<>();
ttsParams.put(android.speech.tts.TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, callbackId);
Expand All @@ -102,12 +130,13 @@ public void onRangeStart(String utteranceId, int start, int end, int frame) {
tts.setLanguage(locale);
tts.setSpeechRate(rate);
tts.setPitch(pitch);
tts.speak(text, android.speech.tts.TextToSpeech.QUEUE_FLUSH, ttsParams);
tts.speak(text, queueStrategy, ttsParams);
}
}

public void stop() {
tts.stop();
requests.clear();
}

public JSArray getSupportedLanguages() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.getcapacitor.community.tts;

import android.util.Base64;
import android.util.Log;
import com.getcapacitor.JSArray;
import com.getcapacitor.JSObject;
import com.getcapacitor.Plugin;
Expand Down Expand Up @@ -37,6 +38,7 @@ public void speak(PluginCall call) {
float pitch = call.getFloat("pitch", 1.0f);
float volume = call.getFloat("volume", 1.0f);
int voice = call.getInt("voice", -1);
int queueStrategy = call.getInt("queueStrategy", 0);

boolean isLanguageSupported = implementation.isLanguageSupported(lang);
if (!isLanguageSupported) {
Expand All @@ -56,17 +58,18 @@ public void onError() {
}

@Override
public void onRangeStart(int start, int end, String spokenWord) {
public void onRangeStart(int start, int end) {
JSObject ret = new JSObject();
ret.put("start", start);
ret.put("end", end);
String spokenWord = text.substring(start, end);
ret.put("spokenWord", spokenWord);
notifyListeners("onRangeStart", ret);
}
};

try {
implementation.speak(text, lang, rate, pitch, volume, voice, call.getCallbackId(), resultCallback);
implementation.speak(text, lang, rate, pitch, volume, voice, call.getCallbackId(), resultCallback, queueStrategy);
} catch (Exception ex) {
call.reject(ex.getLocalizedMessage());
}
Expand Down
18 changes: 12 additions & 6 deletions ios/Plugin/TextToSpeech.swift
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import AVFoundation
import Capacitor

enum QUEUE_STRATEGY: Int {
case QUEUE_ADD = 1, QUEUE_FLUSH = 0
}

@objc public class TextToSpeech: NSObject, AVSpeechSynthesizerDelegate {
let synthesizer = AVSpeechSynthesizer()
var calls: [CAPPluginCall] = []
Expand Down Expand Up @@ -29,8 +33,10 @@ import Capacitor
self.resolveCurrentCall()
}

@objc public func speak(_ text: String, _ lang: String, _ rate: Float, _ pitch: Float, _ category: String, _ volume: Float, _ voice: Int, _ call: CAPPluginCall) throws {
self.synthesizer.stopSpeaking(at: .immediate)
@objc public func speak(_ text: String, _ lang: String, _ rate: Float, _ pitch: Float, _ category: String, _ volume: Float, _ voice: Int, _ queueStrategy: Int, _ call: CAPPluginCall) throws {
if queueStrategy == QUEUE_STRATEGY.QUEUE_FLUSH.rawValue {
self.synthesizer.stopSpeaking(at: .immediate)
}
self.calls.append(call)

let utterance = AVSpeechUtterance(string: text)
Expand Down Expand Up @@ -68,10 +74,10 @@ import Capacitor

// Adjust rate for a closer match to other platform.
@objc private func adjustRate(_ rate: Float) -> Float {
let baseRate: Float = AVSpeechUtteranceDefaultSpeechRate
if (rate >= 1.0 ) {
return (0.1 * rate) + (baseRate - 0.1)
}
let baseRate: Float = AVSpeechUtteranceDefaultSpeechRate
if rate >= 1.0 {
return (0.1 * rate) + (baseRate - 0.1)
}
return rate * baseRate
}

Expand Down
3 changes: 2 additions & 1 deletion ios/Plugin/TextToSpeechPlugin.swift
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ public class TextToSpeechPlugin: CAPPlugin {
let volume = call.getFloat("volume") ?? 1.0
let voice = call.getInt("voice") ?? -1
let category = call.getString("category") ?? "ambient"
let queueStrategy = call.getInt("queueStrategy") ?? 0

let isLanguageSupported = implementation.isLanguageSupported(lang)
guard isLanguageSupported else {
Expand All @@ -28,7 +29,7 @@ public class TextToSpeechPlugin: CAPPlugin {
}

do {
try implementation.speak(text, lang, rate, pitch, category, volume, voice, call)
try implementation.speak(text, lang, rate, pitch, category, volume, voice, queueStrategy, call)
} catch {
call.reject(error.localizedDescription)
}
Expand Down
18 changes: 18 additions & 0 deletions src/definitions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,17 @@ export interface TextToSpeechPlugin {
): Promise<PluginListenerHandle>;
}

export enum QueueStrategy {
/**
* Use `Flush` to stop the current request when a new request is sent.
*/
Flush = 0,
/**
* Use `Add` to buffer the speech request. The request will be executed when all previous requests have been completed.
*/
Add = 1,
}

export interface TTSOptions {
/**
* The text that will be synthesised when the utterance is spoken.
Expand Down Expand Up @@ -87,6 +98,13 @@ export interface TTSOptions {
* @default "ambient"
*/
category?: string;
/**
* Select the strategy to adopt when several requests to speak overlap.
*
* @since 5.1.0
* @default QueueStrategy.Flush
*/
queueStrategy?: QueueStrategy;
}

/**
Expand Down

0 comments on commit cb3e92c

Please sign in to comment.