From e1a838c802a0f55702aa8d68e6d845bd3364776f Mon Sep 17 00:00:00 2001 From: ponderingdemocritus Date: Fri, 22 Nov 2024 16:12:51 +1100 Subject: [PATCH 1/2] speech service fix --- packages/client-discord/src/voice.ts | 5 - packages/plugin-node/package.json | 1 + packages/plugin-node/src/services/speech.ts | 341 +- packages/plugin-node/src/vendor/vits.ts | 8404 ------------------- pnpm-lock.yaml | 1718 +++- 5 files changed, 1938 insertions(+), 8531 deletions(-) delete mode 100644 packages/plugin-node/src/vendor/vits.ts diff --git a/packages/client-discord/src/voice.ts b/packages/client-discord/src/voice.ts index 8c7fcd8d62..61d8224ae0 100644 --- a/packages/client-discord/src/voice.ts +++ b/packages/client-discord/src/voice.ts @@ -416,11 +416,6 @@ export class VoiceManager extends EventEmitter { ServiceType.TRANSCRIPTION ); - console.log( - "transcriptionService: ", - transcriptionService - ); - if (!transcriptionService) { throw new Error( "Transcription generation service not found" diff --git a/packages/plugin-node/package.json b/packages/plugin-node/package.json index f7c2d43ee9..da00f746c7 100644 --- a/packages/plugin-node/package.json +++ b/packages/plugin-node/package.json @@ -20,6 +20,7 @@ "cldr-segmentation": "2.2.1", "command-exists": "1.2.9", "csv-writer": "1.6.0", + "echogarden": "^2.0.5", "espeak-ng": "1.0.2", "ffmpeg-static": "5.2.0", "fluent-ffmpeg": "2.1.3", diff --git a/packages/plugin-node/src/services/speech.ts b/packages/plugin-node/src/services/speech.ts index e338f6f1bf..8170e9fc7d 100644 --- a/packages/plugin-node/src/services/speech.ts +++ b/packages/plugin-node/src/services/speech.ts @@ -1,14 +1,9 @@ import { PassThrough, Readable } from "stream"; -import { - IAgentRuntime, - ISpeechService, - ITranscriptionService, - ServiceType, -} from "@ai16z/eliza"; +import { IAgentRuntime, ISpeechService, ServiceType } from "@ai16z/eliza"; import { getWavHeader } from "./audioUtils.ts"; -import { synthesize } from "../vendor/vits.ts"; import { Service } from "@ai16z/eliza"; import { validateNodeConfig } from "../enviroment.ts"; +import * as Echogarden from "echogarden"; function prependWavHeader( readable: Readable, @@ -40,77 +35,142 @@ function prependWavHeader( async function textToSpeech(runtime: IAgentRuntime, text: string) { await validateNodeConfig(runtime); - const body = { - model_id: runtime.getSetting("ELEVENLABS_MODEL_ID"), - text: text, - voice_settings: { - similarity_boost: runtime.getSetting( - "ELEVENLABS_VOICE_SIMILARITY_BOOST" - ), - stability: runtime.getSetting("ELEVENLABS_VOICE_STABILITY"), - style: runtime.getSetting("ELEVENLABS_VOICE_STYLE"), - use_speaker_boost: runtime.getSetting( - "ELEVENLABS_VOICE_USE_SPEAKER_BOOST" - ), - }, - }; - const options = { - method: "POST", - headers: { - "Content-Type": "application/json", - "xi-api-key": runtime.getSetting("ELEVENLABS_XI_API_KEY"), - }, - body: JSON.stringify(body), - }; - - const response = await fetch( - `https://api.elevenlabs.io/v1/text-to-speech/${runtime.getSetting("ELEVENLABS_VOICE_ID")}/stream?optimize_streaming_latency=${runtime.getSetting("ELEVENLABS_OPTIMIZE_STREAMING_LATENCY")}&output_format=${runtime.getSetting("ELEVENLABS_OUTPUT_FORMAT")}`, - options - ); - const status = response.status; - if (status != 200) { - console.log(`Received status ${status} from Eleven Labs API`); - const errorBodyString = await response.text(); - throw new Error( - `Received status ${status} from Eleven Labs API: ${errorBodyString}` + try { + const body = { + model_id: runtime.getSetting("ELEVENLABS_MODEL_ID"), + text: text, + voice_settings: { + similarity_boost: runtime.getSetting( + "ELEVENLABS_VOICE_SIMILARITY_BOOST" + ), + stability: runtime.getSetting("ELEVENLABS_VOICE_STABILITY"), + style: runtime.getSetting("ELEVENLABS_VOICE_STYLE"), + use_speaker_boost: runtime.getSetting( + "ELEVENLABS_VOICE_USE_SPEAKER_BOOST" + ), + }, + }; + const options = { + method: "POST", + headers: { + "Content-Type": "application/json", + "xi-api-key": runtime.getSetting("ELEVENLABS_XI_API_KEY"), + }, + body: JSON.stringify(body), + }; + + const response = await fetch( + `https://api.elevenlabs.io/v1/text-to-speech/${runtime.getSetting("ELEVENLABS_VOICE_ID")}/stream?optimize_streaming_latency=${runtime.getSetting("ELEVENLABS_OPTIMIZE_STREAMING_LATENCY")}&output_format=${runtime.getSetting("ELEVENLABS_OUTPUT_FORMAT")}`, + options ); - } - if (response) { - const reader = response.body?.getReader(); - const readable = new Readable({ - read() { - reader && - reader.read().then(({ done, value }) => { - if (done) { - this.push(null); - } else { - this.push(value); - } - }); - }, - }); + const status = response.status; + if (status != 200) { + const errorBodyString = await response.text(); + const errorBody = JSON.parse(errorBodyString); - if (runtime.getSetting("ELEVENLABS_OUTPUT_FORMAT").startsWith("pcm_")) { - const sampleRate = parseInt( - runtime.getSetting("ELEVENLABS_OUTPUT_FORMAT").substring(4) - ); - const withHeader = prependWavHeader( - readable, - 1024 * 1024 * 100, - sampleRate, - 1, - 16 + // Check for quota exceeded error + if ( + status === 401 && + errorBody.detail?.status === "quota_exceeded" + ) { + console.log("ElevenLabs quota exceeded, falling back to VITS"); + throw new Error("QUOTA_EXCEEDED"); + } + + throw new Error( + `Received status ${status} from Eleven Labs API: ${errorBodyString}` ); - return withHeader; + } + + if (response) { + const reader = response.body?.getReader(); + const readable = new Readable({ + read() { + reader && + reader.read().then(({ done, value }) => { + if (done) { + this.push(null); + } else { + this.push(value); + } + }); + }, + }); + + if ( + runtime + .getSetting("ELEVENLABS_OUTPUT_FORMAT") + .startsWith("pcm_") + ) { + const sampleRate = parseInt( + runtime.getSetting("ELEVENLABS_OUTPUT_FORMAT").substring(4) + ); + const withHeader = prependWavHeader( + readable, + 1024 * 1024 * 100, + sampleRate, + 1, + 16 + ); + return withHeader; + } else { + return readable; + } } else { - return readable; + return new Readable({ + read() {}, + }); + } + } catch (error) { + if (error.message === "QUOTA_EXCEEDED") { + // Fall back to VITS + const { audio } = await Echogarden.synthesize(text, { + engine: "vits", + voice: "en_US-hfc_female-medium", + }); + + let wavStream: Readable; + if (audio instanceof Buffer) { + console.log("audio is a buffer"); + wavStream = Readable.from(audio); + } else if ("audioChannels" in audio && "sampleRate" in audio) { + console.log("audio is a RawAudio"); + const floatBuffer = Buffer.from(audio.audioChannels[0].buffer); + console.log("buffer length: ", floatBuffer.length); + + // Get the sample rate from the RawAudio object + const sampleRate = audio.sampleRate; + + // Create a Float32Array view of the floatBuffer + const floatArray = new Float32Array(floatBuffer.buffer); + + // Convert 32-bit float audio to 16-bit PCM + const pcmBuffer = new Int16Array(floatArray.length); + for (let i = 0; i < floatArray.length; i++) { + pcmBuffer[i] = Math.round(floatArray[i] * 32767); + } + + // Prepend WAV header to the buffer + const wavHeaderBuffer = getWavHeader( + pcmBuffer.length * 2, + sampleRate, + 1, + 16 + ); + const wavBuffer = Buffer.concat([ + wavHeaderBuffer, + Buffer.from(pcmBuffer.buffer), + ]); + + wavStream = Readable.from(wavBuffer); + } else { + throw new Error("Unsupported audio format"); + } + return wavStream; } - } else { - return new Readable({ - read() {}, - }); + throw error; // Re-throw other errors } } @@ -124,53 +184,104 @@ export class SpeechService extends Service implements ISpeechService { } async generate(runtime: IAgentRuntime, text: string): Promise { - // check for elevenlabs API key - if (runtime.getSetting("ELEVENLABS_XI_API_KEY")) { - return textToSpeech(runtime, text); - } - const { audio } = await synthesize(text, { - engine: "vits", - voice: "en_US-hfc_female-medium", - }); - - let wavStream: Readable; - if (audio instanceof Buffer) { - console.log("audio is a buffer"); - wavStream = Readable.from(audio); - } else if ("audioChannels" in audio && "sampleRate" in audio) { - console.log("audio is a RawAudio"); - const floatBuffer = Buffer.from(audio.audioChannels[0].buffer); - console.log("buffer length: ", floatBuffer.length); - - // Get the sample rate from the RawAudio object - const sampleRate = audio.sampleRate; - - // Create a Float32Array view of the floatBuffer - const floatArray = new Float32Array(floatBuffer.buffer); - - // Convert 32-bit float audio to 16-bit PCM - const pcmBuffer = new Int16Array(floatArray.length); - for (let i = 0; i < floatArray.length; i++) { - pcmBuffer[i] = Math.round(floatArray[i] * 32767); + try { + // check for elevenlabs API key + if (runtime.getSetting("ELEVENLABS_XI_API_KEY")) { + return await textToSpeech(runtime, text); } - // Prepend WAV header to the buffer - const wavHeaderBuffer = getWavHeader( - pcmBuffer.length * 2, - sampleRate, - 1, - 16 - ); - const wavBuffer = Buffer.concat([ - wavHeaderBuffer, - Buffer.from(pcmBuffer.buffer), - ]); + // Default to VITS if no ElevenLabs API key + const { audio } = await Echogarden.synthesize(text, { + engine: "vits", + voice: "en_US-hfc_female-medium", + }); - wavStream = Readable.from(wavBuffer); - } else { - throw new Error("Unsupported audio format"); - } + let wavStream: Readable; + if (audio instanceof Buffer) { + console.log("audio is a buffer"); + wavStream = Readable.from(audio); + } else if ("audioChannels" in audio && "sampleRate" in audio) { + console.log("audio is a RawAudio"); + const floatBuffer = Buffer.from(audio.audioChannels[0].buffer); + console.log("buffer length: ", floatBuffer.length); + + // Get the sample rate from the RawAudio object + const sampleRate = audio.sampleRate; + + // Create a Float32Array view of the floatBuffer + const floatArray = new Float32Array(floatBuffer.buffer); + + // Convert 32-bit float audio to 16-bit PCM + const pcmBuffer = new Int16Array(floatArray.length); + for (let i = 0; i < floatArray.length; i++) { + pcmBuffer[i] = Math.round(floatArray[i] * 32767); + } + + // Prepend WAV header to the buffer + const wavHeaderBuffer = getWavHeader( + pcmBuffer.length * 2, + sampleRate, + 1, + 16 + ); + const wavBuffer = Buffer.concat([ + wavHeaderBuffer, + Buffer.from(pcmBuffer.buffer), + ]); + + wavStream = Readable.from(wavBuffer); + } else { + throw new Error("Unsupported audio format"); + } + + return wavStream; + } catch (error) { + console.error("Speech generation error:", error); + // If ElevenLabs fails for any reason, fall back to VITS + const { audio } = await Echogarden.synthesize(text, { + engine: "vits", + voice: "en_US-hfc_female-medium", + }); + + let wavStream: Readable; + if (audio instanceof Buffer) { + console.log("audio is a buffer"); + wavStream = Readable.from(audio); + } else if ("audioChannels" in audio && "sampleRate" in audio) { + console.log("audio is a RawAudio"); + const floatBuffer = Buffer.from(audio.audioChannels[0].buffer); + console.log("buffer length: ", floatBuffer.length); + + // Get the sample rate from the RawAudio object + const sampleRate = audio.sampleRate; + + // Create a Float32Array view of the floatBuffer + const floatArray = new Float32Array(floatBuffer.buffer); - return wavStream; + // Convert 32-bit float audio to 16-bit PCM + const pcmBuffer = new Int16Array(floatArray.length); + for (let i = 0; i < floatArray.length; i++) { + pcmBuffer[i] = Math.round(floatArray[i] * 32767); + } + + // Prepend WAV header to the buffer + const wavHeaderBuffer = getWavHeader( + pcmBuffer.length * 2, + sampleRate, + 1, + 16 + ); + const wavBuffer = Buffer.concat([ + wavHeaderBuffer, + Buffer.from(pcmBuffer.buffer), + ]); + + wavStream = Readable.from(wavBuffer); + } else { + throw new Error("Unsupported audio format"); + } + + return wavStream; + } } } diff --git a/packages/plugin-node/src/vendor/vits.ts b/packages/plugin-node/src/vendor/vits.ts deleted file mode 100644 index 37360c3e1b..0000000000 --- a/packages/plugin-node/src/vendor/vits.ts +++ /dev/null @@ -1,8404 +0,0 @@ -// VITS implementation, extracted from echogarden: https://github.com/echogarden-project/echogarden -// We had some compatibility issues with the package, so we brought this code in directly -// This code is licensed under the GPL license - -import * as AlawMulaw from "alawmulaw"; -import { spawn } from "child_process"; -import * as CldrSegmentation from "cldr-segmentation"; -import commandExists from "command-exists"; -import { randomBytes } from "crypto"; -import * as fsExtra from "fs-extra"; -import { GaxiosOptions, request } from "gaxios"; -import gracefulFS from "graceful-fs"; -import { convert as convertHtmlToText } from "html-to-text"; -import * as Onnx from "onnxruntime-node"; -import os from "os"; -import path from "path"; -import { detectAll } from "tinyld"; -import { fileURLToPath } from "url"; -import { inspect, promisify } from "util"; -import { vitsVoiceList } from "./vitsVoiceList.ts"; - -export async function synthesize( - input: string | string[], - options: SynthesisOptions, - onSegment?: SynthesisSegmentEvent, - onSentence?: SynthesisSegmentEvent -): Promise { - options = extendDeep(defaultSynthesisOptions, options); - - let segments: string[]; - - if (Array.isArray(input)) { - segments = input; - } else { - const plainTextOptions = options.plainText!; - - segments = splitToParagraphs( - input, - plainTextOptions.paragraphBreaks!, - plainTextOptions.whitespace! - ); - } - - return await synthesizeSegments(segments, options, onSegment, onSentence); -} - -const createWriteStream = gracefulFS.createWriteStream; - -function getCostMatrixMemorySizeMB( - sequence1Length: number, - sequence2Length: number, - windowLength: number -) { - const costMatrixMemorySizeMB = - (sequence1Length * Math.min(sequence2Length, windowLength) * 4) / - 1000000; - - return costMatrixMemorySizeMB; -} - -function argIndexOfMinimumOf3(x1: number, x2: number, x3: number) { - if (x1 <= x2 && x1 <= x3) { - return 1; - } else if (x2 <= x3) { - return 2; - } else { - return 3; - } -} - -function minimumOf3(x1: number, x2: number, x3: number) { - if (x1 <= x2 && x1 <= x3) { - return x1; - } else if (x2 <= x3) { - return x2; - } else { - return x3; - } -} - -function computeAccumulatedCostMatrixTransposed( - sequence1: T[], - sequence2: U[], - costFunction: (a: T, b: U) => number, - windowMaxLength: number, - centerIndexes?: number[] -) { - const halfWindowMaxLength = Math.floor(windowMaxLength / 2); - - const columnCount = sequence1.length; - const rowCount = Math.min(windowMaxLength, sequence2.length); - - const accumulatedCostMatrixTransposed: Float32Array[] = - new Array(columnCount); - - // Initialize an array to store window start offsets - const windowStartOffsets = new Int32Array(columnCount); - - // Compute accumulated cost matrix column by column - for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) { - // Create new column and add it to the matrix - const currentColumn = new Float32Array(rowCount); - accumulatedCostMatrixTransposed[columnIndex] = currentColumn; - - // Compute window center, or use given one - let windowCenter: number; - - if (centerIndexes) { - windowCenter = centerIndexes[columnIndex]; - } else { - windowCenter = Math.floor( - (columnIndex / columnCount) * sequence2.length - ); - } - - // Compute window start and end offsets - let windowStartOffset = Math.max(windowCenter - halfWindowMaxLength, 0); - let windowEndOffset = windowStartOffset + rowCount; - - if (windowEndOffset > sequence2.length) { - windowEndOffset = sequence2.length; - windowStartOffset = windowEndOffset - rowCount; - } - - // Store the start offset for this column - windowStartOffsets[columnIndex] = windowStartOffset; - - // Get target sequence1 value - const targetSequence1Value = sequence1[columnIndex]; - - // If this is the first column, fill it only using the 'up' neighbors - if (columnIndex == 0) { - for (let rowIndex = 1; rowIndex < rowCount; rowIndex++) { - const cost = costFunction( - targetSequence1Value, - sequence2[windowStartOffset + rowIndex] - ); - const upCost = currentColumn[rowIndex - 1]; - - currentColumn[rowIndex] = cost + upCost; - } - - continue; - } - - // If not first column - - // Store the column to the left - const leftColumn = accumulatedCostMatrixTransposed[columnIndex - 1]; - - // Compute the delta between the current window start offset - // and left column's window offset - const windowOffsetDelta = - windowStartOffset - windowStartOffsets[columnIndex - 1]; - - // Compute the accumulated cost for all rows in the window - for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) { - // Compute the cost for current cell - const cost = costFunction( - targetSequence1Value, - sequence2[windowStartOffset + rowIndex] - ); - - // Retrieve the cost for the 'up' (insertion) neighbor - let upCost = Infinity; - if (rowIndex > 0) { - upCost = currentColumn[rowIndex - 1]; - } - - // Retrieve the cost for the 'left' (deletion) neighbor - let leftCost = Infinity; - const leftRowIndex = rowIndex + windowOffsetDelta; - - if (leftRowIndex < rowCount) { - leftCost = leftColumn[leftRowIndex]; - } - - // Retrieve the cost for the 'up and left' (match) neighbor - let upAndLeftCost = Infinity; - const upAndLeftRowIndex = leftRowIndex - 1; - - if (upAndLeftRowIndex >= 0 && upAndLeftRowIndex < rowCount) { - upAndLeftCost = leftColumn[upAndLeftRowIndex]; - } - - // Find the minimum of all neighbors - let minimumNeighborCost = minimumOf3( - upCost, - leftCost, - upAndLeftCost - ); - - // If all neighbors are infinity, then it means there is a "jump" between the window - // of the current column and the left column, and they don't have overlapping rows. - // In this case, only the cost of the current cell will be used - if (minimumNeighborCost === Infinity) { - minimumNeighborCost = 0; - } - - // Write cost + minimum neighbor cost to the current column - currentColumn[rowIndex] = cost + minimumNeighborCost; - } - } - - return { - accumulatedCostMatrixTransposed, - windowStartOffsets, - }; -} - -function computeBestPathTransposed( - accumulatedCostMatrixTransposed: Float32Array[], - windowStartOffsets: Int32Array -) { - const columnCount = accumulatedCostMatrixTransposed.length; - const rowCount = accumulatedCostMatrixTransposed[0].length; - - const bestPath: AlignmentPath = []; - - // Start at the bottom right corner and find the best path - // towards the top left - let columnIndex = columnCount - 1; - let rowIndex = rowCount - 1; - - while (columnIndex > 0 || rowIndex > 0) { - const windowStartIndex = windowStartOffsets[columnIndex]; - const windowStartDelta = - columnIndex > 0 - ? windowStartIndex - windowStartOffsets[columnIndex - 1] - : 0; - - // Add the current cell to the best path - bestPath.push({ - source: columnIndex, - dest: windowStartIndex + rowIndex, - }); - - // Retrieve the cost for the 'up' (insertion) neighbor - const upRowIndex = rowIndex - 1; - let upCost = Infinity; - - if (upRowIndex >= 0) { - upCost = accumulatedCostMatrixTransposed[columnIndex][upRowIndex]; - } - - // Retrieve the cost for the 'left' (deletion) neighbor - const leftRowIndex = rowIndex + windowStartDelta; - const leftColumnIndex = columnIndex - 1; - let leftCost = Infinity; - - if (leftColumnIndex >= 0 && leftRowIndex < rowCount) { - leftCost = - accumulatedCostMatrixTransposed[leftColumnIndex][leftRowIndex]; - } - - // Retrieve the cost for the 'up and left' (match) neighbor - const upAndLeftRowIndex = rowIndex - 1 + windowStartDelta; - const upAndLeftColumnIndex = columnIndex - 1; - let upAndLeftCost = Infinity; - - if ( - upAndLeftColumnIndex >= 0 && - upAndLeftRowIndex >= 0 && - upAndLeftRowIndex < rowCount - ) { - upAndLeftCost = - accumulatedCostMatrixTransposed[upAndLeftColumnIndex][ - upAndLeftRowIndex - ]; - } - - // If all neighbors have a cost of infinity, it means - // there is a "jump" between the window for the current and previous column - if ( - upCost == Infinity && - leftCost == Infinity && - upAndLeftCost == Infinity - ) { - // In that case: - // - // If there are rows above - if (upRowIndex >= 0) { - // Move upward - rowIndex = upRowIndex; - } else if (leftColumnIndex >= 0) { - // Otherwise, move to the left - columnIndex = leftColumnIndex; - } else { - // Since we know that either columnIndex > 0 or rowIndex > 0, - // one of these directions must be available. - // This error should never happen - - throw new Error( - `Unexpected state: columnIndex: ${columnIndex}, rowIndex: ${rowIndex}` - ); - } - } else { - // Choose the direction with the smallest cost - const smallestCostDirection = argIndexOfMinimumOf3( - upCost, - leftCost, - upAndLeftCost - ); - - if (smallestCostDirection == 1) { - // Move upward - rowIndex = upRowIndex; - // The upper column index stays the same - } else if (smallestCostDirection == 2) { - // Move to the left - rowIndex = leftRowIndex; - columnIndex = leftColumnIndex; - } else { - // Move upward and to the left - rowIndex = upAndLeftRowIndex; - columnIndex = upAndLeftColumnIndex; - } - } - } - - bestPath.push({ - source: 0, - dest: 0, - }); - - return bestPath.reverse() as AlignmentPath; -} - -function alignDTWWindowed( - sequence1: T[], - sequence2: U[], - costFunction: (a: T, b: U) => number, - windowMaxLength: number, - centerIndexes?: number[] -) { - if (windowMaxLength < 2) { - throw new Error("Window length must be greater or equal to 2"); - } - - if (sequence1.length == 0 || sequence2.length == 0) { - return { - path: [] as AlignmentPath, - pathCost: 0, - }; - } - - // Compute accumulated cost matrix (transposed) - const { accumulatedCostMatrixTransposed, windowStartOffsets } = - computeAccumulatedCostMatrixTransposed( - sequence1, - sequence2, - costFunction, - windowMaxLength, - centerIndexes - ); - - // Find best path for the computed matrix - const path = computeBestPathTransposed( - accumulatedCostMatrixTransposed, - windowStartOffsets - ); - - // Best path cost is the bottom right element of the matrix - const columnCount = accumulatedCostMatrixTransposed.length; - const rowCount = accumulatedCostMatrixTransposed[0].length; - - const pathCost = - accumulatedCostMatrixTransposed[columnCount - 1][rowCount - 1]; - - // Return - return { path, pathCost }; -} - -async function alignMFCC_DTW( - mfccFrames1: number[][], - mfccFrames2: number[][], - windowLength: number, - distanceFunctionKind: "euclidian" | "cosine" = "euclidian", - centerIndexes?: number[] -) { - if (distanceFunctionKind == "euclidian") { - let distanceFunction = euclidianDistance; - - if (mfccFrames1.length > 0 && mfccFrames1[0].length === 13) { - distanceFunction = euclidianDistance13Dim; - } - - const { path } = alignDTWWindowed( - mfccFrames1, - mfccFrames2, - distanceFunction, - windowLength, - centerIndexes - ); - - return path; - } else if (distanceFunctionKind == "cosine") { - const indexes1 = createVectorForIntegerRange(0, mfccFrames1.length); - const indexes2 = createVectorForIntegerRange(0, mfccFrames2.length); - - const magnitudes1 = mfccFrames1.map(magnitude); - const magnitudes2 = mfccFrames2.map(magnitude); - - const { path } = alignDTWWindowed( - indexes1, - indexes2, - (i, j) => - cosineDistancePrecomputedMagnitudes( - mfccFrames1[i], - mfccFrames2[j], - magnitudes1[i], - magnitudes2[j] - ), - windowLength, - centerIndexes - ); - - return path; - } else { - throw new Error("Invalid distance function"); - } -} - -let kissFFTInstance: any; - -type WindowType = "hann" | "hamming" | "povey"; - -function getBinFrequencies(binCount: number, maxFrequency: number) { - const binFrequencies = new Float32Array(binCount); - const frequencyStep = maxFrequency / (binCount - 1); - - for ( - let i = 0, frequency = 0; - i < binFrequencies.length; - i++, frequency += frequencyStep - ) { - binFrequencies[i] = frequency; - } - - return binFrequencies; -} - -function fftFrameToPowerSpectrum(fftFrame: Float32Array) { - const powerSpectrum = new Float32Array(fftFrame.length / 2); - - for (let i = 0; i < powerSpectrum.length; i++) { - const binOffset = i * 2; - const fftCoefficientRealPart = fftFrame[binOffset]; - const fftCoefficientImaginaryPart = fftFrame[binOffset + 1]; - const binPower = - fftCoefficientRealPart ** 2 + fftCoefficientImaginaryPart ** 2; - - powerSpectrum[i] = binPower; - } - - return powerSpectrum; -} - -async function getKissFFTInstance() { - if (!kissFFTInstance) { - const { default: initializer } = await import( - "@echogarden/kissfft-wasm" - ); - - kissFFTInstance = await initializer(); - } - - return kissFFTInstance; -} - -function getWindowWeights(windowType: WindowType, windowSize: number) { - const weights = new Float32Array(windowSize); - - const innerFactor = (2 * Math.PI) / (windowSize - 1); - - if (windowType == "hann") { - for (let i = 0; i < windowSize; i++) { - //weights[i] = 0.5 * (1 - Math.cos(2 * Math.PI * (i / (windowSize - 1)))) - weights[i] = 0.5 * (1 - Math.cos(innerFactor * i)); - } - } else if (windowType == "hamming") { - for (let i = 0; i < windowSize; i++) { - //weights[i] = 0.54 - (0.46 * Math.cos(2 * Math.PI * (i / (windowSize - 1)))) - weights[i] = 0.54 - 0.46 * Math.cos(innerFactor * i); - } - } else if (windowType == "povey") { - const hannWeights = getWindowWeights("hann", windowSize); - - for (let i = 0; i < windowSize; i++) { - weights[i] = hannWeights[i] ** 0.85; - } - } else { - throw new Error(`Unsupported window function type: ${windowType}`); - } - - return weights; -} - -async function stftr( - samples: Float32Array, - fftOrder: number, - windowSize: number, - hopSize: number, - windowType: WindowType -) { - if (fftOrder % 2 != 0 || windowSize % 2 != 0) { - throw new Error("FFT order and window size must multiples of 2"); - } - - if (windowSize > fftOrder) { - throw new Error("Window size must be lesser or equal to the FFT size"); - } - - if (hopSize > windowSize) { - throw new Error("Hop size must be lesser or equal to the window size"); - } - - const halfWindowSize = windowSize / 2; - - const padding = new Float32Array(halfWindowSize); - samples = concatFloat32Arrays([padding, samples, padding]); - - const windowWeights = getWindowWeights(windowType, windowSize); - - const m = await getKissFFTInstance(); - const wasmMemory = new WasmMemoryManager(m); - - const statePtr = m._kiss_fftr_alloc(fftOrder, 0, 0, 0); - wasmMemory.wrapPointer(statePtr); - - const sampleCount = samples.length; - const frameBufferRef = wasmMemory.allocFloat32Array(fftOrder); - const binsBufferRef = wasmMemory.allocFloat32Array(fftOrder * 2); - - const frames: Float32Array[] = []; - - for (let offset = 0; offset < sampleCount; offset += hopSize) { - const windowSamples = samples.subarray(offset, offset + windowSize); - frameBufferRef.clear(); - - const frameBufferView = frameBufferRef.view; - - for (let i = 0; i < windowSamples.length; i++) { - frameBufferView[i] = windowSamples[i] * windowWeights[i]; - } - - binsBufferRef.clear(); - - m._kiss_fftr(statePtr, frameBufferRef.address, binsBufferRef.address); - - const bins = binsBufferRef.view.slice(0, fftOrder + 2); - frames.push(bins); - } - - wasmMemory.freeAll(); - - return frames; -} - -async function computeMelSpectogramUsingFilterbanks( - rawAudio: RawAudio, - fftOrder: number, - windowSize: number, - hopLength: number, - filterbanks: Filterbank[], - windowType: WindowType = "hann" -) { - const logger = new Logger(); - - logger.start("Compute short-time FFTs"); - const audioSamples = rawAudio.audioChannels[0]; - const fftFrames = await stftr( - audioSamples, - fftOrder, - windowSize, - hopLength, - windowType - ); - - logger.start("Convert FFT frames to a mel spectogram"); - const melSpectogram = fftFramesToMelSpectogram(fftFrames, filterbanks); - - logger.end(); - - return { melSpectogram, fftFrames }; -} - -function fftFramesToMelSpectogram( - fftFrames: Float32Array[], - melFilterbanks: Filterbank[] -) { - return fftFrames.map((fftFrame) => { - const powerSpectrum = fftFrameToPowerSpectrum(fftFrame); - return powerSpectrumToMelSpectrum(powerSpectrum, melFilterbanks); - }); -} - -function powerSpectrumToMelSpectrum( - powerSpectrum: Float32Array, - filterbanks: Filterbank[] -) { - const filterbankCount = filterbanks.length; - const melSpectrum = new Float32Array(filterbankCount); - - for (let melBandIndex = 0; melBandIndex < filterbankCount; melBandIndex++) { - const filterbank = filterbanks[melBandIndex]; - const filterbankStartIndex = filterbank.startIndex; - const filterbankWeights = filterbank.weights; - - if (filterbankStartIndex === -1) { - continue; - } - - let melBandValue = 0; - - for (let i = 0; i < filterbankWeights.length; i++) { - const powerSpectrumIndex = filterbankStartIndex + i; - - if (powerSpectrumIndex >= powerSpectrum.length) { - break; - } - - const weight = filterbankWeights[i]; - const powerSpectrumValue = powerSpectrum[powerSpectrumIndex]; - - melBandValue += weight * powerSpectrumValue; - } - - melSpectrum[melBandIndex] = melBandValue; - } - - return melSpectrum; -} - -function getMelFilterbanks( - powerSpectrumFrequenciesHz: Float32Array, - centerFrequenciesMel: Float32Array, - lowerFrequencyMel: number, - upperFrequencyMel: number -) { - const filterbankCount = centerFrequenciesMel.length; - const powerSpectrumFrequenciesMel = powerSpectrumFrequenciesHz.map( - (frequencyHz) => hertzToMel(frequencyHz) - ); - - const filterbanks: Filterbank[] = []; - - for ( - let filterbankIndex = 0; - filterbankIndex < filterbankCount; - filterbankIndex++ - ) { - const centerFrequency = centerFrequenciesMel[filterbankIndex]; - - const leftFrequency = - filterbankIndex > 0 - ? centerFrequenciesMel[filterbankIndex - 1] - : lowerFrequencyMel; - const rightFrequency = - filterbankIndex < filterbankCount - 1 - ? centerFrequenciesMel[filterbankIndex + 1] - : upperFrequencyMel; - - const width = rightFrequency - leftFrequency; - const halfWidth = width / 2; - - let startIndex = -1; - let weights: number[] = []; - - let weightSum = 0; - - for ( - let powerSpectrumBandIndex = 0; - powerSpectrumBandIndex < powerSpectrumFrequenciesMel.length; - powerSpectrumBandIndex++ - ) { - const powerSpectrumBandFrequencyMel = - powerSpectrumFrequenciesMel[powerSpectrumBandIndex]; - - let weight = 0; - - if ( - powerSpectrumBandFrequencyMel >= leftFrequency && - powerSpectrumBandFrequencyMel <= centerFrequency - ) { - weight = - (powerSpectrumBandFrequencyMel - leftFrequency) / halfWidth; - } else if ( - powerSpectrumBandFrequencyMel > centerFrequency && - powerSpectrumBandFrequencyMel <= rightFrequency - ) { - weight = - (rightFrequency - powerSpectrumBandFrequencyMel) / - halfWidth; - } - - if (weight > 0) { - if (startIndex == -1) { - startIndex = powerSpectrumBandIndex; - } - - weights.push(weight); - weightSum += weight; - } else if (startIndex != -1) { - break; - } - } - - weights = weights.map((weight) => weight / weightSum); - - filterbanks.push({ startIndex, weights }); - } - - return filterbanks; -} - -function getMelFilterbanksCenterFrequencies( - melBandCount: number, - lowerFrequencyMel: number, - upperFrequencyMel: number -) { - const stepSizeMel = - (upperFrequencyMel - lowerFrequencyMel) / (melBandCount + 1); - - const centerFrequencies = new Float32Array(melBandCount); - - for (let i = 0; i < melBandCount; i++) { - centerFrequencies[i] = lowerFrequencyMel + (i + 1) * stepSizeMel; - } - - return centerFrequencies; -} - -function hertzToMel(frequency: number) { - return 2595.0 * Math.log10(1.0 + frequency / 700.0); -} - -type Filterbank = { - startIndex: number; - weights: number[]; -}; - -async function computeMelSpectogram( - rawAudio: RawAudio, - fftOrder: number, - windowSize: number, - hopLength: number, - filterbankCount: number, - lowerFrequencyHz: number, - upperFrequencyHz: number, - windowType: WindowType = "hann" -) { - const logger = new Logger(); - - logger.start("Compute mel filterbank"); - const binCount = fftOrder / 2 + 2; - const nyquistFrequency = rawAudio.sampleRate / 2; - const binFrequencies = getBinFrequencies(binCount, nyquistFrequency); - - const lowerFrequencyMel = hertzToMel(lowerFrequencyHz); - const upperFrequencyMel = hertzToMel(upperFrequencyHz); - - const filterbanksCenterFrequencies = getMelFilterbanksCenterFrequencies( - filterbankCount, - lowerFrequencyMel, - upperFrequencyMel - ); - const melFilterbanks = getMelFilterbanks( - binFrequencies, - filterbanksCenterFrequencies, - lowerFrequencyMel, - upperFrequencyMel - ); - - logger.end(); - - return computeMelSpectogramUsingFilterbanks( - rawAudio, - fftOrder, - windowSize, - hopLength, - melFilterbanks, - windowType - ); -} - -function powerToDecibels(power: number) { - return power <= 0.0000000001 ? -100 : 10.0 * Math.log10(power); -} - -function melSpectrumToMFCC( - melSpectrum: Float32Array, - mfccFeatureCount: number, - dctMatrix: Float32Array[], - normalization: "none" | "orthonormal" = "orthonormal" -) { - const melBandCount = melSpectrum.length; - - let firstFeatureNormalizationFactor: number; - let nonfirstFeatureNormalizationFactor: number; - - if (normalization == "orthonormal") { - firstFeatureNormalizationFactor = Math.sqrt(1 / (4 * mfccFeatureCount)); - nonfirstFeatureNormalizationFactor = Math.sqrt( - 1 / (2 * mfccFeatureCount) - ); - } else { - firstFeatureNormalizationFactor = 1; - nonfirstFeatureNormalizationFactor = 1; - } - - const mfcc = new Float32Array(mfccFeatureCount); - - for ( - let mfccFeatureIndex = 0; - mfccFeatureIndex < mfccFeatureCount; - mfccFeatureIndex++ - ) { - const dctMatrixRow = dctMatrix[mfccFeatureIndex]; - - let sum = 0; - - for (let j = 0; j < melBandCount; j++) { - const dctCoefficient = dctMatrixRow[j]; - const logMel = powerToDecibels(melSpectrum[j]); - //const logMel = Math.log(1e-40 + melSpectrum[j]) - - sum += dctCoefficient * logMel; - } - - const normalizationFactor = - mfccFeatureIndex == 0 - ? firstFeatureNormalizationFactor - : nonfirstFeatureNormalizationFactor; - - //mfcc[mfccFeatureIndex] = normalizationFactor * sum - mfcc[mfccFeatureIndex] = normalizationFactor * 2 * sum; // Sum multiplied by 2 to match with librosa - } - - return mfcc; -} - -function createDCTType2CoefficientMatrix( - mfccFeatureCount: number, - melBandCount: number -) { - const dctMatrix = new Array(mfccFeatureCount); - - for ( - let mfccFeatureIndex = 0; - mfccFeatureIndex < mfccFeatureCount; - mfccFeatureIndex++ - ) { - const row = new Float32Array(melBandCount); - - const innerMultiplier = (Math.PI * mfccFeatureIndex) / melBandCount; - - for ( - let melBandIndex = 0; - melBandIndex < melBandCount; - melBandIndex++ - ) { - row[melBandIndex] = Math.cos( - innerMultiplier * (melBandIndex + 0.5) - ); - } - - dctMatrix[mfccFeatureIndex] = row; - } - - return dctMatrix; -} - -function melSpectogramToMFCCs( - melSpectogram: Float32Array[], - mfccFeatureCount: number -) { - const melBandCount = melSpectogram[0].length; - const dctMatrix = createDCTType2CoefficientMatrix( - mfccFeatureCount, - melBandCount - ); - - const mfccs = melSpectogram.map((frame) => - melSpectrumToMFCC(frame, mfccFeatureCount, dctMatrix) - ); - - return mfccs; -} - -function applyEmphasis( - samples: Float32Array, - emphasisFactor = 0.97, - initialState = 0 -) { - const processedSamples = new Float32Array(samples.length); - - processedSamples[0] = samples[0] - emphasisFactor * initialState; - - for (let i = 1; i < processedSamples.length; i++) { - processedSamples[i] = samples[i] - emphasisFactor * samples[i - 1]; - } - - return processedSamples; -} - -function cloneRawAudio(rawAudio: RawAudio): RawAudio { - return { - audioChannels: rawAudio.audioChannels.map((channel) => channel.slice()), - sampleRate: rawAudio.sampleRate, - }; -} - -let speexResamplerInstance: any; - -async function resampleAudioSpeex( - rawAudio: RawAudio, - outSampleRate: number, - quality = 0 -): Promise { - const channelCount = rawAudio.audioChannels.length; - const inSampleRate = rawAudio.sampleRate; - - const totalSampleCount = rawAudio.audioChannels[0].length; - const sampleRateRatio = outSampleRate / inSampleRate; - - if (inSampleRate === outSampleRate) { - return cloneRawAudio(rawAudio); - } - - if (totalSampleCount === 0) { - return { - ...cloneRawAudio(rawAudio), - sampleRate: outSampleRate, - } as RawAudio; - } - - const m = await getSpeexResamplerInstance(); - const wasmMemory = new WasmMemoryManager(m); - - function speexResultCodeToString(resultCode: number) { - const errorStrPtr = m._speex_resampler_strerror(resultCode); - const errorStrRef = wasmMemory.wrapUint8Array(errorStrPtr, 1024); - const message = errorStrRef.readAsNullTerminatedUtf8String(); - - return message; - } - - const initErrRef = wasmMemory.allocInt32(); - const resamplerStateAddress = m._speex_resampler_init( - channelCount, - inSampleRate, - outSampleRate, - quality, - initErrRef.address - ); - let resultCode = initErrRef.value; - - if (resultCode != 0) { - throw new Error( - `Speex resampler failed while initializing with code ${resultCode}: ${speexResultCodeToString(resultCode)}` - ); - } - - const inputLatency = m._speex_resampler_get_input_latency( - resamplerStateAddress - ); - const outputLatency = m._speex_resampler_get_output_latency( - resamplerStateAddress - ); - - const maxChunkSize = 2 ** 20; - - const inputChunkSampleCountRef = wasmMemory.allocInt32(); - const outputChunkSampleCountRef = wasmMemory.allocInt32(); - - const inputChunkSamplesRef = wasmMemory.allocFloat32Array(maxChunkSize * 2); - const outputChunkSamplesRef = wasmMemory.allocFloat32Array( - Math.floor(maxChunkSize * sampleRateRatio) * 2 - ); - - const resampledAudioChunksForChannels: Float32Array[][] = []; - - for (let channelIndex = 0; channelIndex < channelCount; channelIndex++) { - resampledAudioChunksForChannels.push([]); - } - - for (let channelIndex = 0; channelIndex < channelCount; channelIndex++) { - for (let readOffset = 0; readOffset < totalSampleCount; ) { - const isLastChunk = readOffset + maxChunkSize >= totalSampleCount; - - const inputPaddingSize = isLastChunk ? inputLatency : 0; - const maxSamplesToRead = - Math.min(maxChunkSize, totalSampleCount - readOffset) + - inputPaddingSize; - - const maxSamplesToWrite = outputChunkSamplesRef.length; - - const inputChunkSamplesForChannel = rawAudio.audioChannels[ - channelIndex - ].slice(readOffset, readOffset + maxSamplesToRead); - - inputChunkSampleCountRef.value = maxSamplesToRead; - outputChunkSampleCountRef.value = maxSamplesToWrite; - - inputChunkSamplesRef.view.set(inputChunkSamplesForChannel); - resultCode = m._speex_resampler_process_float( - resamplerStateAddress, - channelIndex, - inputChunkSamplesRef.address, - inputChunkSampleCountRef.address, - outputChunkSamplesRef.address, - outputChunkSampleCountRef.address - ); - - if (resultCode != 0) { - throw new Error( - `Speex resampler failed while resampling with code ${resultCode}: ${speexResultCodeToString(resultCode)}` - ); - } - - const samplesReadCount = inputChunkSampleCountRef.value; - const samplesWrittenCount = outputChunkSampleCountRef.value; - - const resampledChannelAudio = outputChunkSamplesRef.view.slice( - 0, - samplesWrittenCount - ); - - resampledAudioChunksForChannels[channelIndex].push( - resampledChannelAudio - ); - - readOffset += samplesReadCount; - } - } - - m._speex_resampler_destroy(resamplerStateAddress); - wasmMemory.freeAll(); - - const resampledAudio: RawAudio = { - audioChannels: [], - sampleRate: outSampleRate, - }; - - for (let i = 0; i < channelCount; i++) { - resampledAudioChunksForChannels[i][0] = - resampledAudioChunksForChannels[i][0].slice(outputLatency); - - resampledAudio.audioChannels.push( - concatFloat32Arrays(resampledAudioChunksForChannels[i]) - ); - } - - return resampledAudio; -} - -async function getSpeexResamplerInstance() { - if (!speexResamplerInstance) { - const { default: SpeexResamplerInitializer } = await import( - "@echogarden/speex-resampler-wasm" - ); - - speexResamplerInstance = await SpeexResamplerInitializer(); - } - - return speexResamplerInstance; -} - -function clip(num: number, min: number, max: number) { - return Math.max(min, Math.min(max, num)); -} -function normalizeVectors( - vectors: number[][], - kind: "population" | "sample" = "population" -) { - const vectorCount = vectors.length; - - if (vectorCount == 0) { - return { normalizedVectors: [], mean: [], stdDeviation: [] }; - } - - const featureCount = vectors[0].length; - - const mean = meanOfVectors(vectors); - const stdDeviation = stdDeviationOfVectors(vectors, kind, mean); - - const normalizedVectors: number[][] = []; - - for (const vector of vectors) { - const normalizedVector = createVector(featureCount); - - for ( - let featureIndex = 0; - featureIndex < featureCount; - featureIndex++ - ) { - normalizedVector[featureIndex] = - (vector[featureIndex] - mean[featureIndex]) / - stdDeviation[featureIndex]; - - normalizedVector[featureIndex] = zeroIfNaN( - normalizedVector[featureIndex] - ); - } - - normalizedVectors.push(normalizedVector); - } - - return { normalizedVectors, mean, stdDeviation }; -} - -function meanOfVectors(vectors: number[][]) { - const vectorCount = vectors.length; - - if (vectorCount == 0) { - return []; - } - - const featureCount = vectors[0].length; - - const result = createVector(featureCount); - - for (const vector of vectors) { - for ( - let featureIndex = 0; - featureIndex < featureCount; - featureIndex++ - ) { - result[featureIndex] += vector[featureIndex]; - } - } - - for (let featureIndex = 0; featureIndex < featureCount; featureIndex++) { - result[featureIndex] /= vectorCount; - } - - return result; -} - -function stdDeviationOfVectors( - vectors: number[][], - kind: "population" | "sample" = "population", - mean?: number[] -) { - return varianceOfVectors(vectors, kind, mean).map((v) => Math.sqrt(v)); -} - -function varianceOfVectors( - vectors: number[][], - kind: "population" | "sample" = "population", - mean?: number[] -) { - const vectorCount = vectors.length; - - if (vectorCount == 0) { - return []; - } - - const sampleSizeMetric = - kind == "population" || vectorCount == 1 - ? vectorCount - : vectorCount - 1; - const featureCount = vectors[0].length; - - if (!mean) { - mean = meanOfVectors(vectors); - } - - const result = createVector(featureCount); - - for (const vector of vectors) { - for (let i = 0; i < featureCount; i++) { - result[i] += (vector[i] - mean[i]) ** 2; - } - } - - for (let i = 0; i < featureCount; i++) { - result[i] /= sampleSizeMetric; - } - - return result; -} - -function euclidianDistance( - vector1: ArrayLike, - vector2: ArrayLike -) { - return Math.sqrt(squaredEuclidianDistance(vector1, vector2)); -} - -function squaredEuclidianDistance( - vector1: ArrayLike, - vector2: ArrayLike -) { - if (vector1.length !== vector2.length) { - throw new Error("Vectors are not the same length"); - } - - const elementCount = vector1.length; - - if (elementCount === 0) { - return 0; - } - - let sum = 0.0; - - for (let i = 0; i < elementCount; i++) { - sum += (vector1[i] - vector2[i]) ** 2; - } - - return sum; -} - -function euclidianDistance13Dim( - vector1: ArrayLike, - vector2: ArrayLike -) { - return Math.sqrt(squaredEuclidianDistance13Dim(vector1, vector2)); -} - -function squaredEuclidianDistance13Dim( - vector1: ArrayLike, - vector2: ArrayLike -) { - // Assumes the input has 13 dimensions (optimized for 13-dimensional MFCC vectors) - - const result = - (vector1[0] - vector2[0]) ** 2 + - (vector1[1] - vector2[1]) ** 2 + - (vector1[2] - vector2[2]) ** 2 + - (vector1[3] - vector2[3]) ** 2 + - (vector1[4] - vector2[4]) ** 2 + - (vector1[5] - vector2[5]) ** 2 + - (vector1[6] - vector2[6]) ** 2 + - (vector1[7] - vector2[7]) ** 2 + - (vector1[8] - vector2[8]) ** 2 + - (vector1[9] - vector2[9]) ** 2 + - (vector1[10] - vector2[10]) ** 2 + - (vector1[11] - vector2[11]) ** 2 + - (vector1[12] - vector2[12]) ** 2; - - return result; -} - -function cosineDistancePrecomputedMagnitudes( - vector1: ArrayLike, - vector2: ArrayLike, - magnitude1: number, - magnitude2: number -) { - return ( - 1 - - cosineSimilarityPrecomputedMagnitudes( - vector1, - vector2, - magnitude1, - magnitude2 - ) - ); -} - -function cosineSimilarityPrecomputedMagnitudes( - vector1: ArrayLike, - vector2: ArrayLike, - magnitude1: number, - magnitude2: number -) { - if (vector1.length != vector2.length) { - throw new Error("Vectors are not the same length"); - } - - if (vector1.length == 0) { - return 0; - } - - const featureCount = vector1.length; - - let dotProduct = 0.0; - - for (let i = 0; i < featureCount; i++) { - dotProduct += vector1[i] * vector2[i]; - } - - let result = dotProduct / (magnitude1 * magnitude2 + 1e-40); - - result = zeroIfNaN(result); - result = clip(result, -1.0, 1.0); - - return result; -} - -function magnitude(vector: ArrayLike) { - const featureCount = vector.length; - - let squaredMagnitude = 0.0; - - for (let i = 0; i < featureCount; i++) { - squaredMagnitude += vector[i] ** 2; - } - - return Math.sqrt(squaredMagnitude); -} - -function createVector(elementCount: number, initialValue = 0.0) { - const result: number[] = new Array(elementCount); - - for (let i = 0; i < elementCount; i++) { - result[i] = initialValue; - } - - return result; -} - -function createVectorForIntegerRange(start: number, end: number) { - const newVector: number[] = []; - - for (let i = start; i < end; i++) { - newVector.push(i); - } - - return newVector; -} - -function zeroIfNaN(val: number) { - if (isNaN(val)) { - return 0; - } else { - return val; - } -} - -type MfccOptions = { - filterbankCount?: number; - featureCount?: number; - fftOrder?: number; - lowerFreq?: number; - upperFreq?: number; - windowDuration?: number; - hopDuration?: number; - emphasisFactor?: number; - analysisSampleRate?: number; - lifteringFactor?: number; - normalize?: boolean; - zeroFirstCoefficient?: boolean; -}; - -const defaultMfccOptions: MfccOptions = { - filterbankCount: 40, - featureCount: 13, - fftOrder: 512, - lowerFreq: 133.3333, - upperFreq: 6855.4976, - windowDuration: 0.025, - hopDuration: 0.01, - emphasisFactor: 0.97, - analysisSampleRate: 16000, - lifteringFactor: 0, - normalize: false, - zeroFirstCoefficient: false, -}; - -function extendDefaultMfccOptions(options: MfccOptions) { - return extendDeep(defaultMfccOptions, options); -} - -function applyLiftering(mfccs: number[][], lifteringFactor: number) { - const featureCount = mfccs[0].length; - - const lifterMultipliers = new Float32Array(featureCount); - - for (let i = 0; i < featureCount; i++) { - lifterMultipliers[i] = - 1 + - (lifteringFactor / 2) * - Math.sin((Math.PI * (i + 1)) / lifteringFactor); - } - - const lifteredMfccs: number[][] = []; - - for (const mfcc of mfccs) { - const lifteredMfcc = new Array(featureCount); - - for (let i = 0; i < featureCount; i++) { - lifteredMfcc[i] = mfcc[i] * lifterMultipliers[i]; - } - - lifteredMfccs.push(lifteredMfcc); - } - - return lifteredMfccs; -} - -function compactPath(path: AlignmentPath) { - const compactedPath: CompactedPath = []; - - for (let i = 0; i < path.length; i++) { - const pathEntry = path[i]; - - if (compactedPath.length <= pathEntry.source) { - compactedPath.push({ first: pathEntry.dest, last: pathEntry.dest }); - } else { - compactedPath[compactedPath.length - 1].last = pathEntry.dest; - } - } - - return compactedPath; -} - -async function computeMFCCs(monoAudio: RawAudio, options: MfccOptions = {}) { - const logger = new Logger(); - logger.start("Initialize options"); - - if (monoAudio.audioChannels.length != 1) { - throw new Error("Audio must be mono"); - } - - options = extendDefaultMfccOptions(options); - - const analysisSampleRate = options.analysisSampleRate!; - const featureCount = options.featureCount!; - - const fftOrder = options.fftOrder!; - - const windowDuration = options.windowDuration!; - const windowSize = windowDuration * analysisSampleRate; - const hopDuration = options.hopDuration!; - const hopLength = hopDuration * analysisSampleRate; - - const filterbankCount = options.filterbankCount!; - const lowerFrequencyHz = options.lowerFreq!; - const upperFrequencyHz = options.upperFreq!; - - const emphasisFactor = options.emphasisFactor!; - const lifteringFactor = options.lifteringFactor!; - const zeroFirstCoefficient = options.zeroFirstCoefficient!; - - logger.start( - `Resample audio to analysis sample rate (${analysisSampleRate}Hz)` - ); - const resampledAudio = await resampleAudioSpeex( - monoAudio, - analysisSampleRate - ); - - let mfccs: number[][]; - - if (emphasisFactor > 0) { - logger.start("Apply emphasis"); - resampledAudio.audioChannels[0] = applyEmphasis( - resampledAudio.audioChannels[0], - emphasisFactor - ); - } - - logger.start("Compute Mel spectogram"); - const { melSpectogram } = await computeMelSpectogram( - resampledAudio, - fftOrder, - windowSize, - hopLength, - filterbankCount, - lowerFrequencyHz, - upperFrequencyHz - ); - - logger.start("Extract MFCCs from Mel spectogram"); - const mfccsFloat32 = melSpectogramToMFCCs(melSpectogram, featureCount); - - mfccs = mfccsFloat32.map((mfcc) => Array.from(mfcc)); - - if (options.normalize!) { - logger.start("Normalize MFCCs"); - - const { normalizedVectors, mean, stdDeviation } = - normalizeVectors(mfccs); - mfccs = normalizedVectors; - //mfccs = mfccs.map(mfcc => subtractVectors(mfcc, mean)) - } - - if (lifteringFactor > 0) { - logger.start("Apply liftering to MFCCs"); - mfccs = applyLiftering(mfccs, lifteringFactor); - } - - if (zeroFirstCoefficient) { - for (const mfcc of mfccs) { - mfcc[0] = 0; - } - } - - logger.end(); - - return mfccs; -} - -type DtwGranularity = "xx-low" | "x-low" | "low" | "medium" | "high" | "x-high"; - -type AlignmentPath = AlignmentPathEntry[]; - -type AlignmentPathEntry = { - source: number; - dest: number; -}; - -type CompactedPath = CompactedPathEntry[]; - -type CompactedPathEntry = { - first: number; - last: number; -}; - -function getStartingSilentSampleCount( - audioSamples: Float32Array, - amplitudeThresholdDecibels = defaultSilenceThresholdDecibels -) { - const minSampleAmplitude = decibelsToGainFactor(amplitudeThresholdDecibels); - - let silentSampleCount = 0; - - for (let i = 0; i < audioSamples.length - 1; i++) { - if (Math.abs(audioSamples[i]) > minSampleAmplitude) { - break; - } - - silentSampleCount += 1; - } - - return silentSampleCount; -} - -function decibelsToGainFactor(decibels: number) { - return decibels <= -100.0 ? 0 : Math.pow(10, 0.05 * decibels); -} - -const defaultSilenceThresholdDecibels = -40; - -function getMappedFrameIndexForPath( - referenceFrameIndex: number, - compactedPath: CompactedPath, - mappingKind: "first" | "last" = "first" -) { - if (compactedPath.length == 0) { - return 0; - } - - referenceFrameIndex = clip( - referenceFrameIndex, - 0, - compactedPath.length - 1 - ); - - const compactedPathEntry = compactedPath[referenceFrameIndex]; - - let mappedFrameIndex: number; - - if (mappingKind == "first") { - mappedFrameIndex = compactedPathEntry.first; - } else { - mappedFrameIndex = compactedPathEntry.last; - } - - return mappedFrameIndex; -} - -function getEndingSilentSampleCount( - audioSamples: Float32Array, - amplitudeThresholdDecibels = defaultSilenceThresholdDecibels -) { - const minSampleAmplitude = decibelsToGainFactor(amplitudeThresholdDecibels); - - let silentSampleCount = 0; - - for (let i = audioSamples.length - 1; i >= 0; i--) { - if (Math.abs(audioSamples[i]) > minSampleAmplitude) { - break; - } - - silentSampleCount += 1; - } - - return silentSampleCount; -} - -function getMappedTimelineEntry( - timelineEntry: TimelineEntry, - sourceRawAudio: RawAudio, - framesPerSecond: number, - compactedPath: CompactedPath, - recurse = true -): TimelineEntry { - const referenceStartFrameIndex = Math.floor( - timelineEntry.startTime * framesPerSecond - ); - const referenceEndFrameIndex = Math.floor( - timelineEntry.endTime * framesPerSecond - ); - - if (referenceStartFrameIndex < 0 || referenceEndFrameIndex < 0) { - throw new Error( - "Unexpected: encountered a negative timestamp in timeline" - ); - } - - const mappedStartFrameIndex = getMappedFrameIndexForPath( - referenceStartFrameIndex, - compactedPath, - "first" - ); - const mappedEndFrameIndex = getMappedFrameIndexForPath( - referenceEndFrameIndex, - compactedPath, - "first" - ); - - let innerTimeline: Timeline | undefined; - - if (recurse && timelineEntry.timeline != null) { - innerTimeline = timelineEntry.timeline.map((entry) => - getMappedTimelineEntry( - entry, - sourceRawAudio, - framesPerSecond, - compactedPath, - recurse - ) - ); - } - - // Trim silent samples from start and end of mapped entry range - const sourceSamplesPerFrame = Math.floor( - sourceRawAudio.sampleRate / framesPerSecond - ); - - let startSampleIndex = mappedStartFrameIndex * sourceSamplesPerFrame; - let endSampleIndex = mappedEndFrameIndex * sourceSamplesPerFrame; - - const frameSamples = sourceRawAudio.audioChannels[0].subarray( - startSampleIndex, - endSampleIndex - ); - - const silenceThresholdDecibels = -40; - - startSampleIndex += getStartingSilentSampleCount( - frameSamples, - silenceThresholdDecibels - ); - endSampleIndex -= getEndingSilentSampleCount( - frameSamples, - silenceThresholdDecibels - ); - - endSampleIndex = Math.max(endSampleIndex, startSampleIndex); - - // Build mapped timeline entry - const startTime = startSampleIndex / sourceRawAudio.sampleRate; - const endTime = endSampleIndex / sourceRawAudio.sampleRate; - - return { - type: timelineEntry.type, - text: timelineEntry.text, - - startTime, - endTime, - - timeline: innerTimeline, - }; -} - -function getMfccOptionsForGranularity(granularity: DtwGranularity) { - let mfccOptions: MfccOptions; - - if (granularity == "xx-low") { - mfccOptions = { - windowDuration: 0.4, - hopDuration: 0.16, - fftOrder: 8192, - }; - } else if (granularity == "x-low") { - mfccOptions = { - windowDuration: 0.2, - hopDuration: 0.08, - fftOrder: 4096, - }; - } else if (granularity == "low") { - mfccOptions = { - windowDuration: 0.1, - hopDuration: 0.04, - fftOrder: 2048, - }; - } else if (granularity == "medium") { - mfccOptions = { - windowDuration: 0.05, - hopDuration: 0.02, - fftOrder: 1024, - }; - } else if (granularity == "high") { - mfccOptions = { - windowDuration: 0.025, - hopDuration: 0.01, - fftOrder: 512, - }; - } else if (granularity == "x-high") { - mfccOptions = { - windowDuration: 0.02, - hopDuration: 0.005, - fftOrder: 512, - }; - } else { - throw new Error(`Invalid granularity setting: '${granularity}'`); - } - - return mfccOptions; -} - -async function alignUsingDtw( - sourceRawAudio: RawAudio, - referenceRawAudio: RawAudio, - referenceTimeline: Timeline, - granularities: DtwGranularity[], - windowDurations: number[] -) { - const logger = new Logger(); - - if (windowDurations.length == 0) { - throw new Error(`Window durations array has length 0.`); - } - - if (windowDurations.length != granularities.length) { - throw new Error( - `Window durations and granularities are not the same length.` - ); - } - - const rawAudioDuration = getRawAudioDuration(sourceRawAudio); - - let framesPerSecond: number; - let compactedPath: CompactedPath; - let relativeCenters: number[] | undefined; - - for (let passIndex = 0; passIndex < windowDurations.length; passIndex++) { - const granularity = granularities[passIndex]; - const windowDuration = windowDurations[passIndex]; - - logger.logTitledMessage( - `\nStarting alignment pass ${passIndex + 1}/${windowDurations.length}`, - `granularity: ${granularity}, max window duration: ${windowDuration}s` - ); - - const mfccOptions = extendDefaultMfccOptions({ - ...getMfccOptionsForGranularity(granularity), - zeroFirstCoefficient: true, - }) as MfccOptions; - - framesPerSecond = 1 / mfccOptions.hopDuration!; - - // Compute reference MFCCs - logger.start("Compute reference MFCC features"); - const referenceMfccs = await computeMFCCs( - referenceRawAudio, - mfccOptions - ); - - // Compute source MFCCs - logger.start("Compute source MFCC features"); - const sourceMfccs = await computeMFCCs(sourceRawAudio, mfccOptions); - logger.end(); - - // Compute path - logger.logTitledMessage( - `DTW cost matrix memory size`, - `${getCostMatrixMemorySizeMB(referenceMfccs.length, sourceMfccs.length, windowDuration * framesPerSecond).toFixed(1)}MB` - ); - - if (passIndex == 0) { - const minRecommendedWindowDuration = 0.2 * rawAudioDuration; - - if (windowDuration < minRecommendedWindowDuration) { - logger.logTitledMessage( - "Warning", - `Maximum DTW window duration is set to ${windowDuration.toFixed(1)}s, which is smaller than 20% of the source audio duration of ${rawAudioDuration.toFixed(1)}s. This may lead to suboptimal results in some cases. Consider increasing window duration if needed.`, - "warning" - ); - } - } - - logger.start("Align reference and source MFCC features using DTW"); - const dtwWindowLength = Math.floor(windowDuration * framesPerSecond); - - let centerIndexes: number[] | undefined; - - if (relativeCenters) { - centerIndexes = []; - - for (let i = 0; i < referenceMfccs.length; i++) { - const relativeReferencePosition = i / referenceMfccs.length; - - const relativeCenterIndex = Math.floor( - relativeReferencePosition * relativeCenters!.length - ); - const relativeCenter = relativeCenters[relativeCenterIndex]; - const centerIndex = Math.floor( - relativeCenter * sourceMfccs.length - ); - - centerIndexes.push(centerIndex); - } - } - - const rawPath = await alignMFCC_DTW( - referenceMfccs, - sourceMfccs, - dtwWindowLength, - undefined, - centerIndexes - ); - - compactedPath = compactPath(rawPath); - - relativeCenters = compactedPath.map( - (entry) => (entry.first + entry.last) / 2 / sourceMfccs.length - ); - - logger.end(); - } - - logger.start("\nConvert path to timeline"); - - const mappedTimeline = referenceTimeline.map((entry) => - getMappedTimelineEntry( - entry, - sourceRawAudio, - framesPerSecond, - compactedPath - ) - ); - - logger.end(); - - return mappedTimeline; -} - -class OpenPromise { - promise: Promise; - resolve: (value: T) => void = () => { - throw new Error("Open promise resolved before initialization"); - }; - reject: (reason?: any) => void = () => { - throw new Error("Open promise rejected before initialization"); - }; - - constructor() { - this.promise = new Promise((resolve, reject) => { - this.resolve = resolve; - this.reject = reject; - }); - } -} - -let kuromojiTokenizer: any; - -async function getKuromojiTokenizer() { - if (kuromojiTokenizer) { - return kuromojiTokenizer; - } - - const { default: kuromoji } = await import("kuromoji"); - - const resultOpenPromise = new OpenPromise(); - - const kuromojiScriptPath = await resolveModuleScriptPath("kuromoji"); - const dictionaryPath = path.join( - path.dirname(kuromojiScriptPath), - "..", - "/dict" - ); - - kuromoji.builder({ dicPath: dictionaryPath }).build(function ( - error: any, - tokenizer: any - ) { - if (error) { - resultOpenPromise.reject(error); - return; - } - - kuromojiTokenizer = tokenizer; - - resultOpenPromise.resolve(kuromojiTokenizer); - }); - - return resultOpenPromise.promise; -} - -async function splitJapaneseTextToWords_Kuromoji(text: string) { - const tokenizer = await getKuromojiTokenizer(); - - const results: any[] = tokenizer.tokenize(text); - const words = results.map((entry) => entry.surface_form); - - return words; -} - -let JiebaWasmInstance: typeof import("jieba-wasm"); -async function getWasmInstance() { - if (!JiebaWasmInstance) { - const { default: JibeaWasm } = await import("jieba-wasm"); - // @ts-ignore - JiebaWasmInstance = JibeaWasm; - } - - return JiebaWasmInstance; -} - -async function splitChineseTextToWords_Jieba( - text: string, - fineGrained = false, - useHMM = true -) { - const jieba = await getWasmInstance(); - - if (!fineGrained) { - return jieba.cut(text, useHMM); - } else { - const results = jieba.tokenize(text, "search", useHMM); - - const startOffsetsSet = new Set(); - const endOffsetsSet = new Set(); - - for (const result of results) { - startOffsetsSet.add(result.start); - endOffsetsSet.add(result.end); - } - - const startOffsets = Array.from(startOffsetsSet); - startOffsets.sort((a, b) => a - b); - - const endOffsets = Array.from(endOffsetsSet); - endOffsets.sort((a, b) => a - b); - - const words: string[] = []; - - for (let i = 0; i < startOffsets.length; i++) { - const wordStartOffset = startOffsets[i]; - - function getWordEndOffset() { - if (i < startOffsets.length - 1) { - const nextWordStartOffset = startOffsets[i + 1]; - - for (let j = 0; j < endOffsets.length - 1; j++) { - const currentEndOffset = endOffsets[j]; - const nextEndOffset = endOffsets[j + 1]; - - if (currentEndOffset >= nextWordStartOffset) { - return nextWordStartOffset; - } else if ( - currentEndOffset > wordStartOffset && - currentEndOffset < nextWordStartOffset && - nextEndOffset > nextWordStartOffset - ) { - return currentEndOffset; - } - } - } - - return endOffsets[endOffsets.length - 1]; - } - - const wordEndOffset = getWordEndOffset(); - - words.push(text.substring(wordStartOffset, wordEndOffset)); - } - - return words; - } -} - -function getShortLanguageCode(langCode: string) { - const dashIndex = langCode.indexOf("-"); - - if (dashIndex == -1) { - return langCode; - } - - return langCode.substring(0, dashIndex).toLowerCase(); -} - -abstract class ValueRef { - protected ptr: number; - private readonly manager: WasmMemoryManager; - - protected get module() { - return this.manager.wasmModule; - } - - constructor(ptr: number, manager: WasmMemoryManager) { - this.ptr = ptr; - this.manager = manager; - } - - get value(): T { - this.assertNotFreed(); - return this.getValue(); - } - - set value(newValue: T) { - this.assertNotFreed(); - this.setValue(newValue); - } - - abstract getValue(): T; - abstract setValue(newValue: T): void; - - get address() { - this.assertNotFreed(); - return this.ptr; - } - - clear() { - this.assertNotFreed(); - - if (typeof this.value == "number") { - this.value = 0 as any; - } else if (typeof this.value == "string") { - throw new Error("Unimplemented"); - } - - return this; - } - - free() { - this.manager.free(this as any); - } - - clearAddress() { - this.ptr = 0; - } - - get isFreed() { - return this.ptr == 0; - } - - protected assertNotFreed() { - if (this.isFreed) { - throw new Error("Attempt to read a freed WASM value reference."); - } - } -} - -class Int8Ref extends ValueRef { - getValue() { - return this.module.HEAP8[this.ptr >>> 0] as number; - } - - setValue(newValue: number) { - this.module.HEAP8[this.ptr >>> 0] = newValue; - } -} - -class Uint8Ref extends ValueRef { - getValue() { - return this.module.HEAPU8[this.ptr >>> 0] as number; - } - - setValue(newValue: number) { - this.module.HEAPU8[this.ptr >>> 0] = newValue; - } -} - -class Int16Ref extends ValueRef { - getValue() { - return this.module.HEAP16[this.ptr >>> 1] as number; - } - - setValue(newValue: number) { - this.module.HEAP16[this.ptr >>> 1] = newValue; - } -} - -class Uint16Ref extends ValueRef { - getValue() { - return this.module.HEAPU16[this.ptr >>> 1] as number; - } - - setValue(newValue: number) { - this.module.HEAPU16[this.ptr >>> 1] = newValue; - } -} - -class Int32Ref extends ValueRef { - getValue() { - return this.module.HEAP32[this.ptr >>> 2] as number; - } - - setValue(newValue: number) { - this.module.HEAP32[this.ptr >>> 2] = newValue; - } -} - -class Uint32Ref extends ValueRef { - getValue() { - return this.module.HEAPU32[this.ptr >>> 2] as number; - } - - setValue(newValue: number) { - this.module.HEAPU32[this.ptr >>> 2] = newValue; - } -} - -class PointerRef extends Uint32Ref {} - -class Float32Ref extends ValueRef { - getValue() { - return this.module.HEAPF32[this.ptr >>> 2] as number; - } - - setValue(newValue: number) { - this.module.HEAPF32[this.ptr >>> 2] = newValue; - } -} - -class Float64Ref extends ValueRef { - getValue() { - return this.module.HEAPF64[this.ptr >>> 3] as number; - } - - setValue(newValue: number) { - this.module.HEAPF64[this.ptr >>> 3] = newValue; - } -} - -class NullTerminatedUtf8StringRef extends ValueRef { - getValue() { - const ptr = this.ptr >>> 0; - - const heapU8 = this.module.HEAPU8; - - const endByteOffset = heapU8.subarray(ptr).indexOf(0); - - const strBytes = heapU8.subarray(ptr, ptr + endByteOffset); - - const str = Buffer.from(strBytes).toString("utf8"); - - return str; - } - - setValue(newValue: string) { - throw new Error("Unimplemented"); - } -} - -abstract class TypedArrayRef { - protected ptr: number; - readonly length: number; - private readonly manager: WasmMemoryManager; - - get module() { - return this.manager.wasmModule; - } - - constructor(ptr: number, length: number, manager: WasmMemoryManager) { - this.ptr = ptr; - this.length = length; - this.manager = manager; - } - - get view() { - this.assertNotFreed(); - return this.getView(); - } - - protected abstract getView(): T; - - slice(start?: number, end?: number) { - return this.view.slice(start, end); - } - - get address() { - this.assertNotFreed(); - return this.ptr; - } - - clear() { - this.view.fill(0); - return this; - } - - free() { - this.manager.free(this); - } - - clearAddress() { - this.ptr = 0; - } - - get isFreed() { - return this.ptr == 0; - } - - protected assertNotFreed() { - if (this.isFreed) { - throw new Error( - "Attempt to read a freed WASM typed array reference." - ); - } - } -} - -class Int8ArrayRef extends TypedArrayRef { - getView() { - const startIndex = this.ptr >>> 0; - return this.module.HEAP8.subarray( - startIndex, - startIndex + this.length - ) as Int8Array; - } -} - -class Uint8ArrayRef extends TypedArrayRef { - getView() { - const startIndex = this.ptr >>> 0; - return this.module.HEAPU8.subarray( - startIndex, - startIndex + this.length - ) as Uint8Array; - } - - readAsNullTerminatedUtf8String(): string { - let strBytes = this.view; - - const indexOfFirstZero = strBytes.indexOf(0); - - if (indexOfFirstZero >= 0) { - strBytes = strBytes.subarray(0, indexOfFirstZero); - } - - const str = Buffer.from(strBytes).toString("utf8"); - - return str; - } -} - -class Int16ArrayRef extends TypedArrayRef { - getView() { - const startIndex = this.ptr >>> 1; - return this.module.HEAP16.subarray( - startIndex, - startIndex + this.length - ) as Int16Array; - } -} - -class Uint16ArrayRef extends TypedArrayRef { - getView() { - const startIndex = this.ptr >>> 1; - return this.module.HEAPU16.subarray( - startIndex, - startIndex + this.length - ) as Uint16Array; - } -} - -class Int32ArrayRef extends TypedArrayRef { - getView() { - const startIndex = this.ptr >>> 2; - return this.module.HEAP32.subarray( - startIndex, - startIndex + this.length - ) as Int32Array; - } -} - -class Uint32ArrayRef extends TypedArrayRef { - getView() { - const startIndex = this.ptr >>> 2; - return this.module.HEAPU32.subarray( - startIndex, - startIndex + this.length - ) as Uint32Array; - } -} - -class Float32ArrayRef extends TypedArrayRef { - getView() { - const startIndex = this.ptr >>> 2; - return this.module.HEAPF32.subarray( - startIndex, - startIndex + this.length - ) as Float32Array; - } -} - -class Float64ArrayRef extends TypedArrayRef { - getView() { - const startIndex = this.ptr >>> 3; - return this.module.HEAPF64.subarray( - startIndex, - startIndex + this.length - ) as Float64Array; - } -} - -type TypedArray = - | Int8Array - | Uint8Array - | Uint8ClampedArray - | Int16Array - | Uint16Array - | Int32Array - | Uint32Array - | Float32Array - | Float64Array; -type WasmRef = ValueRef | ValueRef | TypedArrayRef; - -class WasmMemoryManager { - wasmModule: any; - - private allocatedReferences = new Set(); - - constructor(wasmModule: any) { - this.wasmModule = wasmModule; - } - - allocInt8() { - const address = this.alloc(1); - return this.wrapInt8(address).clear(); - } - - wrapInt8(address: number) { - const ref = new Int8Ref(address, this); - this.allocatedReferences.add(ref); - return ref; - } - - allocUint8() { - const address = this.alloc(1); - return this.wrapUint8(address).clear(); - } - - wrapUint8(address: number) { - const ref = new Uint8Ref(address, this); - this.allocatedReferences.add(ref); - return ref; - } - - allocInt16() { - const address = this.alloc(2); - return this.wrapInt16(address).clear(); - } - - wrapInt16(address: number) { - const ref = new Int16Ref(address, this); - this.allocatedReferences.add(ref); - return ref; - } - - allocUint16() { - const address = this.alloc(2); - return this.wrapUint16(address).clear(); - } - - wrapUint16(address: number) { - const ref = new Uint16Ref(address, this); - this.allocatedReferences.add(ref); - return ref; - } - - allocInt32() { - const address = this.alloc(4); - return this.wrapInt32(address).clear(); - } - - wrapInt32(address: number) { - const ref = new Int32Ref(address, this); - this.allocatedReferences.add(ref); - return ref; - } - - allocUint32() { - const address = this.alloc(4); - return this.wrapUint32(address).clear(); - } - - wrapUint32(address: number) { - const ref = new Uint32Ref(address, this); - this.allocatedReferences.add(ref); - return ref; - } - - allocPointer() { - const address = this.alloc(4); - return this.wrapPointer(address).clear(); - } - - wrapPointer(address: number) { - const ref = new PointerRef(address, this); - this.allocatedReferences.add(ref); - return ref; - } - - allocFloat32() { - const address = this.alloc(4); - return this.wrapFloat64(address).clear(); - } - - wrapFloat32(address: number) { - const ref = new Float32Ref(address, this); - this.allocatedReferences.add(ref); - return ref; - } - - allocFloat64() { - const address = this.alloc(8); - return this.wrapFloat64(address).clear(); - } - - wrapFloat64(address: number) { - const ref = new Float64Ref(address, this); - this.allocatedReferences.add(ref); - return ref; - } - - // Allocate or wrap arrays - allocInt8Array(length: number) { - const address = this.alloc(length << 0); - return this.wrapInt8Array(address, length).clear(); - } - - wrapInt8Array(address: number, length: number) { - const ref = new Int8ArrayRef(address, length, this); - this.allocatedReferences.add(ref); - return ref; - } - - allocUint8Array(length: number) { - const address = this.alloc(length << 0); - return this.wrapUint8Array(address, length).clear(); - } - - wrapUint8Array(address: number, length: number) { - const ref = new Uint8ArrayRef(address, length, this); - this.allocatedReferences.add(ref); - return ref; - } - - allocInt16Array(length: number) { - const address = this.alloc(length << 1); - return this.wrapInt16Array(address, length).clear(); - } - - wrapInt16Array(address: number, length: number) { - const ref = new Int16ArrayRef(address, length, this); - this.allocatedReferences.add(ref); - return ref; - } - - allocUint16Array(length: number) { - const address = this.alloc(length << 1); - return this.wrapUint16Array(address, length).clear(); - } - - wrapUint16Array(address: number, length: number) { - const ref = new Uint16ArrayRef(address, length, this); - this.allocatedReferences.add(ref); - return ref; - } - - allocInt32Array(length: number) { - const address = this.alloc(length << 2); - return this.wrapInt32Array(address, length).clear(); - } - - wrapInt32Array(address: number, length: number) { - const ref = new Int32ArrayRef(address, length, this); - this.allocatedReferences.add(ref); - return ref; - } - - allocUint32Array(length: number) { - const address = this.alloc(length << 2); - return this.wrapUint32Array(address, length).clear(); - } - - wrapUint32Array(address: number, length: number) { - const ref = new Uint32ArrayRef(address, length, this); - this.allocatedReferences.add(ref); - return ref; - } - - allocFloat32Array(length: number) { - const address = this.alloc(length << 2); - return this.wrapFloat32Array(address, length).clear(); - } - - wrapFloat32Array(address: number, length: number) { - const ref = new Float32ArrayRef(address, length, this); - this.allocatedReferences.add(ref); - return ref; - } - - allocFloat64Array(length: number) { - const address = this.alloc(length << 3); - return this.wrapFloat64Array(address, length).clear(); - } - - wrapFloat64Array(address: number, length: number) { - const ref = new Float64ArrayRef(address, length, this); - this.allocatedReferences.add(ref); - return ref; - } - - allocNullTerminatedUtf8String(str: string) { - const strBuffer = Buffer.concat([ - Buffer.from(str, "utf8"), - Buffer.alloc(1), - ]); - const ref = this.allocUint8Array(strBuffer.length); - ref.view.set(strBuffer); - return ref; - } - - wrapNullTerminatedUtf8String(address: number) { - const ref = new NullTerminatedUtf8StringRef(address, this); - this.allocatedReferences.add(ref); - return ref; - } - - private alloc(size: number) { - const ptr = this.wasmModule._malloc(size); - return ptr as number; - } - - free(wasmReference: WasmRef) { - if (wasmReference.isFreed) { - return; - } - - this.wasmModule._free(wasmReference.address); - - this.allocatedReferences.delete(wasmReference); - wasmReference.clearAddress(); - } - - freeAll() { - for (const wasmReference of this.allocatedReferences) { - this.free(wasmReference); - } - } -} - -function concatFloat32Arrays(arrays: Float32Array[]) { - return concatTypedArrays(Float32Array, arrays); -} - -function simplifyPunctuationCharacters(text: string) { - return text - .replaceAll(`“`, `"`) - .replaceAll(`”`, `"`) - .replaceAll(`„`, `"`) - .replaceAll(`ߵ`, `"`) - .replaceAll(`ߴ`, `"`) - .replaceAll(`«`, `"`) - .replaceAll(`»`, `"`) - - .replaceAll(`’`, `'`) - .replaceAll(`ʼ`, `'`) - .replaceAll(`ʼ`, `'`) - .replaceAll(`'`, `'`) - .replaceAll(`,`, `,`) - .replaceAll(`、`, `,`) - .replaceAll(`:`, `:`) - .replaceAll(`;`, `;`) - .replaceAll(`。`, `.`) - - .replaceAll(`?`, `?`) - .replaceAll(`!`, `!`) - .replaceAll(`؟`, `?`); -} - -function normalizeFourDigitDecadeString(decadeString: string) { - const firstTwoDigitsValue = parseInt(decadeString.substring(0, 2)); - const secondTwoDigitsValue = parseInt(decadeString.substring(2, 4)); - - let normalizedString: string; - - const isBeforeSecondMillenium = firstTwoDigitsValue < 10; - const isMilleniumDecade = - firstTwoDigitsValue % 10 == 0 && secondTwoDigitsValue == 0; - - if (!isBeforeSecondMillenium && !isMilleniumDecade) { - if (secondTwoDigitsValue != 0) { - normalizedString = `${firstTwoDigitsValue} ${secondTwoDigitsValue}s`; - } else { - normalizedString = `${firstTwoDigitsValue} hundreds`; - } - } else { - normalizedString = decadeString; - } - - return normalizedString; -} - -function normalizeFourDigitYearString(yearString: string) { - const firstTwoDigitsValue = parseFloat(yearString.substring(0, 2)); - const secondTwoDigitsValue = parseFloat(yearString.substring(2, 4)); - - let normalizedString: string; - - if (firstTwoDigitsValue >= 10 && secondTwoDigitsValue >= 10) { - normalizedString = `${firstTwoDigitsValue} ${secondTwoDigitsValue}`; - } else if ( - firstTwoDigitsValue >= 10 && - firstTwoDigitsValue % 10 != 0 && - secondTwoDigitsValue < 10 - ) { - normalizedString = `${firstTwoDigitsValue} oh ${secondTwoDigitsValue}`; - } else { - normalizedString = yearString; - } - - return normalizedString; -} - -function getNormalizedFragmentsForSpeech(words: string[], language: string) { - language = getShortLanguageCode(language); - - if (language != "en") { - return { - normalizedFragments: [...words], - referenceFragments: [...words], - }; - } - - const numberPattern = /^[0-9][0-9\,\.]*$/; - - const fourDigitYearPattern = /^[0-9][0-9][0-9][0-9]$/; - const fourDigitDecadePattern = /^[0-9][0-9][0-9]0s$/; - - const fourDigitYearRangePattern = - /^[0-9][0-9][0-9][0-9][\-\–][0-9][0-9][0-9][0-9]$/; - - const wordsPrecedingAYear = [ - "in", - "the", - "a", - "to", - "of", - "since", - "from", - "between", - "by", - "until", - "around", - "before", - "after", - "his", - "her", - "year", - "years", - "during", - "copyright", - "©", - "early", - "mid", - "late", - "january", - "february", - "march", - "april", - "may", - "june", - "july", - "august", - "september", - "october", - "november", - "december", - "jan", - "feb", - "mar", - "apr", - "may", - "jun", - "jul", - "aug", - "sep", - "oct", - "nov", - "dec", - ]; - - const wordsPrecedingADecade = ["the", "in", "early", "mid", "late", "a"]; - - const symbolsPrecedingACurrency = ["$", "€", "£", "¥"]; - - const symbolsPrecedingACurrencyAsWords = [ - "dollars", - "euros", - "pounds", - "yen", - ]; - - const wordsSucceedingACurrency = ["million", "billion", "trillion"]; - - const normalizedFragments: string[] = []; - const referenceFragments: string[] = []; - - for (let wordIndex = 0; wordIndex < words.length; wordIndex++) { - const word = words[wordIndex]; - const lowerCaseWord = word.toLowerCase(); - - const nextWords = words.slice(wordIndex + 1); - const nextWord = nextWords[0]; - - if ( - // Normalize a four digit year pattern, e.g. 'in 1995'. - wordsPrecedingAYear.includes(lowerCaseWord) && - fourDigitYearPattern.test(nextWord) - ) { - const normalizedString = normalizeFourDigitYearString(nextWord); - - normalizedFragments.push(word); - referenceFragments.push(word); - - normalizedFragments.push(normalizedString); - referenceFragments.push(nextWord); - - wordIndex += 1; - } else if ( - // Normalize a four digit decade pattern, e.g. 'the 1980s'. - wordsPrecedingADecade.includes(lowerCaseWord) && - fourDigitDecadePattern.test(nextWord) - ) { - const normalizedString = normalizeFourDigitDecadeString(nextWord); - - normalizedFragments.push(word); - referenceFragments.push(word); - - normalizedFragments.push(normalizedString); - referenceFragments.push(nextWord); - - wordIndex += 1; - } else if ( - // Normalize a year range pattern, e.g. '1835-1896' - fourDigitYearRangePattern.test( - words.slice(wordIndex, wordIndex + 3).join("") - ) - ) { - normalizedFragments.push( - normalizeFourDigitYearString(words[wordIndex]) - ); - referenceFragments.push(words[wordIndex]); - - normalizedFragments.push("to"); - referenceFragments.push(words[wordIndex + 1]); - - normalizedFragments.push( - normalizeFourDigitYearString(words[wordIndex + 2]) - ); - referenceFragments.push(words[wordIndex + 2]); - - wordIndex += 2; - } else if ( - // Normalize a currency pattern, e.g. '$53.1 million', '€3.53' - symbolsPrecedingACurrency.includes(lowerCaseWord) && - numberPattern.test(nextWord) - ) { - const currencyWord = - symbolsPrecedingACurrencyAsWords[ - symbolsPrecedingACurrency.indexOf(lowerCaseWord) - ]; - - if (wordsSucceedingACurrency.includes(nextWords[1].toLowerCase())) { - const normalizedString = `${nextWord} ${nextWords[1]} ${currencyWord}`; - - normalizedFragments.push(normalizedString); - - const referenceString = `${word}${nextWord} ${nextWords[1]}`; - referenceFragments.push(referenceString); - - wordIndex += 2; - } else { - const normalizedString = `${nextWord} ${currencyWord}`; - - normalizedFragments.push(normalizedString); - - const referenceString = `${word}${nextWord}`; - referenceFragments.push(referenceString); - - wordIndex += 1; - } - } else { - normalizedFragments.push(word); - referenceFragments.push(word); - } - } - - return { normalizedFragments, referenceFragments }; -} - -const wordCharacterPattern = /[\p{Letter}\p{Number}]/u; - -async function splitToWords(text: string, langCode: string): Promise { - const shortLangCode = getShortLanguageCode(langCode || ""); - - if (shortLangCode == "zh" || shortLangCode == "cmn") { - return splitChineseTextToWords_Jieba(text, undefined, true); - } else if (shortLangCode == "ja") { - return splitJapaneseTextToWords_Kuromoji(text); - } else { - return CldrSegmentation.wordSplit( - text, - CldrSegmentation.suppressions[shortLangCode] - ); - } -} - -const ipaToKirshenbaum: { [p: string]: string | undefined } = { - "1": "1", - "2": "2", - "4": "4", - "5": "5", - "6": "6", - "7": "7", - "9": "9", - " ": " ", - "!": "!", - "'": "'", - ʰ: "#", - $: "$", - "%": "%", - //'æ': '&', - æ: "a", - ˈ: "'", - "(": "(", - ")": ")", - ɾ: "*", - "+": "+", - ˌ: ",", - "-": "-", - ".": ".", - "/": "/", - ɒ: "0", - ɜ: "3", - ɵ: "8", - ː: ":", - ʲ: ";", - "<": "<", - "=": "=", - ">": ">", - ʔ: "?", - ə: "@", - ɑ: "A", - β: "B", - ç: "C", - ð: "D", - ɛ: "E", - F: "F", - ɢ: "G", - ħ: "H", - ɪ: "I", - ɟ: "J", - K: "K", - ɫ: "L", - ɱ: "M", - ŋ: "N", - ɔ: "O", - Φ: "P", - ɣ: "Q", - ʀ: "R", - ʃ: "S", - θ: "T", - ʊ: "U", - ʌ: "V", - œ: "W", - χ: "X", - ø: "Y", - ʒ: "Z", - "̪": "[", - "\\": "\\", - "]": "]", - "^": "^", - _: "_", - "`": "`", - a: "a", - b: "b", - c: "c", - d: "d", - e: "e", - f: "f", - ɡ: "g", - h: "h", - i: "i", - j: "j", - k: "k", - l: "l", - m: "m", - n: "n", - o: "o", - p: "p", - q: "q", - r: "r", - s: "s", - t: "t", - u: "u", - v: "v", - w: "w", - x: "x", - y: "y", - z: "z", - "{": "{", - "|": "|", - "}": "}", - "̃": "~", - "": "", - - // Extensions - ɚ: "3", - ɹ: "r", - ɐ: "a#", - ᵻ: "i", - "̩": ",", -}; - -function ipaPhoneToKirshenbaum(ipaPhone: string) { - let result = ""; - - for (const char of ipaPhone) { - const convertedChar = ipaToKirshenbaum[char]; - - if (convertedChar == undefined) { - throw new Error( - `Could not convert phone character '${char}' to Kirshenbaum encoding` - ); - } - - result += convertedChar || "_"; - } - - return result; -} - -function deepClone(val: T) { - return clone(val, true); -} - -function clone(val: T, deep = true, seenObjects: any[] = []): T { - if (val == null || typeof val !== "object") { - return val; - } - - const obj = val; - const prototypeIdentifier = toString.call(obj); - - switch (prototypeIdentifier) { - case "[object Array]": { - if (seenObjects.includes(obj)) { - throw new Error("deepClone: encountered a cyclic object"); - } - - seenObjects.push(obj); - - const clonedArray = new Array(obj.length); - - for (let i = 0; i < obj.length; i++) { - if (deep) { - clonedArray[i] = clone(obj[i], true, seenObjects); - } else { - clonedArray[i] = obj[i]; - } - } - - seenObjects.pop(); - - return clonedArray; - } - - case "[object ArrayBuffer]": { - const clonedArray = new Uint8Array(obj.byteLength); - clonedArray.set(new Uint8Array(obj)); - return clonedArray.buffer; - } - - case "[object Int8Array]": { - const clonedArray = new Int8Array(obj.length); - clonedArray.set(obj); - return clonedArray; - } - - case "[object Uint8Array]": { - const clonedArray = new Uint8Array(obj.length); - clonedArray.set(obj); - return clonedArray; - } - - case "[object Uint8ClampedArray]": { - const clonedArray = new Uint8ClampedArray(obj.length); - clonedArray.set(obj); - return clonedArray; - } - - case "[object Int16Array]": { - const clonedArray = new Int16Array(obj.length); - clonedArray.set(obj); - return clonedArray; - } - - case "[object Uint16Array]": { - const clonedArray = new Uint16Array(obj.length); - clonedArray.set(obj); - return clonedArray; - } - - case "[object Int32Array]": { - const clonedArray = new Int32Array(obj.length); - clonedArray.set(obj); - return clonedArray; - } - - case "[object Uint32Array]": { - const clonedArray = new Uint32Array(obj.length); - clonedArray.set(obj); - return clonedArray; - } - - case "[object Float32Array]": { - const clonedArray = new Float32Array(obj.length); - clonedArray.set(obj); - return clonedArray; - } - - case "[object Float64Array]": { - const clonedArray = new Float64Array(obj.length); - clonedArray.set(obj); - return clonedArray; - } - - case "[object Date]": { - return new Date(obj.valueOf()); - } - - case "[object RegExp]": { - return obj; - } - - case "[object Function]": { - return obj; - } - - case "[object Object]": { - if (seenObjects.includes(obj)) { - throw new Error("deepClone: encountered a cyclic object"); - } - - seenObjects.push(obj); - - const clonedObj: any = {}; - - for (const propName in obj) { - if (!obj.hasOwnProperty(propName)) { - continue; - } - - if (deep) { - clonedObj[propName] = clone( - obj[propName], - true, - seenObjects - ); - } else { - clonedObj[propName] = obj[propName]; - } - } - - seenObjects.pop(); - - return clonedObj; - } - - default: { - throw new Error( - `Cloning of type ${prototypeIdentifier} is not supported` - ); - } - } -} - -function isPlainObject(val: any) { - return ( - val != null && - typeof val === "object" && - toString.call(val) === "[object Object]" - ); -} - -function extendDeep(base: any, extension: any): any { - const baseClone = deepClone(base); - - if (isPlainObject(base) && extension === undefined) { - return baseClone; - } - - const extensionClone = deepClone(extension); - if (!isPlainObject(base) || !isPlainObject(extension)) { - return extensionClone; - } - - for (const propName in extensionClone) { - if (!extensionClone.hasOwnProperty(propName)) { - continue; - } - - baseClone[propName] = extendDeep( - baseClone[propName], - extensionClone[propName] - ); - } - - return baseClone; -} - -function tryGetFirstLexiconSubstitution( - sentenceWords: string[], - wordIndex: number, - lexicons: Lexicon[], - languageCode: string -) { - const reversedLexicons = [...lexicons].reverse(); // Give precedence to later lexicons - - for (const lexicon of reversedLexicons) { - const match = tryGetLexiconSubstitution( - sentenceWords, - wordIndex, - lexicon, - languageCode - ); - - if (match) { - return match; - } - } - - return undefined; -} - -function tryGetLexiconSubstitution( - sentenceWords: string[], - wordIndex: number, - lexicon: Lexicon, - languageCode: string -) { - const word = sentenceWords[wordIndex]; - - if (!word) { - return; - } - - const shortLanguageCode = getShortLanguageCode(languageCode); - const lexiconForLanguage = lexicon[shortLanguageCode]; - - if (!lexiconForLanguage) { - return; - } - - const lexiconEntry = lexiconForLanguage[word]; - - if (!lexiconEntry) { - return; - } - - for (let i = 0; i < lexiconEntry.length; i++) { - const substitutionEntry = lexiconEntry[i]; - - const substitutionPhonemesText = - substitutionEntry?.pronunciation?.espeak?.[languageCode]; - - if (!substitutionPhonemesText) { - continue; - } - - const precedingWord = sentenceWords[wordIndex - 1] || ""; - const succeedingWord = sentenceWords[wordIndex + 1] || ""; - - const precededBy = substitutionEntry?.precededBy || []; - const notPrecededBy = substitutionEntry?.notPrecededBy || []; - - const succeededBy = substitutionEntry?.succeededBy || []; - const notSucceededBy = substitutionEntry?.notSucceededBy || []; - - const hasNegativePattern = - notPrecededBy.includes(precedingWord) || - notSucceededBy.includes(succeedingWord); - const hasPositivePattern = - precededBy.includes(precedingWord) || - succeededBy.includes(succeedingWord); - - if ( - i == lexiconEntry.length - 1 || - (hasPositivePattern && !hasNegativePattern) - ) { - const substitutionPhonemes = substitutionPhonemesText.split(/ +/g); - - return substitutionPhonemes; - } - } - - return; -} - -function int16PcmToFloat32(input: Int16Array) { - const output = new Float32Array(input.length); - - for (let i = 0; i < input.length; i++) { - const sample = input[i]; - output[i] = sample < 0 ? sample / 32768 : sample / 32767; - } - - return output; -} - -let espeakInstance: any; -let espeakModule: any; - -type EspeakEventType = - | "sentence" - | "word" - | "phoneme" - | "end" - | "mark" - | "play" - | "msg_terminated" - | "list_terminated" - | "samplerate"; - -interface EspeakEvent { - audio_position: number; - type: EspeakEventType; - text_position: number; - word_length: number; - id?: string | number; -} - -async function setVoice(voiceId: string) { - const { instance } = await getEspeakInstance(); - - instance.set_voice(voiceId); -} - -async function getEspeakInstance() { - if (!espeakInstance) { - const { default: EspeakInitializer } = await import( - "@echogarden/espeak-ng-emscripten" - ); - - const m = await EspeakInitializer(); - espeakInstance = await new m.eSpeakNGWorker(); - espeakModule = m; - } - - return { instance: espeakInstance, module: espeakModule }; -} - -async function getSampleRate(): Promise<22050> { - return 22050; -} - -async function synthesizeFragments( - fragments: string[], - espeakOptions: EspeakOptions -) { - espeakOptions = extendDeep(defaultEspeakOptions, espeakOptions); - - const voice = espeakOptions.voice; - - const sampleRate = await getSampleRate(); - - if (fragments.length === 0) { - return { - rawAudio: getEmptyRawAudio(1, sampleRate), - timeline: [] as Timeline, - events: [] as EspeakEvent[], - }; - } - - const canInsertSeparators = ![ - "roa/an", - "art/eo", - "trk/ky", - "zlw/pl", - "zle/uk", - ].includes(voice); - - let textWithMarkers: string; - - if (canInsertSeparators) { - textWithMarkers = `() | `; - } else { - textWithMarkers = `() `; - } - - for (let i = 0; i < fragments.length; i++) { - let fragment = fragments[i]; - - fragment = simplifyPunctuationCharacters(fragment); - - fragment = fragment.replaceAll("<", "<").replaceAll(">", ">"); - - if (espeakOptions.insertSeparators && canInsertSeparators) { - const separator = ` | `; - - textWithMarkers += `${separator}${fragment}${separator}`; - } else { - if (fragment.endsWith(".")) { - fragment += " ()"; - } - - textWithMarkers += `${fragment} `; - } - } - - const { rawAudio, events } = await espeakSynthesize(textWithMarkers, { - ...espeakOptions, - ssml: true, - }); - - // Add first marker if missing - if (fragments.length > 0) { - const firstMarkerEvent = events.find((event) => event.type === "mark"); - - if (firstMarkerEvent && firstMarkerEvent.id === "e-0") { - events.unshift({ - type: "mark", - text_position: 0, - word_length: 0, - audio_position: 0, - id: "s-0", - }); - } - } - - // Build word timeline from events - const wordTimeline: Timeline = fragments.map((word) => ({ - type: "word", - text: word, - startTime: -1, - endTime: -1, - timeline: [ - { - type: "token", - text: "", - startTime: -1, - endTime: -1, - timeline: [], - }, - ], - })); - - let wordIndex = 0; - - const clauseEndIndexes: number[] = []; - - for (const event of events) { - const eventTime = event.audio_position / 1000; - - const currentWordEntry = wordTimeline[wordIndex]; - - const currentTokenTimeline = currentWordEntry.timeline!; - const currentTokenEntry = - currentTokenTimeline[currentTokenTimeline.length - 1]; - - const currentPhoneTimeline = currentTokenEntry.timeline!; - const lastPhoneEntry = - currentPhoneTimeline[currentPhoneTimeline.length - 1]; - - if (lastPhoneEntry && lastPhoneEntry.endTime === -1) { - lastPhoneEntry.endTime = eventTime; - } - - if (event.type === "word") { - if (!event.id || currentPhoneTimeline.length === 0) { - continue; - } - - if (currentTokenEntry.endTime === -1) { - currentTokenEntry.endTime = eventTime; - } - - currentTokenTimeline.push({ - type: "token", - text: "", - startTime: eventTime, - endTime: -1, - timeline: [], - }); - } else if (event.type === "phoneme") { - const phoneText = event.id as string; - - if (!phoneText || phoneText.startsWith("(")) { - continue; - } - - currentPhoneTimeline.push({ - type: "phone", - text: phoneText, - startTime: eventTime, - endTime: -1, - }); - - currentTokenEntry.text += phoneText; - currentTokenEntry.startTime = currentPhoneTimeline[0].startTime; - } else if (event.type === "mark") { - const markerName = event.id! as string; - - if (markerName.startsWith("s-")) { - const markerIndex = parseInt(markerName.substring(2)); - - if (markerIndex != wordIndex) { - throw new Error( - `Word start marker for index ${wordIndex} is not consistent with word index. The words were: ${objToString(fragments)}` - ); - } - - if (currentPhoneTimeline.length > 0) { - throw new Error( - `Word entry ${wordIndex} already has phones before its start marker was seen. The words were: ${objToString(fragments)}` - ); - } - - currentWordEntry.startTime = eventTime; - currentTokenEntry.startTime = eventTime; - } else if (markerName.startsWith("e-")) { - const markerIndex = parseInt(markerName.substring(2)); - - if (markerIndex != wordIndex) { - throw new Error( - `Word end marker for index ${wordIndex} is not consistent with word index. The words were: ${objToString(fragments)}` - ); - } - - currentWordEntry.startTime = currentTokenTimeline[0].startTime; - - currentWordEntry.endTime = eventTime; - currentTokenEntry.endTime = eventTime; - - wordIndex += 1; - - if (wordIndex === wordTimeline.length) { - break; - } - } else { - continue; - } - } else if (event.type === "end") { - clauseEndIndexes.push(wordIndex); - } - } - - clauseEndIndexes.push(wordTimeline.length); - - // Split compound tokens - for (const [index, wordEntry] of wordTimeline.entries()) { - const tokenTimeline = wordEntry.timeline; - - if (index === 0) { - continue; - } - - if (!tokenTimeline || tokenTimeline.length === 0) { - throw new Error( - "Unexpected: token timeline should exist and have at least one token" - ); - } - - if (tokenTimeline.length !== 1 && tokenTimeline[0].text != "") { - continue; - } - - const wordReferencePhonemes = ( - await textToPhonemes(wordEntry.text, espeakOptions.voice, true) - ).split("_"); - - const wordReferenceIPA = wordReferencePhonemes.join(" "); - - if (wordReferenceIPA.trim().length === 0) { - continue; - } - - const wordReferenceIPAWithoutStress = wordReferenceIPA - .replaceAll("ˈ", "") - .replaceAll("ˌ", ""); - - const previousWordEntry = wordTimeline[index - 1]; - - if (!previousWordEntry.timeline) { - continue; - } - - const previousWordTokenEntry = - previousWordEntry.timeline[previousWordEntry.timeline.length - 1]; - - if (!previousWordTokenEntry.timeline) { - continue; - } - - const previousWordTokenIPAWithoutStress = - previousWordTokenEntry.timeline - .map((phoneEntry) => - phoneEntry.text.replaceAll("ˈ", "").replaceAll("ˌ", "") - ) - .join(" "); - - if ( - previousWordEntry.timeline.length > 1 && - previousWordTokenIPAWithoutStress === wordReferenceIPAWithoutStress - ) { - tokenTimeline.pop(); - - const tokenEntryToInsert = previousWordEntry.timeline.pop()!; - tokenTimeline.push(tokenEntryToInsert); - - previousWordEntry.endTime = - previousWordEntry.timeline[ - previousWordEntry.timeline.length - 1 - ].endTime; - - wordEntry.startTime = tokenEntryToInsert.startTime; - wordEntry.endTime = tokenEntryToInsert.endTime; - - continue; - } - - if ( - previousWordTokenEntry.timeline.length <= - wordReferencePhonemes.length - ) { - continue; - } - - if ( - !previousWordTokenIPAWithoutStress.endsWith( - wordReferenceIPAWithoutStress - ) - ) { - continue; - } - - const tokenEntry = tokenTimeline[0]; - - tokenEntry.timeline = previousWordTokenEntry.timeline.splice( - previousWordTokenEntry.timeline.length - - wordReferencePhonemes.length - ); - tokenEntry.text = tokenEntry.timeline - .map((phoneEntry) => phoneEntry.text) - .join(""); - - tokenEntry.startTime = tokenEntry.timeline[0].startTime; - tokenEntry.endTime = - tokenEntry.timeline[tokenEntry.timeline.length - 1].endTime; - wordEntry.startTime = tokenEntry.startTime; - wordEntry.endTime = tokenEntry.endTime; - - previousWordTokenEntry.text = previousWordTokenEntry.timeline - .map((phoneEntry) => phoneEntry.text) - .join(""); - previousWordTokenEntry.endTime = - previousWordTokenEntry.timeline[ - previousWordTokenEntry.timeline.length - 1 - ].endTime; - previousWordEntry.endTime = previousWordTokenEntry.endTime; - } - - // Build clause timeline - const clauseTimeline: Timeline = []; - - let clauseStartIndex = 0; - - for (const clauseEndIndex of clauseEndIndexes) { - const newClause: TimelineEntry = { - type: "clause", - text: "", - startTime: -1, - endTime: -1, - timeline: [], - }; - - for ( - let entryIndex = clauseStartIndex; - entryIndex <= clauseEndIndex && entryIndex < wordTimeline.length; - entryIndex++ - ) { - const wordEntry = wordTimeline[entryIndex]; - if (newClause.startTime === -1) { - newClause.startTime = wordEntry.startTime; - } - - newClause.endTime = wordEntry.endTime; - - newClause.text += `${wordEntry.text} `; - - newClause.timeline!.push(wordEntry); - } - - if (newClause.timeline!.length > 0) { - clauseTimeline.push(newClause); - clauseStartIndex = clauseEndIndex + 1; - } - } - - return { rawAudio, timeline: clauseTimeline, events }; -} - -async function setRate(rate: number) { - const { instance } = await getEspeakInstance(); - - return instance.set_rate(rate); -} - -async function setPitch(pitch: number) { - const { instance } = await getEspeakInstance(); - - return instance.set_pitch(pitch); -} - -async function setPitchRange(pitchRange: number) { - const { instance } = await getEspeakInstance(); - - return instance.set_range(pitchRange); -} - -async function espeakSynthesize(text: string, espeakOptions: EspeakOptions) { - const logger = new Logger(); - - espeakOptions = extendDeep(defaultEspeakOptions, espeakOptions); - - logger.start("Get eSpeak Emscripten instance"); - - if (!espeakOptions.ssml) { - const { escape } = await import("html-escaper"); - - text = escape(text); - } - - const { instance } = await getEspeakInstance(); - - const sampleChunks: Float32Array[] = []; - const allEvents: EspeakEvent[] = []; - - logger.start("Synthesize with eSpeak"); - - if (espeakOptions.useKlatt) { - await setVoice(`${espeakOptions.voice}+klatt6`); - } else { - await setVoice(espeakOptions.voice); - } - - await setRate(espeakOptions.rate); - await setPitch(espeakOptions.pitch); - await setPitchRange(espeakOptions.pitchRange); - - instance.synthesize(text, (samples: Int16Array, events: EspeakEvent[]) => { - if (samples && samples.length > 0) { - sampleChunks.push(int16PcmToFloat32(samples)); - } - - for (const event of events) { - if (event.type === "word") { - const textPosition = event.text_position - 1; - (event as any)["text"] = text.substring( - textPosition, - textPosition + event.word_length - ); - } - } - - allEvents.push(...events); - }); - - const concatenatedSamples = concatFloat32Arrays(sampleChunks); - - const rawAudio: RawAudio = { - audioChannels: [concatenatedSamples], - sampleRate: 22050, - }; - - logger.end(); - - return { rawAudio, events: allEvents }; -} - -async function textToPhonemes(text: string, voice: string, useIPA = true) { - await setVoice(voice); - const { instance, module } = await getEspeakInstance(); - const textPtr = instance.convert_to_phonemes(text, useIPA); - - const wasmMemory = new WasmMemoryManager(module); - - const resultRef = wasmMemory.wrapNullTerminatedUtf8String(textPtr.ptr); - const result = resultRef.getValue(); - - wasmMemory.freeAll(); - - return result; -} - -async function preprocessAndSynthesize( - text: string, - language: string, - espeakOptions: EspeakOptions, - lexicons: Lexicon[] = [] -) { - const logger = new Logger(); - - espeakOptions = extendDeep(defaultEspeakOptions, espeakOptions); - - await logger.startAsync("Tokenize and analyze text"); - - let lowerCaseLanguageCode = language.toLowerCase(); - - if (lowerCaseLanguageCode === "en-gb") { - lowerCaseLanguageCode = "en-gb-x-rp"; - } - - let fragments: string[]; - let preprocessedFragments: string[]; - const phonemizedFragmentsSubstitutions = new Map(); - - fragments = []; - preprocessedFragments = []; - - let words = await splitToWords(text, language); - - // Merge repeating symbol words to a single word to work around eSpeak bug - const wordsWithMerges: string[] = []; - - for (let i = 0; i < words.length; i++) { - const currentWord = words[i]; - const previousWord = words[i - 1]; - - if ( - i > 0 && - currentWord === previousWord && - !wordCharacterPattern.test(currentWord) - ) { - wordsWithMerges[wordsWithMerges.length - 1] += currentWord; - } else { - wordsWithMerges.push(currentWord); - } - } - - words = wordsWithMerges; - - // Remove words containing only whitespace - words = words.filter((word) => word.trim() != ""); - - const { normalizedFragments, referenceFragments } = - getNormalizedFragmentsForSpeech(words, language); - - const simplifiedFragments = normalizedFragments.map((word) => - simplifyPunctuationCharacters(word).toLocaleLowerCase() - ); - - if ([`'`].includes(simplifiedFragments[0])) { - normalizedFragments[0] = `()`; - } - - for ( - let fragmentIndex = 0; - fragmentIndex < normalizedFragments.length; - fragmentIndex++ - ) { - const fragment = normalizedFragments[fragmentIndex]; - - const substitutionPhonemes = tryGetFirstLexiconSubstitution( - simplifiedFragments, - fragmentIndex, - lexicons, - lowerCaseLanguageCode - ); - - if (!substitutionPhonemes) { - continue; - } - - phonemizedFragmentsSubstitutions.set( - fragmentIndex, - substitutionPhonemes - ); - const referenceIPA = ( - await textToPhonemes(fragment, espeakOptions.voice, true) - ).replaceAll("_", " "); - const referenceKirshenbaum = ( - await textToPhonemes(fragment, espeakOptions.voice, false) - ).replaceAll("_", ""); - - const kirshenbaumPhonemes = substitutionPhonemes - .map((phone) => ipaPhoneToKirshenbaum(phone)) - .join(""); - - logger.logTitledMessage( - `\nLexicon substitution for '${fragment}'`, - `IPA: ${substitutionPhonemes.join(" ")} (original: ${referenceIPA}), Kirshenbaum: ${kirshenbaumPhonemes} (reference: ${referenceKirshenbaum})` - ); - - const substitutionPhonemesFragment = ` [[${kirshenbaumPhonemes}]] `; - - normalizedFragments[fragmentIndex] = substitutionPhonemesFragment; - } - - fragments = referenceFragments; - preprocessedFragments = normalizedFragments; - - logger.start("Synthesize preprocessed fragments with eSpeak"); - - const { rawAudio: referenceSynthesizedAudio, timeline: referenceTimeline } = - await synthesizeFragments(preprocessedFragments, espeakOptions); - - await logger.startAsync("Build phonemized tokens"); - - const phonemizedSentence: string[][][] = []; - - let wordIndex = 0; - for (const phraseEntry of referenceTimeline) { - const phrase: string[][] = []; - - for (const wordEntry of phraseEntry.timeline!) { - wordEntry.text = fragments[wordIndex]; - - if (phonemizedFragmentsSubstitutions.has(wordIndex)) { - phrase.push(phonemizedFragmentsSubstitutions.get(wordIndex)!); - } else { - for (const tokenEntry of wordEntry.timeline!) { - const tokenPhonemes: string[] = []; - - for (const phoneme of tokenEntry.timeline!) { - if (phoneme.text) { - tokenPhonemes.push(phoneme.text); - } - } - - if (tokenPhonemes.length > 0) { - phrase.push(tokenPhonemes); - } - } - } - - wordIndex += 1; - } - - if (phrase.length > 0) { - phonemizedSentence.push(phrase); - } - } - - logger.log( - phonemizedSentence - .map((phrase) => phrase.map((word) => word.join(" ")).join(" | ")) - .join(" || ") - ); - - logger.end(); - - return { - referenceSynthesizedAudio, - referenceTimeline, - fragments, - preprocessedFragments, - phonemizedFragmentsSubstitutions, - phonemizedSentence, - }; -} - -type Lexicon = { - [shortLanguageCode: string]: LexiconForLanguage; -}; - -type LexiconForLanguage = { - [word: string]: LexiconEntry[]; -}; - -type LexiconEntry = { - pos?: string[]; - case?: LexiconWordCase; - - pronunciation?: { - espeak?: LexiconPronunciationForLanguageCodes; - sapi?: LexiconPronunciationForLanguageCodes; - }; - - precededBy?: string[]; - notPrecededBy?: string[]; - - succeededBy?: string[]; - notSucceededBy?: string[]; - - example?: string; -}; - -type LexiconWordCase = - | "any" - | "capitalized" - | "uppercase" - | "lowercase" - | "titlecase" - | "camelcase" - | "pascalcase"; -type LexiconPronunciationForLanguageCodes = { [languageCode: string]: string }; - -function getRawAudioDuration(rawAudio: RawAudio) { - if (rawAudio.audioChannels.length == 0 || rawAudio.sampleRate == 0) { - return 0; - } - - return rawAudio.audioChannels[0].length / rawAudio.sampleRate; -} - -function getEmptyRawAudio(channelCount: number, sampleRate: number) { - const audioChannels = []; - - for (let c = 0; c < channelCount; c++) { - audioChannels.push(new Float32Array(0)); - } - - const result: RawAudio = { audioChannels, sampleRate }; - - return result; -} - -type RawAudio = { - audioChannels: Float32Array[]; - sampleRate: number; -}; - -function concatTypedArrays(ArrayConstructor: any, arrays: any[]) { - let totalLength = 0; - - for (const arr of arrays) { - totalLength += arr.length; - } - - const result = new ArrayConstructor(totalLength); - - let offset = 0; - - for (const arr of arrays) { - result.set(arr, offset); - offset += arr.length; - } - - return result; -} - -function writeToStderr(message: any) { - process.stderr.write(message); -} - -function printToStderr(message: any) { - if (typeof message == "string") { - writeToStderr(message); - } else { - writeToStderr(objToString(message)); - } -} - -function logToStderr(message: any) { - printToStderr(message); - writeToStderr("\n"); -} - -function objToString(obj: any) { - const formattedString = inspect(obj, { - showHidden: false, - depth: null, - colors: false, - maxArrayLength: null, - maxStringLength: null, - compact: 5, - }); - - return formattedString; -} - -function roundToDigits(val: number, digits = 3) { - const multiplier = 10 ** digits; - return Math.round(val * multiplier) / multiplier; -} - -function yieldToEventLoop() { - return new Promise((resolve) => { - setImmediate(resolve); - }); -} - -async function resolveModuleScriptPath(moduleName: string) { - const { resolve } = await import("import-meta-resolve"); - - const scriptPath = resolve(moduleName, import.meta.url); - - const { fileURLToPath } = await import("url"); - - return fileURLToPath(scriptPath); -} - -let currentActiveLogger: Logger | null = null; - -declare const chrome: any; -declare const process: any; - -class Timer { - startTime = 0; - - constructor() { - this.restart(); - } - - restart() { - this.startTime = Timer.currentTime; - } - - get elapsedTime(): number { - // Elapsed time (milliseconds) - return Timer.currentTime - this.startTime; - } - - get elapsedTimeSeconds(): number { - // Elapsed time (seconds) - return this.elapsedTime / 1000; - } - - getElapsedTimeAndRestart(): number { - const elapsedTime = this.elapsedTime; - this.restart(); - return elapsedTime; - } - - logAndRestart(title: string, timePrecision = 3): number { - const elapsedTime = this.elapsedTime; - - // - const message = `${title}: ${roundToDigits(elapsedTime, timePrecision)}ms`; - writeToStderr(message); - // - - this.restart(); - - return elapsedTime; - } - - static get currentTime(): number { - if (!this.timestampFunc) { - this.createGlobalTimestampFunction(); - } - - return this.timestampFunc(); - } - - static get microsecondTimestamp(): number { - return Math.floor(Timer.currentTime * 1000); - } - - private static createGlobalTimestampFunction() { - if ( - typeof process === "object" && - typeof process.hrtime === "function" - ) { - let baseTimestamp = 0; - - this.timestampFunc = () => { - const nodeTimeStamp = process.hrtime(); - const millisecondTime = - nodeTimeStamp[0] * 1000 + nodeTimeStamp[1] / 1000000; - - return baseTimestamp + millisecondTime; - }; - - baseTimestamp = Date.now() - this.timestampFunc(); - } else if (typeof chrome === "object" && chrome.Interval) { - const baseTimestamp = Date.now(); - - const chromeIntervalObject = new chrome.Interval(); - chromeIntervalObject.start(); - - this.timestampFunc = () => - baseTimestamp + chromeIntervalObject.microseconds() / 1000; - } else if (typeof performance === "object" && performance.now) { - const baseTimestamp = Date.now() - performance.now(); - - this.timestampFunc = () => baseTimestamp + performance.now(); - } else if (Date.now) { - this.timestampFunc = () => Date.now(); - } else { - this.timestampFunc = () => new Date().getTime(); - } - } - - private static timestampFunc: () => number; -} - -function logLevelToNumber(logLevel: LogLevel) { - return logLevels.indexOf(logLevel); -} - -function getLogLevel() { - return "info" as const; -} - -function logLevelGreaterOrEqualTo(referenceLevel: LogLevel) { - return !logLevelSmallerThan(referenceLevel); -} - -function logLevelSmallerThan(referenceLevel: LogLevel) { - return logLevelToNumber(getLogLevel()) < logLevelToNumber(referenceLevel); -} - -const logLevels = [ - "silent", - "output", - "error", - "warning", - "info", - "trace", -] as const; - -type LogLevel = (typeof logLevels)[number]; -class Logger { - private timer = new Timer(); - active = false; - - start(title: string) { - this.startAsync(title, false); - } - - async startAsync(title: string, yieldBeforeStart = true) { - if (currentActiveLogger != null && currentActiveLogger != this) { - return; - } - - this.end(); - - if (yieldBeforeStart) { - await yieldToEventLoop(); - } - - if (logLevelGreaterOrEqualTo("info")) { - writeToStderr(`${title}.. `); - } - - this.setAsActiveLogger(); - - this.timer.restart(); - } - - setAsActiveLogger() { - this.active = true; - currentActiveLogger = this; - } - - unsetAsActiveLogger() { - this.active = false; - currentActiveLogger = null; - } - - end() { - if (this.active && currentActiveLogger == this) { - const elapsedTime = this.timer.elapsedTime; - - if (logLevelGreaterOrEqualTo("info")) { - writeToStderr(`${elapsedTime.toFixed(1)}ms\n`); - } - - currentActiveLogger = null; - } - - this.active = false; - } - - logTitledMessage(title: string, content: any, logLevel: LogLevel = "info") { - this.log(`${title}: ${content}`, logLevel); - } - - log(message: any, logLevel: LogLevel = "info") { - if (logLevelSmallerThan(logLevel)) { - return; - } - - if (currentActiveLogger == this || currentActiveLogger == null) { - logToStderr(message); - } - } - - write(message: any, logLevel: LogLevel = "info") { - if (logLevelSmallerThan(logLevel)) { - return; - } - - if (currentActiveLogger == this || currentActiveLogger == null) { - writeToStderr(message); - } - } - - getTimestamp() { - return Timer.currentTime; - } -} - -type TimelineEntryType = - | "segment" - | "paragraph" - | "sentence" - | "clause" - | "phrase" - | "word" - | "token" - | "letter" - | "phone" - | "subphone"; - -type TimelineEntry = { - type: TimelineEntryType; - - text: string; - - startTime: number; - endTime: number; - - startOffsetUtf16?: number; - endOffsetUtf16?: number; - - startOffsetUtf32?: number; - endOffsetUtf32?: number; - - confidence?: number; - - id?: number; - - timeline?: Timeline; -}; - -type Timeline = TimelineEntry[]; - -const readFile = promisify(gracefulFS.readFile); -//const writeFile = promisify(gracefulFS.writeFile) -const readdir = promisify(gracefulFS.readdir); - -async function readAndParseJsonFile(jsonFilePath: string, useJson5 = false) { - const fileContent = await readFile(jsonFilePath, { encoding: "utf-8" }); - - if (useJson5) { - const { default: JSON5 } = await import("json5"); - - return JSON5.parse(fileContent); - } else { - return JSON.parse(fileContent); - } -} - -function getOnnxSessionOptions(options: OnnxSessionOptions) { - const onnxOptions: Onnx.InferenceSession.SessionOptions = { - executionProviders: ["cpu"], - logSeverityLevel: 3, - }; - - function dmlProviderAvailable() { - const platform = process.platform; - const arch = process.arch; - - return platform === "win32" && arch === "x64"; - } - - if (options) { - if (options.executionProviders != null) { - let executionProviders = options.executionProviders.filter( - (provider) => { - if (!provider) { - return false; - } - - if (provider === "dml" && !dmlProviderAvailable()) { - return false; - } - - return true; - } - ); - - if (!executionProviders.includes("cpu")) { - executionProviders.push("cpu"); - } - - executionProviders = Array.from(new Set(executionProviders)); - - onnxOptions.executionProviders = executionProviders as any; - } else if (options.enableGPU === true && dmlProviderAvailable()) { - onnxOptions.executionProviders = ["dml", "cpu"]; - } - - if (options.logSeverityLevel != null) { - onnxOptions.logSeverityLevel = options.logSeverityLevel; - } - } - - return onnxOptions; -} - -interface OnnxSessionOptions { - enableGPU?: boolean; - executionProviders?: OnnxExecutionProvider[]; - logSeverityLevel?: 0 | 1 | 2 | 3 | 4; -} - -type OnnxExecutionProvider = "cpu" | "dml" | "cuda"; - -interface EspeakOptions { - voice: string; - ssml: boolean; - rate: number; - pitch: number; - pitchRange: number; - useKlatt: boolean; - insertSeparators: boolean; -} - -const defaultEspeakOptions: EspeakOptions = { - voice: "en-us", - ssml: false, - rate: 1.0, - pitch: 1.0, - pitchRange: 1.0, - useKlatt: false, - insertSeparators: false, -}; - -const cachedInstanceLookup = new Map(); - -class VitsTTS { - session?: Onnx.InferenceSession; - metadata?: any; - phonemeMap?: Map; - - constructor( - public readonly voiceName: string, - public readonly modelPath: string, - public readonly executionProviders: OnnxExecutionProvider[] - ) {} - - static async synthesizeSentence( - text: string, - voiceName: string, - modelPath: string, - lengthScale: number, - speakerId: number, - lexicons: Lexicon[], - executionProviders: OnnxExecutionProvider[] - ) { - const cacheLookupKey = modelPath; - - let vitsTTS: VitsTTS | undefined = - cachedInstanceLookup.get(cacheLookupKey); - - if (!vitsTTS) { - vitsTTS = new VitsTTS(voiceName, modelPath, executionProviders); - - cachedInstanceLookup.clear(); - cachedInstanceLookup.set(cacheLookupKey, vitsTTS); - } - - const result = await vitsTTS._synthesizeSentence( - text, - lengthScale, - speakerId, - lexicons - ); - - return result; - } - - private async _synthesizeSentence( - sentence: string, - lengthScale: number, - speakerId = 0, - lexicons?: Lexicon[] - ) { - const logger = new Logger(); - - await this.initializeIfNeeded(); - - await logger.startAsync("Prepare for VITS synthesis"); - - const metadata = this.metadata; - const phonemeMap = this.phonemeMap!; - const espeakVoice = metadata.espeak.voice as string; - const languageCode = espeakVoice; - const outputSampleRate = metadata.audio.sample_rate; - const baseLengthScale = metadata.inference.length_scale || 1.0; - - lengthScale *= baseLengthScale; - - sentence = //simplifyPunctuationCharacters(sentence.trim()) - sentence - .replaceAll("(", ", ") - .replaceAll(")", ", ") - .replaceAll("—", ", "); - - logger.end(); - - const espeakOptions: EspeakOptions = { - ...defaultEspeakOptions, - voice: espeakVoice, - useKlatt: false, - }; - - const { - referenceSynthesizedAudio, - referenceTimeline, - fragments, - phonemizedFragmentsSubstitutions, - phonemizedSentence, - } = await preprocessAndSynthesize( - sentence, - languageCode, - espeakOptions, - lexicons - ); - - if (phonemizedSentence.length == 0) { - logger.end(); - - return { - rawAudio: getEmptyRawAudio(1, outputSampleRate), - timeline: [], - referenceSynthesizedAudio: getEmptyRawAudio( - 1, - outputSampleRate - ), - referenceTimeline: [] as Timeline, - }; - } - - await logger.startAsync("Encode phonemes to identifiers"); - - const clauseEndBreaker = ","; - let sentenceEndBreaker = "."; - - if (sentence.endsWith("?") || sentence.endsWith(`?"`)) { - sentenceEndBreaker = "?"; - } else if (sentence.endsWith("!") || sentence.endsWith(`!"`)) { - sentenceEndBreaker = "!"; - } - - const phonemeCharacterSeparatorId = phonemeMap.get("_")!; - const wordSeparatorId = phonemeMap.get(" ")!; - const startId = phonemeMap.get("^")!; - const endId = phonemeMap.get("$")!; - - const clauseEndBreakerId = phonemeMap.get(clauseEndBreaker)!; - const sentenceEndBreakerId = phonemeMap.get(sentenceEndBreaker)!; - - const ids: number[] = [...startId, ...phonemeCharacterSeparatorId]; - - for ( - let clauseIndex = 0; - clauseIndex < phonemizedSentence.length; - clauseIndex++ - ) { - const clause = phonemizedSentence[clauseIndex]; - - for (const word of clause) { - for (const phoneme of word) { - for (const phonemeCharacter of phoneme) { - const id = phonemeMap.get(phonemeCharacter); - - if (id == null) { - continue; - } - - ids.push(...id, ...phonemeCharacterSeparatorId); - } - } - - if (clauseIndex < phonemizedSentence.length - 1) { - ids.push( - ...wordSeparatorId, - ...phonemeCharacterSeparatorId - ); - } - } - - if (clauseIndex < phonemizedSentence.length - 1) { - ids.push(...clauseEndBreakerId, ...phonemeCharacterSeparatorId); - } - } - - ids.push( - ...sentenceEndBreakerId, - ...phonemeCharacterSeparatorId, - ...endId - ); - - const bigIntIds = new BigInt64Array(ids.map((id) => BigInt(id))); - const idLengths = new BigInt64Array([BigInt(bigIntIds.length)]); - - await logger.startAsync("Generate audio using synthesis model"); - - const inputTensor = new Onnx.Tensor("int64", bigIntIds, [ - 1, - bigIntIds.length, - ]); - const inputLengthsTensor = new Onnx.Tensor("int64", idLengths, [1]); - const scalesTensor = new Onnx.Tensor( - "float32", - [ - metadata.inference.noise_scale, - lengthScale, - metadata.inference.noise_w, - ], - [3] - ); - const speakerIdTensor = new Onnx.Tensor( - "int64", - new BigInt64Array([BigInt(speakerId)]), - [1] - ); - - const modelInputs = { - input: inputTensor, - input_lengths: inputLengthsTensor, - scales: scalesTensor, - sid: speakerIdTensor, - }; - - const modelResults = await this.session!.run(modelInputs); - const modelOutput = modelResults["output"]; - - const modelOutputAudioSamples = modelOutput["data"] as Float32Array; - - const synthesizedAudio: RawAudio = { - audioChannels: [modelOutputAudioSamples], - sampleRate: outputSampleRate, - }; - - await logger.startAsync("Align with reference synthesized audio"); - - const referenceWordTimeline = referenceTimeline.flatMap( - (clause) => clause.timeline! - ); - - const dtwWindowDuration = Math.max( - 5, - Math.ceil(0.2 * getRawAudioDuration(synthesizedAudio)) - ); - const mappedTimeline = await alignUsingDtw( - synthesizedAudio, - referenceSynthesizedAudio, - referenceWordTimeline, - ["high"], - [dtwWindowDuration] - ); - - logger.end(); - - return { - rawAudio: synthesizedAudio, - timeline: mappedTimeline, - referenceSynthesizedAudio, - referenceTimeline, - }; - } - - async initializeIfNeeded() { - if (this.session) { - return; - } - - const logger = new Logger(); - await logger.startAsync("Initialize VITS ONNX synthesis model"); - - const filesInModelPath = await readdir(this.modelPath); - const onnxModelFilename = filesInModelPath.find((filename) => - filename.endsWith(".onnx") - ); - - if (!onnxModelFilename) { - throw new Error( - `Couldn't file any ONNX model file in ${this.modelPath}` - ); - } - - const onnxModelFilepath = path.join(this.modelPath, onnxModelFilename); - - const onnxSessionOptions = getOnnxSessionOptions({ - executionProviders: this.executionProviders, - }); - - this.session = await Onnx.InferenceSession.create( - onnxModelFilepath, - onnxSessionOptions - ); - this.metadata = await readAndParseJsonFile(`${onnxModelFilepath}.json`); - - this.phonemeMap = new Map(); - - for (const key in this.metadata.phoneme_id_map) { - this.phonemeMap.set(key, this.metadata.phoneme_id_map[key]); - } - - logger.end(); - } -} - -function getSamplePeakAmplitude(audioChannels: Float32Array[]) { - let maxAmplitude = 0.00001; - - for (const channelSamples of audioChannels) { - for (const sample of channelSamples) { - maxAmplitude = Math.max(maxAmplitude, Math.abs(sample)); - } - } - - return maxAmplitude; -} - -function getSamplePeakDecibels(audioChannels: Float32Array[]) { - return gainFactorToDecibels(getSamplePeakAmplitude(audioChannels)); -} - -function gainFactorToDecibels(gainFactor: number) { - return gainFactor <= 0.00001 ? -100 : 20.0 * Math.log10(gainFactor); -} - -function splitToSentences(text: string, langCode: string): string[] { - const shortLangCode = getShortLanguageCode(langCode || ""); - - return CldrSegmentation.sentenceSplit( - text, - CldrSegmentation.suppressions[shortLangCode] - ); -} - -type LanguageDetectionResults = LanguageDetectionResultsEntry[]; - -interface LanguageDetectionResultsEntry { - language: string; - languageName: string; - probability: number; -} - -async function detectLanguage(text: string) { - const tinyldResults = detectAll(text); - - const results: LanguageDetectionResults = tinyldResults.map((result) => ({ - language: result.lang, - languageName: languageCodeToName(result.lang), - probability: result.accuracy, - })); - - return results; -} - -interface TextLanguageDetectionOptions { - defaultLanguage?: string; - fallbackThresholdProbability?: number; -} - -interface TextLanguageDetectionResult { - detectedLanguage: string; - detectedLanguageName: string; - detectedLanguageProbabilities: LanguageDetectionResults; -} - -const defaultTextLanguageDetectionOptions: TextLanguageDetectionOptions = { - defaultLanguage: "en", - fallbackThresholdProbability: 0.05, -}; - -async function detectTextLanguage( - input: string, - options: TextLanguageDetectionOptions -): Promise { - const logger = new Logger(); - - options = extendDeep(defaultTextLanguageDetectionOptions, options); - - const defaultLanguage = options.defaultLanguage!; - const fallbackThresholdProbability = options.fallbackThresholdProbability!; - - let detectedLanguageProbabilities: LanguageDetectionResults; - - logger.start(`Initialize language detection module`); - - detectedLanguageProbabilities = await detectLanguage(input); - - let detectedLanguage: string; - - if ( - detectedLanguageProbabilities.length == 0 || - detectedLanguageProbabilities[0].probability < - fallbackThresholdProbability - ) { - detectedLanguage = defaultLanguage; - } else { - detectedLanguage = detectedLanguageProbabilities[0].language; - } - - logger.end(); - - return { - detectedLanguage, - detectedLanguageName: languageCodeToName(detectedLanguage), - detectedLanguageProbabilities, - }; -} - -function clampFloatSample(floatSample: number) { - if (floatSample < -1.0) { - return -1.0; - } else if (floatSample > 1.0) { - return 1.0; - } else { - return floatSample; - } -} - -function float32ToInt16Pcm(input: Float32Array) { - const output = new Int16Array(input.length); - - for (let i = 0; i < input.length; i++) { - const sample = clampFloatSample(input[i]); - output[i] = (sample < 0 ? sample * 32768 : sample * 32767) | 0; - } - - return output; -} - -// Typed arrays to Buffer (little endian) conversions -// -// The faster conversion methods (other than the methods for int8) would only work correctly -// on little-endian architectures, since they assume the byte order of the underlying architecture. -// -// Since Echogarden only supports little-endian architectures, this shouldn't matter. - -// int8 <-> bufferLE -function int8ToBuffer(int8s: Int8Array) { - return Buffer.copyBytesFrom(int8s); -} - -function int8ToBuffer_Slow(int8s: Int8Array) { - const buffer = Buffer.alloc(int8s.length); - - for (let i = 0; i < int8s.length; i++) { - buffer[i] = int8s[i] + 128; - } - - return buffer; -} - -function bufferToInt8(buffer: Buffer) { - return new Int8Array(buffer.buffer, buffer.byteOffset, buffer.byteLength); -} - -function bufferToInt8_Slow(buffer: Buffer) { - const result = new Int8Array(buffer.length); - - for (let i = 0; i < result.length; i++) { - result[i] = buffer[i] - 128; - } - - return result; -} - -// int16 <-> bufferLE -function int16ToBufferLE(int16s: Int16Array) { - return Buffer.copyBytesFrom(int16s); -} - -function int16ToBufferLE_Slow(int16s: Int16Array) { - const buffer = Buffer.alloc(int16s.length * 2); - - for (let i = 0; i < int16s.length; i++) { - buffer.writeInt16LE(int16s[i], i * 2); - } - - return buffer; -} - -function bufferLEToInt16(buffer: Buffer) { - return new Int16Array( - buffer.buffer, - buffer.byteOffset, - buffer.byteLength / 2 - ); -} - -function bufferLEToInt16_Slow(buffer: Buffer) { - const result = new Int16Array(buffer.length / 2); - - for (let i = 0; i < result.length; i++) { - result[i] = buffer.readInt16LE(i * 2); - } - - return result; -} - -// int24 <-> bufferLE (uses int32 for storage) -function int24ToBufferLE(int24s: Int32Array) { - const buffer = Buffer.alloc(int24s.length * 3); - - for (let i = 0; i < int24s.length; i++) { - const val = int24s[i]; - const encodedVal = val < 0 ? val + 0x1000000 : val; - - buffer[i * 3 + 0] = (encodedVal >> 0) & 0xff; - buffer[i * 3 + 1] = (encodedVal >> 8) & 0xff; - buffer[i * 3 + 2] = (encodedVal >> 16) & 0xff; - } - - return buffer; -} - -function bufferLEToInt24(buffer: Buffer) { - const result = new Int32Array(buffer.length / 3); - - for (let i = 0; i < result.length; i++) { - const b0 = buffer[i * 3 + 0]; - const b1 = buffer[i * 3 + 1]; - const b2 = buffer[i * 3 + 2]; - - const encodedVal = (b0 << 0) + (b1 << 8) + (b2 << 16); - result[i] = encodedVal > 0x800000 ? encodedVal - 0x1000000 : encodedVal; - } - - return result; -} - -// int32 <-> bufferLE -function int32ToBufferLE(int32s: Int32Array) { - return Buffer.copyBytesFrom(int32s); -} - -function int32ToBufferLE_Slow(int32s: Int32Array) { - const buffer = Buffer.alloc(int32s.length * 4); - - for (let i = 0; i < int32s.length; i++) { - buffer.writeInt32LE(int32s[i], i * 4); - } - - return buffer; -} - -function bufferLEToInt32(buffer: Buffer) { - return new Int32Array( - buffer.buffer, - buffer.byteOffset, - buffer.byteLength / 4 - ); -} - -function bufferLEToInt32_Slow(buffer: Buffer) { - const result = new Int32Array(buffer.length / 4); - - for (let i = 0; i < result.length; i++) { - result[i] = buffer.readInt32LE(i * 4); - } - - return result; -} - -// float32 <-> bufferLE -function float32ToBufferLE(float32s: Float32Array) { - return Buffer.copyBytesFrom(float32s); -} - -function float32ToBufferLE_Slow(float32s: Float32Array) { - const buffer = Buffer.alloc(float32s.length * 4); - - for (let i = 0; i < float32s.length; i++) { - buffer.writeFloatLE(float32s[i], i * 4); - } - - return buffer; -} - -function bufferLEToFloat32(buffer: Buffer) { - return new Float32Array( - buffer.buffer, - buffer.byteOffset, - buffer.byteLength / 4 - ); -} - -function bufferLEToFloat32_Slow(buffer: Buffer) { - const result = new Float32Array(buffer.length / 4); - - for (let i = 0; i < result.length; i++) { - result[i] = buffer.readFloatLE(i * 4); - } - - return result; -} - -// float64 <-> bufferLE -function float64ToBufferLE(float64s: Float64Array) { - return Buffer.copyBytesFrom(float64s); -} - -function float64ToBufferLE_Slow(float64s: Float64Array) { - const buffer = Buffer.alloc(float64s.length * 8); - - for (let i = 0; i < float64s.length; i++) { - buffer.writeDoubleLE(float64s[i], i * 8); - } - - return buffer; -} - -function bufferLEToFloat64(buffer: Buffer) { - return new Float64Array( - buffer.buffer, - buffer.byteOffset, - buffer.byteLength / 8 - ); -} - -function bufferLEToFloat64_Slow(buffer: Buffer) { - const result = new Float64Array(buffer.length / 8); - - for (let i = 0; i < result.length; i++) { - result[i] = buffer.readDoubleLE(i * 8); - } - - return result; -} - -// float64 <-> float32 -function float64Tofloat32(float64s: Float64Array) { - return Float32Array.from(float64s); -} - -function float32Tofloat64(float32s: Float32Array) { - return Float64Array.from(float32s); -} - -function interleaveChannels(channels: Float32Array[]) { - const channelCount = channels.length; - - if (channelCount === 0) { - throw new Error("Empty channel array received"); - } - - if (channelCount === 1) { - return channels[0]; - } - - const sampleCount = channels[0].length; - const result = new Float32Array(sampleCount * channelCount); - - let writeIndex = 0; - - for (let i = 0; i < sampleCount; i++) { - for (let c = 0; c < channelCount; c++) { - result[writeIndex] = channels[c][i]; - writeIndex += 1; - } - } - - return result; -} - -function float32ToInt8Pcm(input: Float32Array) { - const output = new Int8Array(input.length); - - for (let i = 0; i < input.length; i++) { - const sample = clampFloatSample(input[i]); - output[i] = (sample < 0 ? sample * 128 : sample * 127) | 0; - } - - return output; -} - -function float32ToInt24Pcm(input: Float32Array) { - const output = new Int32Array(input.length); - - for (let i = 0; i < input.length; i++) { - const sample = clampFloatSample(input[i]); - output[i] = (sample < 0 ? sample * 8388608 : sample * 8388607) | 0; - } - - return output; -} - -function float32ToInt32Pcm(input: Float32Array) { - const output = new Int32Array(input.length); - - for (let i = 0; i < input.length; i++) { - const sample = clampFloatSample(input[i]); - output[i] = - (sample < 0 ? sample * 2147483648 : sample * 2147483647) | 0; - } - - return output; -} - -function encodeToAudioBuffer( - audioChannels: Float32Array[], - targetBitDepth: BitDepth = 16, - targetSampleFormat: SampleFormat = SampleFormat.PCM -) { - const interleavedChannels = interleaveChannels(audioChannels); - - audioChannels = []; // Zero the array references to allow the GC to free up memory, if possible - - if (targetSampleFormat === SampleFormat.PCM) { - if (targetBitDepth === 8) { - return int8ToBuffer(float32ToInt8Pcm(interleavedChannels)); - } else if (targetBitDepth === 16) { - return int16ToBufferLE(float32ToInt16Pcm(interleavedChannels)); - } else if (targetBitDepth === 24) { - return int24ToBufferLE(float32ToInt24Pcm(interleavedChannels)); - } else if (targetBitDepth === 32) { - return int32ToBufferLE(float32ToInt32Pcm(interleavedChannels)); - } else { - throw new Error(`Unsupported PCM bit depth: ${targetBitDepth}`); - } - } else if (targetSampleFormat === SampleFormat.Float) { - if (targetBitDepth === 32) { - return float32ToBufferLE(interleavedChannels); - } else if (targetBitDepth === 64) { - return float64ToBufferLE(float32Tofloat64(interleavedChannels)); - } else { - throw new Error(`Unsupported float bit depth: ${targetBitDepth}`); - } - } else if (targetSampleFormat === SampleFormat.Alaw) { - if (targetBitDepth === 8) { - return Buffer.from( - AlawMulaw.alaw.encode(float32ToInt16Pcm(interleavedChannels)) - ); - } else { - throw new Error(`Unsupported alaw bit depth: ${targetBitDepth}`); - } - } else if (targetSampleFormat === SampleFormat.Mulaw) { - if (targetBitDepth === 8) { - return Buffer.from( - AlawMulaw.mulaw.encode(float32ToInt16Pcm(interleavedChannels)) - ); - } else { - throw new Error(`Unsupported mulaw bit depth: ${targetBitDepth}`); - } - } else { - throw new Error(`Unsupported audio format: ${targetSampleFormat}`); - } -} - -class WaveFormat { - // 24 bytes total for PCM, 26 for float - sampleFormat: SampleFormat; // 2 bytes LE - channelCount: number; // 2 bytes LE - sampleRate: number; // 4 bytes LE - get byteRate() { - return this.sampleRate * this.bytesPerSample * this.channelCount; - } // 4 bytes LE - get blockAlign() { - return this.bytesPerSample * this.channelCount; - } // 2 bytes LE - bitDepth: BitDepth; // 2 bytes LE - - speakerPositionMask: number; // 4 bytes LE - get guid() { - return sampleFormatToGuid[this.sampleFormat]; - } // 16 bytes BE - - // helpers: - get bytesPerSample() { - return this.bitDepth / 8; - } - - constructor( - channelCount: number, - sampleRate: number, - bitDepth: BitDepth, - sampleFormat: SampleFormat, - speakerPositionMask = 0 - ) { - this.sampleFormat = sampleFormat; - this.channelCount = channelCount; - this.sampleRate = sampleRate; - this.bitDepth = bitDepth; - - this.speakerPositionMask = speakerPositionMask; - } - - serialize(useExtensibleFormat: boolean) { - let sampleFormatId = this.sampleFormat; - - if (useExtensibleFormat) { - sampleFormatId = 65534 as number; - } - - const serializedSize = sampleFormatToSerializedSize[sampleFormatId]; - - const result = Buffer.alloc(serializedSize); - - result.write("fmt ", 0, "ascii"); // + 4 - result.writeUint32LE(serializedSize - 8, 4); // + 4 - - result.writeUint16LE(sampleFormatId, 8); // + 2 - result.writeUint16LE(this.channelCount, 10); // + 2 - result.writeUint32LE(this.sampleRate, 12); // + 4 - result.writeUint32LE(this.byteRate, 16); // + 4 - result.writeUint16LE(this.blockAlign, 20); // + 2 - result.writeUint16LE(this.bitDepth, 22); // + 2 - - if (useExtensibleFormat) { - result.writeUint16LE(serializedSize - 26, 24); // + 2 (extension size) - result.writeUint16LE(this.bitDepth, 26); // + 2 (valid bits per sample) - result.writeUint32LE(this.speakerPositionMask, 28); // + 2 (speaker position mask) - - if ( - this.sampleFormat == SampleFormat.PCM || - this.sampleFormat == SampleFormat.Float - ) { - result.set(Buffer.from(this.guid, "hex"), 32); - } else { - throw new Error( - `Extensible format is not supported for sample format ${this.sampleFormat}` - ); - } - } - - return result; - } - - static deserializeFrom(formatChunkBody: Buffer) { - // chunkBody should not include the first 8 bytes - let sampleFormat = formatChunkBody.readUint16LE(0); // + 2 - const channelCount = formatChunkBody.readUint16LE(2); // + 2 - const sampleRate = formatChunkBody.readUint32LE(4); // + 4 - const bitDepth = formatChunkBody.readUint16LE(14); - let speakerPositionMask = 0; - - if (sampleFormat == 65534) { - if (formatChunkBody.length < 40) { - throw new Error( - `Format subchunk specifies a format id of 65534 (extensible) but its body size is ${formatChunkBody.length} bytes, which is smaller than the minimum expected of 40 bytes` - ); - } - - speakerPositionMask = formatChunkBody.readUint16LE(20); - - const guid = formatChunkBody.subarray(24, 40).toString("hex"); - - if (guid == sampleFormatToGuid[SampleFormat.PCM]) { - sampleFormat = SampleFormat.PCM; - } else if (guid == sampleFormatToGuid[SampleFormat.Float]) { - sampleFormat = SampleFormat.Float; - } else { - throw new Error( - `Unsupported format GUID in extended format subchunk: ${guid}` - ); - } - } - - if (sampleFormat == SampleFormat.PCM) { - if ( - bitDepth != 8 && - bitDepth != 16 && - bitDepth != 24 && - bitDepth != 32 - ) { - throw new Error( - `PCM audio has a bit depth of ${bitDepth}, which is not supported` - ); - } - } else if (sampleFormat == SampleFormat.Float) { - if (bitDepth != 32 && bitDepth != 64) { - throw new Error( - `IEEE float audio has a bit depth of ${bitDepth}, which is not supported` - ); - } - } else if (sampleFormat == SampleFormat.Alaw) { - if (bitDepth != 8) { - throw new Error( - `Alaw audio has a bit depth of ${bitDepth}, which is not supported` - ); - } - } else if (sampleFormat == SampleFormat.Mulaw) { - if (bitDepth != 8) { - throw new Error( - `Mulaw audio has a bit depth of ${bitDepth}, which is not supported` - ); - } - } else { - throw new Error( - `Wave audio format id ${sampleFormat} is not supported` - ); - } - - return new WaveFormat( - channelCount, - sampleRate, - bitDepth, - sampleFormat, - speakerPositionMask - ); - } -} - -function encodeWave( - rawAudio: RawAudio, - bitDepth: BitDepth = 16, - sampleFormat: SampleFormat = SampleFormat.PCM, - speakerPositionMask = 0 -) { - const audioChannels = rawAudio.audioChannels; - const sampleRate = rawAudio.sampleRate; - - const audioBuffer = encodeToAudioBuffer( - audioChannels, - bitDepth, - sampleFormat - ); - const audioDataLength = audioBuffer.length; - - const shouldUseExtensibleFormat = bitDepth > 16 || audioChannels.length > 2; - - const formatSubChunk = new WaveFormat( - audioChannels.length, - sampleRate, - bitDepth, - sampleFormat, - speakerPositionMask - ); - const formatSubChunkBuffer = formatSubChunk.serialize( - shouldUseExtensibleFormat - ); - - const dataSubChunkBuffer = Buffer.alloc(4 + 4 + audioDataLength); - dataSubChunkBuffer.write("data", 0, "ascii"); - const dataChunkLength = Math.min(audioDataLength, 4294967295); // Ensure large data chunk length is clipped to max - dataSubChunkBuffer.writeUint32LE(dataChunkLength, 4); - dataSubChunkBuffer.set(audioBuffer, 8); - - const riffChunkHeaderBuffer = Buffer.alloc(12); - riffChunkHeaderBuffer.write("RIFF", 0, "ascii"); - const riffChunkLength = Math.min( - 4 + formatSubChunkBuffer.length + dataSubChunkBuffer.length, - 4294967295 - ); // Ensure large RIFF chunk length is clipped to max - riffChunkHeaderBuffer.writeUint32LE(riffChunkLength, 4); - riffChunkHeaderBuffer.write("WAVE", 8, "ascii"); - - return Buffer.concat([ - riffChunkHeaderBuffer, - formatSubChunkBuffer, - dataSubChunkBuffer, - ]); -} - -enum SampleFormat { - PCM = 1, - Float = 3, - Alaw = 6, - Mulaw = 7, -} - -type BitDepth = 8 | 16 | 24 | 32 | 64; - -const sampleFormatToSerializedSize = { - [SampleFormat.PCM]: 24, - [SampleFormat.Float]: 26, - [SampleFormat.Alaw]: 26, - [SampleFormat.Mulaw]: 26, - 65534: 48, -}; - -const sampleFormatToGuid = { - [SampleFormat.PCM]: "0100000000001000800000aa00389b71", - [SampleFormat.Float]: "0300000000001000800000aa00389b71", - [SampleFormat.Alaw]: "", - [SampleFormat.Mulaw]: "", -}; - -function encodeRawAudioToWave( - rawAudio: RawAudio, - bitDepth: BitDepth = 16, - sampleFormat: SampleFormat = SampleFormat.PCM, - speakerPositionMask = 0 -) { - return encodeWave(rawAudio, bitDepth, sampleFormat, speakerPositionMask); -} - -function languageCodeToName(languageCode: string) { - const languageNames = new Intl.DisplayNames(["en"], { type: "language" }); - - let translatedLanguageName: string | undefined; - - try { - translatedLanguageName = languageNames.of(languageCode); - } catch (e) {} - - return translatedLanguageName || "Unknown"; -} - -function formatLanguageCodeWithName(languageCode: string, styleId: 1 | 2 = 1) { - if (styleId == 1) { - return `${languageCodeToName(languageCode)} (${languageCode})`; - } else { - return `${languageCode}, ${languageCodeToName(languageCode)}`; - } -} - -const cancelCurrentTask = false; - -function shouldCancelCurrentTask() { - return cancelCurrentTask; -} - -function trimAudioStart( - audioSamples: Float32Array, - targetStartSilentSampleCount = 0, - amplitudeThresholdDecibels = defaultSilenceThresholdDecibels -) { - const silentSampleCount = getStartingSilentSampleCount( - audioSamples, - amplitudeThresholdDecibels - ); - - const trimmedAudio = audioSamples.subarray( - silentSampleCount, - audioSamples.length - ); - const restoredSilence = new Float32Array(targetStartSilentSampleCount); - - const trimmedAudioSamples = concatFloat32Arrays([ - restoredSilence, - trimmedAudio, - ]); - - return trimmedAudioSamples; -} - -function trimAudioEnd( - audioSamples: Float32Array, - targetEndSilentSampleCount = 0, - amplitudeThresholdDecibels = defaultSilenceThresholdDecibels -) { - if (audioSamples.length === 0) { - return new Float32Array(0); - } - - const silentSampleCount = getEndingSilentSampleCount( - audioSamples, - amplitudeThresholdDecibels - ); - - const trimmedAudio = audioSamples.subarray( - 0, - audioSamples.length - silentSampleCount - ); - const restoredSilence = new Float32Array(targetEndSilentSampleCount); - - const trimmedAudioSamples = concatFloat32Arrays([ - trimmedAudio, - restoredSilence, - ]); - - return trimmedAudioSamples; -} - -function addTimeOffsetToTimeline(targetTimeline: Timeline, timeOffset: number) { - if (!targetTimeline) { - return targetTimeline; - } - - const newTimeline = deepClone(targetTimeline); - - for (const segmentTimelineEntry of newTimeline) { - segmentTimelineEntry.startTime = Math.max( - segmentTimelineEntry.startTime + timeOffset, - 0 - ); - segmentTimelineEntry.endTime = Math.max( - segmentTimelineEntry.endTime + timeOffset, - 0 - ); - - if (segmentTimelineEntry.timeline) { - segmentTimelineEntry.timeline = addTimeOffsetToTimeline( - segmentTimelineEntry.timeline, - timeOffset - ); - } - } - - return newTimeline; -} - -async function synthesizeSegments( - segments: string[], - options: SynthesisOptions, - onSegment?: SynthesisSegmentEvent, - onSentence?: SynthesisSegmentEvent -): Promise { - const logger = new Logger(); - options = extendDeep(defaultSynthesisOptions, options); - - if (!options.language && !options.voice) { - logger.start("No language or voice specified. Detect language"); - - const segmentsPlainText = segments; - - const { detectedLanguage } = await detectTextLanguage( - segmentsPlainText.join("\n\n"), - options.languageDetection || {} - ); - - options.language = detectedLanguage; - - logger.end(); - } - - const { bestMatchingVoice } = await requestVoiceList(options); - - if (!bestMatchingVoice) { - throw new Error("No matching voice found 1"); - } - - options.voice = bestMatchingVoice.name; - - if (!options.language) { - options.language = bestMatchingVoice.languages[0]; - } - - logger.end(); - logger.logTitledMessage( - "Selected voice", - `'${options.voice}' (${formatLanguageCodeWithName(bestMatchingVoice.languages[0], 2)})` - ); - - const segmentsRawAudio: RawAudio[] = []; - const segmentsTimelines: Timeline[] = []; - - const timeline: Timeline = []; - - let peakDecibelsSoFar = -100; - - let timeOffset = 0; - - for (let segmentIndex = 0; segmentIndex < segments.length; segmentIndex++) { - const segmentText = segments[segmentIndex].trim(); - - logger.log( - `\n${`Synthesizing segment ${segmentIndex + 1}/${segments.length}`}: '${segmentText}'` - ); - - const segmentStartTime = timeOffset; - - const segmentEntry: TimelineEntry = { - type: "segment", - text: segmentText, - startTime: timeOffset, - endTime: -1, - timeline: [], - }; - - let sentences: string[]; - - if (!options.ssml) { - sentences = splitToSentences(segmentText, options.language!); - sentences = sentences.filter((sentence) => sentence.trim() != ""); - - if (sentences.length == 0) { - sentences = [""]; - } - } else { - sentences = [segmentText]; - } - - const sentencesRawAudio: RawAudio[] = []; - const sentencesTimelines: Timeline[] = []; - - for ( - let sentenceIndex = 0; - sentenceIndex < sentences.length; - sentenceIndex++ - ) { - await yieldToEventLoop(); - - if (shouldCancelCurrentTask()) { - //log('\n\n\n\n\nCANCELED\n\n\n\n') - throw new Error("Canceled"); - } - - const sentenceText = sentences[sentenceIndex].trim(); - - logger.log( - `\n${`Synthesizing sentence ${sentenceIndex + 1}/${sentences.length}`}: "${sentenceText}"` - ); - - const sentenceStartTime = timeOffset; - - let sentencetSynthesisOptions: SynthesisOptions = { - postProcessing: { normalizeAudio: false }, - }; - sentencetSynthesisOptions = extendDeep( - options, - sentencetSynthesisOptions - ); - - const { - synthesizedAudio: sentenceRawAudio, - timeline: sentenceTimeline, - } = await synthesizeSegment( - sentenceText, - sentencetSynthesisOptions - ); - - const endPause = - sentenceIndex == sentences.length - 1 - ? options.segmentEndPause! - : options.sentenceEndPause!; - sentenceRawAudio.audioChannels[0] = trimAudioEnd( - sentenceRawAudio.audioChannels[0], - endPause * sentenceRawAudio.sampleRate - ); - - sentencesRawAudio.push(sentenceRawAudio); - - if (sentenceTimeline.length > 0) { - sentencesTimelines.push(sentenceTimeline); - } - - const sentenceAudioLength = - sentenceRawAudio.audioChannels[0].length / - sentenceRawAudio.sampleRate; - - timeOffset += sentenceAudioLength; - - const sentenceTimelineWithOffset = addTimeOffsetToTimeline( - sentenceTimeline, - sentenceStartTime - ); - - const sentenceEndTime = timeOffset - endPause; - - segmentEntry.timeline!.push({ - type: "sentence", - text: sentenceText, - startTime: sentenceStartTime, - endTime: sentenceEndTime, - timeline: sentenceTimelineWithOffset, - }); - - peakDecibelsSoFar = Math.max( - peakDecibelsSoFar, - getSamplePeakDecibels(sentenceRawAudio.audioChannels) - ); - - const sentenceAudio = - await convertToTargetCodecIfNeeded(sentenceRawAudio); - - if (onSentence) { - await onSentence({ - index: sentenceIndex, - total: sentences.length, - audio: sentenceAudio, - timeline: sentenceTimeline, - transcript: sentenceText, - language: options.language!, - peakDecibelsSoFar, - }); - } - } - - segmentEntry.endTime = - segmentEntry.timeline?.[segmentEntry.timeline.length - 1] - ?.endTime || timeOffset; - - logger.end(); - - logger.start(`Merge and postprocess sentences`); - - let segmentRawAudio: RawAudio; - - if (sentencesRawAudio.length > 0) { - const joinedAudioBuffers = concatAudioSegments( - sentencesRawAudio.map((part) => part.audioChannels) - ); - segmentRawAudio = { - audioChannels: joinedAudioBuffers, - sampleRate: sentencesRawAudio[0].sampleRate, - }; - } else { - segmentRawAudio = getEmptyRawAudio(1, 24000); - } - - segmentsRawAudio.push(segmentRawAudio); - - timeline.push(segmentEntry); - const segmentTimelineWithoutOffset = addTimeOffsetToTimeline( - segmentEntry.timeline!, - -segmentStartTime - ); - segmentsTimelines.push(segmentTimelineWithoutOffset); - - const segmentAudio = - await convertToTargetCodecIfNeeded(segmentRawAudio); - - logger.end(); - - if (onSegment) { - await onSegment({ - index: segmentIndex, - total: segments.length, - audio: segmentAudio, - timeline: segmentTimelineWithoutOffset, - transcript: segmentText, - language: options.language!, - peakDecibelsSoFar, - }); - } - } - - logger.start(`\nMerge and postprocess segments`); - let resultRawAudio: RawAudio; - - if (segmentsRawAudio.length > 0) { - const joinedAudioBuffers = concatAudioSegments( - segmentsRawAudio.map((part) => part.audioChannels) - ); - resultRawAudio = { - audioChannels: joinedAudioBuffers, - sampleRate: segmentsRawAudio[0].sampleRate, - }; - - if (options.postProcessing!.normalizeAudio) { - resultRawAudio = normalizeAudioLevel( - resultRawAudio, - options.postProcessing!.targetPeak, - options.postProcessing!.maxGainIncrease - ); - } else { - resultRawAudio = attenuateIfClipping(resultRawAudio); - } - } else { - resultRawAudio = getEmptyRawAudio(1, 24000); - } - - async function convertToTargetCodecIfNeeded(rawAudio: RawAudio) { - const targetCodec = options.outputAudioFormat?.codec; - - let output: RawAudio | Buffer; - - if (targetCodec) { - logger.start(`Convert to ${targetCodec} codec`); - - if (targetCodec == "wav") { - output = encodeRawAudioToWave(rawAudio); - } else { - const ffmpegOptions = getDefaultFFMpegOptionsForSpeech( - targetCodec, - options.outputAudioFormat?.bitrate - ); - output = await encodeFromChannels(rawAudio, ffmpegOptions); - } - } else { - output = rawAudio; - } - - return output; - } - - const resultAudio = await convertToTargetCodecIfNeeded(resultRawAudio); - - logger.end(); - - return { - audio: resultAudio, - timeline, - language: options.language, - voice: options.voice, - }; -} - -async function encodeFromChannels( - rawAudio: RawAudio, - outputOptions: FFMpegOutputOptions -) { - return transcode(encodeRawAudioToWave(rawAudio), outputOptions); -} - -function getDefaultFFMpegOptionsForSpeech( - fileExtension: string, - customBitrate?: number -) { - let ffmpegOptions: FFMpegOutputOptions; - - if (fileExtension == "mp3") { - ffmpegOptions = { - format: "mp3", - codec: "libmp3lame", - bitrate: 64, - customOptions: [], - }; - } else if (fileExtension == "opus") { - ffmpegOptions = { - codec: "libopus", - bitrate: 48, - customOptions: [], - }; - } else if (fileExtension == "m4a") { - ffmpegOptions = { - format: "mp4", - codec: "aac", - bitrate: 48, - customOptions: [ - "-profile:a", - "aac_low", - "-movflags", - "frag_keyframe+empty_moov", - ], - }; - } else if (fileExtension == "ogg") { - ffmpegOptions = { - codec: "libvorbis", - bitrate: 48, - customOptions: [], - }; - } else if (fileExtension == "flac") { - ffmpegOptions = { - format: "flac", - customOptions: ["-compression_level", "6"], - }; - } else { - throw new Error(`Unsupported codec extension: '${fileExtension}'`); - } - - if (customBitrate != null) { - ffmpegOptions.bitrate = customBitrate; - } - - return ffmpegOptions; -} - -function concatAudioSegments(audioSegments: Float32Array[][]) { - if (audioSegments.length == 0) { - return []; - } - - const channelCount = audioSegments[0].length; - - const outAudioChannels: Float32Array[] = []; - - for (let i = 0; i < channelCount; i++) { - const audioSegmentsForChannel = audioSegments.map( - (segment) => segment[i] - ); - - outAudioChannels.push(concatFloat32Arrays(audioSegmentsForChannel)); - } - - return outAudioChannels; -} - -interface SynthesisResult { - audio: RawAudio | Buffer; - timeline: Timeline; - language: string; - voice: string; -} - -interface SynthesisOptions { - engine?: "vits" | "espeak"; - language?: string; - voice?: string; - voiceGender?: VoiceGender; - - speed?: number; - pitch?: number; - pitchVariation?: number; - - splitToSentences?: boolean; - - ssml?: boolean; - - segmentEndPause?: number; - sentenceEndPause?: number; - - customLexiconPaths?: string[]; - - plainText?: PlainTextOptions; - - alignment?: AlignmentOptions; - - postProcessing?: { - normalizeAudio?: boolean; - targetPeak?: number; - maxGainIncrease?: number; - - speed?: number; - pitch?: number; - }; - - outputAudioFormat?: { - codec?: "wav" | "mp3" | "opus" | "m4a" | "ogg" | "flac"; - bitrate?: number; - }; - - languageDetection?: TextLanguageDetectionOptions; - - vits?: { - speakerId?: number; - provider?: OnnxExecutionProvider; - }; - - espeak?: { - rate?: number; - pitch?: number; - pitchRange?: number; - - useKlatt?: boolean; - insertSeparators?: boolean; - }; -} - -async function synthesizeSegment(text: string, options: SynthesisOptions) { - const logger = new Logger(); - - const startTimestamp = logger.getTimestamp(); - - logger.start("Prepare text for synthesis"); - - const simplifiedText = simplifyPunctuationCharacters(text); - - const engine = options.engine; - - logger.start(`Get voice list for ${engine}`); - - const { bestMatchingVoice } = await requestVoiceList(options); - - if (!bestMatchingVoice) { - throw new Error("No matching voice found 2"); - } - - const selectedVoice = bestMatchingVoice; - - let voicePackagePath: string | undefined; - - if (selectedVoice.packageName) { - logger.end(); - - voicePackagePath = await loadPackage(selectedVoice.packageName); - } - - logger.start(`Initialize ${engine} module`); - - const voice = selectedVoice.name; - - let language: string; - - if (options.language) { - language = await normalizeIdentifierToLanguageCode(options.language); - } else { - language = selectedVoice.languages[0]; - } - - const voiceGender = selectedVoice.gender; - - const speed = clip(options.speed!, 0.1, 10.0); - const pitch = clip(options.pitch!, 0.1, 10.0); - - const inputIsSSML = options.ssml!; - - let synthesizedAudio: RawAudio; - - let timeline: Timeline | undefined; - - const shouldPostprocessSpeed = false; - let shouldPostprocessPitch = false; - - switch (engine) { - case "vits": { - if (inputIsSSML) { - throw new Error( - `The VITS engine doesn't currently support SSML inputs` - ); - } - - let vitsLanguage = language; - - if (vitsLanguage == "en") { - vitsLanguage = "en-us"; - } - - const lengthScale = 1 / speed; - - const vitsOptions = options.vits!; - - const speakerId = vitsOptions.speakerId; - - if (speakerId != undefined) { - if (selectedVoice.speakerCount == undefined) { - if (speakerId != 0) { - throw new Error( - "Selected VITS model has only one speaker. Speaker ID must be 0 if specified." - ); - } - } else if ( - speakerId < 0 || - speakerId >= selectedVoice.speakerCount - ) { - throw new Error( - `Selected VITS model has ${selectedVoice.speakerCount} speaker IDs. Speaker ID should be in the range ${0} to ${selectedVoice.speakerCount - 1}` - ); - } - } - - const lexicons = await loadLexiconsForLanguage( - language, - options.customLexiconPaths - ); - - const modelPath = voicePackagePath!; - - const onnxExecutionProviders: OnnxExecutionProvider[] = - vitsOptions.provider ? [vitsOptions.provider] : []; - - logger.end(); - - const { rawAudio, timeline: outTimeline } = - await VitsTTS.synthesizeSentence( - text, - voice, - modelPath, - lengthScale, - speakerId ?? 0, - lexicons, - onnxExecutionProviders - ); - - synthesizedAudio = rawAudio; - timeline = outTimeline; - - shouldPostprocessPitch = true; - - logger.end(); - - break; - } - - case "espeak": { - const engineOptions = options.espeak!; - - const espeakVoice = voice; - const espeakLanguage = selectedVoice.languages[0]; - const espeakRate = engineOptions.rate || speed * 150; - const espeakPitch = engineOptions.pitch || options.pitch! * 50; - const espeakPitchRange = - engineOptions.pitchRange || options.pitchVariation! * 50; - const espeakUseKlatt = engineOptions.useKlatt || false; - const espeakInsertSeparators = - engineOptions.insertSeparators || false; - - const espeakOptions: EspeakOptions = { - voice: espeakVoice, - ssml: inputIsSSML, - rate: espeakRate, - pitch: espeakPitch, - pitchRange: espeakPitchRange, - useKlatt: espeakUseKlatt, - insertSeparators: espeakInsertSeparators, - }; - - if (inputIsSSML) { - logger.end(); - - const { rawAudio } = await espeakSynthesize( - text, - espeakOptions - ); - - synthesizedAudio = rawAudio; - } else { - const lexicons = await loadLexiconsForLanguage( - language, - options.customLexiconPaths - ); - - logger.end(); - - const { referenceSynthesizedAudio, referenceTimeline } = - await preprocessAndSynthesize( - text, - espeakLanguage, - espeakOptions, - lexicons - ); - - synthesizedAudio = referenceSynthesizedAudio; - timeline = referenceTimeline.flatMap( - (clause) => clause.timeline! - ); - } - - break; - } - - default: { - throw new Error(`Engine '${options.engine}' is not supported`); - } - } - - logger.start("Postprocess synthesized audio"); - synthesizedAudio = downmixToMono(synthesizedAudio); - - if (options.postProcessing!.normalizeAudio) { - synthesizedAudio = normalizeAudioLevel( - synthesizedAudio, - options.postProcessing!.targetPeak!, - options.postProcessing!.maxGainIncrease! - ); - } else { - synthesizedAudio = attenuateIfClipping(synthesizedAudio); - } - - const preTrimSampleCount = synthesizedAudio.audioChannels[0].length; - synthesizedAudio.audioChannels[0] = trimAudioStart( - synthesizedAudio.audioChannels[0] - ); - - if (timeline) { - const oldDuration = preTrimSampleCount / synthesizedAudio.sampleRate; - const newDuration = - synthesizedAudio.audioChannels[0].length / - synthesizedAudio.sampleRate; - - timeline = addTimeOffsetToTimeline(timeline, newDuration - oldDuration); - } - - if (!timeline) { - logger.start("Align synthesized audio with text"); - - let plainText = text; - - if (inputIsSSML) { - plainText = await convertHtmlToText(text); - } - - const alignmentOptions = options.alignment!; - - alignmentOptions.language = language; - - if (!alignmentOptions.customLexiconPaths) { - alignmentOptions.customLexiconPaths = options.customLexiconPaths; - } - - if (alignmentOptions.dtw!.windowDuration == null) { - alignmentOptions.dtw!.windowDuration = Math.max( - 5, - Math.ceil(0.2 * getRawAudioDuration(synthesizedAudio)) - ); - } - - const { wordTimeline } = await align( - synthesizedAudio, - plainText, - alignmentOptions - ); - - timeline = wordTimeline; - - logger.end(); - } - - const postProcessingOptions = options.postProcessing!; - - let timeStretchFactor = postProcessingOptions.speed; - - if (shouldPostprocessSpeed && timeStretchFactor == undefined) { - timeStretchFactor = speed; - } - - let pitchShiftFactor = postProcessingOptions.pitch; - - if (shouldPostprocessPitch && pitchShiftFactor == undefined) { - pitchShiftFactor = pitch; - } - - if (timeline) { - timeline = timeline.filter((entry) => isWordOrSymbolWord(entry.text)); - } - - logger.end(); - - return { synthesizedAudio, timeline }; -} - -interface PlainTextOptions { - paragraphBreaks?: ParagraphBreakType; - whitespace?: WhitespaceProcessing; -} - -type AudioSourceParam = string | Buffer | Uint8Array | RawAudio; - -type PhoneAlignmentMethod = "interpolation" | "dtw"; - -interface AlignmentOptions { - language?: string; - - crop?: boolean; - - customLexiconPaths?: string[]; - - languageDetection?: TextLanguageDetectionOptions; - - plainText?: PlainTextOptions; - - dtw?: { - granularity?: DtwGranularity | DtwGranularity[]; - windowDuration?: number | number[]; - phoneAlignmentMethod?: PhoneAlignmentMethod; - }; -} - -interface AlignmentResult { - timeline: Timeline; - wordTimeline: Timeline; - - transcript: string; - language: string; - - inputRawAudio: RawAudio; - isolatedRawAudio?: RawAudio; - backgroundRawAudio?: RawAudio; -} - -const defaultAlignmentOptions: AlignmentOptions = { - language: undefined, - - crop: true, - - customLexiconPaths: undefined, - - languageDetection: {}, - - plainText: { - paragraphBreaks: "double", - whitespace: "collapse", - }, - - dtw: { - granularity: undefined, - windowDuration: undefined, - phoneAlignmentMethod: "dtw", - }, -}; - -async function ensureRawAudio( - input: AudioSourceParam, - outSampleRate?: number, - outChannelCount?: number -) { - let inputAsRawAudio: RawAudio = input as RawAudio; - - if ( - inputAsRawAudio.audioChannels?.length > 0 && - inputAsRawAudio.sampleRate - ) { - const inputAudioChannelCount = inputAsRawAudio.audioChannels.length; - - if (outChannelCount == 1 && inputAudioChannelCount > 1) { - inputAsRawAudio = downmixToMono(inputAsRawAudio); - } - - if (outChannelCount == 2 && inputAudioChannelCount == 1) { - inputAsRawAudio = cloneRawAudio(inputAsRawAudio); - inputAsRawAudio.audioChannels.push( - inputAsRawAudio.audioChannels[0].slice() - ); - } - - if ( - outChannelCount != null && - outChannelCount > 2 && - outChannelCount != inputAudioChannelCount - ) { - throw new Error( - `Can't convert ${inputAudioChannelCount} channels to ${outChannelCount} channels. Channel conversion of raw audio currently only supports mono and stereo inputs.` - ); - } - - if (outSampleRate && inputAsRawAudio.sampleRate != outSampleRate) { - inputAsRawAudio = await resampleAudioSpeex( - inputAsRawAudio, - outSampleRate - ); - } - } else if (typeof input == "string" || input instanceof Uint8Array) { - if (input instanceof Uint8Array && !Buffer.isBuffer(input)) { - input = Buffer.from(input); - } - - const inputAsStringOrBuffer = input as string | Buffer; - - inputAsRawAudio = await decodeToChannels( - inputAsStringOrBuffer, - outSampleRate, - outChannelCount - ); - } else { - throw new Error("Received an invalid input audio data type."); - } - - return inputAsRawAudio; -} - -type FFMpegOutputOptions = { - filename?: string; - codec?: string; - format?: string; - sampleRate?: number; - sampleFormat?: "u8" | "s16" | "s32" | "s64" | "flt" | "dbl"; - channelCount?: number; - bitrate?: number; - audioOnly?: boolean; - customOptions?: string[]; -}; - -function buildCommandLineArguments( - inputFilename: string, - outputOptions: FFMpegOutputOptions -) { - outputOptions = { ...outputOptions }; - - if (!outputOptions.filename) { - outputOptions.filename = "-"; - } - - const args: string[] = []; - - args.push(`-i`, `${inputFilename}`); - - if (outputOptions.audioOnly) { - args.push(`-map`, `a`); - } - - if (outputOptions.codec) { - args.push(`-c:a`, `${outputOptions.codec}`); - } - - if (outputOptions.format) { - args.push(`-f:a`, `${outputOptions.format}`); - } - - if (outputOptions.sampleRate) { - args.push(`-ar`, `${outputOptions.sampleRate}`); - } - - if (outputOptions.sampleFormat) { - args.push(`-sample_fmt`, `${outputOptions.sampleFormat}`); - } - - if (outputOptions.channelCount) { - args.push(`-ac`, `${outputOptions.channelCount}`); - } - - if (outputOptions.bitrate) { - args.push(`-ab`, `${outputOptions.bitrate}k`); - } - - args.push(`-y`); - - if (outputOptions.customOptions) { - args.push(...outputOptions.customOptions); - } - - args.push(outputOptions.filename); - - return args; -} - -async function transcode_CLI( - ffmpegCommand: string, - input: string | Buffer, - outputOptions: FFMpegOutputOptions -) { - return new Promise((resolve, reject) => { - const logger = new Logger(); - logger.start("Transcode with command-line ffmpeg"); - - const args = buildCommandLineArguments( - Buffer.isBuffer(input) ? "-" : input, - outputOptions - ); - - const process = spawn(ffmpegCommand, args); - - if (Buffer.isBuffer(input)) { - process.stdin.end(input); - } else if (typeof input === "string") { - if (!existsSync(input)) { - reject(`Audio file was not found: ${input}`); - return; - } - } - - const stdoutChunks: Buffer[] = []; - let stderrOutput = ""; - - process.stdout.on("data", (data) => { - stdoutChunks.push(data); - }); - - process.stderr.setEncoding("utf8"); - process.stderr.on("data", (data) => { - //log(data) - stderrOutput += data; - }); - - process.on("error", (e) => { - reject(e); - }); - - process.on("close", (exitCode) => { - if (exitCode == 0) { - const concatenatedChunks = Buffer.concat(stdoutChunks); - - resolve(concatenatedChunks); - } else { - reject(`ffmpeg exited with code ${exitCode}`); - console.log(stderrOutput); - } - - logger.end(); - }); - }); -} - -function resolveToVersionedPackageNameIfNeeded(packageName: string) { - const versionTag = getVersionTagFromPackageName(packageName); - - if (versionTag) { - return packageName; - } - - const resolvedVersionTag = - resolveVersionTagForUnversionedPackageName(packageName); - - return (packageName = `${packageName}-${resolvedVersionTag}`); -} - -function resolveVersionTagForUnversionedPackageName( - unversionedPackageName: string -) { - return ( - packageVersionTagResolutionLookup[unversionedPackageName] || - defaultVersionTag - ); -} - -const defaultVersionTag = "20230718"; - -const packageVersionTagResolutionLookup: { [packageName: string]: string } = { - "sox-14.4.2-linux-minimal": "20230802", - - "vits-de_DE-thorsten_emotional-medium": "20230808", - "vits-en_GB-semaine-medium": "20230808", - "vits-fr_FR-upmc-medium": "20230808", - "vits-lb_LU-marylux-medium": "20230808", - "vits-ro_RO-mihai-medium": "20230808", - "vits-sr_RS-serbski_institut-medium": "20230808", - "vits-tr_TR-dfki-medium": "20230808", - - "vits-cs_CZ-jirka-medium": "20230824", - "vits-de_DE-thorsten-high": "20230824", - "vits-hu_HU-anna-medium": "20230824", - "vits-pt_PT-tugao-medium": "20230824", - "vits-sk_SK-lili-medium": "20230824", - "vits-tr_TR-fahrettin-medium": "20230824", - - "vits-ar_JO-kareem-medium": "20231022", - "vits-cs_CZ-jirka-low": "20231022", - "vits-en_US-hfc_male-medium": "20231022", - "vits-en_US-libritts_r-medium": "20231022", - "vits-hu_HU-imre-medium": "20231022", - "vits-pl_PL-mc_speech-medium": "20231022", - - "whisper-tiny": "20231126", - "whisper-tiny.en": "20231126", - "whisper-base": "20231126", - "whisper-base.en": "20231126", - "whisper-small": "20231126", - "whisper-small.en": "20231126", - "whisper-medium": "20231126", - "whisper-medium.en": "20231126", - "whisper-large-v3": "20231126", - - "vits-ar_JO-kareem-low": "20231126", - "vits-en_US-hfc_female-medium": "20231126", - - "ffmpeg-6.0-win32-x64": "20240316", - "ffmpeg-6.0-win32-ia32": "20240316", - "ffmpeg-6.0-darwin-x64": "20240316", - "ffmpeg-6.0-darwin-arm64": "20240316", - "ffmpeg-6.0-linux-x64": "20240316", - "ffmpeg-6.0-linux-ia32": "20240316", - "ffmpeg-6.0-linux-arm64": "20240316", - "ffmpeg-6.0-linux-arm": "20240316", - "ffmpeg-6.0-freebsd-x64": "20240316", - - "vits-de_DE-mls-medium": "20240316", - "vits-en_GB-cori-high": "20240316", - "vits-en_US-kristin-medium": "20240316", - "vits-en_US-ljspeech-high": "20240316", - "vits-en_US-ljspeech-medium": "20240316", - "vits-es_MX-claude-high": "20240316", - "vits-fa_IR-amir-medium": "20240316", - "vits-fa_IR-gyro-medium": "20240316", - "vits-fr_FR-mls-medium": "20240316", - "vits-fr_FR-tom-medium": "20240316", - "vits-nl_NL-mls-medium": "20240316", - "vits-sl_SI-artur-medium": "20240316", - "vits-tr_TR-fettah-medium": "20240316", - - "mdxnet-UVR_MDXNET_1_9703": "20240330", - "mdxnet-UVR_MDXNET_2_9682": "20240330", - "mdxnet-UVR_MDXNET_3_9662": "20240330", - "mdxnet-UVR_MDXNET_KARA": "20240330", - - "whisper.cpp-tiny": "20240405", - "whisper.cpp-tiny-q5_1": "20240405", - "whisper.cpp-tiny.en": "20240405", - "whisper.cpp-tiny.en-q5_1": "20240405", - "whisper.cpp-tiny.en-q8_0": "20240405", - - "whisper.cpp-base": "20240405", - "whisper.cpp-base-q5_1": "20240405", - "whisper.cpp-base.en": "20240405", - "whisper.cpp-base.en-q5_1": "20240405", - - "whisper.cpp-small": "20240405", - "whisper.cpp-small-q5_1": "20240405", - "whisper.cpp-small.en": "20240405", - "whisper.cpp-small.en-q5_1": "20240405", - - "whisper.cpp-medium": "20240405", - "whisper.cpp-medium-q5_0": "20240405", - "whisper.cpp-medium.en": "20240405", - "whisper.cpp-medium.en-q5_0": "20240405", - - "whisper.cpp-large-v1": "20240405", - "whisper.cpp-large-v2": "20240405", - "whisper.cpp-large-v2-q5_0": "20240405", - "whisper.cpp-large-v3": "20240405", - "whisper.cpp-large-v3-q5_0": "20240405", - - "whisper.cpp-binaries-linux-x64-cpu-latest-patched": "20240405", - "whisper.cpp-binaries-windows-x64-cpu-latest-patched": "20240409", - - "whisper-tiktoken-data": "20240408", - - "whisper.cpp-binaries-windows-x64-cublas-12.4.0-latest-patched": "20240409", - "whisper.cpp-binaries-windows-x64-cublas-11.8.0-latest-patched": "20240409", - - "xenova-multilingual-e5-small-q8": "20240504", - "xenova-nllb-200-distilled-600M-q8": "20240505", - "xenova-multilingual-e5-small-fp16": "20240514", - "xenova-multilingual-e5-base-fp16": "20240514", - "xenova-multilingual-e5-base-q8": "20240514", - "xenova-multilingual-e5-large-q8": "20240514", - - "w2v-bert-2.0-int8": "20240517", - "w2v-bert-2.0-uint8": "20240517", -}; - -function getVersionTagFromPackageName(packageName: string) { - return packageName.match( - /.*\-([0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9](_[0-9]+)?)$/ - )?.[1]; -} - -async function ensureAndGetPackagesDir() { - const dataPath = getAppDataDir("eliza"); - - const packagesPath = path.join(dataPath, "packages"); - - await ensureDir(packagesPath); - - return packagesPath; -} - -async function downloadFile( - options: GaxiosOptions, - targetFilePath: string, - prompt = "Downloading" -) { - const write = logLevelGreaterOrEqualTo("info") ? writeToStderr : () => {}; - - const downloadPromise = new OpenPromise(); - - const timer = new Timer(); - - options.responseType = "stream"; - - const response = await request(options); - - const ttyOutput = process.stderr.isTTY === true; - - write(`\n${prompt}.. `); - - const rateAveragingWindowSeconds = 5.0; - - let downloadStarted = false; - let downloadedBytes = 0; - let totalBytes: number | undefined = undefined; - - const statusUpdateInterval = 250; - - let lastString = prompt; - - const downloadStateHistory: { time: number; downloadedMBytes: number }[] = - []; - - function updateStatus() { - if (!downloadStarted) { - return; - } - - const totalMBytes = (totalBytes || 0) / 1000 / 1000; - const downloadedMBytes = downloadedBytes / 1000 / 1000; - - const elapsedTime = timer.elapsedTimeSeconds; - const cumulativeDownloadRate = downloadedMBytes / elapsedTime; - - const windowStartRecord = downloadStateHistory.find( - (r) => - r.time >= timer.elapsedTimeSeconds - rateAveragingWindowSeconds - ); - - let windowDownloadRate: number; - - if (windowStartRecord) { - windowDownloadRate = - (downloadedMBytes - windowStartRecord.downloadedMBytes) / - (elapsedTime - windowStartRecord.time); - } else { - windowDownloadRate = cumulativeDownloadRate; - } - - downloadStateHistory.push({ time: elapsedTime, downloadedMBytes }); - - const isLastUpdate = downloadedMBytes == totalMBytes; - - const downloadedMbytesStr = downloadedMBytes.toFixed(2); - const totalMbytesStr = totalMBytes.toFixed(2); - const downloadRateStr = windowDownloadRate.toFixed(2); - const cumulativeDownloadRateStr = cumulativeDownloadRate.toFixed(2); - - if (ttyOutput) { - let newString: string; - - if (totalBytes != undefined) { - const percentage = (downloadedMBytes / totalMBytes) * 100; - - newString = `${prompt}.. ${downloadedMbytesStr}MB/${totalMbytesStr}MB (${percentage.toFixed(1) + "%"}, ${timer.elapsedTimeSeconds.toFixed(1)}s, ${downloadRateStr}MB/s)`; - } else { - newString = `${prompt}.. ${downloadedMbytesStr}MB (${timer.elapsedTimeSeconds.toFixed(1)}s, ${downloadRateStr}MB/s)`; - } - - if (newString != lastString) { - write("\r"); - write(newString); - } - - lastString = newString; - } else { - if (totalBytes == undefined) { - return; - } - - const percent = downloadedBytes / totalBytes; - const percentDisplay = `${(Math.floor(percent * 10) * 10).toString()}%`; - - if (lastString == prompt) { - write(`(${totalMbytesStr}MB): `); - } - - if (percentDisplay != lastString) { - write(percentDisplay); - - if (percent == 1.0) { - write( - ` (${timer.elapsedTimeSeconds.toFixed(2)}s, ${cumulativeDownloadRateStr}MB/s)` - ); - } else { - write(` `); - } - - lastString = percentDisplay; - } - } - } - - const partialFilePath = `${targetFilePath}.${getRandomHexString(16)}.partial`; - const fileWriteStream = createWriteStream(partialFilePath, { - encoding: "binary", - autoClose: true, - }); - - const statusInterval = setInterval(() => { - updateStatus(); - }, statusUpdateInterval); - - response.data.on("data", (chunk: Uint8Array) => { - try { - const contentLengthString = response.headers["content-length"]; - - totalBytes = - contentLengthString != undefined - ? parseInt(contentLengthString) - : undefined; - - const chunkLength = chunk.length; - - fileWriteStream.write(chunk); - - downloadedBytes += chunkLength; - - if (downloadStarted == false) { - downloadStarted = true; - } - } catch (err) { - clearInterval(statusInterval); - - downloadPromise.reject(err); - } - }); - - response.data.on("end", async () => { - try { - clearInterval(statusInterval); - updateStatus(); - - fileWriteStream.end(); - - write("\n"); - - await move(partialFilePath, targetFilePath); - - downloadPromise.resolve(); - } catch (err) { - clearInterval(statusInterval); - downloadPromise.reject(err); - } - }); - - response.data.on("error", async (err: any) => { - try { - clearInterval(statusInterval); - - fileWriteStream.end(); - await remove(partialFilePath); - } finally { - downloadPromise.reject(err); - } - }); - - return downloadPromise.promise; -} - -async function extractTarball(filepath: string, outputDir: string) { - const { extract } = await import("tar"); - - await extract({ - file: filepath, - cwd: outputDir, - preserveOwner: false, - //noChmod: true - }); -} - -async function downloadAndExtractTarball( - options: GaxiosOptions, - targetDir: string, - baseTempPath: string, - displayName = "archive" -) { - const logger = new Logger(); - - const randomID = getRandomHexString(16).toLowerCase(); - const tempTarballPath = path.join(baseTempPath, `/${randomID}.tarball`); - const tempDirPath = path.join(baseTempPath, `/${randomID}`); - await ensureDir(tempDirPath); - - logger.end(); - - await downloadFile( - options, - tempTarballPath, - `${"Downloading"} ${displayName}` - ); - - logger.end(); - - logger.start(`Extracting ${displayName}`); - - await extractTarball(tempTarballPath, tempDirPath); - - await remove(tempTarballPath); - - for (const filename of await readdir(tempDirPath)) { - const sourceFilePath = path.join(tempDirPath, filename); - const targetFilePath = path.join(targetDir, filename); - - await move(sourceFilePath, targetFilePath); - } - - await remove(tempDirPath); - - logger.end(); -} - -async function loadPackage(packageName: string) { - packageName = resolveToVersionedPackageNameIfNeeded(packageName); - - const packagesPath = await ensureAndGetPackagesDir(); - - const packagePath = path.join(packagesPath, packageName); - - if (existsSync(packagePath)) { - return packagePath; - } - - const packageBaseURL = getGlobalOption("packageBaseURL"); - - const tempPath = getAppTempDir("eliza"); - - const headers = {}; - - const options = { - url: `${packageBaseURL}${packageName}.tar.gz`, - headers, - }; - - await downloadAndExtractTarball( - options, - packagesPath, - tempPath, - packageName - ); - - return packagePath; -} - -interface GlobalOptions { - ffmpegPath?: string; - soxPath?: string; - packageBaseURL?: string; - logLevel?: LogLevel; -} - -const globalOptions: GlobalOptions = { - ffmpegPath: undefined, - soxPath: undefined, - packageBaseURL: - "https://huggingface.co/echogarden/echogarden-packages/resolve/main/", - logLevel: "info", -}; - -function listGlobalOptions() { - return Object.keys(globalOptions); -} - -function getGlobalOption( - key: K -): GlobalOptions[K] { - if (!listGlobalOptions().includes(key)) { - throw new Error(`Unknown global option key '${key}'`); - } - - return globalOptions[key]; -} - -async function commandExists(command: string) { - try { - await commandExists(command); - return true; - } catch { - return false; - } -} - -async function getFFMpegExecutablePath() { - // If a global option set for the path, use it - if (getGlobalOption("ffmpegPath")) { - return getGlobalOption("ffmpegPath"); - } - - // If an 'ffmpeg' command exist in system path, use it - if (await commandExists("ffmpeg")) { - return "ffmpeg"; - } - - // Otherwise, download and use an internal ffmpeg package - const platform = process.platform; - const arch = process.arch; - - let packageName: string; - - if (platform === "win32" && arch === "x64") { - packageName = "ffmpeg-6.0-win32-x64"; - } else if (platform === "win32" && arch === "ia32") { - packageName = "ffmpeg-6.0-win32-ia32"; - } else if (platform === "darwin" && arch === "x64") { - packageName = "ffmpeg-6.0-darwin-x64"; - } else if (platform === "darwin" && arch === "arm64") { - packageName = "ffmpeg-6.0-darwin-arm64"; - } else if (platform === "linux" && arch === "x64") { - packageName = "ffmpeg-6.0-linux-x64"; - } else if (platform === "linux" && arch === "ia32") { - packageName = "ffmpeg-6.0-linux-ia32"; - } else if (platform === "linux" && arch === "arm64") { - packageName = "ffmpeg-6.0-linux-arm64"; - } else if (platform === "linux" && arch === "arm") { - packageName = "ffmpeg-6.0-linux-arm"; - } else if (platform === "freebsd" && arch === "x64") { - packageName = "ffmpeg-6.0-freebsd-x64"; - } else { - return undefined; - } - - const ffmpegPackagePath = await loadPackage(packageName); - - let filename = packageName; - - if (platform === "win32") { - filename += ".exe"; - } - - return path.join(ffmpegPackagePath, filename); -} - -async function transcode( - input: string | Buffer, - outputOptions: FFMpegOutputOptions -) { - const executablePath = await getFFMpegExecutablePath(); - - if (!executablePath) { - throw new Error( - `The ffmpeg utility wasn't found. Please ensure it is available on the system path.` - ); - } - - return transcode_CLI(executablePath, input, outputOptions); -} - -function decodeWave( - waveData: Buffer, - ignoreTruncatedChunks = true, - ignoreOverflowingDataChunks = true -) { - let readOffset = 0; - - const riffId = waveData - .subarray(readOffset, readOffset + 4) - .toString("ascii"); - - if (riffId != "RIFF") { - throw new Error("Not a valid wave file. No RIFF id found at offset 0."); - } - - readOffset += 4; - - let riffChunkSize = waveData.readUInt32LE(readOffset); - - readOffset += 4; - - const waveId = waveData - .subarray(readOffset, readOffset + 4) - .toString("ascii"); - - if (waveId != "WAVE") { - throw new Error("Not a valid wave file. No WAVE id found at offset 8."); - } - - if (ignoreOverflowingDataChunks && riffChunkSize === 4294967295) { - riffChunkSize = waveData.length - 8; - } - - if (riffChunkSize < waveData.length - 8) { - throw new Error( - `RIFF chunk length ${riffChunkSize} is smaller than the remaining size of the buffer (${waveData.length - 8})` - ); - } - - if (!ignoreTruncatedChunks && riffChunkSize > waveData.length - 8) { - throw new Error( - `RIFF chunk length (${riffChunkSize}) is greater than the remaining size of the buffer (${waveData.length - 8})` - ); - } - - readOffset += 4; - - let formatSubChunkBodyBuffer: Buffer | undefined; - const dataBuffers: Buffer[] = []; - - while (true) { - const subChunkIdentifier = waveData - .subarray(readOffset, readOffset + 4) - .toString("ascii"); - readOffset += 4; - - let subChunkSize = waveData.readUInt32LE(readOffset); - readOffset += 4; - - if ( - !ignoreTruncatedChunks && - subChunkSize > waveData.length - readOffset - ) { - throw new Error( - `Encountered a '${subChunkIdentifier}' subchunk with a size of ${subChunkSize} which is greater than the remaining size of the buffer (${waveData.length - readOffset})` - ); - } - - if (subChunkIdentifier == "fmt ") { - formatSubChunkBodyBuffer = waveData.subarray( - readOffset, - readOffset + subChunkSize - ); - } else if (subChunkIdentifier == "data") { - if (!formatSubChunkBodyBuffer) { - throw new Error( - "A data subchunk was encountered before a format subchunk" - ); - } - - // If the data chunk is truncated or extended beyond 4 GiB, - // the data would be read up to the end of the buffer - if (ignoreOverflowingDataChunks && subChunkSize === 4294967295) { - subChunkSize = waveData.length - readOffset; - } - - const subChunkData = waveData.subarray( - readOffset, - readOffset + subChunkSize - ); - - dataBuffers.push(subChunkData); - } - // All sub chunks other than 'data' (e.g. 'LIST', 'fact', 'plst', 'junk' etc.) are ignored - - // This addition operation may overflow if JavaScript integers were 32 bits, - // but since they are 52 bits, it is okay: - readOffset += subChunkSize; - - // Break if readOffset is equal to or is greater than the size of the buffer - if (readOffset >= waveData.length) { - break; - } - } - - if (!formatSubChunkBodyBuffer) { - throw new Error("No format subchunk was found in the wave file"); - } - - if (dataBuffers.length === 0) { - throw new Error("No data subchunks were found in the wave file"); - } - - const waveFormat = WaveFormat.deserializeFrom(formatSubChunkBodyBuffer); - - const sampleFormat = waveFormat.sampleFormat; - const channelCount = waveFormat.channelCount; - const sampleRate = waveFormat.sampleRate; - const bitDepth = waveFormat.bitDepth; - const speakerPositionMask = waveFormat.speakerPositionMask; - - const concatenatedDataBuffers = Buffer.concat(dataBuffers); - dataBuffers.length = 0; // Allow the garbage collector to free up memory held by the data buffers - - const audioChannels = decodeAudioBufferToChannels( - concatenatedDataBuffers, - channelCount, - bitDepth, - sampleFormat - ); - - return { - rawAudio: { audioChannels, sampleRate }, - - sourceSampleFormat: sampleFormat, - sourceBitDepth: bitDepth, - sourceSpeakerPositionMask: speakerPositionMask, - }; -} - -// Int8 PCM <-> Float32 conversion -function int8PcmToFloat32(input: Int8Array) { - const output = new Float32Array(input.length); - - for (let i = 0; i < input.length; i++) { - const sample = input[i]; - output[i] = sample < 0 ? sample / 128 : sample / 127; - } - - return output; -} - -// Int24 PCM <-> Float32 conversion (uses int32 for storage) -function int24PcmToFloat32(input: Int32Array) { - const output = new Float32Array(input.length); - - for (let i = 0; i < input.length; i++) { - const sample = input[i]; - output[i] = sample < 0 ? sample / 8388608 : sample / 8388607; - } - - return output; -} - -function int32PcmToFloat32(input: Int32Array) { - const output = new Float32Array(input.length); - - for (let i = 0; i < input.length; i++) { - const sample = input[i]; - output[i] = sample < 0 ? sample / 2147483648 : sample / 2147483647; - } - - return output; -} - -function decodeAudioBufferToChannels( - audioBuffer: Buffer, - channelCount: number, - sourceBitDepth: number, - sourceSampleFormat: SampleFormat -) { - let interleavedChannels: Float32Array; - - if (sourceSampleFormat === SampleFormat.PCM) { - if (sourceBitDepth === 8) { - interleavedChannels = int8PcmToFloat32(bufferToInt8(audioBuffer)); - } else if (sourceBitDepth === 16) { - interleavedChannels = int16PcmToFloat32( - bufferLEToInt16(audioBuffer) - ); - } else if (sourceBitDepth === 24) { - interleavedChannels = int24PcmToFloat32( - bufferLEToInt24(audioBuffer) - ); - } else if (sourceBitDepth === 32) { - interleavedChannels = int32PcmToFloat32( - bufferLEToInt32(audioBuffer) - ); - } else { - throw new Error(`Unsupported PCM bit depth: ${sourceBitDepth}`); - } - } else if (sourceSampleFormat === SampleFormat.Float) { - if (sourceBitDepth === 32) { - interleavedChannels = bufferLEToFloat32(audioBuffer); - } else if (sourceBitDepth === 64) { - interleavedChannels = float64Tofloat32( - bufferLEToFloat64(audioBuffer) - ); - } else { - throw new Error(`Unsupported float bit depth: ${sourceBitDepth}`); - } - } else if (sourceSampleFormat === SampleFormat.Alaw) { - if (sourceBitDepth === 8) { - interleavedChannels = int16PcmToFloat32( - AlawMulaw.alaw.decode(audioBuffer) - ); - } else { - throw new Error(`Unsupported alaw bit depth: ${sourceBitDepth}`); - } - } else if (sourceSampleFormat === SampleFormat.Mulaw) { - if (sourceBitDepth === 8) { - interleavedChannels = int16PcmToFloat32( - AlawMulaw.mulaw.decode(audioBuffer) - ); - } else { - throw new Error(`Unsupported mulaw bit depth: ${sourceBitDepth}`); - } - } else { - throw new Error(`Unsupported audio format: ${sourceSampleFormat}`); - } - - audioBuffer = Buffer.from([]); // Zero the buffer reference to allow the GC to free up memory, if possible - - return deInterleaveChannels(interleavedChannels, channelCount); -} - -function deInterleaveChannels( - interleavedChannels: Float32Array, - channelCount: number -) { - if (channelCount === 0) { - throw new Error("0 channel count received"); - } - - if (channelCount === 1) { - return [interleavedChannels]; - } - - if (interleavedChannels.length % channelCount != 0) { - throw new Error( - `Size of interleaved channels (${interleaveChannels.length}) is not a multiple of channel count (${channelCount})` - ); - } - - const sampleCount = interleavedChannels.length / channelCount; - const channels: Float32Array[] = []; - - for (let i = 0; i < channelCount; i++) { - channels.push(new Float32Array(sampleCount)); - } - - let readIndex = 0; - - for (let i = 0; i < sampleCount; i++) { - for (let c = 0; c < channelCount; c++) { - channels[c][i] = interleavedChannels[readIndex]; - readIndex += 1; - } - } - - return channels; -} - -function decodeWaveToRawAudio( - waveFileBuffer: Buffer, - ignoreTruncatedChunks = true, - ignoreOverflowingDataChunks = true -) { - return decodeWave( - waveFileBuffer, - ignoreTruncatedChunks, - ignoreOverflowingDataChunks - ); -} - -async function decodeToChannels( - input: string | Buffer, - outSampleRate?: number, - outChannelCount?: number -) { - const outputOptions: FFMpegOutputOptions = { - codec: "pcm_f32le", - format: "wav", - sampleRate: outSampleRate, - channelCount: outChannelCount, - audioOnly: true, - }; - - const waveAudio = await transcode(input, outputOptions); - - const logger = new Logger(); - - logger.start(`Convert wave buffer to raw audio`); - const { rawAudio } = decodeWaveToRawAudio(waveAudio); - logger.end(); - - return rawAudio; -} - -async function align( - input: AudioSourceParam, - transcript: string, - options: AlignmentOptions -): Promise { - const logger = new Logger(); - - const startTimestamp = logger.getTimestamp(); - - options = extendDeep(defaultAlignmentOptions, options); - - const inputRawAudio = await ensureRawAudio(input); - - let sourceRawAudio: RawAudio; - let isolatedRawAudio: RawAudio | undefined; - let backgroundRawAudio: RawAudio | undefined; - - logger.start(`Resample audio to 16kHz mono`); - sourceRawAudio = await ensureRawAudio(inputRawAudio, 16000, 1); - - let sourceUncropTimeline: Timeline | undefined; - - logger.start("Normalize and trim audio"); - - sourceRawAudio = normalizeAudioLevel(sourceRawAudio); - sourceRawAudio.audioChannels[0] = trimAudioEnd( - sourceRawAudio.audioChannels[0] - ); - - logger.end(); - - let language: string; - - if (options.language) { - const languageData = await parseLangIdentifier(options.language); - - language = languageData.Name; - - logger.logTitledMessage( - "Language specified", - formatLanguageCodeWithName(language) - ); - } else { - logger.start( - "No language specified. Detect language using reference text" - ); - const { detectedLanguage } = await detectTextLanguage( - transcript, - options.languageDetection || {} - ); - - language = detectedLanguage; - - logger.end(); - - logger.logTitledMessage( - "Language detected", - formatLanguageCodeWithName(language) - ); - } - - language = getDefaultDialectForLanguageCodeIfPossible(language); - - logger.start("Load alignment module"); - - function getDtwWindowGranularitiesAndDurations() { - const sourceAudioDuration = getRawAudioDuration(sourceRawAudio); - - const granularities: DtwGranularity[] = ["high"]; - let windowDurations: number[]; - - if (options.dtw!.windowDuration) { - if (typeof options.dtw!.windowDuration === "number") { - windowDurations = [options.dtw!.windowDuration]; - } else if (Array.isArray(options.dtw!.windowDuration)) { - windowDurations = options.dtw!.windowDuration; - } else { - throw new Error( - `'dtw.windowDuration' must be a number or an array of numbers.` - ); - } - } else { - if (granularities.length > 2) { - throw new Error( - `More than two passes requested, this requires window durations to be explicitly specified for each pass. For example 'dtw.windowDuration=[600,60,10]'.` - ); - } - - if (sourceAudioDuration < 5 * 60) { - // If up to 5 minutes, set window duration to one minute - windowDurations = [60]; - } else if (sourceAudioDuration < 2.5 * 60 * 60) { - // If less than 2.5 hours, set window duration to 20% of total duration - windowDurations = [Math.ceil(sourceAudioDuration * 0.2)]; - } else { - // Otherwise, set window duration to 30 minutes - windowDurations = [30 * 60]; - } - } - - if (granularities.length === 2 && windowDurations.length === 1) { - windowDurations = [windowDurations[0], 15]; - } - - if (granularities.length != windowDurations.length) { - throw new Error( - `The option 'dtw.granularity' has ${granularities.length} values, but 'dtw.windowDuration' has ${windowDurations.length} values. The lengths should be equal.` - ); - } - - return { windowDurations, granularities }; - } - - let mappedTimeline: Timeline; - - const { windowDurations, granularities } = - getDtwWindowGranularitiesAndDurations(); - - logger.end(); - - const { referenceRawAudio, referenceTimeline } = - await createAlignmentReferenceUsingEspeak( - transcript, - language, - options.plainText, - options.customLexiconPaths, - false, - false - ); - - logger.end(); - - mappedTimeline = await alignUsingDtw( - sourceRawAudio, - referenceRawAudio, - referenceTimeline, - granularities, - windowDurations - ); - - logger.start(`Postprocess timeline`); - - // If the audio was cropped before recognition, map the timestamps back to the original audio - if (sourceUncropTimeline && sourceUncropTimeline.length > 0) { - convertCroppedToUncroppedTimeline(mappedTimeline, sourceUncropTimeline); - } - - // Add text offsets - addWordTextOffsetsToTimeline(mappedTimeline, transcript); - - // Make segment timeline - const { segmentTimeline } = await wordTimelineToSegmentSentenceTimeline( - mappedTimeline, - transcript, - language, - options.plainText?.paragraphBreaks, - options.plainText?.whitespace - ); - - logger.end(); - - return { - timeline: segmentTimeline, - wordTimeline: mappedTimeline, - - transcript, - language, - - inputRawAudio, - isolatedRawAudio, - backgroundRawAudio, - }; -} - -function getDefaultDialectForLanguageCodeIfPossible(langCode: string) { - const defaultDialect = defaultDialectForLanguageCode[langCode]; - - return defaultDialect || langCode; -} - -async function wordTimelineToSegmentSentenceTimeline( - wordTimeline: Timeline, - transcript: string, - language: string, - paragraphBreaks: ParagraphBreakType = "double", - whitespace: WhitespaceProcessing = "collapse" -) { - const paragraphs = splitToParagraphs( - transcript, - paragraphBreaks, - whitespace - ); - - const segments = paragraphs.map((segment) => - splitToSentences(segment, language).map((sentence) => sentence.trim()) - ); - - let text = ""; - const charIndexToSentenceEntryMapping: TimelineEntry[] = []; - - const segmentTimeline: Timeline = []; - - for (const segment of segments) { - const sentencesInSegment: Timeline = []; - - const segmentEntry: TimelineEntry = { - type: "segment", - text: "", - startTime: -1, - endTime: -1, - timeline: sentencesInSegment, - }; - - for (const sentence of segment) { - const sentenceEntry: TimelineEntry = { - type: "sentence", - text: sentence, - startTime: -1, - endTime: -1, - timeline: [], - }; - - for (const char of sentence + " ") { - text += char; - charIndexToSentenceEntryMapping.push(sentenceEntry); - } - - sentencesInSegment.push(sentenceEntry); - } - - segmentTimeline.push(segmentEntry); - } - - let wordSearchStartOffset = 0; - - for (let wordIndex = 0; wordIndex < wordTimeline.length; wordIndex++) { - const wordEntry = wordTimeline[wordIndex]; - const wordText = wordEntry.text; - - if (!isWordOrSymbolWord(wordText)) { - continue; - } - - const indexOfWordInText = text.indexOf(wordText, wordSearchStartOffset); - - if (indexOfWordInText == -1) { - throw new Error( - `Couldn't find the word '${wordText}' in the text at start position ${wordSearchStartOffset}` - ); - } - - const targetSentenceEntry = - charIndexToSentenceEntryMapping[indexOfWordInText]; - targetSentenceEntry.timeline!.push(deepClone(wordEntry)); - - wordSearchStartOffset = indexOfWordInText + wordText.length; - } - - const newSegmentTimeline: Timeline = []; - - for (const segmentEntry of segmentTimeline) { - const oldSentenceTimeline = segmentEntry.timeline!; - - const newSentenceTimeline: Timeline = []; - - for (const sentenceEntry of oldSentenceTimeline) { - const wordTimeline = sentenceEntry.timeline; - - if (!wordTimeline || wordTimeline.length == 0) { - continue; - } - - sentenceEntry.startTime = wordTimeline[0].startTime; - sentenceEntry.endTime = - wordTimeline[wordTimeline.length - 1].endTime; - - newSentenceTimeline.push(sentenceEntry); - } - - if (newSentenceTimeline.length == 0) { - continue; - } - - segmentEntry.text = newSentenceTimeline - .map((sentenceEntry) => sentenceEntry.text) - .join(" "); - - segmentEntry.startTime = newSentenceTimeline[0].startTime; - segmentEntry.endTime = - newSentenceTimeline[newSentenceTimeline.length - 1].endTime; - - newSegmentTimeline.push(segmentEntry); - } - - return { segmentTimeline: newSegmentTimeline }; -} - -interface UncropTimelineMapResult { - mappedStartTime: number; - mappedEndTime: number; -} - -function mapUsingUncropTimeline( - startTimeInCroppedAudio: number, - endTimeInCroppedAudio: number, - uncropTimeline: Timeline -): UncropTimelineMapResult { - if (uncropTimeline.length === 0) { - return { - mappedStartTime: 0, - mappedEndTime: 0, - }; - } - - let offsetInCroppedAudio = 0; - - if (endTimeInCroppedAudio < startTimeInCroppedAudio) { - endTimeInCroppedAudio = startTimeInCroppedAudio; - } - - let bestOverlapDuration = -1; - let mappedStartTime = -1; - let mappedEndTime = -1; - - for (const uncropEntry of uncropTimeline) { - const uncropEntryDuration = uncropEntry.endTime - uncropEntry.startTime; - - const overlapStartTime = Math.max( - startTimeInCroppedAudio, - offsetInCroppedAudio - ); - const overlapEndTime = Math.min( - endTimeInCroppedAudio, - offsetInCroppedAudio + uncropEntryDuration - ); - - const overlapDuration = overlapEndTime - overlapStartTime; - - if (overlapDuration >= 0 && overlapDuration > bestOverlapDuration) { - bestOverlapDuration = overlapDuration; - - mappedStartTime = - uncropEntry.startTime + - (overlapStartTime - offsetInCroppedAudio); - mappedEndTime = - uncropEntry.startTime + (overlapEndTime - offsetInCroppedAudio); - } - - offsetInCroppedAudio += uncropEntryDuration; - } - - if (bestOverlapDuration === -1) { - if (startTimeInCroppedAudio >= offsetInCroppedAudio) { - const maxTimestamp = - uncropTimeline[uncropTimeline.length - 1].endTime; - - return { - mappedStartTime: maxTimestamp, - mappedEndTime: maxTimestamp, - }; - } else { - throw new Error( - `Given start time ${startTimeInCroppedAudio} was smaller than audio duration but no match was found in uncrop timeline (should not occur)` - ); - } - } - - return { - mappedStartTime, - mappedEndTime, - }; -} - -function convertCroppedToUncroppedTimeline( - timeline: Timeline, - uncropTimeline: Timeline -) { - if (timeline.length === 0) { - return; - } - - for (const entry of timeline) { - const { mappedStartTime, mappedEndTime } = mapUsingUncropTimeline( - entry.startTime, - entry.endTime, - uncropTimeline - ); - - const mapSubTimeline = (subTimeline: Timeline | undefined) => { - if (!subTimeline) { - return; - } - - for (const subEntry of subTimeline) { - subEntry.startTime = Math.min( - mappedStartTime + (subEntry.startTime - entry.startTime), - mappedEndTime - ); - subEntry.endTime = Math.min( - mappedStartTime + (subEntry.endTime - entry.startTime), - mappedEndTime - ); - - mapSubTimeline(subEntry.timeline); - } - }; - - mapSubTimeline(entry.timeline); - - entry.startTime = mappedStartTime; - entry.endTime = mappedEndTime; - } -} - -function getUTF32Chars(str: string) { - const utf32chars: string[] = []; - const mapping: number[] = []; - - let utf32Index = 0; - - for (const utf32char of str) { - utf32chars.push(utf32char); - - for (let i = 0; i < utf32char.length; i++) { - mapping.push(utf32Index); - } - - utf32Index += 1; - } - - mapping.push(utf32Index); - - return { utf32chars, mapping }; -} - -function addWordTextOffsetsToTimeline( - timeline: Timeline, - text: string, - currentOffset = 0 -) { - const { mapping } = getUTF32Chars(text); - - for (const entry of timeline) { - if (entry.type == "word") { - let word = entry.text; - - word = word.trim().replaceAll(/\s+/g, " "); - - const wordParts = word.split(" "); - - let startOffset: number | undefined; - let endOffset: number | undefined; - - for (let i = 0; i < wordParts.length; i++) { - const wordPart = wordParts[i]; - - const wordPartOffset = text.indexOf(wordPart, currentOffset); - - if (wordPartOffset == -1) { - continue; - } - - currentOffset = wordPartOffset + wordParts[i].length; - - if (i == 0) { - startOffset = wordPartOffset; - } - - endOffset = currentOffset; - } - - entry.startOffsetUtf16 = startOffset; - entry.endOffsetUtf16 = endOffset; - - entry.startOffsetUtf32 = - startOffset != undefined ? mapping[startOffset] : undefined; - entry.endOffsetUtf32 = - endOffset != undefined ? mapping[endOffset] : undefined; - } else if (entry.timeline) { - currentOffset = addWordTextOffsetsToTimeline( - entry.timeline, - text, - currentOffset - ); - } - } - - return currentOffset; -} - -async function createAlignmentReferenceUsingEspeak( - transcript: string, - language: string, - plaintextOptions?: PlainTextOptions, - customLexiconPaths?: string[], - insertSeparators?: boolean, - useKlatt?: boolean -) { - const logger = new Logger(); - - logger.start("Synthesize alignment reference with eSpeak"); - - const synthesisOptions: SynthesisOptions = { - engine: "espeak", - language, - - plainText: plaintextOptions, - customLexiconPaths: customLexiconPaths, - }; - - let { - audio: referenceRawAudio, - timeline: segmentTimeline, - voice: espeakVoice, - } = await synthesize(transcript, synthesisOptions); - - const sentenceTimeline = segmentTimeline.flatMap( - (entry) => entry.timeline! - ); - const wordTimeline = sentenceTimeline.flatMap((entry) => entry.timeline!); - - referenceRawAudio = await resampleAudioSpeex( - referenceRawAudio as RawAudio, - 16000 - ); - referenceRawAudio = downmixToMonoAndNormalize(referenceRawAudio); - - logger.end(); - - return { referenceRawAudio, referenceTimeline: wordTimeline, espeakVoice }; -} - -function downmixToMonoAndNormalize( - rawAudio: RawAudio, - targetPeakDecibels = -3 -) { - return normalizeAudioLevel(downmixToMono(rawAudio), targetPeakDecibels); -} - -async function loadLexiconFile(jsonFilePath: string): Promise { - const parsedLexicon: Lexicon = await readAndParseJsonFile(jsonFilePath); - - return parsedLexicon; -} - -async function loadLexiconsForLanguage( - language: string, - customLexiconPaths?: string[] -) { - const lexicons: Lexicon[] = []; - - if (getShortLanguageCode(language) == "en") { - const heteronymsLexicon = await loadLexiconFile( - resolveToModuleRootDir("data/lexicons/heteronyms.en.json") - ); - lexicons.push(heteronymsLexicon); - } - - if (customLexiconPaths && customLexiconPaths.length > 0) { - for (const customLexicon of customLexiconPaths) { - const customLexiconObject = await loadLexiconFile(customLexicon); - - lexicons.push(customLexiconObject); - } - } - - return lexicons; -} - -function downmixToMono(rawAudio: RawAudio): RawAudio { - const channelCount = rawAudio.audioChannels.length; - const sampleCount = rawAudio.audioChannels[0].length; - - if (channelCount === 1) { - return cloneRawAudio(rawAudio); - } - - const downmixedAudio = new Float32Array(sampleCount); - - for (const channelSamples of rawAudio.audioChannels) { - for (let i = 0; i < sampleCount; i++) { - downmixedAudio[i] += channelSamples[i]; - } - } - - if (channelCount > 1) { - for (let i = 0; i < sampleCount; i++) { - downmixedAudio[i] /= channelCount; - } - } - - return { - audioChannels: [downmixedAudio], - sampleRate: rawAudio.sampleRate, - } as RawAudio; -} - -function applyGainFactor(rawAudio: RawAudio, gainFactor: number): RawAudio { - const outputAudioChannels: Float32Array[] = []; - - for (const channelSamples of rawAudio.audioChannels) { - const sampleCount = channelSamples.length; - - const outputChannelSamples = new Float32Array(sampleCount); - - for (let i = 0; i < sampleCount; i++) { - outputChannelSamples[i] = channelSamples[i] * gainFactor; - } - - outputAudioChannels.push(outputChannelSamples); - } - - return { - audioChannels: outputAudioChannels, - sampleRate: rawAudio.sampleRate, - } as RawAudio; -} - -function normalizeAudioLevel( - rawAudio: RawAudio, - targetPeakDecibels = -3, - maxGainIncreaseDecibels = 30 -): RawAudio { - //rawAudio = correctDCBias(rawAudio) - - const targetPeakAmplitude = decibelsToGainFactor(targetPeakDecibels); - const maxGainFactor = decibelsToGainFactor(maxGainIncreaseDecibels); - - const peakAmplitude = getSamplePeakAmplitude(rawAudio.audioChannels); - - const gainFactor = Math.min( - targetPeakAmplitude / peakAmplitude, - maxGainFactor - ); - - return applyGainFactor(rawAudio, gainFactor); -} - -function attenuateIfClipping(rawAudio: RawAudio) { - return normalizeAudioLevel(rawAudio, -0.1, 0); -} - -const symbolWords = [ - "$", - "€", - "¢", - "£", - "¥", - "©", - "®", - "™", - "%", - "&", - "#", - "~", - "@", - "+", - "±", - "÷", - "/", - "*", - "=", - "¼", - "½", - "¾", -]; - -function isWord(str: string) { - return wordCharacterPattern.test(str.trim()); -} - -function multiplyTimelineByFactor(targetTimeline: Timeline, factor: number) { - const newTimeline = deepClone(targetTimeline); - - for (const segmentTimelineEntry of newTimeline) { - segmentTimelineEntry.startTime = - segmentTimelineEntry.startTime * factor; - segmentTimelineEntry.endTime = segmentTimelineEntry.endTime * factor; - - if (segmentTimelineEntry.timeline) { - segmentTimelineEntry.timeline = multiplyTimelineByFactor( - segmentTimelineEntry.timeline, - factor - ); - } - } - - return newTimeline; -} - -function isWordOrSymbolWord(str: string) { - return isWord(str) || symbolWords.includes(str); -} - -const defaultSynthesisOptions: SynthesisOptions = { - engine: "vits", - language: "en-us", - - voice: undefined, - voiceGender: undefined, - - speed: 1.0, - pitch: 1.0, - pitchVariation: 1.0, - - ssml: false, - - splitToSentences: true, - - segmentEndPause: 1.0, - sentenceEndPause: 0.75, - - customLexiconPaths: undefined, - - plainText: { - paragraphBreaks: "double", - whitespace: "collapse", - }, - - alignment: { - dtw: { - granularity: "high", - }, - }, - - postProcessing: { - normalizeAudio: true, - targetPeak: -3, - maxGainIncrease: 30, - - speed: undefined, - pitch: undefined, - }, - - outputAudioFormat: undefined, - - languageDetection: undefined, - - vits: { - speakerId: undefined, - provider: undefined, - }, - - espeak: { - rate: undefined, - pitch: undefined, - pitchRange: undefined, - useKlatt: false, - }, -}; - -const defaultDialectForLanguageCode: { [lang: string]: string } = { - en: "en-US", - zh: "zh-CN", - ar: "ar-EG", - fr: "fr-FR", - de: "de-DE", - pt: "pt-BR", - es: "es-ES", - nl: "nl-NL", -}; - -function getAppDataDir(appName: string) { - let dataDir: string; - - const platform = process.platform; - const homeDir = os.homedir(); - - if (platform == "win32") { - dataDir = path.join(homeDir, "AppData", "Local", appName); - } else if (platform == "darwin") { - dataDir = path.join(homeDir, "Library", "Application Support", appName); - } else if (platform == "linux") { - dataDir = path.join(homeDir, ".local", "share", appName); - } else { - throw new Error(`Unsupport platform ${platform}`); - } - - return dataDir; -} - -const existsSync = gracefulFS.existsSync; - -const stat = promisify(gracefulFS.stat); - -async function ensureDir(dirPath: string) { - dirPath = path.normalize(dirPath); - - if (existsSync(dirPath)) { - const dirStats = await stat(dirPath); - - if (!dirStats.isDirectory()) { - throw new Error( - `The path '${dirPath}' exists but is not a directory.` - ); - } - } else { - return fsExtra.ensureDir(dirPath); - } -} - -async function isFileIsUpToDate(filePath: string, timeRangeSeconds: number) { - const fileUpdateTime = (await stat(filePath)).mtime.valueOf(); - - const currentTime = new Date().valueOf(); - - const differenceInMilliseconds = currentTime - fileUpdateTime; - - const differenceInSeconds = differenceInMilliseconds / 1000; - - return differenceInSeconds <= timeRangeSeconds; -} - -function getAppTempDir(appName: string) { - let tempDir: string; - - const platform = process.platform; - const homeDir = os.homedir(); - - if (platform == "win32") { - tempDir = path.join(homeDir, "AppData", "Local", "Temp", appName); - } else if (platform == "darwin") { - tempDir = path.join(homeDir, "Library", "Caches", appName); - } else if (platform == "linux") { - tempDir = path.join(homeDir, ".cache", appName); - } else { - throw new Error(`Unsupport platform ${platform}`); - } - - return tempDir; -} - -async function writeFile( - filePath: string, - data: string | NodeJS.ArrayBufferView, - options?: fsExtra.WriteFileOptions -) { - return outputFile(filePath, data, options); -} - -const access = promisify(gracefulFS.access); - -const remove = fsExtra.remove; - -async function existsAndIsWritable(targetPath: string) { - try { - await access(targetPath, gracefulFS.constants.W_OK); - } catch { - return false; - } - - return true; -} - -async function testDirectoryIsWritable(dir: string) { - const testFileName = path.join(dir, getRandomHexString(16)); - - try { - await fsExtra.createFile(testFileName); - await remove(testFileName); - } catch (e) { - return false; - } - - return true; -} - -async function move(source: string, dest: string) { - source = path.normalize(source); - dest = path.normalize(dest); - - if (existsSync(dest)) { - const destPathExistsAndIsWritable = await existsAndIsWritable(dest); - - if (!destPathExistsAndIsWritable) { - throw new Error( - `The destination path '${dest}' exists but is not writable. There may be a permissions or locking issue.` - ); - } - } else { - const destDir = path.parse(dest).dir; - const destDirIsWritable = await testDirectoryIsWritable(destDir); - - if (!destDirIsWritable) { - throw new Error( - `The directory ${destDir} is not writable. There may be a permissions issue.` - ); - } - } - - return fsExtra.move(source, dest, { overwrite: true }); -} - -const outputFile = fsExtra.outputFile; - -async function writeFileSafe( - filePath: string, - data: string | NodeJS.ArrayBufferView, - options?: fsExtra.WriteFileOptions -) { - const tempDir = getAppTempDir("eliza"); - const tempFilePath = path.join( - tempDir, - `${getRandomHexString(16)}.partial` - ); - - await writeFile(tempFilePath, data, options); - - await move(tempFilePath, filePath); -} - -function getRandomHexString(charCount = 32, upperCase = false) { - if (charCount % 2 !== 0) { - throw new Error(`'charCount' must be an even number`); - } - - let hex = randomBytes(charCount / 2).toString("hex"); - - if (upperCase) { - hex = hex.toUpperCase(); - } - - return hex; -} - -function stringifyAndFormatJson(obj: any) { - return JSON.stringify(obj, undefined, 4); -} - -async function normalizeIdentifierToLanguageCode(langIdentifier: string) { - const result = await parseLangIdentifier(langIdentifier); - - return result.Name; -} - -const langInfoEntries: LangInfoEntry[] = []; - -interface LangInfoEntry { - LCID: number; - - Name: string; - NameLowerCase: string; - - TwoLetterISOLanguageName: string; - ThreeLetterISOLanguageName: string; - ThreeLetterWindowsLanguageName: string; - - EnglishName: string; - EnglishNameLowerCase: string; - - ANSICodePage: string; -} - -function getModuleRootDir() { - const currentScriptDir = path.dirname(fileURLToPath(import.meta.url)); - return path.resolve(currentScriptDir, "..", ".."); -} - -function resolveToModuleRootDir(relativePath: string) { - return path.resolve(getModuleRootDir(), relativePath); -} - -async function loadLangInfoEntriesIfNeeded() { - if (langInfoEntries.length > 0) { - return; - } - - const entries = (await readAndParseJsonFile( - resolveToModuleRootDir("data/tables/lcid-table.json") - )) as LangInfoEntry[]; - - for (const entry of entries) { - entry.NameLowerCase = entry.Name.toLowerCase(); - entry.EnglishNameLowerCase = entry.EnglishName.toLowerCase(); - - langInfoEntries.push(entry); - } -} - -async function parseLangIdentifier(langIdentifier: string) { - if (!langIdentifier) { - return emptyLangInfoEntry; - } - - await loadLangInfoEntriesIfNeeded(); - - langIdentifier = langIdentifier.trim().toLowerCase(); - - for (const entry of langInfoEntries) { - if ( - langIdentifier === entry.NameLowerCase || - langIdentifier === entry.ThreeLetterISOLanguageName || - langIdentifier === entry.EnglishNameLowerCase - ) { - return entry; - } - } - - throw new Error(`Couldn't parse language identifier '${langIdentifier}'.`); -} - -const emptyLangInfoEntry: LangInfoEntry = { - LCID: -1, - - Name: "", - NameLowerCase: "", - - TwoLetterISOLanguageName: "", - ThreeLetterISOLanguageName: "", - ThreeLetterWindowsLanguageName: "", - - EnglishName: "Empty", - EnglishNameLowerCase: "empty", - - ANSICodePage: "", -}; - -///////////////////////////////////////////////////////////////////////////////////////////// -// Voice list request -///////////////////////////////////////////////////////////////////////////////////////////// -async function requestVoiceList( - options: VoiceListRequestOptions -): Promise { - console.log("voice list requests", options); - options = extendDeep(defaultVoiceListRequestOptions, options); - - const cacheOptions = options.cache!; - - let cacheDir = cacheOptions?.path; - - if (!cacheDir) { - const appDataDir = getAppDataDir("eliza"); - cacheDir = path.join(appDataDir, "voice-list-cache"); - await ensureDir(cacheDir); - } - - const cacheFilePath = path.join(cacheDir, `${options.engine}.voices.json`); - console.log("cacheFilePath", cacheFilePath); - async function loadVoiceList() { - let voiceList: SynthesisVoice[] = []; - - switch (options.engine) { - case "vits": { - voiceList = vitsVoiceList.map((entry) => { - return { ...entry, packageName: `vits-${entry.name}` }; - }); - - break; - } - } - - if (cacheFilePath) { - await writeFileSafe( - cacheFilePath, - stringifyAndFormatJson(voiceList) - ); - } - - return voiceList; - } - - let voiceList: SynthesisVoice[]; - - if ( - cacheFilePath && - existsSync(cacheFilePath) && - (await isFileIsUpToDate(cacheFilePath, options.cache!.duration!)) - ) { - voiceList = await readAndParseJsonFile(cacheFilePath); - } else { - voiceList = await loadVoiceList(); - } - - console.log("voiceList"); - console.log(voiceList); - - const languageCode = await normalizeIdentifierToLanguageCode( - options.language || "" - ); - - if (languageCode) { - let filteredVoiceList = voiceList.filter((voice) => - voice.languages.includes(languageCode) - ); - - if (filteredVoiceList.length == 0 && languageCode.includes("-")) { - const shortLanguageCode = getShortLanguageCode(languageCode); - - filteredVoiceList = voiceList.filter((voice) => - voice.languages.includes(shortLanguageCode) - ); - } - - voiceList = filteredVoiceList; - } - - if (options.voiceGender) { - const genderLowercase = options.voiceGender.toLowerCase(); - voiceList = voiceList.filter( - (voice) => - voice.gender == genderLowercase || voice.gender == "unknown" - ); - } - - if (options.voice) { - const namePatternLowerCase = options.voice.toLocaleLowerCase(); - const namePatternParts = namePatternLowerCase.split(/\b/g); - - if (namePatternParts.length > 1) { - voiceList = voiceList.filter((voice) => - voice.name.toLocaleLowerCase().includes(namePatternLowerCase) - ); - } else { - voiceList = voiceList.filter((voice) => { - const name = voice.name.toLocaleLowerCase(); - const nameParts = name.split(/\b/g); - - for (const namePart of nameParts) { - if (namePart.startsWith(namePatternLowerCase)) { - return true; - } - } - - return false; - }); - } - } - - let bestMatchingVoice = voiceList[0]; - - if ( - bestMatchingVoice && - voiceList.length > 1 && - defaultDialectForLanguageCode[languageCode] - ) { - const expandedLanguageCode = - defaultDialectForLanguageCode[languageCode]; - - for (const voice of voiceList) { - if (voice.languages.includes(expandedLanguageCode)) { - bestMatchingVoice = voice; - break; - } - } - } - - return { voiceList, bestMatchingVoice }; -} - -interface RequestVoiceListResult { - voiceList: SynthesisVoice[]; - bestMatchingVoice: SynthesisVoice; -} - -function getAllLangCodesFromVoiceList(voiceList: SynthesisVoice[]) { - const languageCodes = new Set(); - const langList: string[] = []; - - for (const voice of voiceList) { - for (const langCode of voice.languages) { - if (languageCodes.has(langCode)) { - continue; - } - - langList.push(langCode); - languageCodes.add(langCode); - } - } - - return langList; -} - -interface VoiceListRequestOptions extends SynthesisOptions { - cache?: { - path?: string; - duration?: number; - }; -} - -const defaultVoiceListRequestOptions: VoiceListRequestOptions = { - ...defaultSynthesisOptions, - - cache: { - path: undefined, - duration: 60 * 1, - }, -}; - -interface SynthesisSegmentEventData { - index: number; - total: number; - audio: RawAudio | Buffer; - timeline: Timeline; - transcript: string; - language: string; - peakDecibelsSoFar: number; -} - -type SynthesisSegmentEvent = (data: SynthesisSegmentEventData) => Promise; - -export interface SynthesisVoice { - name: string; - languages: string[]; - gender: VoiceGender; - speakerCount?: number; - packageName?: string; -} - -type VoiceGender = "male" | "female" | "unknown"; - -type ParagraphBreakType = "single" | "double"; -type WhitespaceProcessing = "preserve" | "removeLineBreaks" | "collapse"; - -function splitToParagraphs( - text: string, - paragraphBreaks: ParagraphBreakType, - whitespace: WhitespaceProcessing -) { - let paragraphs: string[] = []; - - if (paragraphBreaks == "single") { - paragraphs = text.split(/(\r?\n)+/g); - } else if (paragraphBreaks == "double") { - paragraphs = text.split(/(\r?\n)(\r?\n)+/g); - } else { - throw new Error(`Invalid paragraph break type: ${paragraphBreaks}`); - } - - if (whitespace == "removeLineBreaks") { - paragraphs = paragraphs.map((p) => p.replaceAll(/(\r?\n)+/g, " ")); - } else if (whitespace == "collapse") { - paragraphs = paragraphs.map((p) => p.replaceAll(/\s+/g, " ")); - } - - paragraphs = paragraphs.map((p) => p.trim()); - paragraphs = paragraphs.filter((p) => p.length > 0); - - return paragraphs; -} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 77c34b9d5f..ee813316b3 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -47,7 +47,7 @@ importers: version: 5.4.11(@types/node@22.8.4)(terser@5.36.0) vitest: specifier: ^2.1.5 - version: 2.1.5(@types/node@22.8.4)(terser@5.36.0) + version: 2.1.5(@types/node@22.8.4)(jsdom@25.0.1(bufferutil@4.0.8)(canvas@2.11.2(encoding@0.1.13))(utf-8-validate@5.0.10))(terser@5.36.0) agent: dependencies: @@ -777,6 +777,9 @@ importers: csv-writer: specifier: 1.6.0 version: 1.6.0 + echogarden: + specifier: ^2.0.5 + version: 2.0.5(bufferutil@4.0.8)(canvas@2.11.2(encoding@0.1.13))(encoding@0.1.13)(utf-8-validate@5.0.10)(zod@3.23.8) espeak-ng: specifier: 1.0.2 version: 1.0.2 @@ -960,7 +963,7 @@ importers: version: 8.3.5(@swc/core@1.9.2(@swc/helpers@0.5.15))(jiti@2.4.0)(postcss@8.4.49)(typescript@5.6.3)(yaml@2.6.1) vitest: specifier: ^2.1.4 - version: 2.1.5(@types/node@22.8.4)(terser@5.36.0) + version: 2.1.5(@types/node@22.8.4)(jsdom@25.0.1(bufferutil@4.0.8)(canvas@2.11.2(encoding@0.1.13))(utf-8-validate@5.0.10))(terser@5.36.0) whatwg-url: specifier: 7.1.0 version: 7.1.0 @@ -981,7 +984,7 @@ importers: version: 11.0.2 vitest: specifier: ^2.1.4 - version: 2.1.5(@types/node@22.8.4)(terser@5.36.0) + version: 2.1.5(@types/node@22.8.4)(jsdom@25.0.1(bufferutil@4.0.8)(canvas@2.11.2(encoding@0.1.13))(utf-8-validate@5.0.10))(terser@5.36.0) whatwg-url: specifier: 7.1.0 version: 7.1.0 @@ -1290,6 +1293,155 @@ packages: qs: ^6.12.0 starknet: ^6.6.0 + '@aws-crypto/crc32@5.2.0': + resolution: {integrity: sha512-nLbCWqQNgUiwwtFsen1AdzAtvuLRsQS8rYgMuxCrdKf9kOssamGLuPwyTY9wyYblNr9+1XM8v6zoDTPPSIeANg==} + engines: {node: '>=16.0.0'} + + '@aws-crypto/sha256-browser@5.2.0': + resolution: {integrity: sha512-AXfN/lGotSQwu6HNcEsIASo7kWXZ5HYWvfOmSNKDsEqC4OashTp8alTmaz+F7TC2L083SFv5RdB+qU3Vs1kZqw==} + + '@aws-crypto/sha256-js@5.2.0': + resolution: {integrity: sha512-FFQQyu7edu4ufvIZ+OadFpHHOt+eSTBaYaki44c+akjg7qZg9oOQeLlk77F6tSYqjDAFClrHJk9tMf0HdVyOvA==} + engines: {node: '>=16.0.0'} + + '@aws-crypto/supports-web-crypto@5.2.0': + resolution: {integrity: sha512-iAvUotm021kM33eCdNfwIN//F77/IADDSs58i+MDaOqFrVjZo9bAal0NK7HurRuWLLpF1iLX7gbWrjHjeo+YFg==} + + '@aws-crypto/util@5.2.0': + resolution: {integrity: sha512-4RkU9EsI6ZpBve5fseQlGNUWKMa1RLPQ1dnjnQoe07ldfIzcsGb5hC5W0Dm7u423KWzawlrpbjXBrXCEv9zazQ==} + + '@aws-sdk/client-polly@3.696.0': + resolution: {integrity: sha512-2eZ/P8/Kz2b1AST4aBmRaBCjqOCLUDchzw51J3K7cWLKKqECRNPt38G+hTja6zWF2KWY4PyMseEjaYWspYAh1A==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/client-sso-oidc@3.696.0': + resolution: {integrity: sha512-ikxQ3mo86d1mAq5zTaQAh8rLBERwL+I4MUYu/IVYW2hhl9J2SDsl0SgnKeXQG6S8zWuHcBO587zsZaRta1MQ/g==} + engines: {node: '>=16.0.0'} + peerDependencies: + '@aws-sdk/client-sts': ^3.696.0 + + '@aws-sdk/client-sso@3.696.0': + resolution: {integrity: sha512-q5TTkd08JS0DOkHfUL853tuArf7NrPeqoS5UOvqJho8ibV9Ak/a/HO4kNvy9Nj3cib/toHYHsQIEtecUPSUUrQ==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/client-sts@3.696.0': + resolution: {integrity: sha512-eJOxR8/UyI7kGSRyE751Ea7MKEzllQs7eNveDJy9OP4t/jsN/P19HJ1YHeA1np40JRTUBfqa6WLAAiIXsk8rkg==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/client-transcribe-streaming@3.696.0': + resolution: {integrity: sha512-Mw2PpKm86b80JgsOUbTAs+9m/kqq7LdgZ2ANxYvgkh+usbQ3xIuANJCcoeYIvL5tZnqAjpBtFrT3KZHunfHbvA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/core@3.696.0': + resolution: {integrity: sha512-3c9III1k03DgvRZWg8vhVmfIXPG6hAciN9MzQTzqGngzWAELZF/WONRTRQuDFixVtarQatmLHYVw/atGeA2Byw==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-env@3.696.0': + resolution: {integrity: sha512-T9iMFnJL7YTlESLpVFT3fg1Lkb1lD+oiaIC8KMpepb01gDUBIpj9+Y+pA/cgRWW0yRxmkDXNazAE2qQTVFGJzA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-http@3.696.0': + resolution: {integrity: sha512-GV6EbvPi2eq1+WgY/o2RFA3P7HGmnkIzCNmhwtALFlqMroLYWKE7PSeHw66Uh1dFQeVESn0/+hiUNhu1mB0emA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-ini@3.696.0': + resolution: {integrity: sha512-9WsZZofjPjNAAZhIh7c7FOhLK8CR3RnGgUm1tdZzV6ZSM1BuS2m6rdwIilRxAh3fxxKDkmW/r/aYmmCYwA+AYA==} + engines: {node: '>=16.0.0'} + peerDependencies: + '@aws-sdk/client-sts': ^3.696.0 + + '@aws-sdk/credential-provider-node@3.696.0': + resolution: {integrity: sha512-8F6y5FcfRuMJouC5s207Ko1mcVvOXReBOlJmhIwE4QH1CnO/CliIyepnAZrRQ659mo5wIuquz6gXnpYbitEVMg==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-process@3.696.0': + resolution: {integrity: sha512-mL1RcFDe9sfmyU5K1nuFkO8UiJXXxLX4JO1gVaDIOvPqwStpUAwi3A1BoeZhWZZNQsiKI810RnYGo0E0WB/hUA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-sso@3.696.0': + resolution: {integrity: sha512-4SSZ9Nk08JSu4/rX1a+dEac/Ims1HCXfV7YLUe5LGdtRLSKRoQQUy+hkFaGYoSugP/p1UfUPl3BuTO9Vv8z1pA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/credential-provider-web-identity@3.696.0': + resolution: {integrity: sha512-XJ/CVlWChM0VCoc259vWguFUjJDn/QwDqHwbx+K9cg3v6yrqXfK5ai+p/6lx0nQpnk4JzPVeYYxWRpaTsGC9rg==} + engines: {node: '>=16.0.0'} + peerDependencies: + '@aws-sdk/client-sts': ^3.696.0 + + '@aws-sdk/eventstream-handler-node@3.696.0': + resolution: {integrity: sha512-wK5o8Ziucz6s5jWIG6weHLsSE9qIAfeepoAdiuEvoJLhxNCJUkxF25NNidzhqxRfGDUzJIa+itSdD8vdP60qyA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/middleware-eventstream@3.696.0': + resolution: {integrity: sha512-ZbyKX1L+moB7Gid8332XaxA6uA2aMz9V5mmdEeOYRDEPXxf6VZYAOFZ6koSqThDuekxOuXunXw90BwiXz9/DEg==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/middleware-host-header@3.696.0': + resolution: {integrity: sha512-zELJp9Ta2zkX7ELggMN9qMCgekqZhFC5V2rOr4hJDEb/Tte7gpfKSObAnw/3AYiVqt36sjHKfdkoTsuwGdEoDg==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/middleware-logger@3.696.0': + resolution: {integrity: sha512-KhkHt+8AjCxcR/5Zp3++YPJPpFQzxpr+jmONiT/Jw2yqnSngZ0Yspm5wGoRx2hS1HJbyZNuaOWEGuJoxLeBKfA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/middleware-recursion-detection@3.696.0': + resolution: {integrity: sha512-si/maV3Z0hH7qa99f9ru2xpS5HlfSVcasRlNUXKSDm611i7jFMWwGNLUOXFAOLhXotPX5G3Z6BLwL34oDeBMug==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/middleware-sdk-transcribe-streaming@3.696.0': + resolution: {integrity: sha512-WToGtHGaRWIQFkjqPaXShokTH1LZMFjoSX0CPT1I9OZhyy95FYyibJbnQLiSGY9zQN45jcUA8PtQZwbR/EfuTw==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/middleware-signing@3.696.0': + resolution: {integrity: sha512-7ooWYsX+QgFEphNxOZrkZlWFLoDyLQgayf/JvFZ6qnO550K6H9Z2w7vEySoChRDoLjYs6omEHW6A8YLIK3r8rw==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/middleware-user-agent@3.696.0': + resolution: {integrity: sha512-Lvyj8CTyxrHI6GHd2YVZKIRI5Fmnugt3cpJo0VrKKEgK5zMySwEZ1n4dqPK6czYRWKd5+WnYHYAuU+Wdk6Jsjw==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/middleware-websocket@3.696.0': + resolution: {integrity: sha512-8CaCtg08JZx7KtOepIMOUde2KsBk2UJ85h3LKGdmXXnWnmT+Jv3Q5LYbs+VowW/04OXcaYmua7Q3XbnRPw6qgw==} + engines: {node: '>= 14.0.0'} + + '@aws-sdk/region-config-resolver@3.696.0': + resolution: {integrity: sha512-7EuH142lBXjI8yH6dVS/CZeiK/WZsmb/8zP6bQbVYpMrppSTgB3MzZZdxVZGzL5r8zPQOU10wLC4kIMy0qdBVQ==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/token-providers@3.696.0': + resolution: {integrity: sha512-fvTcMADrkwRdNwVmJXi2pSPf1iizmUqczrR1KusH4XehI/KybS4U6ViskRT0v07vpxwL7x+iaD/8fR0PUu5L/g==} + engines: {node: '>=16.0.0'} + peerDependencies: + '@aws-sdk/client-sso-oidc': ^3.696.0 + + '@aws-sdk/types@3.696.0': + resolution: {integrity: sha512-9rTvUJIAj5d3//U5FDPWGJ1nFJLuWb30vugGOrWk7aNZ6y9tuA3PI7Cc9dP8WEXKVyK1vuuk8rSFP2iqXnlgrw==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/util-endpoints@3.696.0': + resolution: {integrity: sha512-T5s0IlBVX+gkb9g/I6CLt4yAZVzMSiGnbUqWihWsHvQR1WOoIcndQy/Oz/IJXT9T2ipoy7a80gzV6a5mglrioA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/util-format-url@3.696.0': + resolution: {integrity: sha512-R6yK1LozUD1GdAZRPhNsIow6VNFJUTyyoIar1OCWaknlucBMcq7musF3DN3TlORBwfFMj5buHc2ET9OtMtzvuA==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/util-locate-window@3.693.0': + resolution: {integrity: sha512-ttrag6haJLWABhLqtg1Uf+4LgHWIMOVSYL+VYZmAp2v4PUGOwWmWQH0Zk8RM7YuQcLfH/EoR72/Yxz6A4FKcuw==} + engines: {node: '>=16.0.0'} + + '@aws-sdk/util-user-agent-browser@3.696.0': + resolution: {integrity: sha512-Z5rVNDdmPOe6ELoM5AhF/ja5tSjbe6ctSctDPb0JdDf4dT0v2MfwhJKzXju2RzX8Es/77Glh7MlaXLE0kCB9+Q==} + + '@aws-sdk/util-user-agent-node@3.696.0': + resolution: {integrity: sha512-KhKqcfyXIB0SCCt+qsu4eJjsfiOrNzK5dCV7RAW2YIpp+msxGUUX0NdRE9rkzjiv+3EMktgJm3eEIS+yxtlVdQ==} + engines: {node: '>=16.0.0'} + peerDependencies: + aws-crt: '>=1.0.0' + peerDependenciesMeta: + aws-crt: + optional: true + '@babel/code-frame@7.26.2': resolution: {integrity: sha512-RJlIHRueQgwWitWgF8OdFYGZX328Ax5BCemNGlqHfplnRT9ESi8JkFlvaVYbS+UubVY6dpv87Fs2u5M29iNFVQ==} engines: {node: '>=6.9.0'} @@ -2532,15 +2684,55 @@ packages: resolution: {integrity: sha512-oxnpUcFZGE3uPCDoXr8GJriB3VWM9sFjPedFidX3Fsz87l1NZNc1wtbKPfQ7GYFDMYo2IGlAv5+47Me9RkM6lg==} engines: {node: '>=18.0'} + '@echogarden/audio-io@0.2.3': + resolution: {integrity: sha512-3p6oGhuCvfwcEWE52hJ2pMAY05qz1UeHXuITp+ijG2b5z3qizJT4IsP6ZIfiXYg8pW8maUnbwPOLbazpJv2KYQ==} + engines: {node: '>=18'} + os: [win32, darwin, linux] + '@echogarden/espeak-ng-emscripten@0.3.0': resolution: {integrity: sha512-ukSH2lnIRJqthW07lHfEPUUx59OrFovso8bicwbIT4MV/0jvE15nKv7Uk73fwZQSpo9Y+8Lpyr24nP+AJ5AJOA==} + '@echogarden/espeak-ng-emscripten@0.3.3': + resolution: {integrity: sha512-TvSwLnB0vuqIUptvHZyr63Ywj2m7ureIK864O8aoyw9WqEqHE1x5weBzy/1/soZ4BkEkRvurlLF7ue+tEhyatw==} + + '@echogarden/fasttext-wasm@0.1.0': + resolution: {integrity: sha512-spZGRZMUpJsGMJri6+Ea86ECTeFXr2ZQei5xrviVfo8u57OU8Uo0JqW/rUOgn55tVbIxEqfYrHT5u0OUYOKLvQ==} + + '@echogarden/flite-wasi@0.1.1': + resolution: {integrity: sha512-/ayJRFWbq73EEL8N82z1WO2mbey87wFa+t1o+U+xyaD7Ub0qedQ9s0IDJlO5cVvyD2ZXQbFwzeiCD8eXqQ8HCQ==} + + '@echogarden/fvad-wasm@0.2.0': + resolution: {integrity: sha512-jPPzN6uV23dsOkKnGxajBDw81Xx3ICecw72sIzI+m4PzFWpSf/QOLvlgf7mySfqCngD54LRC1aDgD5haB45dbg==} + '@echogarden/kissfft-wasm@0.2.0': resolution: {integrity: sha512-bL+MXQY6zos26QPhmJR18VWzf/fc2zRDl+BPqdO9Pqejop6sz8qjQdyxhB1rFW5/fxCJlL+WzZzbeaC+aBPwDA==} + '@echogarden/pffft-wasm@0.4.2': + resolution: {integrity: sha512-x3rzhVGY01tEAFt+a+D9T/jP8wx5r/XS5hesMFCJz7ujMXg4LO2+94ip1NhzVKPrrsp/oT7UCJjthg5Nz2kYOQ==} + + '@echogarden/rnnoise-wasm@0.2.0': + resolution: {integrity: sha512-dND0FKFaLxyqa+rdgcMWc7A3Zh9pu7zzetYd60+2nbwnKL/8HtUXFGf7GAJ4krwTOgtSLETH9REF39gOa4T5UQ==} + + '@echogarden/rubberband-wasm@0.2.0': + resolution: {integrity: sha512-rcYq34+9HgdKjZb2EksQMW5m4SoyFGjUZCttQCVJz81hbY/qUzjsxsy3bN6iyehTx3mxIYt7ozB/M3B5M40BSQ==} + + '@echogarden/sonic-wasm@0.2.0': + resolution: {integrity: sha512-AjYOkrecn5k8huQ+59z6w2emSqhcDPZOUJwKCTNCQ7VYoLO2GDAQPsNL1o+Hs4mjmnqQcZKwepwMU1K3PhrEYg==} + '@echogarden/speex-resampler-wasm@0.2.1': resolution: {integrity: sha512-sCbMrWNSYWDuJ4igz487CL3/DFWW8SYsg4QGJh55gHRrvJf0IkV/6XcRQtobp/U40GYtBWi46Ct3fU2TGrIKRw==} + '@echogarden/speex-resampler-wasm@0.3.0': + resolution: {integrity: sha512-+J/Vgkseb0NjaKGMBBf9WjZpt4sReA1HQ9QBsuRngBgnzB17Pa1woM797nOqpu1aocotta2yJpQ8FcjfH/w4Bw==} + + '@echogarden/svoxpico-wasm@0.2.0': + resolution: {integrity: sha512-RQH5y5dvUlV4H8TTUX7QFDGpb5j1ge4veuIaPmntUvioKal3U5eNqvI/kCZO0SJ7YS9OWDsHpnKWySs6z9LmTA==} + + '@echogarden/transformers-nodejs-lite@2.17.1-lite.3': + resolution: {integrity: sha512-qD9kvrL1xmce0iiiNEyqq2GW1qoksqvdOpww3Gsgqx/O9tdU/M2R78fji9opY+QU9u8OKH9L+ZzsOQdF5FixZA==} + peerDependencies: + onnxruntime-node: ^1.20.0 + '@emnapi/core@1.3.1': resolution: {integrity: sha512-pVGjBIt1Y6gg3EJN8jTcfpP/+uuRksIo055oE/OBkDNcjZqVbfkWCksG1Jp4yZnj3iKWyWX8fdG/j6UDYPbFog==} @@ -3042,6 +3234,10 @@ packages: '@hapi/topo@5.1.0': resolution: {integrity: sha512-foQZKJig7Ob0BMAYBfcJk8d77QtOe7Wo4ox7ff1lQYoNNAb6jwcY1ncdoy2e9wQZzvNy7ODZCYJkK8kzmcAnAg==} + '@huggingface/jinja@0.2.2': + resolution: {integrity: sha512-/KPde26khDUIPkTGU82jdtTW9UAuvUTumCAbFs/7giR0SxsvZC4hru51PBvpijH6BVkHcROcvZM/lpy5h1jRRA==} + engines: {node: '>=18'} + '@huggingface/jinja@0.3.2': resolution: {integrity: sha512-F2FvuIc+w1blGsaqJI/OErRbWH6bVJDCBI8Rm5D86yZ2wlwrGERsfIaru7XUv9eYC3DMP3ixDRRtF0h6d8AZcQ==} engines: {node: '>=18'} @@ -3342,6 +3538,10 @@ packages: '@mermaid-js/parser@0.3.0': resolution: {integrity: sha512-HsvL6zgE5sUPGgkIDlmAWR1HTNHz2Iy11BAWPTa4Jjabkpguy4Ze2gzfLrg6pdRuBvFwgUYyxiaNqZwrEEXepA==} + '@mozilla/readability@0.5.0': + resolution: {integrity: sha512-Z+CZ3QaosfFaTqvhQsIktyGrjFjSC0Fa4EMph4mqKnWhmyoGICsV/8QK+8HpXut6zV7zwfWwqDmEjtk1Qf6EgQ==} + engines: {node: '>=14.0.0'} + '@napi-rs/wasm-runtime@0.2.4': resolution: {integrity: sha512-9zESzOO5aDByvhIAsOy9TbpZ0Ur2AJbUI7UT73kcUTS2mxAMHOBaa1st/jAymNoCtvrit99kkzT1FZuXVcgfIQ==} @@ -4329,6 +4529,189 @@ packages: '@slorber/remark-comment@1.0.0': resolution: {integrity: sha512-RCE24n7jsOj1M0UPvIQCHTe7fI0sFL4S2nwKVWwHyVr/wI/H8GosgsJGyhnsZoGFnD/P2hLf1mSbrrgSLN93NA==} + '@smithy/abort-controller@3.1.8': + resolution: {integrity: sha512-+3DOBcUn5/rVjlxGvUPKc416SExarAQ+Qe0bqk30YSUjbepwpS7QN0cyKUSifvLJhdMZ0WPzPP5ymut0oonrpQ==} + engines: {node: '>=16.0.0'} + + '@smithy/config-resolver@3.0.12': + resolution: {integrity: sha512-YAJP9UJFZRZ8N+UruTeq78zkdjUHmzsY62J4qKWZ4SXB4QXJ/+680EfXXgkYA2xj77ooMqtUY9m406zGNqwivQ==} + engines: {node: '>=16.0.0'} + + '@smithy/core@2.5.4': + resolution: {integrity: sha512-iFh2Ymn2sCziBRLPuOOxRPkuCx/2gBdXtBGuCUFLUe6bWYjKnhHyIPqGeNkLZ5Aco/5GjebRTBFiWID3sDbrKw==} + engines: {node: '>=16.0.0'} + + '@smithy/credential-provider-imds@3.2.7': + resolution: {integrity: sha512-cEfbau+rrWF8ylkmmVAObOmjbTIzKyUC5TkBL58SbLywD0RCBC4JAUKbmtSm2w5KUJNRPGgpGFMvE2FKnuNlWQ==} + engines: {node: '>=16.0.0'} + + '@smithy/eventstream-codec@3.1.9': + resolution: {integrity: sha512-F574nX0hhlNOjBnP+noLtsPFqXnWh2L0+nZKCwcu7P7J8k+k+rdIDs+RMnrMwrzhUE4mwMgyN0cYnEn0G8yrnQ==} + + '@smithy/eventstream-serde-browser@3.0.13': + resolution: {integrity: sha512-Nee9m+97o9Qj6/XeLz2g2vANS2SZgAxV4rDBMKGHvFJHU/xz88x2RwCkwsvEwYjSX4BV1NG1JXmxEaDUzZTAtw==} + engines: {node: '>=16.0.0'} + + '@smithy/eventstream-serde-config-resolver@3.0.10': + resolution: {integrity: sha512-K1M0x7P7qbBUKB0UWIL5KOcyi6zqV5mPJoL0/o01HPJr0CSq3A9FYuJC6e11EX6hR8QTIR++DBiGrYveOu6trw==} + engines: {node: '>=16.0.0'} + + '@smithy/eventstream-serde-node@3.0.12': + resolution: {integrity: sha512-kiZymxXvZ4tnuYsPSMUHe+MMfc4FTeFWJIc0Q5wygJoUQM4rVHNghvd48y7ppuulNMbuYt95ah71pYc2+o4JOA==} + engines: {node: '>=16.0.0'} + + '@smithy/eventstream-serde-universal@3.0.12': + resolution: {integrity: sha512-1i8ifhLJrOZ+pEifTlF0EfZzMLUGQggYQ6WmZ4d5g77zEKf7oZ0kvh1yKWHPjofvOwqrkwRDVuxuYC8wVd662A==} + engines: {node: '>=16.0.0'} + + '@smithy/fetch-http-handler@4.1.1': + resolution: {integrity: sha512-bH7QW0+JdX0bPBadXt8GwMof/jz0H28I84hU1Uet9ISpzUqXqRQ3fEZJ+ANPOhzSEczYvANNl3uDQDYArSFDtA==} + + '@smithy/hash-node@3.0.10': + resolution: {integrity: sha512-3zWGWCHI+FlJ5WJwx73Mw2llYR8aflVyZN5JhoqLxbdPZi6UyKSdCeXAWJw9ja22m6S6Tzz1KZ+kAaSwvydi0g==} + engines: {node: '>=16.0.0'} + + '@smithy/invalid-dependency@3.0.10': + resolution: {integrity: sha512-Lp2L65vFi+cj0vFMu2obpPW69DU+6O5g3086lmI4XcnRCG8PxvpWC7XyaVwJCxsZFzueHjXnrOH/E0pl0zikfA==} + + '@smithy/is-array-buffer@2.2.0': + resolution: {integrity: sha512-GGP3O9QFD24uGeAXYUjwSTXARoqpZykHadOmA8G5vfJPK0/DC67qa//0qvqrJzL1xc8WQWX7/yc7fwudjPHPhA==} + engines: {node: '>=14.0.0'} + + '@smithy/is-array-buffer@3.0.0': + resolution: {integrity: sha512-+Fsu6Q6C4RSJiy81Y8eApjEB5gVtM+oFKTffg+jSuwtvomJJrhUJBu2zS8wjXSgH/g1MKEWrzyChTBe6clb5FQ==} + engines: {node: '>=16.0.0'} + + '@smithy/middleware-content-length@3.0.12': + resolution: {integrity: sha512-1mDEXqzM20yywaMDuf5o9ue8OkJ373lSPbaSjyEvkWdqELhFMyNNgKGWL/rCSf4KME8B+HlHKuR8u9kRj8HzEQ==} + engines: {node: '>=16.0.0'} + + '@smithy/middleware-endpoint@3.2.4': + resolution: {integrity: sha512-TybiW2LA3kYVd3e+lWhINVu1o26KJbBwOpADnf0L4x/35vLVica77XVR5hvV9+kWeTGeSJ3IHTcYxbRxlbwhsg==} + engines: {node: '>=16.0.0'} + + '@smithy/middleware-retry@3.0.28': + resolution: {integrity: sha512-vK2eDfvIXG1U64FEUhYxoZ1JSj4XFbYWkK36iz02i3pFwWiDz1Q7jKhGTBCwx/7KqJNk4VS7d7cDLXFOvP7M+g==} + engines: {node: '>=16.0.0'} + + '@smithy/middleware-serde@3.0.10': + resolution: {integrity: sha512-MnAuhh+dD14F428ubSJuRnmRsfOpxSzvRhaGVTvd/lrUDE3kxzCCmH8lnVTvoNQnV2BbJ4c15QwZ3UdQBtFNZA==} + engines: {node: '>=16.0.0'} + + '@smithy/middleware-stack@3.0.10': + resolution: {integrity: sha512-grCHyoiARDBBGPyw2BeicpjgpsDFWZZxptbVKb3CRd/ZA15F/T6rZjCCuBUjJwdck1nwUuIxYtsS4H9DDpbP5w==} + engines: {node: '>=16.0.0'} + + '@smithy/node-config-provider@3.1.11': + resolution: {integrity: sha512-URq3gT3RpDikh/8MBJUB+QGZzfS7Bm6TQTqoh4CqE8NBuyPkWa5eUXj0XFcFfeZVgg3WMh1u19iaXn8FvvXxZw==} + engines: {node: '>=16.0.0'} + + '@smithy/node-http-handler@3.3.1': + resolution: {integrity: sha512-fr+UAOMGWh6bn4YSEezBCpJn9Ukp9oR4D32sCjCo7U81evE11YePOQ58ogzyfgmjIO79YeOdfXXqr0jyhPQeMg==} + engines: {node: '>=16.0.0'} + + '@smithy/property-provider@3.1.10': + resolution: {integrity: sha512-n1MJZGTorTH2DvyTVj+3wXnd4CzjJxyXeOgnTlgNVFxaaMeT4OteEp4QrzF8p9ee2yg42nvyVK6R/awLCakjeQ==} + engines: {node: '>=16.0.0'} + + '@smithy/protocol-http@4.1.7': + resolution: {integrity: sha512-FP2LepWD0eJeOTm0SjssPcgqAlDFzOmRXqXmGhfIM52G7Lrox/pcpQf6RP4F21k0+O12zaqQt5fCDOeBtqY6Cg==} + engines: {node: '>=16.0.0'} + + '@smithy/querystring-builder@3.0.10': + resolution: {integrity: sha512-nT9CQF3EIJtIUepXQuBFb8dxJi3WVZS3XfuDksxSCSn+/CzZowRLdhDn+2acbBv8R6eaJqPupoI/aRFIImNVPQ==} + engines: {node: '>=16.0.0'} + + '@smithy/querystring-parser@3.0.10': + resolution: {integrity: sha512-Oa0XDcpo9SmjhiDD9ua2UyM3uU01ZTuIrNdZvzwUTykW1PM8o2yJvMh1Do1rY5sUQg4NDV70dMi0JhDx4GyxuQ==} + engines: {node: '>=16.0.0'} + + '@smithy/service-error-classification@3.0.10': + resolution: {integrity: sha512-zHe642KCqDxXLuhs6xmHVgRwy078RfqxP2wRDpIyiF8EmsWXptMwnMwbVa50lw+WOGNrYm9zbaEg0oDe3PTtvQ==} + engines: {node: '>=16.0.0'} + + '@smithy/shared-ini-file-loader@3.1.11': + resolution: {integrity: sha512-AUdrIZHFtUgmfSN4Gq9nHu3IkHMa1YDcN+s061Nfm+6pQ0mJy85YQDB0tZBCmls0Vuj22pLwDPmL92+Hvfwwlg==} + engines: {node: '>=16.0.0'} + + '@smithy/signature-v4@4.2.3': + resolution: {integrity: sha512-pPSQQ2v2vu9vc8iew7sszLd0O09I5TRc5zhY71KA+Ao0xYazIG+uLeHbTJfIWGO3BGVLiXjUr3EEeCcEQLjpWQ==} + engines: {node: '>=16.0.0'} + + '@smithy/smithy-client@3.4.5': + resolution: {integrity: sha512-k0sybYT9zlP79sIKd1XGm4TmK0AS1nA2bzDHXx7m0nGi3RQ8dxxQUs4CPkSmQTKAo+KF9aINU3KzpGIpV7UoMw==} + engines: {node: '>=16.0.0'} + + '@smithy/types@3.7.1': + resolution: {integrity: sha512-XKLcLXZY7sUQgvvWyeaL/qwNPp6V3dWcUjqrQKjSb+tzYiCy340R/c64LV5j+Tnb2GhmunEX0eou+L+m2hJNYA==} + engines: {node: '>=16.0.0'} + + '@smithy/url-parser@3.0.10': + resolution: {integrity: sha512-j90NUalTSBR2NaZTuruEgavSdh8MLirf58LoGSk4AtQfyIymogIhgnGUU2Mga2bkMkpSoC9gxb74xBXL5afKAQ==} + + '@smithy/util-base64@3.0.0': + resolution: {integrity: sha512-Kxvoh5Qtt0CDsfajiZOCpJxgtPHXOKwmM+Zy4waD43UoEMA+qPxxa98aE/7ZhdnBFZFXMOiBR5xbcaMhLtznQQ==} + engines: {node: '>=16.0.0'} + + '@smithy/util-body-length-browser@3.0.0': + resolution: {integrity: sha512-cbjJs2A1mLYmqmyVl80uoLTJhAcfzMOyPgjwAYusWKMdLeNtzmMz9YxNl3/jRLoxSS3wkqkf0jwNdtXWtyEBaQ==} + + '@smithy/util-body-length-node@3.0.0': + resolution: {integrity: sha512-Tj7pZ4bUloNUP6PzwhN7K386tmSmEET9QtQg0TgdNOnxhZvCssHji+oZTUIuzxECRfG8rdm2PMw2WCFs6eIYkA==} + engines: {node: '>=16.0.0'} + + '@smithy/util-buffer-from@2.2.0': + resolution: {integrity: sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA==} + engines: {node: '>=14.0.0'} + + '@smithy/util-buffer-from@3.0.0': + resolution: {integrity: sha512-aEOHCgq5RWFbP+UDPvPot26EJHjOC+bRgse5A8V3FSShqd5E5UN4qc7zkwsvJPPAVsf73QwYcHN1/gt/rtLwQA==} + engines: {node: '>=16.0.0'} + + '@smithy/util-config-provider@3.0.0': + resolution: {integrity: sha512-pbjk4s0fwq3Di/ANL+rCvJMKM5bzAQdE5S/6RL5NXgMExFAi6UgQMPOm5yPaIWPpr+EOXKXRonJ3FoxKf4mCJQ==} + engines: {node: '>=16.0.0'} + + '@smithy/util-defaults-mode-browser@3.0.28': + resolution: {integrity: sha512-6bzwAbZpHRFVJsOztmov5PGDmJYsbNSoIEfHSJJyFLzfBGCCChiO3od9k7E/TLgrCsIifdAbB9nqbVbyE7wRUw==} + engines: {node: '>= 10.0.0'} + + '@smithy/util-defaults-mode-node@3.0.28': + resolution: {integrity: sha512-78ENJDorV1CjOQselGmm3+z7Yqjj5HWCbjzh0Ixuq736dh1oEnD9sAttSBNSLlpZsX8VQnmERqA2fEFlmqWn8w==} + engines: {node: '>= 10.0.0'} + + '@smithy/util-endpoints@2.1.6': + resolution: {integrity: sha512-mFV1t3ndBh0yZOJgWxO9J/4cHZVn5UG1D8DeCc6/echfNkeEJWu9LD7mgGH5fHrEdR7LDoWw7PQO6QiGpHXhgA==} + engines: {node: '>=16.0.0'} + + '@smithy/util-hex-encoding@3.0.0': + resolution: {integrity: sha512-eFndh1WEK5YMUYvy3lPlVmYY/fZcQE1D8oSf41Id2vCeIkKJXPcYDCZD+4+xViI6b1XSd7tE+s5AmXzz5ilabQ==} + engines: {node: '>=16.0.0'} + + '@smithy/util-middleware@3.0.10': + resolution: {integrity: sha512-eJO+/+RsrG2RpmY68jZdwQtnfsxjmPxzMlQpnHKjFPwrYqvlcT+fHdT+ZVwcjlWSrByOhGr9Ff2GG17efc192A==} + engines: {node: '>=16.0.0'} + + '@smithy/util-retry@3.0.10': + resolution: {integrity: sha512-1l4qatFp4PiU6j7UsbasUHL2VU023NRB/gfaa1M0rDqVrRN4g3mCArLRyH3OuktApA4ye+yjWQHjdziunw2eWA==} + engines: {node: '>=16.0.0'} + + '@smithy/util-stream@3.3.1': + resolution: {integrity: sha512-Ff68R5lJh2zj+AUTvbAU/4yx+6QPRzg7+pI7M1FbtQHcRIp7xvguxVsQBKyB3fwiOwhAKu0lnNyYBaQfSW6TNw==} + engines: {node: '>=16.0.0'} + + '@smithy/util-uri-escape@3.0.0': + resolution: {integrity: sha512-LqR7qYLgZTD7nWLBecUi4aqolw8Mhza9ArpNEQ881MJJIU2sE5iHCK6TdyqqzcDLy0OPe10IY4T8ctVdtynubg==} + engines: {node: '>=16.0.0'} + + '@smithy/util-utf8@2.3.0': + resolution: {integrity: sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A==} + engines: {node: '>=14.0.0'} + + '@smithy/util-utf8@3.0.0': + resolution: {integrity: sha512-rUeT12bxFnplYDe815GXbq/oixEGHfRFFtcTF3YdDi/JaENIM6aSYYLJydG83UNzLXeRI5K8abYd/8Sp/QM0kA==} + engines: {node: '>=16.0.0'} + '@solana/buffer-layout-utils@0.2.0': resolution: {integrity: sha512-szG4sxgJGktbuZYDg2FfNmkMi0DYQoVjN2h7ta1W1hPrwzarcFLBq9UpX1UjNXsNpT9dn+chgprtWGioUAr4/g==} engines: {node: '>= 10'} @@ -5028,6 +5411,9 @@ packages: '@types/wav-encoder@1.3.3': resolution: {integrity: sha512-2haw8zEMg4DspJRXmxUn2TElrQUs0bLPDh6x4N7/hDn+3tx2G05Lc+kC55uoHYsv8q+4deWhnDtHZT/ximg9aw==} + '@types/webrtc@0.0.37': + resolution: {integrity: sha512-JGAJC/ZZDhcrrmepU4sPLQLIOIAgs5oIK+Ieq90K8fdaNMhfdfqmYatJdgif1NDQtvrSlTOGJDUYHIDunuufOg==} + '@types/ws@7.4.7': resolution: {integrity: sha512-JQbbmxZTZehdc2iszGKs5oC3NFnjeay7mtAWrdt7qNtAVK0g19muApzAy4bm9byz79xa2ZnO/BOBC2R8RC5Lww==} @@ -5360,6 +5746,10 @@ packages: aes-js@4.0.0-beta.5: resolution: {integrity: sha512-G965FqalsNyrPqgEGON7nIx1e/OVENSgiEIzyC63haUMuvNnwIgIjMs52hlTCKhkBny7A2ORNlfY9Zu+jmGk1Q==} + agent-base@5.1.1: + resolution: {integrity: sha512-TMeqbNl2fMW0nMjTEPOwe3J/PRFP4vqeoNuQMG0HlMrtm5QxKqdvAkZ1pRBQ/ulIyDD5Yq0nJ7YbdD8ey0TO3g==} + engines: {node: '>= 6.0.0'} + agent-base@6.0.2: resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==} engines: {node: '>= 6.0.0'} @@ -5740,6 +6130,9 @@ packages: before-after-hook@3.0.2: resolution: {integrity: sha512-Nik3Sc0ncrMK4UUdXQmAnRtzmNQTAAXmXIopizwZ1W1t8QmfJj+zL4OA2I7XPTPW5z5TDqv4hRo/JzouDJnX3A==} + bent@7.3.12: + resolution: {integrity: sha512-T3yrKnVGB63zRuoco/7Ybl7BwwGZR0lceoVG5XmQyMIH9s19SV5m+a8qam4if0zQuAmOQTyPTPmsQBdAorGK3w==} + better-sqlite3@11.5.0: resolution: {integrity: sha512-e/6eggfOutzoK0JWiU36jsisdWoHOfN9iWiW/SieKvb7SAa6aGNmBM/UKyp+/wWSXpLlWNN8tCPwoDNPhzUvuQ==} @@ -5807,6 +6200,9 @@ packages: bottleneck@2.19.5: resolution: {integrity: sha512-VHiNCbI1lKdl44tGrhNfU3lup0Tj/ZBMJB5/2ZbNXRCPuRCO7ed2mgcK4r17y+KB2EfuYuRaVlwNbAeaWGSpbw==} + bowser@2.11.0: + resolution: {integrity: sha512-AlcaJBi/pqqJBIQ8U9Mcpc9i8Aqxn88Skv5d+xBX006BY5u8N3mGLHa5Lgppa7L/HfwgwLgZ6NYs+Ag6uUmJRA==} + boxen@6.2.1: resolution: {integrity: sha512-H4PEsJXfFI/Pt8sjDWbHlQPx4zL/bvSQjcilJmaulGt5mLDorHOHpmdXAJcBcmru7PhYSp/cDMWRko4ZUMFkSw==} engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} @@ -5897,6 +6293,9 @@ packages: resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==} engines: {node: '>= 0.8'} + bytesish@0.4.4: + resolution: {integrity: sha512-i4uu6M4zuMUiyfZN4RU2+i9+peJh//pXhd9x1oSe1LBkZ3LEbCoygu8W0bXTukU1Jme2txKuotpCZRaC3FLxcQ==} + c12@2.0.1: resolution: {integrity: sha512-Z4JgsKXHG37C6PYUtIxCfLJZvo6FyhHJoClwwb9ftUkLpPSkuYqn6Tr+vnaN8hymm0kIbcg6Ey3kv/Q71k5w/A==} peerDependencies: @@ -5940,6 +6339,10 @@ packages: resolution: {integrity: sha512-YrwaA0vEKazPBkn0ipTiMpSajYDSe+KjQfrjhcBMxJt/znbvlHd8Pw/Vamaz5EB4Wfhs3SUR3Z9mwRu/P3s3Yg==} engines: {node: '>=8'} + camelcase-keys@7.0.2: + resolution: {integrity: sha512-Rjs1H+A9R+Ig+4E/9oyB66UC5Mj9Xq3N//vcLf2WzgdTi/3gUu3Z9KoqmlrEG4VuuLK8wJHofxzdQXz/knhiYg==} + engines: {node: '>=12'} + camelcase@5.3.1: resolution: {integrity: sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==} engines: {node: '>=6'} @@ -6283,6 +6686,10 @@ packages: resolution: {integrity: sha512-bQJ0YRck5ak3LgtnpKkiabX5pNF7tMUh1BSy2ZBOTh0Dim0BUu6aPPwByIns6/A5Prh8PufSPerMDUklpzes2Q==} engines: {node: '>= 0.8.0'} + compromise@14.14.2: + resolution: {integrity: sha512-g2Qe4zn8TmL7xQFR5Tx7i1txTUnzTPxhE7hDCQM+LDIfvYcriKzqH7eD2J/apUr/hRxvfKbRJ/yYXtN1cgD+Ug==} + engines: {node: '>=12.0.0'} + concat-map@0.0.1: resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==} @@ -6615,6 +7022,10 @@ packages: resolution: {integrity: sha512-0LrrStPOdJj+SPCCrGhzryycLjwcgUSHBtxNA8aIDxf0GLsRh1cKYhB00Gd1lDOS4yGH69+SNn13+TWbVHETFQ==} engines: {node: ^10 || ^12.20.0 || ^14.13.0 || >=15.0.0, npm: '>=7.0.0'} + cssstyle@4.1.0: + resolution: {integrity: sha512-h66W1URKpBS5YMI/V8PyXvTMFT8SupJ1IzoIV8IeBC/ji8WVmrO8dGlTi+2dh6whmdk6BiKJLD/ZBkhWbcg6nA==} + engines: {node: '>=18'} + csstype@3.1.3: resolution: {integrity: sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==} @@ -6802,6 +7213,10 @@ packages: resolution: {integrity: sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==} engines: {node: '>= 14'} + data-urls@5.0.0: + resolution: {integrity: sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==} + engines: {node: '>=18'} + dateformat@3.0.3: resolution: {integrity: sha512-jyCETtSl3VMZMWeRo7iY1FL19ges1t55hMo5yaam4Jrsm5EPL89UQkoQRyiI+Yf4k8r2ZpdngkV8hr1lIdjb3Q==} @@ -6864,9 +7279,16 @@ packages: resolution: {integrity: sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==} engines: {node: '>=0.10.0'} + decamelize@5.0.1: + resolution: {integrity: sha512-VfxadyCECXgQlkoEAjeghAr5gY3Hf+IKjKb+X8tGVDtveCjN+USwprd2q3QXBR9T1+x2DG0XZF5/w+7HAtSaXA==} + engines: {node: '>=10'} + decimal.js-light@2.5.1: resolution: {integrity: sha512-qIMFpTMZmny+MMIitAB6D7iVPEorVw6YQRWkvarTkT4tBeSLLiHzcwj6q0MmYSFCiVpiqPJTJEYIrpcPzVEIvg==} + decimal.js@10.4.3: + resolution: {integrity: sha512-VBBaLc1MgL5XpzgIP7ny5Z6Nx3UrRkIViUkPUdtl9aya5amy3De1gsUUSB1g3+3sExYNjCAsAznmukyxCb1GRA==} + decode-named-character-reference@1.0.2: resolution: {integrity: sha512-O8x12RzrUF8xyVcY0KJowWsmaJxQbmy0/EtnNtHRpsOcT7dFk5W598coHqBVpmWo1oQQfsCqfCmkZN5DJrZVdg==} @@ -7132,9 +7554,27 @@ packages: ecdsa-sig-formatter@1.0.11: resolution: {integrity: sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==} + echogarden@2.0.5: + resolution: {integrity: sha512-6dvmc8MUG8H+ozLbEEZTxhMursUwrfxdrK+HbYeLVRoy676nXt1t/FdktFo5SQsMT6/Zr5l6zB0UQnhSp1cJEw==} + engines: {node: '>=18'} + os: [win32, darwin, linux] + hasBin: true + peerDependencies: + '@echogarden/vosk': ^0.3.39-patched.1 + winax: ^3.4.2 + peerDependenciesMeta: + '@echogarden/vosk': + optional: true + winax: + optional: true + ee-first@1.1.1: resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==} + efrt@2.7.0: + resolution: {integrity: sha512-/RInbCy1d4P6Zdfa+TMVsf/ufZVotat5hCw3QXmWtjU+3pFEOvOQ7ibo3aIxyCJw2leIeAMjmPj+1SLJiCpdrQ==} + engines: {node: '>=12.0.0'} + ejs@3.1.10: resolution: {integrity: sha512-UeJmFfOrAQS8OJWPZ4qtgHyWExa088/MtK5UEyoJGFH67cDEXkZSviOiKRCZ4Xij0zxI3JECgYs3oKx+AizQBA==} engines: {node: '>=0.10.0'} @@ -7453,6 +7893,9 @@ packages: event-emitter@0.3.5: resolution: {integrity: sha512-D9rRn9y7kLPnJ+hMq7S/nhvoKwwvVJahBi2BPmx3bvbsEdK3W9ii8cBSGjP+72/LnM4n6fo3+dkCX5FeTQruXA==} + event-lite@0.1.3: + resolution: {integrity: sha512-8qz9nOz5VeD2z96elrEKD2U433+L3DWdUdDkOINLGOJvx1GsMBbMn0aCeu28y8/e85A6mCigBiFlYMnTBEGlSw==} + event-target-shim@5.0.1: resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==} engines: {node: '>=6'} @@ -7577,6 +8020,10 @@ packages: fast-uri@3.0.3: resolution: {integrity: sha512-aLrHthzCjH5He4Z2H9YZ+v6Ujb9ocRuW6ZzkJQOrTxleEijANq4v1TsaPaVG1PZcuurEzrLcWRyYBYXD5cEiaw==} + fast-xml-parser@4.4.1: + resolution: {integrity: sha512-xkjOecfnKGkSsOwtZ5Pz7Us/T6mrbPQrq0nh+aCO5V9nk5NLWmasAHumTKjiPJPWANe+kAZ84Jc8ooJkzZ88Sw==} + hasBin: true + fastembed@1.14.1: resolution: {integrity: sha512-Y14v+FWZwjNUpQ7mRGYu4N5yF+hZkF7zqzPWzzLbwdIEtYsHy0DSpiVJ+Fg6Oi1fQjrBKASQt0hdSMSjw1/Wtw==} @@ -8053,6 +8500,10 @@ packages: graceful-fs@4.2.11: resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==} + grad-school@0.0.5: + resolution: {integrity: sha512-rXunEHF9M9EkMydTBux7+IryYXEZinRk6g8OBOGDBzo/qWJjhTxy86i5q7lQYpCLHN8Sqv1XX3OIOc7ka2gtvQ==} + engines: {node: '>=8.0.0'} + graphemer@1.4.0: resolution: {integrity: sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==} @@ -8213,6 +8664,10 @@ packages: hpack.js@2.1.6: resolution: {integrity: sha512-zJxVehUdMGIKsRaNt7apO2Gqp0BdqW5yaiGHXXmbpvxgBYVZnAql+BJb4RO5ad2MgpbZKn5G6nMnegrH1FcNYQ==} + html-encoding-sniffer@4.0.0: + resolution: {integrity: sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==} + engines: {node: '>=18'} + html-entities@2.5.2: resolution: {integrity: sha512-K//PSRMQk4FZ78Kyau+mZurHn3FH0Vwr+H36eE0rPbeYkRRi9YxceYPhuN60UwWorxyKHhqoAJl2OFKa4BVtaA==} @@ -8306,6 +8761,10 @@ packages: resolution: {integrity: sha512-V5nVw1PAOgfI3Lmeaj2Exmeg7fenjhRUgz1lPSezy1CuhPYbgQtbQj4jZfEAEMlaL+vupsvhjqCyjzob0yxsmQ==} engines: {node: '>=10.19.0'} + https-proxy-agent@4.0.0: + resolution: {integrity: sha512-zoDhWrkR3of1l9QAL8/scJZyLu8j/gBkcwcaQOZh7Gyh/+uJQzGVETdgT30akuwkpL8HTRfssqI3BZuV18teDg==} + engines: {node: '>= 6.0.0'} + https-proxy-agent@5.0.1: resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==} engines: {node: '>= 6'} @@ -8398,6 +8857,10 @@ packages: resolution: {integrity: sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==} engines: {node: '>=8'} + indent-string@5.0.0: + resolution: {integrity: sha512-m6FAo/spmsW2Ab2fU35JTYwtOKa2yAwXSwgjSv1TJzh4Mh7mC3lzAOVLBprb72XsTrgkEIsl7YrFNAiDiRhIGg==} + engines: {node: '>=12'} + infima@0.2.0-alpha.45: resolution: {integrity: sha512-uyH0zfr1erU1OohLk0fT4Rrb94AOhguWNOcD9uGrSpRvNB+6gZXUoJX5J0NtvzBO10YZ9PgvA4NFgt+fYg8ojw==} engines: {node: '>=12'} @@ -8437,6 +8900,9 @@ packages: resolution: {integrity: sha512-M1WuAmb7pn9zdFRtQYk26ZBoY043Sse0wVDdk4Bppr+JOXyQYybdtvK+l9wUibhtjdjvtoiNy8tk+EgsYIUqKg==} engines: {node: '>=12.0.0'} + int64-buffer@0.1.10: + resolution: {integrity: sha512-v7cSY1J8ydZ0GyjUHqF+1bshJ6cnEVLo9EnjB8p+4HDRPZc9N5jjmvUV7NvEsqQOKyH0pmIBFWXVQbiS0+OBbA==} + internmap@1.0.1: resolution: {integrity: sha512-lDB5YccMydFBtasVtxnZ3MRBHuaoE8GKsppq+EchKL2U4nK/DmEpPHNH8MZe5HkMtpSiTSOZwfN0tzYjO/lJEw==} @@ -8607,6 +9073,9 @@ packages: resolution: {integrity: sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==} engines: {node: '>=0.10.0'} + is-potential-custom-element-name@1.0.1: + resolution: {integrity: sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==} + is-promise@2.2.2: resolution: {integrity: sha512-+lP4/6lKUBfQjZ2pdxThZvLUAafmZb8OAxFb8XXtiQmS35INgr85hdOGoEs124ez1FCnZJt6jau/T+alh58QFQ==} @@ -8693,6 +9162,9 @@ packages: isomorphic-fetch@3.0.0: resolution: {integrity: sha512-qvUtwJ3j6qwsF3jLxkZ72qCgjMysPzDfeV240JHiGZsANBYd+EEuu35v7dfrJ9Up0Ak07D7GGSkGhCHTqg/5wA==} + isomorphic-unfetch@3.1.0: + resolution: {integrity: sha512-geDJjpoZ8N0kWexiwkX8F9NkTsXhetLPVbZFQ+JTW239QNOwvB0gniuR1Wc6f0AMTn7/mFGyXvHTifrCp/GH8Q==} + isomorphic-ws@4.0.1: resolution: {integrity: sha512-BhBvN2MBpWTaSHdWRb/bwdZJ1WaehQ2L1KngkCkfLUGF0mAWAT1sQUQacEmQ0jXkFw/czDXPNQSL5u2/Krsz1w==} peerDependencies: @@ -8928,6 +9400,15 @@ packages: jsbn@1.1.0: resolution: {integrity: sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==} + jsdom@25.0.1: + resolution: {integrity: sha512-8i7LzZj7BF8uplX+ZyOlIz86V6TAsSs+np6m1kpW9u0JWi4z/1t+FzcK1aek+ybTnAC4KhBL4uXCNT0wcUIeCw==} + engines: {node: '>=18'} + peerDependencies: + canvas: ^2.11.2 + peerDependenciesMeta: + canvas: + optional: true + jsesc@3.0.2: resolution: {integrity: sha512-xKqzzWXDttJuOcawBt4KnKHHIf5oQ/Cxax+0PWFG+DFDgHNAdi+TXECADI+RYiFUMmx8792xsMbbgXj4CwnP4g==} engines: {node: '>=6'} @@ -9490,6 +9971,10 @@ packages: memory-stream@1.0.0: resolution: {integrity: sha512-Wm13VcsPIMdG96dzILfij09PvuS3APtcKNh7M28FsCA/w6+1mjR7hhPmfFNoilX9xU7wTdhsH5lJAm6XNzdtww==} + meow@10.1.5: + resolution: {integrity: sha512-/d+PQ4GKmGvM9Bee/DPa8z3mXs/pkvJE2KEThngVNOqtmljC6K7NMPxtc2JeZYTmpWb9k/TmxjeL18ez3h7vCw==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + meow@8.1.2: resolution: {integrity: sha512-r85E3NdZ+mpYk1C6RjPFEMSE+s1iZMuHtsHAqY0DT3jZczl0diWUZ8g6oU7h0M9cD2EL+PzaYghhCLzR0ZNn5Q==} engines: {node: '>=10'} @@ -9642,6 +10127,9 @@ packages: resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==} engines: {node: '>=8.6'} + microsoft-cognitiveservices-speech-sdk@1.41.0: + resolution: {integrity: sha512-96jyuCBK5TDQm9sHriYuR0UeJ5OsE2WuggDgYSn8L72AsgmjOZxM2BlxgS5BLZuwhIOw91KSc6l1eoTqs+zwfg==} + mime-db@1.33.0: resolution: {integrity: sha512-BHJ/EKruNIqJf/QahvxwQZXKygOQ256myeN/Ew+THcAa5q+PjyTTMMeNQC4DZw5AwfvelsUrA6B67NKMqXDbzQ==} engines: {node: '>= 0.6'} @@ -9854,6 +10342,10 @@ packages: ms@2.1.3: resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} + msgpack-lite@0.1.26: + resolution: {integrity: sha512-SZ2IxeqZ1oRFGo0xFGbvBJWMp3yLIY9rlIJyxy8CGrwZn1f0ZK4r6jV/AM1r0FZMDUkWkglOk/eeKIL9g77Nxw==} + hasBin: true + multer@1.4.5-lts.1: resolution: {integrity: sha512-ywPWvcDMeH+z9gQq5qYHCCy+ethsk4goepZ45GLD63fOu0YcNecQxi64nDs3qluZB+murG3/D4dJ7+dGctcCQQ==} engines: {node: '>= 6.0.0'} @@ -10138,6 +10630,9 @@ packages: peerDependencies: webpack: ^4.0.0 || ^5.0.0 + nwsapi@2.2.13: + resolution: {integrity: sha512-cTGB9ptp9dY9A5VbMSe7fQBcl/tt22Vcqdq8+eN93rblOuE0aCFu4aZ2vMwct/2t+lFnosm8RkQW1I0Omb1UtQ==} + nx@20.1.2: resolution: {integrity: sha512-CvjmuQmI0RWLYZxRSIgQZmzsQv6dPp9oI0YZE3L1dagBPfTf5Cun65I0GLt7bdkDnVx2PGYkDbIoJSv2/V+83Q==} hasBin: true @@ -10492,6 +10987,11 @@ packages: path-data-parser@0.1.0: resolution: {integrity: sha512-NOnmBpt5Y2RWbuv0LMzsayp3lVylAHLPUTut412ZA3l+C4uw4ZVkQbjShYCQ8TCpUMdPapr4YjUqLYD6v68j+w==} + path-exists-cli@2.0.0: + resolution: {integrity: sha512-qGr0A87KYCznmvabblxyxnzA/MtPZ28wH+4SCMP4tjTFAbzqwvs5xpUZExAYzq5OgHe5vIswzdH5iosCb8YF/Q==} + engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + hasBin: true + path-exists@3.0.0: resolution: {integrity: sha512-bpC7GYwiDYQ4wYLe+FA8lhRjhQCMcQGuSgGGqDkg/QerRWw9CmGRT0iSOVRSZJ29NMLZgIzqaljJ63oaL4NIJQ==} engines: {node: '>=4'} @@ -11755,6 +12255,10 @@ packages: resolution: {integrity: sha512-zK0TB7Xd6JpCLmlLmufqykGE+/TlOePD6qKClNW7hHDKFh/J7/7gCWGR7joEQEW1bKq3a3yUZSObOoWLFQ4ohg==} engines: {node: '>=8'} + read-pkg-up@8.0.0: + resolution: {integrity: sha512-snVCqPczksT0HS2EC+SxUndvSzn6LRCwpfSvLrIfR5BKDQQZMaI6jPRC9dYvYFDRAuFEAnkwww8kBBNE/3VvzQ==} + engines: {node: '>=12'} + read-pkg@3.0.0: resolution: {integrity: sha512-BLq/cCO9two+lBgiTYNqD6GdtK8s4NpaWrl6/rCO9w0TUS8oJl7cmToOZfRYllKTISY6nt1U7jQ53brmKqY6BA==} engines: {node: '>=4'} @@ -11763,6 +12267,10 @@ packages: resolution: {integrity: sha512-Ug69mNOpfvKDAc2Q8DRpMjjzdtrnv9HcSMX+4VsZxD1aZ6ZzrIE7rlzXBtWTyhULSMKg076AW6WR5iZpD0JiOg==} engines: {node: '>=8'} + read-pkg@6.0.0: + resolution: {integrity: sha512-X1Fu3dPuk/8ZLsMhEj5f4wFAF0DWoK7qhGJvgaijocXxBmSToKfbFtqbxMO7bVjNA1dmE5huAzjXj/ey86iw9Q==} + engines: {node: '>=12'} + read@1.0.7: resolution: {integrity: sha512-rSOKNYUmaxy0om1BNjMN4ezNT6VKK+2xF4GBhc81mkH7L60i6dp8qPYrkndNLT3QPphoII3maL9PVC9XmhHwVQ==} engines: {node: '>=0.8'} @@ -11826,6 +12334,10 @@ packages: resolution: {integrity: sha512-6tDA8g98We0zd0GvVeMT9arEOnTw9qM03L9cJXaCjrip1OO764RDBLBfrB4cwzNGDj5OA5ioymC9GkizgWJDUg==} engines: {node: '>=8'} + redent@4.0.0: + resolution: {integrity: sha512-tYkDkVVtYkSVhuQ4zBgfvciymHaeuel+zFKXShfDnFP5SyVEP7qo70Rf1jTOTCx3vGNAbnEi/xFkcfQVMIBWag==} + engines: {node: '>=12'} + redeyed@2.1.1: resolution: {integrity: sha512-FNpGGo1DycYAdnrKFxCMmKYgo/mILAqtRYbkdQD8Ep/Hk2PQ5+aEAEx+IU713RTDmuBaH0c8P5ZozurNu5ObRQ==} @@ -12041,6 +12553,9 @@ packages: rpc-websockets@9.0.4: resolution: {integrity: sha512-yWZWN0M+bivtoNLnaDbtny4XchdAIF5Q4g/ZsC5UC61Ckbp0QczwO8fg44rV3uYmY4WHd+EZQbn90W1d8ojzqQ==} + rrweb-cssom@0.7.1: + resolution: {integrity: sha512-TrEMa7JGdVm0UThDJSx7ddw5nVm3UJS9o9CCIZ72B1vSyEZoziDqBYP3XIoi/12lKrJR8rE3jeFHMok2F/Mnsg==} + rtl-detect@1.1.2: resolution: {integrity: sha512-PGMBq03+TTG/p/cRB7HCLKJ1MgDIi07+QU1faSjiYRfmY5UsAttV9Hs08jDAHVwcOwmVLcSJkpwyfXszVjWfIQ==} @@ -12077,6 +12592,10 @@ packages: safer-buffer@2.1.2: resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==} + sam-js@0.3.1: + resolution: {integrity: sha512-X4GUr8Q/T8RgtjnPOssSwYDknxot69PgEAVvwsJ4kB8Lz8wytuHB6n1JqsXLmpdKGD8YR9tqKptm07jmw83eWQ==} + engines: {node: '>= 18.0.0', yarn: '>= 1.22.15'} + sandwich-stream@2.0.2: resolution: {integrity: sha512-jLYV0DORrzY3xaz/S9ydJL6Iz7essZeAfnAavsJ+zsJGZ1MOnsS52yRjU3uF3pJa/lla7+wisp//fxOwOH8SKQ==} engines: {node: '>= 0.10'} @@ -12087,6 +12606,10 @@ packages: sax@1.4.1: resolution: {integrity: sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg==} + saxes@6.0.0: + resolution: {integrity: sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==} + engines: {node: '>=v12.22.7'} + scheduler@0.23.2: resolution: {integrity: sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==} @@ -12556,6 +13079,10 @@ packages: resolution: {integrity: sha512-laJTa3Jb+VQpaC6DseHhF7dXVqHTfJPCRDaEbid/drOhgitgYku/letMUqOXFoWV0zIIUbjpdH2t+tYj4bQMRQ==} engines: {node: '>=8'} + strip-indent@4.0.0: + resolution: {integrity: sha512-mnVSV2l+Zv6BLpSD/8V87CW/y9EmmbYzGCIavsnsI6/nwn26DwffM/yztm30Z/I2DY9wdS3vXVCMnHDgZaVNoA==} + engines: {node: '>=12'} + strip-json-comments@2.0.1: resolution: {integrity: sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==} engines: {node: '>=0.10.0'} @@ -12564,6 +13091,9 @@ packages: resolution: {integrity: sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==} engines: {node: '>=8'} + strnum@1.0.5: + resolution: {integrity: sha512-J8bbNyKKXl5qYcR36TIO8W3mVGVHrmmxsd5PAItGkmyzwJvybiw2IVq5nqd0i4LSNSkB/sx9VHllbfFdr9k1JA==} + strong-log-transformer@2.1.0: resolution: {integrity: sha512-B3Hgul+z0L9a236FAUC9iZsL+nVHgoCJnqCbN588DjYxvGXaXaaFbfmQ/JhvKjZwsOukuR72XbHv71Qkug0HxA==} engines: {node: '>=4'} @@ -12595,6 +13125,9 @@ packages: engines: {node: '>=16 || 14 >=14.17'} hasBin: true + suffix-thumb@5.0.2: + resolution: {integrity: sha512-I5PWXAFKx3FYnI9a+dQMWNqTxoRt6vdBdb0O+BJ1sxXCWtSoQCusc13E58f+9p4MYx/qCnEMkD5jac6K2j3dgA==} + super-regex@1.0.0: resolution: {integrity: sha512-CY8u7DtbvucKuquCmOFEKhr9Besln7n9uN8eFbwcoGYWXOMW07u2o8njWaiXt11ylS3qoGF55pILjRmPlbodyg==} engines: {node: '>=18'} @@ -12647,6 +13180,9 @@ packages: peerDependencies: vue: '>=3.2.26 < 4' + symbol-tree@3.2.4: + resolution: {integrity: sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==} + synckit@0.9.2: resolution: {integrity: sha512-vrozgXDQwYO72vHjUb/HnFbQx1exDjoKzqx23aXEg2a9VIg2TSFZ8FmeZpTjUCFMYw7mpX4BE2SFu8wI7asYsw==} engines: {node: ^14.18.0 || >=16.0.0} @@ -12820,6 +13356,10 @@ packages: tldts-experimental@6.1.63: resolution: {integrity: sha512-Xqxv4UvuTwC/sslspSbkw/52vvYCeZdEJwnv7VFlQEfYvK8fNuIpz5hoOvO7XuzfjqexMRRnVDYUyjqesTYESg==} + tldts@6.1.63: + resolution: {integrity: sha512-YWwhsjyn9sB/1rOkSRYxvkN/wl5LFM1QDv6F2pVR+pb/jFne4EOBxHfkKVWvDIBEAw9iGOwwubHtQTm0WRT5sQ==} + hasBin: true + tmp@0.0.33: resolution: {integrity: sha512-jRCJlojKnZ3addtTOjdIqoRuPEKBvNXcGYqzO6zWZX8KfKEpnGY5jfggJQ3EjKuu8D4bJRr0y+cYJFmYbImXGw==} engines: {node: '>=0.6.0'} @@ -12871,12 +13411,20 @@ packages: resolution: {integrity: sha512-Loo5UUvLD9ScZ6jh8beX1T6sO1w2/MpCRpEP7V280GKMVUQ0Jzar2U3UJPsrdbziLEMMhu3Ujnq//rhiFuIeag==} engines: {node: '>=6'} + tough-cookie@5.0.0: + resolution: {integrity: sha512-FRKsF7cz96xIIeMZ82ehjC3xW2E+O2+v11udrDYewUbszngYhsGa8z6YUMMzO9QJZzzyd0nGGXnML/TReX6W8Q==} + engines: {node: '>=16'} + tr46@0.0.3: resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==} tr46@1.0.1: resolution: {integrity: sha512-dTpowEjclQ7Kgx5SdBkqRzVhERQXov8/l9Ft9dVM9fmg0W0KQSVaXX9T4i6twCPNtYiZM53lpSSUAwJbFPOHxA==} + tr46@5.0.0: + resolution: {integrity: sha512-tk2G5R2KRwBd+ZN0zaEXpmzdKyOYksXwywulIX95MBODjSzMIuQnQ3m8JxgbhnL1LeVo7lqQKsYa1O3Htl7K5g==} + engines: {node: '>=18'} + tree-kill@1.2.2: resolution: {integrity: sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A==} hasBin: true @@ -12892,6 +13440,10 @@ packages: resolution: {integrity: sha512-c1PTsA3tYrIsLGkJkzHF+w9F2EyxfXGo4UyJc4pFL++FMjnq0HJS69T3M7d//gKrFKwy429bouPescbjecU+Zw==} engines: {node: '>=8'} + trim-newlines@4.1.1: + resolution: {integrity: sha512-jRKj0n0jXWo6kh62nA5TEh3+4igKDXLvzBJcPpiizP7oOolUrYIxmVBG9TOtHYFHoddUk6YvAkGeGoSVTXfQXQ==} + engines: {node: '>=12'} + trough@1.0.5: resolution: {integrity: sha512-rvuRbTarPXmMb79SmzEp8aqXNKcK+y0XaB298IXueQ8I2PsrATcPBCSPyK/dDNa2iWOhKlfNnOjdAOTBU/nkFA==} @@ -13114,6 +13666,9 @@ packages: resolution: {integrity: sha512-U8uCCl2x9TK3WANvmBavymRzxbfFYG+tAu+fgx3zxQy3qdagQqBLwJVrdyO1TBfUXvfKveMKJZhpvUYoOjM+4g==} engines: {node: '>=18.17'} + unfetch@4.2.0: + resolution: {integrity: sha512-F9p7yYCn6cIW9El1zi0HI6vqpeIvBsr3dSuRO6Xuppb1u5rXpCPmMvLSyECLhybr9isec8Ohl0hPekMVrEinDA==} + unicode-canonical-property-names-ecmascript@2.0.1: resolution: {integrity: sha512-dA8WbNeb2a6oQzAQ55YlT5vQAWGV9WXOsi3SskE3bcCdM0P4SDd+24zS/OCacdRq5BkdsRj9q3Pg6YyQoxIGqg==} engines: {node: '>=4'} @@ -13447,12 +14002,19 @@ packages: typescript: optional: true + w3c-xmlserializer@5.0.0: + resolution: {integrity: sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==} + engines: {node: '>=18'} + walk-up-path@3.0.1: resolution: {integrity: sha512-9YlCL/ynK3CTlrSRrDxZvUauLzAswPCrsaCgilqFevUYpeEW0/3ScEjaa3kbW/T0ghhkEr7mv+fpjqn1Y1YuTA==} walker@1.0.8: resolution: {integrity: sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==} + wasm-feature-detect@1.8.0: + resolution: {integrity: sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==} + watchpack@2.4.2: resolution: {integrity: sha512-TnbFSbcOCcDgjZ4piURLCbJ3nJhznVh9kw6F6iokjiFPl8ONxe9A6nMDVXDiNbrSfLILs6vB07F7wLBrwPYzJw==} engines: {node: '>=10.13.0'} @@ -13490,6 +14052,10 @@ packages: webidl-conversions@4.0.2: resolution: {integrity: sha512-YQ+BmxuTgd6UXZW3+ICGfyqRyHXVlD5GtQr5+qjiNW7bF0cqrzX500HVXPBOvgXb5YnzDd+h0zqyv61KUD7+Sg==} + webidl-conversions@7.0.0: + resolution: {integrity: sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==} + engines: {node: '>=12'} + webpack-bundle-analyzer@4.10.2: resolution: {integrity: sha512-vJptkMm9pk5si4Bv922ZbKLV8UTT4zib4FPgXMhgzUny0bfDDkLXAVQs3ly3fS4/TN9ROFtb0NFrm04UXFE/Vw==} engines: {node: '>= 10.13.0'} @@ -13550,9 +14116,21 @@ packages: resolution: {integrity: sha512-OqedPIGOfsDlo31UNwYbCFMSaO9m9G/0faIHj5/dZFDMFqPTcx6UwqyOy3COEaEOg/9VsGIpdqn62W5KhoKSpg==} engines: {node: '>=0.8.0'} + whatwg-encoding@3.1.1: + resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==} + engines: {node: '>=18'} + whatwg-fetch@3.6.20: resolution: {integrity: sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg==} + whatwg-mimetype@4.0.0: + resolution: {integrity: sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==} + engines: {node: '>=18'} + + whatwg-url@14.0.0: + resolution: {integrity: sha512-1lfMEm2IEr7RIV+f4lUNPOqfFL+pO+Xw3fJSqmjX9AbXcXcYOkCe1P6+9VBZB6n94af16NfZf+sSk0JCBZC9aw==} + engines: {node: '>=18'} + whatwg-url@5.0.0: resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==} @@ -13688,6 +14266,11 @@ packages: utf-8-validate: optional: true + wtf_wikipedia@10.3.2: + resolution: {integrity: sha512-8C1eUKDK6NaosrtocTEA4fz5Lm5nO6Hb92zLUqI7S1uVVjwEtI0mvSGSdGd/xR1nfSpDYm1ckBG1aLHEAF1pBg==} + engines: {node: '>=12.0.0'} + hasBin: true + xdg-basedir@5.1.0: resolution: {integrity: sha512-GCPAHLvrIH13+c0SuacwvRYj2SxJXQ4kaVTT5xgL3kPrz56XxkF21IGhjSE1+W0aw7gpBWRGXLCPnPby6lSpmQ==} engines: {node: '>=12'} @@ -13696,6 +14279,13 @@ packages: resolution: {integrity: sha512-7rVi2KMfwfWFl+GpPg6m80IVMWXLRjO+PxTq7V2CDhoGak0wzYzFgUY2m4XJ47OGdXd8eLE8EmwfAmdjw7lC1g==} hasBin: true + xml-name-validator@5.0.0: + resolution: {integrity: sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==} + engines: {node: '>=18'} + + xmlchars@2.2.0: + resolution: {integrity: sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==} + xtend@4.0.2: resolution: {integrity: sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==} engines: {node: '>=0.4'} @@ -14143,6 +14733,511 @@ snapshots: qs: 6.13.0 starknet: 6.18.0(encoding@0.1.13) + '@aws-crypto/crc32@5.2.0': + dependencies: + '@aws-crypto/util': 5.2.0 + '@aws-sdk/types': 3.696.0 + tslib: 2.8.0 + + '@aws-crypto/sha256-browser@5.2.0': + dependencies: + '@aws-crypto/sha256-js': 5.2.0 + '@aws-crypto/supports-web-crypto': 5.2.0 + '@aws-crypto/util': 5.2.0 + '@aws-sdk/types': 3.696.0 + '@aws-sdk/util-locate-window': 3.693.0 + '@smithy/util-utf8': 2.3.0 + tslib: 2.8.0 + + '@aws-crypto/sha256-js@5.2.0': + dependencies: + '@aws-crypto/util': 5.2.0 + '@aws-sdk/types': 3.696.0 + tslib: 2.8.0 + + '@aws-crypto/supports-web-crypto@5.2.0': + dependencies: + tslib: 2.8.0 + + '@aws-crypto/util@5.2.0': + dependencies: + '@aws-sdk/types': 3.696.0 + '@smithy/util-utf8': 2.3.0 + tslib: 2.8.0 + + '@aws-sdk/client-polly@3.696.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/client-sso-oidc': 3.696.0(@aws-sdk/client-sts@3.696.0) + '@aws-sdk/client-sts': 3.696.0 + '@aws-sdk/core': 3.696.0 + '@aws-sdk/credential-provider-node': 3.696.0(@aws-sdk/client-sso-oidc@3.696.0(@aws-sdk/client-sts@3.696.0))(@aws-sdk/client-sts@3.696.0) + '@aws-sdk/middleware-host-header': 3.696.0 + '@aws-sdk/middleware-logger': 3.696.0 + '@aws-sdk/middleware-recursion-detection': 3.696.0 + '@aws-sdk/middleware-user-agent': 3.696.0 + '@aws-sdk/region-config-resolver': 3.696.0 + '@aws-sdk/types': 3.696.0 + '@aws-sdk/util-endpoints': 3.696.0 + '@aws-sdk/util-user-agent-browser': 3.696.0 + '@aws-sdk/util-user-agent-node': 3.696.0 + '@smithy/config-resolver': 3.0.12 + '@smithy/core': 2.5.4 + '@smithy/fetch-http-handler': 4.1.1 + '@smithy/hash-node': 3.0.10 + '@smithy/invalid-dependency': 3.0.10 + '@smithy/middleware-content-length': 3.0.12 + '@smithy/middleware-endpoint': 3.2.4 + '@smithy/middleware-retry': 3.0.28 + '@smithy/middleware-serde': 3.0.10 + '@smithy/middleware-stack': 3.0.10 + '@smithy/node-config-provider': 3.1.11 + '@smithy/node-http-handler': 3.3.1 + '@smithy/protocol-http': 4.1.7 + '@smithy/smithy-client': 3.4.5 + '@smithy/types': 3.7.1 + '@smithy/url-parser': 3.0.10 + '@smithy/util-base64': 3.0.0 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-body-length-node': 3.0.0 + '@smithy/util-defaults-mode-browser': 3.0.28 + '@smithy/util-defaults-mode-node': 3.0.28 + '@smithy/util-endpoints': 2.1.6 + '@smithy/util-middleware': 3.0.10 + '@smithy/util-retry': 3.0.10 + '@smithy/util-stream': 3.3.1 + '@smithy/util-utf8': 3.0.0 + tslib: 2.8.0 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/client-sso-oidc@3.696.0(@aws-sdk/client-sts@3.696.0)': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/client-sts': 3.696.0 + '@aws-sdk/core': 3.696.0 + '@aws-sdk/credential-provider-node': 3.696.0(@aws-sdk/client-sso-oidc@3.696.0(@aws-sdk/client-sts@3.696.0))(@aws-sdk/client-sts@3.696.0) + '@aws-sdk/middleware-host-header': 3.696.0 + '@aws-sdk/middleware-logger': 3.696.0 + '@aws-sdk/middleware-recursion-detection': 3.696.0 + '@aws-sdk/middleware-user-agent': 3.696.0 + '@aws-sdk/region-config-resolver': 3.696.0 + '@aws-sdk/types': 3.696.0 + '@aws-sdk/util-endpoints': 3.696.0 + '@aws-sdk/util-user-agent-browser': 3.696.0 + '@aws-sdk/util-user-agent-node': 3.696.0 + '@smithy/config-resolver': 3.0.12 + '@smithy/core': 2.5.4 + '@smithy/fetch-http-handler': 4.1.1 + '@smithy/hash-node': 3.0.10 + '@smithy/invalid-dependency': 3.0.10 + '@smithy/middleware-content-length': 3.0.12 + '@smithy/middleware-endpoint': 3.2.4 + '@smithy/middleware-retry': 3.0.28 + '@smithy/middleware-serde': 3.0.10 + '@smithy/middleware-stack': 3.0.10 + '@smithy/node-config-provider': 3.1.11 + '@smithy/node-http-handler': 3.3.1 + '@smithy/protocol-http': 4.1.7 + '@smithy/smithy-client': 3.4.5 + '@smithy/types': 3.7.1 + '@smithy/url-parser': 3.0.10 + '@smithy/util-base64': 3.0.0 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-body-length-node': 3.0.0 + '@smithy/util-defaults-mode-browser': 3.0.28 + '@smithy/util-defaults-mode-node': 3.0.28 + '@smithy/util-endpoints': 2.1.6 + '@smithy/util-middleware': 3.0.10 + '@smithy/util-retry': 3.0.10 + '@smithy/util-utf8': 3.0.0 + tslib: 2.8.0 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/client-sso@3.696.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/core': 3.696.0 + '@aws-sdk/middleware-host-header': 3.696.0 + '@aws-sdk/middleware-logger': 3.696.0 + '@aws-sdk/middleware-recursion-detection': 3.696.0 + '@aws-sdk/middleware-user-agent': 3.696.0 + '@aws-sdk/region-config-resolver': 3.696.0 + '@aws-sdk/types': 3.696.0 + '@aws-sdk/util-endpoints': 3.696.0 + '@aws-sdk/util-user-agent-browser': 3.696.0 + '@aws-sdk/util-user-agent-node': 3.696.0 + '@smithy/config-resolver': 3.0.12 + '@smithy/core': 2.5.4 + '@smithy/fetch-http-handler': 4.1.1 + '@smithy/hash-node': 3.0.10 + '@smithy/invalid-dependency': 3.0.10 + '@smithy/middleware-content-length': 3.0.12 + '@smithy/middleware-endpoint': 3.2.4 + '@smithy/middleware-retry': 3.0.28 + '@smithy/middleware-serde': 3.0.10 + '@smithy/middleware-stack': 3.0.10 + '@smithy/node-config-provider': 3.1.11 + '@smithy/node-http-handler': 3.3.1 + '@smithy/protocol-http': 4.1.7 + '@smithy/smithy-client': 3.4.5 + '@smithy/types': 3.7.1 + '@smithy/url-parser': 3.0.10 + '@smithy/util-base64': 3.0.0 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-body-length-node': 3.0.0 + '@smithy/util-defaults-mode-browser': 3.0.28 + '@smithy/util-defaults-mode-node': 3.0.28 + '@smithy/util-endpoints': 2.1.6 + '@smithy/util-middleware': 3.0.10 + '@smithy/util-retry': 3.0.10 + '@smithy/util-utf8': 3.0.0 + tslib: 2.8.0 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/client-sts@3.696.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/client-sso-oidc': 3.696.0(@aws-sdk/client-sts@3.696.0) + '@aws-sdk/core': 3.696.0 + '@aws-sdk/credential-provider-node': 3.696.0(@aws-sdk/client-sso-oidc@3.696.0(@aws-sdk/client-sts@3.696.0))(@aws-sdk/client-sts@3.696.0) + '@aws-sdk/middleware-host-header': 3.696.0 + '@aws-sdk/middleware-logger': 3.696.0 + '@aws-sdk/middleware-recursion-detection': 3.696.0 + '@aws-sdk/middleware-user-agent': 3.696.0 + '@aws-sdk/region-config-resolver': 3.696.0 + '@aws-sdk/types': 3.696.0 + '@aws-sdk/util-endpoints': 3.696.0 + '@aws-sdk/util-user-agent-browser': 3.696.0 + '@aws-sdk/util-user-agent-node': 3.696.0 + '@smithy/config-resolver': 3.0.12 + '@smithy/core': 2.5.4 + '@smithy/fetch-http-handler': 4.1.1 + '@smithy/hash-node': 3.0.10 + '@smithy/invalid-dependency': 3.0.10 + '@smithy/middleware-content-length': 3.0.12 + '@smithy/middleware-endpoint': 3.2.4 + '@smithy/middleware-retry': 3.0.28 + '@smithy/middleware-serde': 3.0.10 + '@smithy/middleware-stack': 3.0.10 + '@smithy/node-config-provider': 3.1.11 + '@smithy/node-http-handler': 3.3.1 + '@smithy/protocol-http': 4.1.7 + '@smithy/smithy-client': 3.4.5 + '@smithy/types': 3.7.1 + '@smithy/url-parser': 3.0.10 + '@smithy/util-base64': 3.0.0 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-body-length-node': 3.0.0 + '@smithy/util-defaults-mode-browser': 3.0.28 + '@smithy/util-defaults-mode-node': 3.0.28 + '@smithy/util-endpoints': 2.1.6 + '@smithy/util-middleware': 3.0.10 + '@smithy/util-retry': 3.0.10 + '@smithy/util-utf8': 3.0.0 + tslib: 2.8.0 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/client-transcribe-streaming@3.696.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/client-sso-oidc': 3.696.0(@aws-sdk/client-sts@3.696.0) + '@aws-sdk/client-sts': 3.696.0 + '@aws-sdk/core': 3.696.0 + '@aws-sdk/credential-provider-node': 3.696.0(@aws-sdk/client-sso-oidc@3.696.0(@aws-sdk/client-sts@3.696.0))(@aws-sdk/client-sts@3.696.0) + '@aws-sdk/eventstream-handler-node': 3.696.0 + '@aws-sdk/middleware-eventstream': 3.696.0 + '@aws-sdk/middleware-host-header': 3.696.0 + '@aws-sdk/middleware-logger': 3.696.0 + '@aws-sdk/middleware-recursion-detection': 3.696.0 + '@aws-sdk/middleware-sdk-transcribe-streaming': 3.696.0 + '@aws-sdk/middleware-user-agent': 3.696.0 + '@aws-sdk/middleware-websocket': 3.696.0 + '@aws-sdk/region-config-resolver': 3.696.0 + '@aws-sdk/types': 3.696.0 + '@aws-sdk/util-endpoints': 3.696.0 + '@aws-sdk/util-user-agent-browser': 3.696.0 + '@aws-sdk/util-user-agent-node': 3.696.0 + '@smithy/config-resolver': 3.0.12 + '@smithy/core': 2.5.4 + '@smithy/eventstream-serde-browser': 3.0.13 + '@smithy/eventstream-serde-config-resolver': 3.0.10 + '@smithy/eventstream-serde-node': 3.0.12 + '@smithy/fetch-http-handler': 4.1.1 + '@smithy/hash-node': 3.0.10 + '@smithy/invalid-dependency': 3.0.10 + '@smithy/middleware-content-length': 3.0.12 + '@smithy/middleware-endpoint': 3.2.4 + '@smithy/middleware-retry': 3.0.28 + '@smithy/middleware-serde': 3.0.10 + '@smithy/middleware-stack': 3.0.10 + '@smithy/node-config-provider': 3.1.11 + '@smithy/node-http-handler': 3.3.1 + '@smithy/protocol-http': 4.1.7 + '@smithy/smithy-client': 3.4.5 + '@smithy/types': 3.7.1 + '@smithy/url-parser': 3.0.10 + '@smithy/util-base64': 3.0.0 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-body-length-node': 3.0.0 + '@smithy/util-defaults-mode-browser': 3.0.28 + '@smithy/util-defaults-mode-node': 3.0.28 + '@smithy/util-endpoints': 2.1.6 + '@smithy/util-middleware': 3.0.10 + '@smithy/util-retry': 3.0.10 + '@smithy/util-utf8': 3.0.0 + tslib: 2.8.0 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/core@3.696.0': + dependencies: + '@aws-sdk/types': 3.696.0 + '@smithy/core': 2.5.4 + '@smithy/node-config-provider': 3.1.11 + '@smithy/property-provider': 3.1.10 + '@smithy/protocol-http': 4.1.7 + '@smithy/signature-v4': 4.2.3 + '@smithy/smithy-client': 3.4.5 + '@smithy/types': 3.7.1 + '@smithy/util-middleware': 3.0.10 + fast-xml-parser: 4.4.1 + tslib: 2.8.0 + + '@aws-sdk/credential-provider-env@3.696.0': + dependencies: + '@aws-sdk/core': 3.696.0 + '@aws-sdk/types': 3.696.0 + '@smithy/property-provider': 3.1.10 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@aws-sdk/credential-provider-http@3.696.0': + dependencies: + '@aws-sdk/core': 3.696.0 + '@aws-sdk/types': 3.696.0 + '@smithy/fetch-http-handler': 4.1.1 + '@smithy/node-http-handler': 3.3.1 + '@smithy/property-provider': 3.1.10 + '@smithy/protocol-http': 4.1.7 + '@smithy/smithy-client': 3.4.5 + '@smithy/types': 3.7.1 + '@smithy/util-stream': 3.3.1 + tslib: 2.8.0 + + '@aws-sdk/credential-provider-ini@3.696.0(@aws-sdk/client-sso-oidc@3.696.0(@aws-sdk/client-sts@3.696.0))(@aws-sdk/client-sts@3.696.0)': + dependencies: + '@aws-sdk/client-sts': 3.696.0 + '@aws-sdk/core': 3.696.0 + '@aws-sdk/credential-provider-env': 3.696.0 + '@aws-sdk/credential-provider-http': 3.696.0 + '@aws-sdk/credential-provider-process': 3.696.0 + '@aws-sdk/credential-provider-sso': 3.696.0(@aws-sdk/client-sso-oidc@3.696.0(@aws-sdk/client-sts@3.696.0)) + '@aws-sdk/credential-provider-web-identity': 3.696.0(@aws-sdk/client-sts@3.696.0) + '@aws-sdk/types': 3.696.0 + '@smithy/credential-provider-imds': 3.2.7 + '@smithy/property-provider': 3.1.10 + '@smithy/shared-ini-file-loader': 3.1.11 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + transitivePeerDependencies: + - '@aws-sdk/client-sso-oidc' + - aws-crt + + '@aws-sdk/credential-provider-node@3.696.0(@aws-sdk/client-sso-oidc@3.696.0(@aws-sdk/client-sts@3.696.0))(@aws-sdk/client-sts@3.696.0)': + dependencies: + '@aws-sdk/credential-provider-env': 3.696.0 + '@aws-sdk/credential-provider-http': 3.696.0 + '@aws-sdk/credential-provider-ini': 3.696.0(@aws-sdk/client-sso-oidc@3.696.0(@aws-sdk/client-sts@3.696.0))(@aws-sdk/client-sts@3.696.0) + '@aws-sdk/credential-provider-process': 3.696.0 + '@aws-sdk/credential-provider-sso': 3.696.0(@aws-sdk/client-sso-oidc@3.696.0(@aws-sdk/client-sts@3.696.0)) + '@aws-sdk/credential-provider-web-identity': 3.696.0(@aws-sdk/client-sts@3.696.0) + '@aws-sdk/types': 3.696.0 + '@smithy/credential-provider-imds': 3.2.7 + '@smithy/property-provider': 3.1.10 + '@smithy/shared-ini-file-loader': 3.1.11 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + transitivePeerDependencies: + - '@aws-sdk/client-sso-oidc' + - '@aws-sdk/client-sts' + - aws-crt + + '@aws-sdk/credential-provider-process@3.696.0': + dependencies: + '@aws-sdk/core': 3.696.0 + '@aws-sdk/types': 3.696.0 + '@smithy/property-provider': 3.1.10 + '@smithy/shared-ini-file-loader': 3.1.11 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@aws-sdk/credential-provider-sso@3.696.0(@aws-sdk/client-sso-oidc@3.696.0(@aws-sdk/client-sts@3.696.0))': + dependencies: + '@aws-sdk/client-sso': 3.696.0 + '@aws-sdk/core': 3.696.0 + '@aws-sdk/token-providers': 3.696.0(@aws-sdk/client-sso-oidc@3.696.0(@aws-sdk/client-sts@3.696.0)) + '@aws-sdk/types': 3.696.0 + '@smithy/property-provider': 3.1.10 + '@smithy/shared-ini-file-loader': 3.1.11 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + transitivePeerDependencies: + - '@aws-sdk/client-sso-oidc' + - aws-crt + + '@aws-sdk/credential-provider-web-identity@3.696.0(@aws-sdk/client-sts@3.696.0)': + dependencies: + '@aws-sdk/client-sts': 3.696.0 + '@aws-sdk/core': 3.696.0 + '@aws-sdk/types': 3.696.0 + '@smithy/property-provider': 3.1.10 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@aws-sdk/eventstream-handler-node@3.696.0': + dependencies: + '@aws-sdk/types': 3.696.0 + '@smithy/eventstream-codec': 3.1.9 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@aws-sdk/middleware-eventstream@3.696.0': + dependencies: + '@aws-sdk/types': 3.696.0 + '@smithy/protocol-http': 4.1.7 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@aws-sdk/middleware-host-header@3.696.0': + dependencies: + '@aws-sdk/types': 3.696.0 + '@smithy/protocol-http': 4.1.7 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@aws-sdk/middleware-logger@3.696.0': + dependencies: + '@aws-sdk/types': 3.696.0 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@aws-sdk/middleware-recursion-detection@3.696.0': + dependencies: + '@aws-sdk/types': 3.696.0 + '@smithy/protocol-http': 4.1.7 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@aws-sdk/middleware-sdk-transcribe-streaming@3.696.0': + dependencies: + '@aws-sdk/types': 3.696.0 + '@aws-sdk/util-format-url': 3.696.0 + '@smithy/eventstream-serde-browser': 3.0.13 + '@smithy/protocol-http': 4.1.7 + '@smithy/signature-v4': 4.2.3 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + uuid: 9.0.1 + + '@aws-sdk/middleware-signing@3.696.0': + dependencies: + '@aws-sdk/types': 3.696.0 + '@smithy/property-provider': 3.1.10 + '@smithy/protocol-http': 4.1.7 + '@smithy/signature-v4': 4.2.3 + '@smithy/types': 3.7.1 + '@smithy/util-middleware': 3.0.10 + tslib: 2.8.0 + + '@aws-sdk/middleware-user-agent@3.696.0': + dependencies: + '@aws-sdk/core': 3.696.0 + '@aws-sdk/types': 3.696.0 + '@aws-sdk/util-endpoints': 3.696.0 + '@smithy/core': 2.5.4 + '@smithy/protocol-http': 4.1.7 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@aws-sdk/middleware-websocket@3.696.0': + dependencies: + '@aws-sdk/middleware-signing': 3.696.0 + '@aws-sdk/types': 3.696.0 + '@aws-sdk/util-format-url': 3.696.0 + '@smithy/eventstream-codec': 3.1.9 + '@smithy/eventstream-serde-browser': 3.0.13 + '@smithy/fetch-http-handler': 4.1.1 + '@smithy/protocol-http': 4.1.7 + '@smithy/signature-v4': 4.2.3 + '@smithy/types': 3.7.1 + '@smithy/util-hex-encoding': 3.0.0 + tslib: 2.8.0 + + '@aws-sdk/region-config-resolver@3.696.0': + dependencies: + '@aws-sdk/types': 3.696.0 + '@smithy/node-config-provider': 3.1.11 + '@smithy/types': 3.7.1 + '@smithy/util-config-provider': 3.0.0 + '@smithy/util-middleware': 3.0.10 + tslib: 2.8.0 + + '@aws-sdk/token-providers@3.696.0(@aws-sdk/client-sso-oidc@3.696.0(@aws-sdk/client-sts@3.696.0))': + dependencies: + '@aws-sdk/client-sso-oidc': 3.696.0(@aws-sdk/client-sts@3.696.0) + '@aws-sdk/types': 3.696.0 + '@smithy/property-provider': 3.1.10 + '@smithy/shared-ini-file-loader': 3.1.11 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@aws-sdk/types@3.696.0': + dependencies: + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@aws-sdk/util-endpoints@3.696.0': + dependencies: + '@aws-sdk/types': 3.696.0 + '@smithy/types': 3.7.1 + '@smithy/util-endpoints': 2.1.6 + tslib: 2.8.0 + + '@aws-sdk/util-format-url@3.696.0': + dependencies: + '@aws-sdk/types': 3.696.0 + '@smithy/querystring-builder': 3.0.10 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@aws-sdk/util-locate-window@3.693.0': + dependencies: + tslib: 2.8.0 + + '@aws-sdk/util-user-agent-browser@3.696.0': + dependencies: + '@aws-sdk/types': 3.696.0 + '@smithy/types': 3.7.1 + bowser: 2.11.0 + tslib: 2.8.0 + + '@aws-sdk/util-user-agent-node@3.696.0': + dependencies: + '@aws-sdk/middleware-user-agent': 3.696.0 + '@aws-sdk/types': 3.696.0 + '@smithy/node-config-provider': 3.1.11 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + '@babel/code-frame@7.26.2': dependencies: '@babel/helper-validator-identifier': 7.25.9 @@ -16295,12 +17390,39 @@ snapshots: - uglify-js - webpack-cli + '@echogarden/audio-io@0.2.3': {} + '@echogarden/espeak-ng-emscripten@0.3.0': {} + '@echogarden/espeak-ng-emscripten@0.3.3': {} + + '@echogarden/fasttext-wasm@0.1.0': {} + + '@echogarden/flite-wasi@0.1.1': {} + + '@echogarden/fvad-wasm@0.2.0': {} + '@echogarden/kissfft-wasm@0.2.0': {} + '@echogarden/pffft-wasm@0.4.2': {} + + '@echogarden/rnnoise-wasm@0.2.0': {} + + '@echogarden/rubberband-wasm@0.2.0': {} + + '@echogarden/sonic-wasm@0.2.0': {} + '@echogarden/speex-resampler-wasm@0.2.1': {} + '@echogarden/speex-resampler-wasm@0.3.0': {} + + '@echogarden/svoxpico-wasm@0.2.0': {} + + '@echogarden/transformers-nodejs-lite@2.17.1-lite.3(onnxruntime-node@1.20.0)': + dependencies: + '@huggingface/jinja': 0.2.2 + onnxruntime-node: 1.20.0 + '@emnapi/core@1.3.1': dependencies: '@emnapi/wasi-threads': 1.0.1 @@ -16618,6 +17740,8 @@ snapshots: dependencies: '@hapi/hoek': 9.3.0 + '@huggingface/jinja@0.2.2': {} + '@huggingface/jinja@0.3.2': {} '@huggingface/transformers@3.0.1': @@ -17127,6 +18251,8 @@ snapshots: dependencies: langium: 3.0.0 + '@mozilla/readability@0.5.0': {} + '@napi-rs/wasm-runtime@0.2.4': dependencies: '@emnapi/core': 1.3.1 @@ -17599,7 +18725,7 @@ snapshots: '@octokit/request-error': 3.0.3 '@octokit/types': 9.3.2 is-plain-object: 5.0.0 - node-fetch: 2.6.7(encoding@0.1.13) + node-fetch: 2.7.0(encoding@0.1.13) universal-user-agent: 6.0.1 transitivePeerDependencies: - encoding @@ -18202,6 +19328,303 @@ snapshots: micromark-util-character: 1.2.0 micromark-util-symbol: 1.1.0 + '@smithy/abort-controller@3.1.8': + dependencies: + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/config-resolver@3.0.12': + dependencies: + '@smithy/node-config-provider': 3.1.11 + '@smithy/types': 3.7.1 + '@smithy/util-config-provider': 3.0.0 + '@smithy/util-middleware': 3.0.10 + tslib: 2.8.0 + + '@smithy/core@2.5.4': + dependencies: + '@smithy/middleware-serde': 3.0.10 + '@smithy/protocol-http': 4.1.7 + '@smithy/types': 3.7.1 + '@smithy/util-body-length-browser': 3.0.0 + '@smithy/util-middleware': 3.0.10 + '@smithy/util-stream': 3.3.1 + '@smithy/util-utf8': 3.0.0 + tslib: 2.8.0 + + '@smithy/credential-provider-imds@3.2.7': + dependencies: + '@smithy/node-config-provider': 3.1.11 + '@smithy/property-provider': 3.1.10 + '@smithy/types': 3.7.1 + '@smithy/url-parser': 3.0.10 + tslib: 2.8.0 + + '@smithy/eventstream-codec@3.1.9': + dependencies: + '@aws-crypto/crc32': 5.2.0 + '@smithy/types': 3.7.1 + '@smithy/util-hex-encoding': 3.0.0 + tslib: 2.8.0 + + '@smithy/eventstream-serde-browser@3.0.13': + dependencies: + '@smithy/eventstream-serde-universal': 3.0.12 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/eventstream-serde-config-resolver@3.0.10': + dependencies: + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/eventstream-serde-node@3.0.12': + dependencies: + '@smithy/eventstream-serde-universal': 3.0.12 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/eventstream-serde-universal@3.0.12': + dependencies: + '@smithy/eventstream-codec': 3.1.9 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/fetch-http-handler@4.1.1': + dependencies: + '@smithy/protocol-http': 4.1.7 + '@smithy/querystring-builder': 3.0.10 + '@smithy/types': 3.7.1 + '@smithy/util-base64': 3.0.0 + tslib: 2.8.0 + + '@smithy/hash-node@3.0.10': + dependencies: + '@smithy/types': 3.7.1 + '@smithy/util-buffer-from': 3.0.0 + '@smithy/util-utf8': 3.0.0 + tslib: 2.8.0 + + '@smithy/invalid-dependency@3.0.10': + dependencies: + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/is-array-buffer@2.2.0': + dependencies: + tslib: 2.8.0 + + '@smithy/is-array-buffer@3.0.0': + dependencies: + tslib: 2.8.0 + + '@smithy/middleware-content-length@3.0.12': + dependencies: + '@smithy/protocol-http': 4.1.7 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/middleware-endpoint@3.2.4': + dependencies: + '@smithy/core': 2.5.4 + '@smithy/middleware-serde': 3.0.10 + '@smithy/node-config-provider': 3.1.11 + '@smithy/shared-ini-file-loader': 3.1.11 + '@smithy/types': 3.7.1 + '@smithy/url-parser': 3.0.10 + '@smithy/util-middleware': 3.0.10 + tslib: 2.8.0 + + '@smithy/middleware-retry@3.0.28': + dependencies: + '@smithy/node-config-provider': 3.1.11 + '@smithy/protocol-http': 4.1.7 + '@smithy/service-error-classification': 3.0.10 + '@smithy/smithy-client': 3.4.5 + '@smithy/types': 3.7.1 + '@smithy/util-middleware': 3.0.10 + '@smithy/util-retry': 3.0.10 + tslib: 2.8.0 + uuid: 9.0.1 + + '@smithy/middleware-serde@3.0.10': + dependencies: + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/middleware-stack@3.0.10': + dependencies: + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/node-config-provider@3.1.11': + dependencies: + '@smithy/property-provider': 3.1.10 + '@smithy/shared-ini-file-loader': 3.1.11 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/node-http-handler@3.3.1': + dependencies: + '@smithy/abort-controller': 3.1.8 + '@smithy/protocol-http': 4.1.7 + '@smithy/querystring-builder': 3.0.10 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/property-provider@3.1.10': + dependencies: + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/protocol-http@4.1.7': + dependencies: + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/querystring-builder@3.0.10': + dependencies: + '@smithy/types': 3.7.1 + '@smithy/util-uri-escape': 3.0.0 + tslib: 2.8.0 + + '@smithy/querystring-parser@3.0.10': + dependencies: + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/service-error-classification@3.0.10': + dependencies: + '@smithy/types': 3.7.1 + + '@smithy/shared-ini-file-loader@3.1.11': + dependencies: + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/signature-v4@4.2.3': + dependencies: + '@smithy/is-array-buffer': 3.0.0 + '@smithy/protocol-http': 4.1.7 + '@smithy/types': 3.7.1 + '@smithy/util-hex-encoding': 3.0.0 + '@smithy/util-middleware': 3.0.10 + '@smithy/util-uri-escape': 3.0.0 + '@smithy/util-utf8': 3.0.0 + tslib: 2.8.0 + + '@smithy/smithy-client@3.4.5': + dependencies: + '@smithy/core': 2.5.4 + '@smithy/middleware-endpoint': 3.2.4 + '@smithy/middleware-stack': 3.0.10 + '@smithy/protocol-http': 4.1.7 + '@smithy/types': 3.7.1 + '@smithy/util-stream': 3.3.1 + tslib: 2.8.0 + + '@smithy/types@3.7.1': + dependencies: + tslib: 2.8.0 + + '@smithy/url-parser@3.0.10': + dependencies: + '@smithy/querystring-parser': 3.0.10 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/util-base64@3.0.0': + dependencies: + '@smithy/util-buffer-from': 3.0.0 + '@smithy/util-utf8': 3.0.0 + tslib: 2.8.0 + + '@smithy/util-body-length-browser@3.0.0': + dependencies: + tslib: 2.8.0 + + '@smithy/util-body-length-node@3.0.0': + dependencies: + tslib: 2.8.0 + + '@smithy/util-buffer-from@2.2.0': + dependencies: + '@smithy/is-array-buffer': 2.2.0 + tslib: 2.8.0 + + '@smithy/util-buffer-from@3.0.0': + dependencies: + '@smithy/is-array-buffer': 3.0.0 + tslib: 2.8.0 + + '@smithy/util-config-provider@3.0.0': + dependencies: + tslib: 2.8.0 + + '@smithy/util-defaults-mode-browser@3.0.28': + dependencies: + '@smithy/property-provider': 3.1.10 + '@smithy/smithy-client': 3.4.5 + '@smithy/types': 3.7.1 + bowser: 2.11.0 + tslib: 2.8.0 + + '@smithy/util-defaults-mode-node@3.0.28': + dependencies: + '@smithy/config-resolver': 3.0.12 + '@smithy/credential-provider-imds': 3.2.7 + '@smithy/node-config-provider': 3.1.11 + '@smithy/property-provider': 3.1.10 + '@smithy/smithy-client': 3.4.5 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/util-endpoints@2.1.6': + dependencies: + '@smithy/node-config-provider': 3.1.11 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/util-hex-encoding@3.0.0': + dependencies: + tslib: 2.8.0 + + '@smithy/util-middleware@3.0.10': + dependencies: + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/util-retry@3.0.10': + dependencies: + '@smithy/service-error-classification': 3.0.10 + '@smithy/types': 3.7.1 + tslib: 2.8.0 + + '@smithy/util-stream@3.3.1': + dependencies: + '@smithy/fetch-http-handler': 4.1.1 + '@smithy/node-http-handler': 3.3.1 + '@smithy/types': 3.7.1 + '@smithy/util-base64': 3.0.0 + '@smithy/util-buffer-from': 3.0.0 + '@smithy/util-hex-encoding': 3.0.0 + '@smithy/util-utf8': 3.0.0 + tslib: 2.8.0 + + '@smithy/util-uri-escape@3.0.0': + dependencies: + tslib: 2.8.0 + + '@smithy/util-utf8@2.3.0': + dependencies: + '@smithy/util-buffer-from': 2.2.0 + tslib: 2.8.0 + + '@smithy/util-utf8@3.0.0': + dependencies: + '@smithy/util-buffer-from': 3.0.0 + tslib: 2.8.0 + '@solana/buffer-layout-utils@0.2.0(bufferutil@4.0.8)(encoding@0.1.13)(utf-8-validate@5.0.10)': dependencies: '@solana/buffer-layout': 4.0.1 @@ -19062,6 +20485,8 @@ snapshots: '@types/wav-encoder@1.3.3': {} + '@types/webrtc@0.0.37': {} + '@types/ws@7.4.7': dependencies: '@types/node': 22.8.4 @@ -19516,6 +20941,8 @@ snapshots: aes-js@4.0.0-beta.5: {} + agent-base@5.1.1: {} + agent-base@6.0.2: dependencies: debug: 4.3.7(supports-color@5.5.0) @@ -19968,6 +21395,12 @@ snapshots: before-after-hook@3.0.2: {} + bent@7.3.12: + dependencies: + bytesish: 0.4.4 + caseless: 0.12.0 + is-stream: 2.0.1 + better-sqlite3@11.5.0: dependencies: bindings: 1.5.0 @@ -20051,6 +21484,8 @@ snapshots: bottleneck@2.19.5: {} + bowser@2.11.0: {} + boxen@6.2.1: dependencies: ansi-align: 3.0.1 @@ -20156,6 +21591,8 @@ snapshots: bytes@3.1.2: {} + bytesish@0.4.4: {} + c12@2.0.1: dependencies: chokidar: 4.0.1 @@ -20223,6 +21660,13 @@ snapshots: map-obj: 4.3.0 quick-lru: 4.0.1 + camelcase-keys@7.0.2: + dependencies: + camelcase: 6.3.0 + map-obj: 4.3.0 + quick-lru: 5.1.1 + type-fest: 1.4.0 + camelcase@5.3.1: {} camelcase@6.3.0: {} @@ -20570,6 +22014,12 @@ snapshots: transitivePeerDependencies: - supports-color + compromise@14.14.2: + dependencies: + efrt: 2.7.0 + grad-school: 0.0.5 + suffix-thumb: 5.0.2 + concat-map@0.0.1: {} concat-stream@1.6.2: @@ -20990,6 +22440,10 @@ snapshots: dependencies: css-tree: 2.2.1 + cssstyle@4.1.0: + dependencies: + rrweb-cssom: 0.7.1 + csstype@3.1.3: {} csv-writer@1.6.0: {} @@ -21199,6 +22653,11 @@ snapshots: data-uri-to-buffer@6.0.2: {} + data-urls@5.0.0: + dependencies: + whatwg-mimetype: 4.0.0 + whatwg-url: 14.0.0 + dateformat@3.0.3: {} dayjs@1.11.13: {} @@ -21248,8 +22707,12 @@ snapshots: decamelize@1.2.0: {} + decamelize@5.0.1: {} + decimal.js-light@2.5.1: {} + decimal.js@10.4.3: {} + decode-named-character-reference@1.0.2: dependencies: character-entities: 2.0.2 @@ -21518,8 +22981,62 @@ snapshots: dependencies: safe-buffer: 5.2.1 + echogarden@2.0.5(bufferutil@4.0.8)(canvas@2.11.2(encoding@0.1.13))(encoding@0.1.13)(utf-8-validate@5.0.10)(zod@3.23.8): + dependencies: + '@aws-sdk/client-polly': 3.696.0 + '@aws-sdk/client-transcribe-streaming': 3.696.0 + '@echogarden/audio-io': 0.2.3 + '@echogarden/espeak-ng-emscripten': 0.3.3 + '@echogarden/fasttext-wasm': 0.1.0 + '@echogarden/flite-wasi': 0.1.1 + '@echogarden/fvad-wasm': 0.2.0 + '@echogarden/pffft-wasm': 0.4.2 + '@echogarden/rnnoise-wasm': 0.2.0 + '@echogarden/rubberband-wasm': 0.2.0 + '@echogarden/sonic-wasm': 0.2.0 + '@echogarden/speex-resampler-wasm': 0.3.0 + '@echogarden/svoxpico-wasm': 0.2.0 + '@echogarden/transformers-nodejs-lite': 2.17.1-lite.3(onnxruntime-node@1.20.0) + '@mozilla/readability': 0.5.0 + alawmulaw: 6.0.0 + chalk: 5.3.0 + cldr-segmentation: 2.2.1 + command-exists: 1.2.9 + compromise: 14.14.2 + fs-extra: 11.2.0 + gaxios: 6.7.1(encoding@0.1.13) + graceful-fs: 4.2.11 + html-to-text: 9.0.5 + import-meta-resolve: 4.1.0 + jieba-wasm: 2.2.0 + jsdom: 25.0.1(bufferutil@4.0.8)(canvas@2.11.2(encoding@0.1.13))(utf-8-validate@5.0.10) + json5: 2.2.3 + kuromoji: 0.1.2 + microsoft-cognitiveservices-speech-sdk: 1.41.0(bufferutil@4.0.8)(utf-8-validate@5.0.10) + msgpack-lite: 0.1.26 + onnxruntime-node: 1.20.0 + openai: 4.73.0(encoding@0.1.13)(zod@3.23.8) + sam-js: 0.3.1 + strip-ansi: 7.1.0 + tar: 7.4.3 + tiktoken: 1.0.17 + tinyld: 1.3.4 + wasm-feature-detect: 1.8.0 + ws: 8.18.0(bufferutil@4.0.8)(utf-8-validate@5.0.10) + wtf_wikipedia: 10.3.2(encoding@0.1.13) + transitivePeerDependencies: + - aws-crt + - bufferutil + - canvas + - encoding + - supports-color + - utf-8-validate + - zod + ee-first@1.1.1: {} + efrt@2.7.0: {} + ejs@3.1.10: dependencies: jake: 10.9.2 @@ -21920,6 +23437,8 @@ snapshots: d: 1.0.2 es5-ext: 0.10.64 + event-lite@0.1.3: {} + event-target-shim@5.0.1: {} eventemitter2@0.4.14: {} @@ -22044,7 +23563,7 @@ snapshots: extract-zip@2.0.1: dependencies: - debug: 4.3.4 + debug: 4.3.7(supports-color@5.5.0) get-stream: 5.2.0 yauzl: 2.10.0 optionalDependencies: @@ -22086,6 +23605,10 @@ snapshots: fast-uri@3.0.3: {} + fast-xml-parser@4.4.1: + dependencies: + strnum: 1.0.5 + fastembed@1.14.1: dependencies: '@anush008/tokenizers': 0.0.0 @@ -22649,6 +24172,8 @@ snapshots: graceful-fs@4.2.11: {} + grad-school@0.0.5: {} + graphemer@1.4.0: {} gray-matter@4.0.3: @@ -22913,6 +24438,10 @@ snapshots: readable-stream: 2.3.8 wbuf: 1.7.3 + html-encoding-sniffer@4.0.0: + dependencies: + whatwg-encoding: 3.1.1 + html-entities@2.5.2: {} html-escaper@2.0.2: {} @@ -23038,6 +24567,13 @@ snapshots: quick-lru: 5.1.1 resolve-alpn: 1.2.1 + https-proxy-agent@4.0.0: + dependencies: + agent-base: 5.1.1 + debug: 4.3.7(supports-color@5.5.0) + transitivePeerDependencies: + - supports-color + https-proxy-agent@5.0.1: dependencies: agent-base: 6.0.2 @@ -23115,6 +24651,8 @@ snapshots: indent-string@4.0.0: {} + indent-string@5.0.0: {} + infima@0.2.0-alpha.45: {} inflight@1.0.6: @@ -23166,6 +24704,8 @@ snapshots: through: 2.3.8 wrap-ansi: 6.2.0 + int64-buffer@0.1.10: {} + internmap@1.0.1: {} internmap@2.0.3: {} @@ -23301,6 +24841,8 @@ snapshots: is-plain-object@5.0.0: {} + is-potential-custom-element-name@1.0.1: {} + is-promise@2.2.2: {} is-reference@1.2.1: @@ -23364,6 +24906,13 @@ snapshots: transitivePeerDependencies: - encoding + isomorphic-unfetch@3.1.0(encoding@0.1.13): + dependencies: + node-fetch: 2.7.0(encoding@0.1.13) + unfetch: 4.2.0 + transitivePeerDependencies: + - encoding + isomorphic-ws@4.0.1(ws@7.5.10(bufferutil@4.0.8)(utf-8-validate@5.0.10)): dependencies: ws: 7.5.10(bufferutil@4.0.8)(utf-8-validate@5.0.10) @@ -23811,6 +25360,36 @@ snapshots: jsbn@1.1.0: {} + jsdom@25.0.1(bufferutil@4.0.8)(canvas@2.11.2(encoding@0.1.13))(utf-8-validate@5.0.10): + dependencies: + cssstyle: 4.1.0 + data-urls: 5.0.0 + decimal.js: 10.4.3 + form-data: 4.0.1 + html-encoding-sniffer: 4.0.0 + http-proxy-agent: 7.0.2 + https-proxy-agent: 7.0.5 + is-potential-custom-element-name: 1.0.1 + nwsapi: 2.2.13 + parse5: 7.2.1 + rrweb-cssom: 0.7.1 + saxes: 6.0.0 + symbol-tree: 3.2.4 + tough-cookie: 5.0.0 + w3c-xmlserializer: 5.0.0 + webidl-conversions: 7.0.0 + whatwg-encoding: 3.1.1 + whatwg-mimetype: 4.0.0 + whatwg-url: 14.0.0 + ws: 8.18.0(bufferutil@4.0.8)(utf-8-validate@5.0.10) + xml-name-validator: 5.0.0 + optionalDependencies: + canvas: 2.11.2(encoding@0.1.13) + transitivePeerDependencies: + - bufferutil + - supports-color + - utf-8-validate + jsesc@3.0.2: {} json-bigint@1.0.0: @@ -24578,6 +26157,21 @@ snapshots: dependencies: readable-stream: 3.6.2 + meow@10.1.5: + dependencies: + '@types/minimist': 1.2.5 + camelcase-keys: 7.0.2 + decamelize: 5.0.1 + decamelize-keys: 1.1.1 + hard-rejection: 2.1.0 + minimist-options: 4.1.0 + normalize-package-data: 3.0.3 + read-pkg-up: 8.0.0 + redent: 4.0.0 + trim-newlines: 4.1.1 + type-fest: 1.4.0 + yargs-parser: 20.2.9 + meow@8.1.2: dependencies: '@types/minimist': 1.2.5 @@ -24934,6 +26528,19 @@ snapshots: braces: 3.0.3 picomatch: 2.3.1 + microsoft-cognitiveservices-speech-sdk@1.41.0(bufferutil@4.0.8)(utf-8-validate@5.0.10): + dependencies: + '@types/webrtc': 0.0.37 + agent-base: 6.0.2 + bent: 7.3.12 + https-proxy-agent: 4.0.0 + uuid: 9.0.1 + ws: 7.5.10(bufferutil@4.0.8)(utf-8-validate@5.0.10) + transitivePeerDependencies: + - bufferutil + - supports-color + - utf-8-validate + mime-db@1.33.0: {} mime-db@1.52.0: {} @@ -25113,6 +26720,13 @@ snapshots: ms@2.1.3: {} + msgpack-lite@0.1.26: + dependencies: + event-lite: 0.1.3 + ieee754: 1.2.1 + int64-buffer: 0.1.10 + isarray: 1.0.0 + multer@1.4.5-lts.1: dependencies: append-field: 1.0.0 @@ -25453,6 +27067,8 @@ snapshots: schema-utils: 3.3.0 webpack: 5.96.1(@swc/core@1.9.2(@swc/helpers@0.5.15)) + nwsapi@2.2.13: {} + nx@20.1.2(@swc/core@1.9.2(@swc/helpers@0.5.15)): dependencies: '@napi-rs/wasm-runtime': 0.2.4 @@ -25663,7 +27279,7 @@ snapshots: ora@5.3.0: dependencies: bl: 4.1.0 - chalk: 4.1.0 + chalk: 4.1.2 cli-cursor: 3.1.0 cli-spinners: 2.6.1 is-interactive: 1.0.0 @@ -25918,6 +27534,11 @@ snapshots: path-data-parser@0.1.0: {} + path-exists-cli@2.0.0: + dependencies: + meow: 10.1.5 + path-exists: 5.0.0 + path-exists@3.0.0: {} path-exists@4.0.0: {} @@ -27278,6 +28899,12 @@ snapshots: read-pkg: 5.2.0 type-fest: 0.8.1 + read-pkg-up@8.0.0: + dependencies: + find-up: 5.0.0 + read-pkg: 6.0.0 + type-fest: 1.4.0 + read-pkg@3.0.0: dependencies: load-json-file: 4.0.0 @@ -27291,6 +28918,13 @@ snapshots: parse-json: 5.2.0 type-fest: 0.6.0 + read-pkg@6.0.0: + dependencies: + '@types/normalize-package-data': 2.4.4 + normalize-package-data: 3.0.3 + parse-json: 5.2.0 + type-fest: 1.4.0 + read@1.0.7: dependencies: mute-stream: 0.0.8 @@ -27384,6 +29018,11 @@ snapshots: indent-string: 4.0.0 strip-indent: 3.0.0 + redent@4.0.0: + dependencies: + indent-string: 5.0.0 + strip-indent: 4.0.0 + redeyed@2.1.1: dependencies: esprima: 4.0.1 @@ -27691,6 +29330,8 @@ snapshots: bufferutil: 4.0.8 utf-8-validate: 5.0.10 + rrweb-cssom@0.7.1: {} + rtl-detect@1.1.2: {} rtlcss@4.3.0: @@ -27724,6 +29365,8 @@ snapshots: safer-buffer@2.1.2: {} + sam-js@0.3.1: {} + sandwich-stream@2.0.2: {} save-pixels-jpeg-js-upgrade@2.3.4-jpeg-js-upgrade.0: @@ -27738,6 +29381,10 @@ snapshots: sax@1.4.1: {} + saxes@6.0.0: + dependencies: + xmlchars: 2.2.0 + scheduler@0.23.2: dependencies: loose-envify: 1.4.0 @@ -28318,10 +29965,16 @@ snapshots: dependencies: min-indent: 1.0.1 + strip-indent@4.0.0: + dependencies: + min-indent: 1.0.1 + strip-json-comments@2.0.1: {} strip-json-comments@3.1.1: {} + strnum@1.0.5: {} + strong-log-transformer@2.1.0: dependencies: duplexer: 0.1.2 @@ -28360,6 +30013,8 @@ snapshots: pirates: 4.0.6 ts-interface-checker: 0.1.13 + suffix-thumb@5.0.2: {} + super-regex@1.0.0: dependencies: function-timeout: 1.0.2 @@ -28423,6 +30078,8 @@ snapshots: dependencies: vue: 3.5.13(typescript@5.6.3) + symbol-tree@3.2.4: {} + synckit@0.9.2: dependencies: '@pkgr/core': 0.1.1 @@ -28621,6 +30278,10 @@ snapshots: dependencies: tldts-core: 6.1.63 + tldts@6.1.63: + dependencies: + tldts-core: 6.1.63 + tmp@0.0.33: dependencies: os-tmpdir: 1.0.2 @@ -28674,12 +30335,20 @@ snapshots: universalify: 0.2.0 url-parse: 1.5.10 + tough-cookie@5.0.0: + dependencies: + tldts: 6.1.63 + tr46@0.0.3: {} tr46@1.0.1: dependencies: punycode: 2.3.1 + tr46@5.0.0: + dependencies: + punycode: 2.3.1 + tree-kill@1.2.2: {} treeverse@3.0.0: {} @@ -28688,6 +30357,8 @@ snapshots: trim-newlines@3.0.1: {} + trim-newlines@4.1.1: {} + trough@1.0.5: {} trough@2.2.0: {} @@ -28914,6 +30585,8 @@ snapshots: undici@6.19.8: {} + unfetch@4.2.0: {} + unicode-canonical-property-names-ecmascript@2.0.1: {} unicode-emoji-modifier-base@1.0.0: {} @@ -29207,7 +30880,7 @@ snapshots: fsevents: 2.3.3 terser: 5.36.0 - vitest@2.1.5(@types/node@22.8.4)(terser@5.36.0): + vitest@2.1.5(@types/node@22.8.4)(jsdom@25.0.1(bufferutil@4.0.8)(canvas@2.11.2(encoding@0.1.13))(utf-8-validate@5.0.10))(terser@5.36.0): dependencies: '@vitest/expect': 2.1.5 '@vitest/mocker': 2.1.5(vite@5.4.11(@types/node@22.8.4)(terser@5.36.0)) @@ -29231,6 +30904,7 @@ snapshots: why-is-node-running: 2.3.0 optionalDependencies: '@types/node': 22.8.4 + jsdom: 25.0.1(bufferutil@4.0.8)(canvas@2.11.2(encoding@0.1.13))(utf-8-validate@5.0.10) transitivePeerDependencies: - less - lightningcss @@ -29276,12 +30950,18 @@ snapshots: optionalDependencies: typescript: 5.6.3 + w3c-xmlserializer@5.0.0: + dependencies: + xml-name-validator: 5.0.0 + walk-up-path@3.0.1: {} walker@1.0.8: dependencies: makeerror: 1.0.12 + wasm-feature-detect@1.8.0: {} + watchpack@2.4.2: dependencies: glob-to-regexp: 0.4.1 @@ -29319,6 +30999,8 @@ snapshots: webidl-conversions@4.0.2: {} + webidl-conversions@7.0.0: {} + webpack-bundle-analyzer@4.10.2(bufferutil@4.0.8)(utf-8-validate@5.0.10): dependencies: '@discoveryjs/json-ext': 0.5.7 @@ -29450,8 +31132,19 @@ snapshots: websocket-extensions@0.1.4: {} + whatwg-encoding@3.1.1: + dependencies: + iconv-lite: 0.6.3 + whatwg-fetch@3.6.20: {} + whatwg-mimetype@4.0.0: {} + + whatwg-url@14.0.0: + dependencies: + tr46: 5.0.0 + webidl-conversions: 7.0.0 + whatwg-url@5.0.0: dependencies: tr46: 0.0.3 @@ -29580,12 +31273,23 @@ snapshots: bufferutil: 4.0.8 utf-8-validate: 5.0.10 + wtf_wikipedia@10.3.2(encoding@0.1.13): + dependencies: + isomorphic-unfetch: 3.1.0(encoding@0.1.13) + path-exists-cli: 2.0.0 + transitivePeerDependencies: + - encoding + xdg-basedir@5.1.0: {} xml-js@1.6.11: dependencies: sax: 1.4.1 + xml-name-validator@5.0.0: {} + + xmlchars@2.2.0: {} + xtend@4.0.2: {} y18n@5.0.8: {} From 79f3ce42d75b3d188eb78dac585bb1ea6fd919b7 Mon Sep 17 00:00:00 2001 From: ponderingdemocritus Date: Fri, 22 Nov 2024 16:20:29 +1100 Subject: [PATCH 2/2] cleanup --- packages/plugin-node/src/services/speech.ts | 47 ++++++++++----------- pnpm-lock.yaml | 8 ++-- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/packages/plugin-node/src/services/speech.ts b/packages/plugin-node/src/services/speech.ts index 8170e9fc7d..a8a81e99a5 100644 --- a/packages/plugin-node/src/services/speech.ts +++ b/packages/plugin-node/src/services/speech.ts @@ -37,32 +37,31 @@ async function textToSpeech(runtime: IAgentRuntime, text: string) { await validateNodeConfig(runtime); try { - const body = { - model_id: runtime.getSetting("ELEVENLABS_MODEL_ID"), - text: text, - voice_settings: { - similarity_boost: runtime.getSetting( - "ELEVENLABS_VOICE_SIMILARITY_BOOST" - ), - stability: runtime.getSetting("ELEVENLABS_VOICE_STABILITY"), - style: runtime.getSetting("ELEVENLABS_VOICE_STYLE"), - use_speaker_boost: runtime.getSetting( - "ELEVENLABS_VOICE_USE_SPEAKER_BOOST" - ), - }, - }; - const options = { - method: "POST", - headers: { - "Content-Type": "application/json", - "xi-api-key": runtime.getSetting("ELEVENLABS_XI_API_KEY"), - }, - body: JSON.stringify(body), - }; - const response = await fetch( `https://api.elevenlabs.io/v1/text-to-speech/${runtime.getSetting("ELEVENLABS_VOICE_ID")}/stream?optimize_streaming_latency=${runtime.getSetting("ELEVENLABS_OPTIMIZE_STREAMING_LATENCY")}&output_format=${runtime.getSetting("ELEVENLABS_OUTPUT_FORMAT")}`, - options + { + method: "POST", + headers: { + "Content-Type": "application/json", + "xi-api-key": runtime.getSetting("ELEVENLABS_XI_API_KEY"), + }, + body: JSON.stringify({ + model_id: runtime.getSetting("ELEVENLABS_MODEL_ID"), + text: text, + voice_settings: { + similarity_boost: runtime.getSetting( + "ELEVENLABS_VOICE_SIMILARITY_BOOST" + ), + stability: runtime.getSetting( + "ELEVENLABS_VOICE_STABILITY" + ), + style: runtime.getSetting("ELEVENLABS_VOICE_STYLE"), + use_speaker_boost: runtime.getSetting( + "ELEVENLABS_VOICE_USE_SPEAKER_BOOST" + ), + }, + }), + } ); const status = response.status; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ee813316b3..8767a3700b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -18725,7 +18725,7 @@ snapshots: '@octokit/request-error': 3.0.3 '@octokit/types': 9.3.2 is-plain-object: 5.0.0 - node-fetch: 2.7.0(encoding@0.1.13) + node-fetch: 2.6.7(encoding@0.1.13) universal-user-agent: 6.0.1 transitivePeerDependencies: - encoding @@ -23563,7 +23563,7 @@ snapshots: extract-zip@2.0.1: dependencies: - debug: 4.3.7(supports-color@5.5.0) + debug: 4.3.4 get-stream: 5.2.0 yauzl: 2.10.0 optionalDependencies: @@ -25801,7 +25801,7 @@ snapshots: log-symbols@4.1.0: dependencies: - chalk: 4.1.2 + chalk: 4.1.0 is-unicode-supported: 0.1.0 log-symbols@6.0.0: @@ -27279,7 +27279,7 @@ snapshots: ora@5.3.0: dependencies: bl: 4.1.0 - chalk: 4.1.2 + chalk: 4.1.0 cli-cursor: 3.1.0 cli-spinners: 2.6.1 is-interactive: 1.0.0