Skip to content

Commit

Permalink
chore: reformatted
Browse files Browse the repository at this point in the history
  • Loading branch information
ThEditor committed Jul 7, 2024
1 parent cf00c16 commit 211b98d
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 76 deletions.
171 changes: 97 additions & 74 deletions packages/_common/src/real-time-vad.ts
Original file line number Diff line number Diff line change
@@ -1,133 +1,156 @@
import * as vad from '../';
import { EventEmitter } from 'events';
import * as vad from "../"
import { EventEmitter } from "events"

export interface RealTimeVADOptions {
sampleRate?: number;
minBufferDuration?: number;
maxBufferDuration?: number;
overlapDuration?: number;
silenceThreshold?: number;
sampleRate?: number
minBufferDuration?: number
maxBufferDuration?: number
overlapDuration?: number
silenceThreshold?: number
}

export interface SpeechSegmentStart {
start: number;
start: number
}

export interface SpeechSegmentData {
audio: Float32Array | Buffer;
start: number;
end: number;
audio: Float32Array | Buffer
start: number
end: number
}

export interface SpeechSegmentEnd {
end: number;
end: number
}

export class RealTimeVAD extends EventEmitter {
private sampleRateHertz: number;
private minBufferSize: number;
private maxBufferSize: number;
private overlapDuration: number;
private audioBuffer: Float32Array;
private vadInstance: vad.NonRealTimeVAD | null;
private inputType: 'float32' | 'buffer' | null;
private isSpeechOngoing: boolean;
private silenceThreshold: number;
private lastSpeechEnd: number;
private currentTime: number;
private sampleRateHertz: number
private minBufferSize: number
private maxBufferSize: number
private overlapDuration: number
private audioBuffer: Float32Array
private vadInstance: vad.NonRealTimeVAD | null
private inputType: "float32" | "buffer" | null
private isSpeechOngoing: boolean
private silenceThreshold: number
private lastSpeechEnd: number
private currentTime: number

constructor(options: RealTimeVADOptions = {}) {
super();
this.sampleRateHertz = options.sampleRate || 16000;
this.minBufferSize = this.sampleRateHertz * (options.minBufferDuration || 1);
this.maxBufferSize = this.sampleRateHertz * (options.maxBufferDuration || 5);
this.overlapDuration = options.overlapDuration || 0.1;
this.audioBuffer = new Float32Array(0);
this.vadInstance = null;
this.inputType = null;
this.isSpeechOngoing = false;
this.silenceThreshold = options.silenceThreshold || 0.5; // seconds
this.lastSpeechEnd = 0;
this.currentTime = 0;
super()
this.sampleRateHertz = options.sampleRate || 16000
this.minBufferSize = this.sampleRateHertz * (options.minBufferDuration || 1)
this.maxBufferSize = this.sampleRateHertz * (options.maxBufferDuration || 5)
this.overlapDuration = options.overlapDuration || 0.1
this.audioBuffer = new Float32Array(0)
this.vadInstance = null
this.inputType = null
this.isSpeechOngoing = false
this.silenceThreshold = options.silenceThreshold || 0.5 // seconds
this.lastSpeechEnd = 0
this.currentTime = 0
}

async init(): Promise<void> {
this.vadInstance = await vad.NonRealTimeVAD.new();
this.vadInstance = await vad.NonRealTimeVAD.new()
}

async processAudio(audioChunk: Float32Array | Buffer): Promise<void> {
let newAudioData: Float32Array;
let newAudioData: Float32Array
if (audioChunk instanceof Float32Array) {
this.inputType = 'float32';
newAudioData = audioChunk;
this.inputType = "float32"
newAudioData = audioChunk
} else if (audioChunk instanceof Buffer) {
this.inputType = 'buffer';
newAudioData = new Float32Array(audioChunk.length / 2);
this.inputType = "buffer"
newAudioData = new Float32Array(audioChunk.length / 2)
for (let i = 0; i < audioChunk.length; i += 2) {
newAudioData[i / 2] = audioChunk.readInt16LE(i) / 32768;
newAudioData[i / 2] = audioChunk.readInt16LE(i) / 32768
}
} else {
throw new Error('Unsupported audio format. Please provide Float32Array or Buffer.');
throw new Error(
"Unsupported audio format. Please provide Float32Array or Buffer."
)
}

this.audioBuffer = Float32Array.from([...this.audioBuffer, ...newAudioData]);
this.currentTime += newAudioData.length / this.sampleRateHertz;
this.audioBuffer = Float32Array.from([...this.audioBuffer, ...newAudioData])
this.currentTime += newAudioData.length / this.sampleRateHertz

if (this.audioBuffer.length > this.maxBufferSize) {
this.audioBuffer = this.audioBuffer.slice(-this.maxBufferSize);
this.audioBuffer = this.audioBuffer.slice(-this.maxBufferSize)
}

if (this.audioBuffer.length >= this.minBufferSize) {
if (!this.vadInstance) {
await this.init();
await this.init()
}

try {
const vadResult = await this.vadInstance!.run(this.audioBuffer, this.sampleRateHertz);

let hasSpeech = false;
if (vadResult && typeof vadResult[Symbol.asyncIterator] === "function") {
const vadResult = await this.vadInstance!.run(
this.audioBuffer,
this.sampleRateHertz
)

let hasSpeech = false
if (
vadResult &&
typeof vadResult[Symbol.asyncIterator] === "function"
) {
for await (const segment of vadResult) {
hasSpeech = true;
const startTime = this.currentTime - (this.audioBuffer.length / this.sampleRateHertz) + (segment.start / this.sampleRateHertz);
const endTime = this.currentTime - (this.audioBuffer.length / this.sampleRateHertz) + (segment.end / this.sampleRateHertz);

hasSpeech = true
const startTime =
this.currentTime -
this.audioBuffer.length / this.sampleRateHertz +
segment.start / this.sampleRateHertz
const endTime =
this.currentTime -
this.audioBuffer.length / this.sampleRateHertz +
segment.end / this.sampleRateHertz

if (!this.isSpeechOngoing) {
this.isSpeechOngoing = true;
this.emit('start', { start: startTime } as SpeechSegmentStart);
this.isSpeechOngoing = true
this.emit("start", { start: startTime } as SpeechSegmentStart)
}

const audio = this.convertAudioToInputType(segment.audio);
this.emit('data', { audio, start: startTime, end: endTime } as SpeechSegmentData);
const audio = this.convertAudioToInputType(segment.audio)
this.emit("data", {
audio,
start: startTime,
end: endTime,
} as SpeechSegmentData)

this.lastSpeechEnd = endTime;
this.lastSpeechEnd = endTime
}
}

if (!hasSpeech && this.isSpeechOngoing &&
(this.currentTime - this.lastSpeechEnd) > this.silenceThreshold) {
this.isSpeechOngoing = false;
this.emit('end', { end: this.lastSpeechEnd } as SpeechSegmentEnd);
if (
!hasSpeech &&
this.isSpeechOngoing &&
this.currentTime - this.lastSpeechEnd > this.silenceThreshold
) {
this.isSpeechOngoing = false
this.emit("end", { end: this.lastSpeechEnd } as SpeechSegmentEnd)
}

const overlapSize = Math.floor(this.overlapDuration * this.sampleRateHertz);
this.audioBuffer = this.audioBuffer.slice(-overlapSize);
const overlapSize = Math.floor(
this.overlapDuration * this.sampleRateHertz
)
this.audioBuffer = this.audioBuffer.slice(-overlapSize)
} catch (error) {
this.emit('error', error);
this.emit("error", error)
}
}
}

private convertAudioToInputType(audio: Float32Array): Float32Array | Buffer {
if (this.inputType === 'float32') {
return audio;
} else if (this.inputType === 'buffer') {
const buffer = Buffer.alloc(audio.length * 2);
if (this.inputType === "float32") {
return audio
} else if (this.inputType === "buffer") {
const buffer = Buffer.alloc(audio.length * 2)
for (let i = 0; i < audio.length; i++) {
buffer.writeInt16LE(Math.floor(audio[i] ?? 0 * 32767), i * 2);
buffer.writeInt16LE(Math.floor(audio[i] ?? 0 * 32767), i * 2)
}
return buffer;
return buffer
}
throw new Error('Invalid input type');
throw new Error("Invalid input type")
}
}
18 changes: 16 additions & 2 deletions packages/node/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,19 @@ class NonRealTimeVAD extends PlatformAgnosticNonRealTimeVAD {
}
}

export { utils, Resampler, FrameProcessor, Message, NonRealTimeVAD, RealTimeVAD }
export type { FrameProcessorOptions, NonRealTimeVADOptions, RealTimeVADOptions, SpeechSegmentStart, SpeechSegmentData, SpeechSegmentEnd}
export {
utils,
Resampler,
FrameProcessor,
Message,
NonRealTimeVAD,
RealTimeVAD,
}
export type {
FrameProcessorOptions,
NonRealTimeVADOptions,
RealTimeVADOptions,
SpeechSegmentStart,
SpeechSegmentData,
SpeechSegmentEnd,
}

0 comments on commit 211b98d

Please sign in to comment.