chore: reformatted

ThEditor · Jul 7, 2024 · 211b98d · 211b98d
1 parent cf00c16
commit 211b98d
Show file tree

Hide file tree

Showing 2 changed files with 113 additions and 76 deletions.
diff --git a/packages/_common/src/real-time-vad.ts b/packages/_common/src/real-time-vad.ts
@@ -1,133 +1,156 @@
-import * as vad from '../';
-import { EventEmitter } from 'events';
+import * as vad from "../"
+import { EventEmitter } from "events"
 
 export interface RealTimeVADOptions {
-  sampleRate?: number;
-  minBufferDuration?: number;
-  maxBufferDuration?: number;
-  overlapDuration?: number;
-  silenceThreshold?: number;
+  sampleRate?: number
+  minBufferDuration?: number
+  maxBufferDuration?: number
+  overlapDuration?: number
+  silenceThreshold?: number
 }
 
 export interface SpeechSegmentStart {
-  start: number;
+  start: number
 }
 
 export interface SpeechSegmentData {
-  audio: Float32Array | Buffer;
-  start: number;
-  end: number;
+  audio: Float32Array | Buffer
+  start: number
+  end: number
 }
 
 export interface SpeechSegmentEnd {
-  end: number;
+  end: number
 }
 
 export class RealTimeVAD extends EventEmitter {
-  private sampleRateHertz: number;
-  private minBufferSize: number;
-  private maxBufferSize: number;
-  private overlapDuration: number;
-  private audioBuffer: Float32Array;
-  private vadInstance: vad.NonRealTimeVAD | null;
-  private inputType: 'float32' | 'buffer' | null;
-  private isSpeechOngoing: boolean;
-  private silenceThreshold: number;
-  private lastSpeechEnd: number;
-  private currentTime: number;
+  private sampleRateHertz: number
+  private minBufferSize: number
+  private maxBufferSize: number
+  private overlapDuration: number
+  private audioBuffer: Float32Array
+  private vadInstance: vad.NonRealTimeVAD | null
+  private inputType: "float32" | "buffer" | null
+  private isSpeechOngoing: boolean
+  private silenceThreshold: number
+  private lastSpeechEnd: number
+  private currentTime: number
 
   constructor(options: RealTimeVADOptions = {}) {
-    super();
-    this.sampleRateHertz = options.sampleRate || 16000;
-    this.minBufferSize = this.sampleRateHertz * (options.minBufferDuration || 1);
-    this.maxBufferSize = this.sampleRateHertz * (options.maxBufferDuration || 5);
-    this.overlapDuration = options.overlapDuration || 0.1;
-    this.audioBuffer = new Float32Array(0);
-    this.vadInstance = null;
-    this.inputType = null;
-    this.isSpeechOngoing = false;
-    this.silenceThreshold = options.silenceThreshold || 0.5; // seconds
-    this.lastSpeechEnd = 0;
-    this.currentTime = 0;
+    super()
+    this.sampleRateHertz = options.sampleRate || 16000
+    this.minBufferSize = this.sampleRateHertz * (options.minBufferDuration || 1)
+    this.maxBufferSize = this.sampleRateHertz * (options.maxBufferDuration || 5)
+    this.overlapDuration = options.overlapDuration || 0.1
+    this.audioBuffer = new Float32Array(0)
+    this.vadInstance = null
+    this.inputType = null
+    this.isSpeechOngoing = false
+    this.silenceThreshold = options.silenceThreshold || 0.5 // seconds
+    this.lastSpeechEnd = 0
+    this.currentTime = 0
   }
 
   async init(): Promise<void> {
-    this.vadInstance = await vad.NonRealTimeVAD.new();
+    this.vadInstance = await vad.NonRealTimeVAD.new()
   }
 
   async processAudio(audioChunk: Float32Array | Buffer): Promise<void> {
-    let newAudioData: Float32Array;
+    let newAudioData: Float32Array
     if (audioChunk instanceof Float32Array) {
-      this.inputType = 'float32';
-      newAudioData = audioChunk;
+      this.inputType = "float32"
+      newAudioData = audioChunk
     } else if (audioChunk instanceof Buffer) {
-      this.inputType = 'buffer';
-      newAudioData = new Float32Array(audioChunk.length / 2);
+      this.inputType = "buffer"
+      newAudioData = new Float32Array(audioChunk.length / 2)
       for (let i = 0; i < audioChunk.length; i += 2) {
-        newAudioData[i / 2] = audioChunk.readInt16LE(i) / 32768;
+        newAudioData[i / 2] = audioChunk.readInt16LE(i) / 32768
       }
     } else {
-      throw new Error('Unsupported audio format. Please provide Float32Array or Buffer.');
+      throw new Error(
+        "Unsupported audio format. Please provide Float32Array or Buffer."
+      )
     }
 
-    this.audioBuffer = Float32Array.from([...this.audioBuffer, ...newAudioData]);
-    this.currentTime += newAudioData.length / this.sampleRateHertz;
+    this.audioBuffer = Float32Array.from([...this.audioBuffer, ...newAudioData])
+    this.currentTime += newAudioData.length / this.sampleRateHertz
 
     if (this.audioBuffer.length > this.maxBufferSize) {
-      this.audioBuffer = this.audioBuffer.slice(-this.maxBufferSize);
+      this.audioBuffer = this.audioBuffer.slice(-this.maxBufferSize)
     }
 
     if (this.audioBuffer.length >= this.minBufferSize) {
       if (!this.vadInstance) {
-        await this.init();
+        await this.init()
       }
 
       try {
-        const vadResult = await this.vadInstance!.run(this.audioBuffer, this.sampleRateHertz);
-
-        let hasSpeech = false;
-        if (vadResult && typeof vadResult[Symbol.asyncIterator] === "function") {
+        const vadResult = await this.vadInstance!.run(
+          this.audioBuffer,
+          this.sampleRateHertz
+        )
+
+        let hasSpeech = false
+        if (
+          vadResult &&
+          typeof vadResult[Symbol.asyncIterator] === "function"
+        ) {
           for await (const segment of vadResult) {
-            hasSpeech = true;
-            const startTime = this.currentTime - (this.audioBuffer.length / this.sampleRateHertz) + (segment.start / this.sampleRateHertz);
-            const endTime = this.currentTime - (this.audioBuffer.length / this.sampleRateHertz) + (segment.end / this.sampleRateHertz);
-
+            hasSpeech = true
+            const startTime =
+              this.currentTime -
+              this.audioBuffer.length / this.sampleRateHertz +
+              segment.start / this.sampleRateHertz
+            const endTime =
+              this.currentTime -
+              this.audioBuffer.length / this.sampleRateHertz +
+              segment.end / this.sampleRateHertz
+
             if (!this.isSpeechOngoing) {
-              this.isSpeechOngoing = true;
-              this.emit('start', { start: startTime } as SpeechSegmentStart);
+              this.isSpeechOngoing = true
+              this.emit("start", { start: startTime } as SpeechSegmentStart)
             }
 
-            const audio = this.convertAudioToInputType(segment.audio);
-            this.emit('data', { audio, start: startTime, end: endTime } as SpeechSegmentData);
+            const audio = this.convertAudioToInputType(segment.audio)
+            this.emit("data", {
+              audio,
+              start: startTime,
+              end: endTime,
+            } as SpeechSegmentData)
 
-            this.lastSpeechEnd = endTime;
+            this.lastSpeechEnd = endTime
           }
         }
 
-        if (!hasSpeech && this.isSpeechOngoing && 
-            (this.currentTime - this.lastSpeechEnd) > this.silenceThreshold) {
-          this.isSpeechOngoing = false;
-          this.emit('end', { end: this.lastSpeechEnd } as SpeechSegmentEnd);
+        if (
+          !hasSpeech &&
+          this.isSpeechOngoing &&
+          this.currentTime - this.lastSpeechEnd > this.silenceThreshold
+        ) {
+          this.isSpeechOngoing = false
+          this.emit("end", { end: this.lastSpeechEnd } as SpeechSegmentEnd)
         }
 
-        const overlapSize = Math.floor(this.overlapDuration * this.sampleRateHertz);
-        this.audioBuffer = this.audioBuffer.slice(-overlapSize);
+        const overlapSize = Math.floor(
+          this.overlapDuration * this.sampleRateHertz
+        )
+        this.audioBuffer = this.audioBuffer.slice(-overlapSize)
       } catch (error) {
-        this.emit('error', error);
+        this.emit("error", error)
       }
     }
   }
 
   private convertAudioToInputType(audio: Float32Array): Float32Array | Buffer {
-    if (this.inputType === 'float32') {
-      return audio;
-    } else if (this.inputType === 'buffer') {
-      const buffer = Buffer.alloc(audio.length * 2);
+    if (this.inputType === "float32") {
+      return audio
+    } else if (this.inputType === "buffer") {
+      const buffer = Buffer.alloc(audio.length * 2)
       for (let i = 0; i < audio.length; i++) {
-        buffer.writeInt16LE(Math.floor(audio[i] ?? 0 * 32767), i * 2);
+        buffer.writeInt16LE(Math.floor(audio[i] ?? 0 * 32767), i * 2)
       }
-      return buffer;
+      return buffer
     }
-    throw new Error('Invalid input type');
+    throw new Error("Invalid input type")
   }
 }
diff --git a/packages/node/src/index.ts b/packages/node/src/index.ts
@@ -30,5 +30,19 @@ class NonRealTimeVAD extends PlatformAgnosticNonRealTimeVAD {
   }
 }
 
-export { utils, Resampler, FrameProcessor, Message, NonRealTimeVAD, RealTimeVAD }
-export type { FrameProcessorOptions, NonRealTimeVADOptions, RealTimeVADOptions, SpeechSegmentStart, SpeechSegmentData, SpeechSegmentEnd}
+export {
+  utils,
+  Resampler,
+  FrameProcessor,
+  Message,
+  NonRealTimeVAD,
+  RealTimeVAD,
+}
+export type {
+  FrameProcessorOptions,
+  NonRealTimeVADOptions,
+  RealTimeVADOptions,
+  SpeechSegmentStart,
+  SpeechSegmentData,
+  SpeechSegmentEnd,
+}