From 5c0b8fcc24b637f8182b5107b00fc3deb379db5f Mon Sep 17 00:00:00 2001
From: lraton <notari.filippo@outlook.it>
Date: Mon, 2 Oct 2023 13:36:27 +0200
Subject: [PATCH] Add sound device to resolve ALSA problem

---
 requirements.txt   |  3 ++-
 transcribe_demo.py | 17 +++++++++--------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index ae172ba..d496865 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 pyaudio
 SpeechRecognition
+sounddevice
 --extra-index-url https://download.pytorch.org/whl/cu116
 torch
-git+https://github.com/openai/whisper.git
\ No newline at end of file
+git+https://github.com/openai/whisper.git
diff --git a/transcribe_demo.py b/transcribe_demo.py
index 6dd8972..3e36949 100644
--- a/transcribe_demo.py
+++ b/transcribe_demo.py
@@ -6,6 +6,7 @@
 import speech_recognition as sr
 import whisper
 import torch
+import sounddevice
 
 from datetime import datetime, timedelta
 from queue import Queue
@@ -26,13 +27,13 @@ def main():
                         help="How real time the recording is in seconds.", type=float)
     parser.add_argument("--phrase_timeout", default=3,
                         help="How much empty space between recordings before we "
-                             "consider it a new line in the transcription.", type=float)  
+                             "consider it a new line in the transcription.", type=float)
     if 'linux' in platform:
         parser.add_argument("--default_microphone", default='pulse',
                             help="Default microphone name for SpeechRecognition. "
                                  "Run this with 'list' to view available Microphones.", type=str)
     args = parser.parse_args()
-    
+
     # The last time a recording was retreived from the queue.
     phrase_time = None
     # Current raw audio bytes.
@@ -44,15 +45,15 @@ def main():
     recorder.energy_threshold = args.energy_threshold
     # Definitely do this, dynamic energy compensation lowers the energy threshold dramtically to a point where the SpeechRecognizer never stops recording.
     recorder.dynamic_energy_threshold = False
-    
-    # Important for linux users. 
+
+    # Important for linux users.
     # Prevents permanent application hang and crash by using the wrong Microphone
     if 'linux' in platform:
         mic_name = args.default_microphone
         if not mic_name or mic_name == 'list':
             print("Available microphone devices are: ")
             for index, name in enumerate(sr.Microphone.list_microphone_names()):
-                print(f"Microphone with name \"{name}\" found")   
+                print(f"Microphone with name \"{name}\" found")
             return
         else:
             for index, name in enumerate(sr.Microphone.list_microphone_names()):
@@ -61,7 +62,7 @@ def main():
                     break
     else:
         source = sr.Microphone(sample_rate=16000)
-        
+
     # Load / Download model
     model = args.model
     if args.model != "large" and not args.non_english:
@@ -73,7 +74,7 @@ def main():
 
     temp_file = NamedTemporaryFile().name
     transcription = ['']
-    
+
     with source:
         recorder.adjust_for_ambient_noise(source)
 
@@ -149,4 +150,4 @@ def record_callback(_, audio:sr.AudioData) -> None:
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()