From 5c0b8fcc24b637f8182b5107b00fc3deb379db5f Mon Sep 17 00:00:00 2001 From: lraton Date: Mon, 2 Oct 2023 13:36:27 +0200 Subject: [PATCH] Add sound device to resolve ALSA problem --- requirements.txt | 3 ++- transcribe_demo.py | 17 +++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index ae172ba..d496865 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ pyaudio SpeechRecognition +sounddevice --extra-index-url https://download.pytorch.org/whl/cu116 torch -git+https://github.com/openai/whisper.git \ No newline at end of file +git+https://github.com/openai/whisper.git diff --git a/transcribe_demo.py b/transcribe_demo.py index 6dd8972..3e36949 100644 --- a/transcribe_demo.py +++ b/transcribe_demo.py @@ -6,6 +6,7 @@ import speech_recognition as sr import whisper import torch +import sounddevice from datetime import datetime, timedelta from queue import Queue @@ -26,13 +27,13 @@ def main(): help="How real time the recording is in seconds.", type=float) parser.add_argument("--phrase_timeout", default=3, help="How much empty space between recordings before we " - "consider it a new line in the transcription.", type=float) + "consider it a new line in the transcription.", type=float) if 'linux' in platform: parser.add_argument("--default_microphone", default='pulse', help="Default microphone name for SpeechRecognition. " "Run this with 'list' to view available Microphones.", type=str) args = parser.parse_args() - + # The last time a recording was retreived from the queue. phrase_time = None # Current raw audio bytes. @@ -44,15 +45,15 @@ def main(): recorder.energy_threshold = args.energy_threshold # Definitely do this, dynamic energy compensation lowers the energy threshold dramtically to a point where the SpeechRecognizer never stops recording. recorder.dynamic_energy_threshold = False - - # Important for linux users. + + # Important for linux users. # Prevents permanent application hang and crash by using the wrong Microphone if 'linux' in platform: mic_name = args.default_microphone if not mic_name or mic_name == 'list': print("Available microphone devices are: ") for index, name in enumerate(sr.Microphone.list_microphone_names()): - print(f"Microphone with name \"{name}\" found") + print(f"Microphone with name \"{name}\" found") return else: for index, name in enumerate(sr.Microphone.list_microphone_names()): @@ -61,7 +62,7 @@ def main(): break else: source = sr.Microphone(sample_rate=16000) - + # Load / Download model model = args.model if args.model != "large" and not args.non_english: @@ -73,7 +74,7 @@ def main(): temp_file = NamedTemporaryFile().name transcription = [''] - + with source: recorder.adjust_for_ambient_noise(source) @@ -149,4 +150,4 @@ def record_callback(_, audio:sr.AudioData) -> None: if __name__ == "__main__": - main() \ No newline at end of file + main()