From 2c25c12f5e7b00b3d137c3d4ded22e2f10e3b8dd Mon Sep 17 00:00:00 2001 From: Ondrej Platek Date: Fri, 16 Aug 2024 17:16:17 +0200 Subject: [PATCH 1/2] add sounddevice multiplatform client --- sounddevice_client.py | 52 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 sounddevice_client.py diff --git a/sounddevice_client.py b/sounddevice_client.py new file mode 100644 index 0000000..8e948b0 --- /dev/null +++ b/sounddevice_client.py @@ -0,0 +1,52 @@ +""" +Simple (hopefully) multiplatform client mimicing the linux command + + arecord -f S16_LE -c1 -r 16000 -t raw -D default | nc localhost 43001 + +which streams audio data from the microphone to the server. +Tested on Mac Os. +""" +import sys +import sounddevice as sd +import socket +import numpy as np +from argparse import ArgumentParser + + +parser = ArgumentParser(__doc__) +parser.add_argument('--host', type=str, default='localhost', help='Host name') +parser.add_argument('--port', type=int, default=43007, help='Port number') +parser.add_argument('--chunk', type=int, default=1000, help='Chunk size in ms') +args = parser.parse_args() + + + +# Audio configuration needed for whisper_online_server.py +SAMPLE_RATE = 16000 +CHANNELS = 1 +CHUNK = 1024 +# SIGNED INT16 LITTLE ENDIAN is setup as for sounddevice +DTYPE = np.int16 + +sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +sock.connect((args.host, args.port)) + +print("Recording and streaming audio...") + +def callback(indata, frames, time, status): + if status: + print(status, file=sys.stderr) + # Convert the audio data to bytes and send it over the socket + sock.sendall(indata.tobytes()) + +try: + # Open the audio stream + with sd.InputStream(samplerate=SAMPLE_RATE, channels=CHANNELS, dtype='int16', callback=callback, blocksize=CHUNK): + print("Press Ctrl+C to stop the recording") + while True: + sd.sleep(args.chunk) +except KeyboardInterrupt: + print("Stopping...") +finally: + # Close the socket + sock.close() From fd1e40e26d90d41271284612ac57fc9376b3855c Mon Sep 17 00:00:00 2001 From: Ondrej Platek Date: Mon, 26 Aug 2024 16:37:45 +0200 Subject: [PATCH 2/2] fix unused DTYPE plus whitespace --- sounddevice_client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sounddevice_client.py b/sounddevice_client.py index 8e948b0..66a1adb 100644 --- a/sounddevice_client.py +++ b/sounddevice_client.py @@ -1,6 +1,6 @@ """ -Simple (hopefully) multiplatform client mimicing the linux command - +Simple (hopefully) multiplatform client mimicing the linux command + arecord -f S16_LE -c1 -r 16000 -t raw -D default | nc localhost 43001 which streams audio data from the microphone to the server. @@ -41,7 +41,7 @@ def callback(indata, frames, time, status): try: # Open the audio stream - with sd.InputStream(samplerate=SAMPLE_RATE, channels=CHANNELS, dtype='int16', callback=callback, blocksize=CHUNK): + with sd.InputStream(samplerate=SAMPLE_RATE, channels=CHANNELS, dtype=DTYPE, callback=callback, blocksize=CHUNK): print("Press Ctrl+C to stop the recording") while True: sd.sleep(args.chunk)