-
Notifications
You must be signed in to change notification settings - Fork 161
/
real_time_processing.py
55 lines (47 loc) · 1.73 KB
/
real_time_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 23 16:23:15 2020
@author: nils
"""
import soundfile as sf
import numpy as np
import tensorflow as tf
##########################
# the values are fixed, if you need other values, you have to retrain.
# The sampling rate of 16k is also fix.
block_len = 512
block_shift = 128
# load model
model = tf.saved_model.load('./pretrained_model/dtln_saved_model')
infer = model.signatures["serving_default"]
# load audio file at 16k fs (please change)
audio,fs = sf.read('path_to_your_favorite_audio.wav')
# check for sampling rate
if fs != 16000:
raise ValueError('This model only supports 16k sampling rate.')
# preallocate output audio
out_file = np.zeros((len(audio)))
# create buffer
in_buffer = np.zeros((block_len))
out_buffer = np.zeros((block_len))
# calculate number of blocks
num_blocks = (audio.shape[0] - (block_len-block_shift)) // block_shift
# iterate over the number of blcoks
for idx in range(num_blocks):
# shift values and write to buffer
in_buffer[:-block_shift] = in_buffer[block_shift:]
in_buffer[-block_shift:] = audio[idx*block_shift:(idx*block_shift)+block_shift]
# create a batch dimension of one
in_block = np.expand_dims(in_buffer, axis=0).astype('float32')
# process one block
out_block= infer(tf.constant(in_block))['conv1d_1']
# shift values and write to buffer
out_buffer[:-block_shift] = out_buffer[block_shift:]
out_buffer[-block_shift:] = np.zeros((block_shift))
out_buffer += np.squeeze(out_block)
# write block to output file
out_file[idx*block_shift:(idx*block_shift)+block_shift] = out_buffer[:block_shift]
# write to .wav file
sf.write('out.wav', out_file, fs)
print('Processing finished.')