forked from microsoft/MS-SNSD
-
Notifications
You must be signed in to change notification settings - Fork 0
/
audiolib.py
73 lines (61 loc) · 2.14 KB
/
audiolib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 26 15:54:05 2019
@author: chkarada
"""
import soundfile as sf
import os
import numpy as np
# Function to read audio
def audioread(path, norm = True, start=0, stop=None):
path = os.path.abspath(path)
if not os.path.exists(path):
raise ValueError("[{}] does not exist!".format(path))
try:
x, sr = sf.read(path, start=start, stop=stop)
except RuntimeError: # fix for sph pcm-embedded shortened v2
print('WARNING: Audio type not supported')
if len(x.shape) == 1: # mono
if norm:
rms = (x ** 2).mean() ** 0.5
scalar = 10 ** (-25 / 20) / (rms)
x = x * scalar
return x, sr
else: # multi-channel
x = x.T
x = x.sum(axis=0)/x.shape[0]
if norm:
rms = (x ** 2).mean() ** 0.5
scalar = 10 ** (-25 / 20) / (rms)
x = x * scalar
return x, sr
# Funtion to write audio
def audiowrite(data, fs, destpath, norm=False):
if norm:
rms = (data ** 2).mean() ** 0.5
scalar = 10 ** (-25 / 10) / (rms+eps)
data = data * scalar
if max(abs(data))>=1:
data = data/max(abs(data), eps)
destpath = os.path.abspath(destpath)
destdir = os.path.dirname(destpath)
if not os.path.exists(destdir):
os.makedirs(destdir)
sf.write(destpath, data, fs)
return
# Function to mix clean speech and noise at various SNR levels
def snr_mixer(clean, noise, snr):
# Normalizing to -25 dB FS
rmsclean = (clean**2).mean()**0.5
scalarclean = 10 ** (-25 / 20) / rmsclean
clean = clean * scalarclean
rmsclean = (clean**2).mean()**0.5
rmsnoise = (noise**2).mean()**0.5
scalarnoise = 10 ** (-25 / 20) /rmsnoise
noise = noise * scalarnoise
rmsnoise = (noise**2).mean()**0.5
# Set the noise level for a given SNR
noisescalar = np.sqrt(rmsclean / (10**(snr/20)) / rmsnoise)
noisenewlevel = noise * noisescalar
noisyspeech = clean + noisenewlevel
return clean, noisenewlevel, noisyspeech