-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'feature-cd-loss' into main
- Loading branch information
Showing
13 changed files
with
2,054 additions
and
585 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
|
||
import numpy as np | ||
from scipy import signal | ||
import copy | ||
|
||
''' | ||
Hemlata Tak, Madhu Kamble, Jose Patino, Massimiliano Todisco, Nicholas Evans. | ||
RawBoost: A Raw Data Boosting and Augmentation Method applied to Automatic Speaker Verification Anti-Spoofing. | ||
In Proc. ICASSP 2022, pp:6382--6386. | ||
''' | ||
|
||
def randRange(x1, x2, integer): | ||
y = np.random.uniform(low=x1, high=x2, size=(1,)) | ||
if integer: | ||
y = int(y) | ||
return y | ||
|
||
def normWav(x,always): | ||
if always: | ||
x = x/np.amax(abs(x)) | ||
elif np.amax(abs(x)) > 1: | ||
x = x/np.amax(abs(x)) | ||
return x | ||
|
||
|
||
def genNotchCoeffs(nBands,minF,maxF,minBW,maxBW,minCoeff,maxCoeff,minG,maxG,fs): | ||
b = 1 | ||
for i in range(0, nBands): | ||
fc = randRange(minF,maxF,0); | ||
bw = randRange(minBW,maxBW,0); | ||
c = randRange(minCoeff,maxCoeff,1); | ||
|
||
if c/2 == int(c/2): | ||
c = c + 1 | ||
f1 = fc - bw/2 | ||
f2 = fc + bw/2 | ||
if f1 <= 0: | ||
f1 = 1/1000 | ||
if f2 >= fs/2: | ||
f2 = fs/2-1/1000 | ||
b = np.convolve(signal.firwin(c, [float(f1), float(f2)], window='hamming', fs=fs),b) | ||
|
||
G = randRange(minG,maxG,0); | ||
_, h = signal.freqz(b, 1, fs=fs) | ||
b = pow(10, G/20)*b/np.amax(abs(h)) | ||
return b | ||
|
||
|
||
def filterFIR(x,b): | ||
N = b.shape[0] + 1 | ||
xpad = np.pad(x, (0, N), 'constant') | ||
y = signal.lfilter(b, 1, xpad) | ||
y = y[int(N/2):int(y.shape[0]-N/2)] | ||
return y | ||
|
||
# Linear and non-linear convolutive noise | ||
def LnL_convolutive_noise(x,N_f,nBands,minF,maxF,minBW,maxBW,minCoeff,maxCoeff,minG,maxG,minBiasLinNonLin,maxBiasLinNonLin,fs): | ||
y = [0] * x.shape[0] | ||
for i in range(0, N_f): | ||
if i == 1: | ||
minG = minG-minBiasLinNonLin; | ||
maxG = maxG-maxBiasLinNonLin; | ||
b = genNotchCoeffs(nBands,minF,maxF,minBW,maxBW,minCoeff,maxCoeff,minG,maxG,fs) | ||
y = y + filterFIR(np.power(x, (i+1)), b) | ||
y = y - np.mean(y) | ||
y = normWav(y,0) | ||
return y | ||
|
||
|
||
# Impulsive signal dependent noise | ||
def ISD_additive_noise(x, P, g_sd): | ||
beta = randRange(0, P, 0) | ||
|
||
y = copy.deepcopy(x) | ||
x_len = x.shape[0] | ||
n = int(x_len*(beta/100)) | ||
p = np.random.permutation(x_len)[:n] | ||
f_r= np.multiply(((2*np.random.rand(p.shape[0]))-1),((2*np.random.rand(p.shape[0]))-1)) | ||
r = g_sd * x[p] * f_r | ||
y[p] = x[p] + r | ||
y = normWav(y,0) | ||
return y | ||
|
||
|
||
# Stationary signal independent noise | ||
|
||
def SSI_additive_noise(x,SNRmin,SNRmax,nBands,minF,maxF,minBW,maxBW,minCoeff,maxCoeff,minG,maxG,fs): | ||
noise = np.random.normal(0, 1, x.shape[0]) | ||
b = genNotchCoeffs(nBands,minF,maxF,minBW,maxBW,minCoeff,maxCoeff,minG,maxG,fs) | ||
noise = filterFIR(noise, b) | ||
noise = normWav(noise,1) | ||
SNR = randRange(SNRmin, SNRmax, 0) | ||
noise = noise / np.linalg.norm(noise,2) * np.linalg.norm(x,2) / 10.0**(0.05 * SNR) | ||
x = x + noise | ||
return x | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
import os | ||
import numpy as np | ||
import torch | ||
import torch.nn as nn | ||
from torch import Tensor | ||
import librosa | ||
from torch.utils.data import Dataset | ||
from RawBoost import ISD_additive_noise,LnL_convolutive_noise,SSI_additive_noise,normWav | ||
from random import randrange | ||
import random | ||
|
||
|
||
___author__ = "Hemlata Tak" | ||
__email__ = "[email protected]" | ||
|
||
|
||
def genSpoof_list( dir_meta,is_train=False,is_eval=False): | ||
|
||
d_meta = {} | ||
file_list=[] | ||
with open(dir_meta, 'r') as f: | ||
l_meta = f.readlines() | ||
|
||
if (is_train): | ||
for line in l_meta: | ||
_,key,_,_,label = line.strip().split() | ||
|
||
file_list.append(key) | ||
d_meta[key] = 1 if label == 'bonafide' else 0 | ||
return d_meta,file_list | ||
|
||
elif(is_eval): | ||
for line in l_meta: | ||
key= line.strip() | ||
file_list.append(key) | ||
return file_list | ||
else: | ||
for line in l_meta: | ||
_,key,_,_,label = line.strip().split() | ||
|
||
file_list.append(key) | ||
d_meta[key] = 1 if label == 'bonafide' else 0 | ||
return d_meta,file_list | ||
|
||
|
||
|
||
def pad(x, max_len=64600): | ||
x_len = x.shape[0] | ||
if x_len >= max_len: | ||
return x[:max_len] | ||
# need to pad | ||
num_repeats = int(max_len / x_len)+1 | ||
padded_x = np.tile(x, (1, num_repeats))[:, :max_len][0] | ||
return padded_x | ||
|
||
|
||
class Dataset_ASVspoof2019_train(Dataset): | ||
def __init__(self,args,list_IDs, labels, base_dir,algo): | ||
'''self.list_IDs : list of strings (each string: utt key), | ||
self.labels : dictionary (key: utt key, value: label integer)''' | ||
|
||
self.list_IDs = list_IDs | ||
self.labels = labels | ||
self.base_dir = base_dir | ||
self.algo=algo | ||
self.args=args | ||
self.cut=64600 # take ~4 sec audio (64600 samples) | ||
|
||
def __len__(self): | ||
return len(self.list_IDs) | ||
|
||
|
||
def __getitem__(self, index): | ||
|
||
utt_id = self.list_IDs[index] | ||
X,fs = librosa.load(self.base_dir+'flac/'+utt_id+'.flac', sr=16000) | ||
Y=process_Rawboost_feature(X,fs,self.args,self.algo) | ||
X_pad= pad(Y,self.cut) | ||
x_inp= Tensor(X_pad) | ||
target = self.labels[utt_id] | ||
|
||
return x_inp, target | ||
|
||
|
||
class Dataset_ASVspoof2021_eval(Dataset): | ||
def __init__(self, list_IDs, base_dir): | ||
'''self.list_IDs : list of strings (each string: utt key), | ||
''' | ||
|
||
self.list_IDs = list_IDs | ||
self.base_dir = base_dir | ||
self.cut=64600 # take ~4 sec audio (64600 samples) | ||
|
||
def __len__(self): | ||
return len(self.list_IDs) | ||
|
||
|
||
def __getitem__(self, index): | ||
|
||
utt_id = self.list_IDs[index] | ||
X, fs = librosa.load(self.base_dir+'flac/'+utt_id+'.flac', sr=16000) | ||
X_pad = pad(X,self.cut) | ||
x_inp = Tensor(X_pad) | ||
return x_inp,utt_id | ||
|
||
|
||
|
||
|
||
#--------------RawBoost data augmentation algorithms---------------------------## | ||
|
||
def process_Rawboost_feature(feature, sr,args,algo): | ||
|
||
# Data process by Convolutive noise (1st algo) | ||
if algo==1: | ||
|
||
feature =LnL_convolutive_noise(feature,args.N_f,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,args.minCoeff,args.maxCoeff,args.minG,args.maxG,args.minBiasLinNonLin,args.maxBiasLinNonLin,sr) | ||
|
||
# Data process by Impulsive noise (2nd algo) | ||
elif algo==2: | ||
|
||
feature=ISD_additive_noise(feature, args.P, args.g_sd) | ||
|
||
# Data process by coloured additive noise (3rd algo) | ||
elif algo==3: | ||
|
||
feature=SSI_additive_noise(feature,args.SNRmin,args.SNRmax,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,args.minCoeff,args.maxCoeff,args.minG,args.maxG,sr) | ||
|
||
# Data process by all 3 algo. together in series (1+2+3) | ||
elif algo==4: | ||
|
||
feature =LnL_convolutive_noise(feature,args.N_f,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW, | ||
args.minCoeff,args.maxCoeff,args.minG,args.maxG,args.minBiasLinNonLin,args.maxBiasLinNonLin,sr) | ||
feature=ISD_additive_noise(feature, args.P, args.g_sd) | ||
feature=SSI_additive_noise(feature,args.SNRmin,args.SNRmax,args.nBands,args.minF, | ||
args.maxF,args.minBW,args.maxBW,args.minCoeff,args.maxCoeff,args.minG,args.maxG,sr) | ||
|
||
# Data process by 1st two algo. together in series (1+2) | ||
elif algo==5: | ||
|
||
feature =LnL_convolutive_noise(feature,args.N_f,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW, | ||
args.minCoeff,args.maxCoeff,args.minG,args.maxG,args.minBiasLinNonLin,args.maxBiasLinNonLin,sr) | ||
feature=ISD_additive_noise(feature, args.P, args.g_sd) | ||
|
||
|
||
# Data process by 1st and 3rd algo. together in series (1+3) | ||
elif algo==6: | ||
|
||
feature =LnL_convolutive_noise(feature,args.N_f,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW, | ||
args.minCoeff,args.maxCoeff,args.minG,args.maxG,args.minBiasLinNonLin,args.maxBiasLinNonLin,sr) | ||
feature=SSI_additive_noise(feature,args.SNRmin,args.SNRmax,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,args.minCoeff,args.maxCoeff,args.minG,args.maxG,sr) | ||
|
||
# Data process by 2nd and 3rd algo. together in series (2+3) | ||
elif algo==7: | ||
|
||
feature=ISD_additive_noise(feature, args.P, args.g_sd) | ||
feature=SSI_additive_noise(feature,args.SNRmin,args.SNRmax,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,args.minCoeff,args.maxCoeff,args.minG,args.maxG,sr) | ||
|
||
# Data process by 1st two algo. together in Parallel (1||2) | ||
elif algo==8: | ||
|
||
feature1 =LnL_convolutive_noise(feature,args.N_f,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW, | ||
args.minCoeff,args.maxCoeff,args.minG,args.maxG,args.minBiasLinNonLin,args.maxBiasLinNonLin,sr) | ||
feature2=ISD_additive_noise(feature, args.P, args.g_sd) | ||
|
||
feature_para=feature1+feature2 | ||
feature=normWav(feature_para,0) #normalized resultant waveform | ||
|
||
# original data without Rawboost processing | ||
else: | ||
|
||
feature=feature | ||
|
||
return feature |
Oops, something went wrong.