Merge branch 'feature-cd-loss' into main

nguyenvulong · Dec 7, 2023 · a1870cf · a1870cf
2 parents 8bfbff1 + 7f0ff30
commit a1870cf
Show file tree

Hide file tree

Showing 13 changed files with 2,054 additions and 585 deletions.
diff --git a/RawBoost.py b/RawBoost.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import numpy as np
+from scipy import signal
+import copy
+
+'''
+   Hemlata Tak, Madhu Kamble, Jose Patino, Massimiliano Todisco, Nicholas Evans.
+   RawBoost: A Raw Data Boosting and Augmentation Method applied to Automatic Speaker Verification Anti-Spoofing.
+   In Proc. ICASSP 2022, pp:6382--6386.
+'''
+
+def randRange(x1, x2, integer):
+    y = np.random.uniform(low=x1, high=x2, size=(1,))
+    if integer:
+        y = int(y)
+    return y
+
+def normWav(x,always):
+    if always:
+        x = x/np.amax(abs(x))
+    elif np.amax(abs(x)) > 1:
+            x = x/np.amax(abs(x))
+    return x
+
+
+def genNotchCoeffs(nBands,minF,maxF,minBW,maxBW,minCoeff,maxCoeff,minG,maxG,fs):
+    b = 1
+    for i in range(0, nBands):
+        fc = randRange(minF,maxF,0);
+        bw = randRange(minBW,maxBW,0);
+        c = randRange(minCoeff,maxCoeff,1);
+
+        if c/2 == int(c/2):
+            c = c + 1
+        f1 = fc - bw/2
+        f2 = fc + bw/2
+        if f1 <= 0:
+            f1 = 1/1000
+        if f2 >= fs/2:
+            f2 =  fs/2-1/1000
+        b = np.convolve(signal.firwin(c, [float(f1), float(f2)], window='hamming', fs=fs),b)
+
+    G = randRange(minG,maxG,0); 
+    _, h = signal.freqz(b, 1, fs=fs)    
+    b = pow(10, G/20)*b/np.amax(abs(h))   
+    return b
+
+
+def filterFIR(x,b):
+    N = b.shape[0] + 1
+    xpad = np.pad(x, (0, N), 'constant')
+    y = signal.lfilter(b, 1, xpad)
+    y = y[int(N/2):int(y.shape[0]-N/2)]
+    return y
+
+# Linear and non-linear convolutive noise
+def LnL_convolutive_noise(x,N_f,nBands,minF,maxF,minBW,maxBW,minCoeff,maxCoeff,minG,maxG,minBiasLinNonLin,maxBiasLinNonLin,fs):
+    y = [0] * x.shape[0]
+    for i in range(0, N_f):
+        if i == 1:
+            minG = minG-minBiasLinNonLin;
+            maxG = maxG-maxBiasLinNonLin;
+        b = genNotchCoeffs(nBands,minF,maxF,minBW,maxBW,minCoeff,maxCoeff,minG,maxG,fs)
+        y = y + filterFIR(np.power(x, (i+1)),  b)     
+    y = y - np.mean(y)
+    y = normWav(y,0)
+    return y
+
+
+# Impulsive signal dependent noise
+def ISD_additive_noise(x, P, g_sd):
+    beta = randRange(0, P, 0)
+
+    y = copy.deepcopy(x)
+    x_len = x.shape[0]
+    n = int(x_len*(beta/100))
+    p = np.random.permutation(x_len)[:n]
+    f_r= np.multiply(((2*np.random.rand(p.shape[0]))-1),((2*np.random.rand(p.shape[0]))-1))
+    r = g_sd * x[p] * f_r
+    y[p] = x[p] + r
+    y = normWav(y,0)
+    return y
+
+
+# Stationary signal independent noise
+
+def SSI_additive_noise(x,SNRmin,SNRmax,nBands,minF,maxF,minBW,maxBW,minCoeff,maxCoeff,minG,maxG,fs):
+    noise = np.random.normal(0, 1, x.shape[0])
+    b = genNotchCoeffs(nBands,minF,maxF,minBW,maxBW,minCoeff,maxCoeff,minG,maxG,fs)
+    noise = filterFIR(noise, b)
+    noise = normWav(noise,1)
+    SNR = randRange(SNRmin, SNRmax, 0)
+    noise = noise / np.linalg.norm(noise,2) * np.linalg.norm(x,2) / 10.0**(0.05 * SNR)
+    x = x + noise
+    return x
+
diff --git a/data_utils_SSL.py b/data_utils_SSL.py
@@ -0,0 +1,173 @@
+import os
+import numpy as np
+import torch
+import torch.nn as nn
+from torch import Tensor
+import librosa
+from torch.utils.data import Dataset
+from RawBoost import ISD_additive_noise,LnL_convolutive_noise,SSI_additive_noise,normWav
+from random import randrange
+import random
+
+
+___author__ = "Hemlata Tak"
+__email__ = "[email protected]"
+
+
+def genSpoof_list( dir_meta,is_train=False,is_eval=False):
+
+    d_meta = {}
+    file_list=[]
+    with open(dir_meta, 'r') as f:
+         l_meta = f.readlines()
+
+    if (is_train):
+        for line in l_meta:
+             _,key,_,_,label = line.strip().split()
+
+             file_list.append(key)
+             d_meta[key] = 1 if label == 'bonafide' else 0
+        return d_meta,file_list
+
+    elif(is_eval):
+        for line in l_meta:
+            key= line.strip()
+            file_list.append(key)
+        return file_list
+    else:
+        for line in l_meta:
+             _,key,_,_,label = line.strip().split()
+
+             file_list.append(key)
+             d_meta[key] = 1 if label == 'bonafide' else 0
+        return d_meta,file_list
+
+
+
+def pad(x, max_len=64600):
+    x_len = x.shape[0]
+    if x_len >= max_len:
+        return x[:max_len]
+    # need to pad
+    num_repeats = int(max_len / x_len)+1
+    padded_x = np.tile(x, (1, num_repeats))[:, :max_len][0]
+    return padded_x	
+
+
+class Dataset_ASVspoof2019_train(Dataset):
+	def __init__(self,args,list_IDs, labels, base_dir,algo):
+            '''self.list_IDs	: list of strings (each string: utt key),
+               self.labels      : dictionary (key: utt key, value: label integer)'''
+
+            self.list_IDs = list_IDs
+            self.labels = labels
+            self.base_dir = base_dir
+            self.algo=algo
+            self.args=args
+            self.cut=64600 # take ~4 sec audio (64600 samples)
+
+	def __len__(self):
+           return len(self.list_IDs)
+
+
+	def __getitem__(self, index):
+
+            utt_id = self.list_IDs[index]
+            X,fs = librosa.load(self.base_dir+'flac/'+utt_id+'.flac', sr=16000) 
+            Y=process_Rawboost_feature(X,fs,self.args,self.algo)
+            X_pad= pad(Y,self.cut)
+            x_inp= Tensor(X_pad)
+            target = self.labels[utt_id]
+
+            return x_inp, target
+
+
+class Dataset_ASVspoof2021_eval(Dataset):
+	def __init__(self, list_IDs, base_dir):
+            '''self.list_IDs	: list of strings (each string: utt key),
+               '''
+
+            self.list_IDs = list_IDs
+            self.base_dir = base_dir
+            self.cut=64600 # take ~4 sec audio (64600 samples)
+
+	def __len__(self):
+            return len(self.list_IDs)
+
+
+	def __getitem__(self, index):
+
+            utt_id = self.list_IDs[index]
+            X, fs = librosa.load(self.base_dir+'flac/'+utt_id+'.flac', sr=16000)
+            X_pad = pad(X,self.cut)
+            x_inp = Tensor(X_pad)
+            return x_inp,utt_id  
+
+
+
+
+#--------------RawBoost data augmentation algorithms---------------------------##
+
+def process_Rawboost_feature(feature, sr,args,algo):
+
+    # Data process by Convolutive noise (1st algo)
+    if algo==1:
+
+        feature =LnL_convolutive_noise(feature,args.N_f,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,args.minCoeff,args.maxCoeff,args.minG,args.maxG,args.minBiasLinNonLin,args.maxBiasLinNonLin,sr)
+
+    # Data process by Impulsive noise (2nd algo)
+    elif algo==2:
+
+        feature=ISD_additive_noise(feature, args.P, args.g_sd)
+
+    # Data process by coloured additive noise (3rd algo)
+    elif algo==3:
+
+        feature=SSI_additive_noise(feature,args.SNRmin,args.SNRmax,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,args.minCoeff,args.maxCoeff,args.minG,args.maxG,sr)
+
+    # Data process by all 3 algo. together in series (1+2+3)
+    elif algo==4:
+
+        feature =LnL_convolutive_noise(feature,args.N_f,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,
+                 args.minCoeff,args.maxCoeff,args.minG,args.maxG,args.minBiasLinNonLin,args.maxBiasLinNonLin,sr)                         
+        feature=ISD_additive_noise(feature, args.P, args.g_sd)  
+        feature=SSI_additive_noise(feature,args.SNRmin,args.SNRmax,args.nBands,args.minF,
+                args.maxF,args.minBW,args.maxBW,args.minCoeff,args.maxCoeff,args.minG,args.maxG,sr)                 
+
+    # Data process by 1st two algo. together in series (1+2)
+    elif algo==5:
+
+        feature =LnL_convolutive_noise(feature,args.N_f,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,
+                 args.minCoeff,args.maxCoeff,args.minG,args.maxG,args.minBiasLinNonLin,args.maxBiasLinNonLin,sr)                         
+        feature=ISD_additive_noise(feature, args.P, args.g_sd)                
+
+
+    # Data process by 1st and 3rd algo. together in series (1+3)
+    elif algo==6:  
+
+        feature =LnL_convolutive_noise(feature,args.N_f,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,
+                 args.minCoeff,args.maxCoeff,args.minG,args.maxG,args.minBiasLinNonLin,args.maxBiasLinNonLin,sr)                         
+        feature=SSI_additive_noise(feature,args.SNRmin,args.SNRmax,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,args.minCoeff,args.maxCoeff,args.minG,args.maxG,sr) 
+
+    # Data process by 2nd and 3rd algo. together in series (2+3)
+    elif algo==7: 
+
+        feature=ISD_additive_noise(feature, args.P, args.g_sd)
+        feature=SSI_additive_noise(feature,args.SNRmin,args.SNRmax,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,args.minCoeff,args.maxCoeff,args.minG,args.maxG,sr) 
+
+    # Data process by 1st two algo. together in Parallel (1||2)
+    elif algo==8:
+
+        feature1 =LnL_convolutive_noise(feature,args.N_f,args.nBands,args.minF,args.maxF,args.minBW,args.maxBW,
+                 args.minCoeff,args.maxCoeff,args.minG,args.maxG,args.minBiasLinNonLin,args.maxBiasLinNonLin,sr)                         
+        feature2=ISD_additive_noise(feature, args.P, args.g_sd)
+
+        feature_para=feature1+feature2
+        feature=normWav(feature_para,0)  #normalized resultant waveform
+
+    # original data without Rawboost processing           
+    else:
+
+        feature=feature
+
+    return feature