Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update sklearn to scikit-learn in setup.py #17

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 156 additions & 0 deletions build/lib/timbral_models/Timbral_Booming.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
from __future__ import division
import numpy as np
import soundfile as sf
from . import timbral_util


def boominess_calculate(loudspec):
"""
Calculates the Booming Index as described by Hatano, S., and Hashimoto, T. "Booming index as a measure for
evaluating booming sensation", The 29th International congress and Exhibition on Noise Control Engineering, 2000.
"""

# loudspec from the loudness_1991 code results in values from 0.1 to 24 Bark in 0.1 steps
z = np.arange(0.1, 24.05, 0.1) #0.1 to 24 bark in 0.1 steps
f = 600 * np.sinh(z / 6.0) # convert these bark values to frequency
FR = [25, 31.5, 40, 50, 63, 80, 100, 125, 160, 200, 250, 315, 400, 500, 630, 800, 1000, 1250, 1600, 2000, 2500,
3150, 4000, 5000, 6300, 8000, 10000, 12500] # get the centre frequencies of 3rd octave bands

# now I need to convert f onto the FR scale
logFR = np.log10(FR)
FR_step = logFR[1] - logFR[0] # get the step size on the log scale
FR_min = logFR[0] # get the minimum value of the logFR

logf = np.log10(f) # get the log version of estimated frequencies
# estimate the indexes of the bark scale on the 3rd octave scale
estimated_index = ((logf - FR_min) / float(FR_step)) + 1

# weighting function based from the estimated indexes
Weighting_function = 2.13 * np.exp(-0.151 * estimated_index)

# change the LF indexes to roll off
Weighting_function[0] = 0.8 # this value is estimated
Weighting_function[1] = 1.05
Weighting_function[2] = 1.10
Weighting_function[3] = 1.18

# identify index where frequency is less than 280Hz
below_280_idx = np.where(f >= 280)[0][0]

I = loudspec * Weighting_function
loudness = np.sum(loudspec)
Ll = np.sum(loudspec[:below_280_idx])

Bandsum = timbral_util.log_sum(I)
BoomingIndex = Bandsum * (Ll / loudness)

return BoomingIndex


def timbral_booming(fname, fs=0, dev_output=False, phase_correction=False, clip_output=False):
"""
This is an implementation of the hasimoto booming index feature.
There are a few fudge factors with the code to convert between the internal representation of the sound using the
same loudness calculation as the sharpness code. The equation for calculating the booming index is not
specifically quoted anywhere so I've done the best i can with the code that was presented.

Shin, SH, Ih, JG, Hashimoto, T., and Hatano, S.: "Sound quality evaluation of the booming sensation for passenger
cars", Applied Acoustics, Vol. 70, 2009.

Hatano, S., and Hashimoto, T. "Booming index as a measure for
evaluating booming sensation", The 29th International congress and Exhibition on Noise Control Engineering, 2000.

This function calculates the apparent Boominess of an audio file.

This version of timbral_booming contains self loudness normalising methods and can accept arrays as an input
instead of a string filename.

Version 0.4

Required parameter
:param fname: string or numpy array
string, audio filename to be analysed, including full file path and extension.
numpy array, array of audio samples, requires fs to be set to the sample rate.

Optional parameters
:param fs: int/float, when fname is a numpy array, this is a required to be the sample rate.
Defaults to 0.
:param dev_output: bool, when False return the warmth, when True return all extracted features.
Defaults to False.
:param phase_correction: bool, if the inter-channel phase should be estimated when performing a mono sum.
Defaults to False.
:param clip_output: bool, force the output to be between 0 and 100.

:return float, apparent boominess of the audio file.

Copyright 2018 Andy Pearce, Institute of Sound Recording, University of Surrey, UK.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
'''
Read input
'''
audio_samples, fs = timbral_util.file_read(fname, fs, phase_correction=phase_correction)


# window the audio file into 4096 sample sections
windowed_audio = timbral_util.window_audio(audio_samples, window_length=4096)

windowed_booming = []
windowed_rms = []
for i in range(windowed_audio.shape[0]):
samples = windowed_audio[i, :] # the current time window
# get the rms value and append to list
windowed_rms.append(np.sqrt(np.mean(samples * samples)))

# calculate the specific loudness
N_entire, N_single = timbral_util.specific_loudness(samples, Pref=100.0, fs=fs, Mod=0)

# calculate the booming index is contains a level
if N_entire > 0:
# boom = boominess_calculate(N_single)
BoomingIndex = boominess_calculate(N_single)
else:
BoomingIndex = 0

windowed_booming.append(BoomingIndex)

# get level of low frequencies
ll, w_ll = timbral_util.weighted_bark_level(audio_samples, fs, 0, 70)

ll = np.log10(ll)
# convert to numpy arrays for fancy indexing
windowed_booming = np.array(windowed_booming)
windowed_rms = np.array(windowed_rms)

# get the weighted average
rms_boom = np.average(windowed_booming, weights=(windowed_rms * windowed_rms))
rms_boom = np.log10(rms_boom)

if dev_output:
return [rms_boom, ll]
else:

# perform thye linear regression
all_metrics = np.ones(3)
all_metrics[0] = rms_boom
all_metrics[1] = ll

coefficients = np.array([43.67402696195865, -10.90054738389845, 26.836530575185435])

boominess = np.sum(all_metrics * coefficients)

if clip_output:
boominess = timbral_util.output_clip(boominess)

return boominess
186 changes: 186 additions & 0 deletions build/lib/timbral_models/Timbral_Brightness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
from __future__ import division
import numpy as np
import soundfile as sf
from . import timbral_util
from scipy.signal import spectrogram


def timbral_brightness(fname, fs=0, dev_output=False, clip_output=False, phase_correction=False, threshold=0,
ratio_crossover=2000, centroid_crossover=100, stepSize=1024, blockSize=2048, minFreq=20):
"""
This function calculates the apparent Brightness of an audio file.
This version of timbral_brightness contains self loudness normalising methods and can accept arrays as an input
instead of a string filename.

Version 0.4

Required parameter
:param fname: string or numpy array
string, audio filename to be analysed, including full file path and extension.
numpy array, array of audio samples, requires fs to be set to the sample rate.

Optional parameters
:param fs: int/float, when fname is a numpy array, this is a required to be the sample rate.
Defaults to 0.
:param dev_output: bool, when False return the brightness, when True return all extracted features.
:param clip_output: bool, force the output to be between 0 and 100.
:param phase_correction: bool, Perform phase checking before summing to mono.
:param threshold: Threshold below which to ignore the energy in a time window, default to 0.
:param ratio_crossover: Crossover frequency for calculating the HF energy ratio, default to 2000 Hz.
:param centroid_crossover: Highpass frequency for calculating the spectral centroid, default to 100 Hz.
:param stepSize: Step size for calculating spectrogram, default to 1024.
:param blockSize: Block size (fft length) for calculating spectrogram, default to 2048.
:param minFreq: Frequency for high-pass filtering audio prior to all analysis, default to 20 Hz.

:return: Apparent brightness of audio file, float.

Copyright 2018 Andy Pearce, Institute of Sound Recording, University of Surrey, UK.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
'''
Read input
'''
audio_samples, fs = timbral_util.file_read(fname, fs, phase_correction=phase_correction)

'''
Filter audio
'''
# highpass audio at minimum frequency
audio_samples = timbral_util.filter_audio_highpass(audio_samples, crossover=minFreq, fs=fs)
audio_samples = timbral_util.filter_audio_highpass(audio_samples, crossover=minFreq, fs=fs)
audio_samples = timbral_util.filter_audio_highpass(audio_samples, crossover=minFreq, fs=fs)

# get highpass audio at ratio crossover
ratio_highpass_audio = timbral_util.filter_audio_highpass(audio_samples, ratio_crossover, fs)
ratio_highpass_audio = timbral_util.filter_audio_highpass(ratio_highpass_audio, ratio_crossover, fs)
ratio_highpass_audio = timbral_util.filter_audio_highpass(ratio_highpass_audio, ratio_crossover, fs)

# get highpass audio at centroid crossover
centroid_highpass_audio = timbral_util.filter_audio_highpass(audio_samples, centroid_crossover, fs)
centroid_highpass_audio = timbral_util.filter_audio_highpass(centroid_highpass_audio, centroid_crossover, fs)
centroid_highpass_audio = timbral_util.filter_audio_highpass(centroid_highpass_audio, centroid_crossover, fs)

'''
Get spectrograms
'''
# normalise audio to the maximum value in the unfiltered audio
ratio_highpass_audio *= (1.0 / max(abs(audio_samples)))
centroid_highpass_audio *= (1.0 / max(abs(audio_samples)))
audio_samples *= (1.0 / max(abs(audio_samples)))


# set FFT parameters
nfft = blockSize
hop_size = int(3 * nfft / 4)

# check that audio is long enough to generate spectrograms
if len(audio_samples) >= nfft:
# get spectrogram
ratio_all_freq, ratio_all_time, ratio_all_spec = spectrogram(audio_samples, fs, 'hamming', nfft,
hop_size, nfft, 'constant', True, 'spectrum')
ratio_hp_freq, ratio_hp_time, ratio_hp_spec = spectrogram(ratio_highpass_audio, fs, 'hamming', nfft,
hop_size, nfft, 'constant', True, 'spectrum')
centroid_hp_freq, centroid_hp_time, centroid_hp_spec = spectrogram(centroid_highpass_audio, fs, 'hamming', nfft,
hop_size, nfft, 'constant', True, 'spectrum')
else:
ratio_all_freq, ratio_all_time, ratio_all_spec = spectrogram(audio_samples, fs, 'hamming',
len(audio_samples),
len(audio_samples)-1,
nfft, 'constant', True, 'spectrum')
ratio_hp_freq, ratio_hp_time, ratio_hp_spec = spectrogram(ratio_highpass_audio, fs, 'hamming',
len(ratio_highpass_audio),
len(ratio_highpass_audio)-1,
nfft, 'constant', True, 'spectrum')
centroid_hp_freq, centroid_hp_time, centroid_hp_spec = spectrogram(centroid_highpass_audio, fs, 'hamming',
len(centroid_highpass_audio),
len(centroid_highpass_audio)-1,
nfft, 'constant', True, 'spectrum')

# initialise variables for storing data
all_ratio = []
all_hp_centroid = []
all_tpower = []
all_hp_centroid_tpower = []

# set threshold level at zero
threshold_db = threshold
if threshold_db == 0:
threshold = 0
hp_threshold = 0
else:
max_power = max(np.sum(ratio_all_spec, axis=1))
threshold = max_power * timbral_util.db2mag(threshold_db)
# get the threshold for centroid
# centroid_hp_max_power = max(np.sum(centroid_hp_spec, axis=1))
# hp_min_power = min(np.sum(hp_spec, axis=1))
# hp_threshold = hp_max_power * timbral_util.db2mag(threshold_db)
# threshold = 0.0

'''
Calculate features for each time window
'''
for idx in range(len(ratio_hp_time)): #
# get the current spectrum for this time window
current_ratio_hp_spec = ratio_hp_spec[:, idx]
current_ratio_all_spec = ratio_all_spec[:, idx]
current_centroid_hp_spec = centroid_hp_spec[:, idx]

# get the power within each spectrum
tpower = np.sum(current_ratio_all_spec)
hp_tpower = np.sum(current_ratio_hp_spec)
# check there is energy in the time window before calculating the ratio (greater than 0)
if tpower > threshold:
# get the ratio
all_ratio.append(hp_tpower / tpower)
# store the powef for weighting
all_tpower.append(tpower)

# get the tpower to assure greater than zero
hp_centroid_tpower = np.sum(current_centroid_hp_spec)
if hp_centroid_tpower > 0.0:
# get the centroid
all_hp_centroid.append(np.sum(current_centroid_hp_spec * centroid_hp_freq[:len(current_centroid_hp_spec)]) /
np.sum(current_centroid_hp_spec))
# store the tpower for weighting
all_hp_centroid_tpower.append(hp_centroid_tpower)

'''
Get mean and weighted average values
'''
mean_ratio = np.mean(all_ratio)
mean_hp_centroid = np.mean(all_hp_centroid)

weighted_mean_ratio = np.average(all_ratio, weights=all_tpower)
weighted_mean_hp_centroid = np.average(all_hp_centroid, weights=all_hp_centroid_tpower)

if dev_output:
# return the ratio and centroid
return np.log10(weighted_mean_ratio), np.log10(weighted_mean_hp_centroid)
else:
# perform thye linear regression
all_metrics = np.ones(3)
all_metrics[0] = np.log10(weighted_mean_ratio)
all_metrics[1] = np.log10(weighted_mean_hp_centroid)
# all_metrics[2] = np.log10(weighted_mean_ratio) * np.log10(weighted_mean_hp_centroid)


coefficients = np.array([4.613128018020465, 17.378889309312974, 17.434733750553022])

# coefficients = np.array([-2.9197705625030235, 9.048261758526614, 3.940747859061009, 47.989783427908705])
bright = np.sum(all_metrics * coefficients)

if clip_output:
bright = timbral_util.output_clip(bright)

return bright
Loading