AudioCommons · Rodsis · Dec 4, 2023 · Dec 5, 2023 · Dec 5, 2023 · Dec 5, 2023
diff --git a/build/lib/timbral_models/Timbral_Booming.py b/build/lib/timbral_models/Timbral_Booming.py
@@ -0,0 +1,156 @@
+from __future__ import division
+import numpy as np
+import soundfile as sf
+from . import timbral_util
+
+
+def boominess_calculate(loudspec):
+    """
+      Calculates the Booming Index as described by Hatano, S., and Hashimoto, T. "Booming index as a measure for
+      evaluating booming sensation", The 29th International congress and Exhibition on Noise Control Engineering, 2000.
+    """
+
+    # loudspec from the loudness_1991 code results in values from 0.1 to 24 Bark in 0.1 steps
+    z = np.arange(0.1, 24.05, 0.1)  #0.1 to 24 bark in 0.1 steps
+    f = 600 * np.sinh(z / 6.0)  # convert these bark values to frequency
+    FR = [25, 31.5, 40, 50, 63, 80, 100, 125, 160, 200, 250, 315, 400, 500, 630, 800, 1000, 1250, 1600, 2000, 2500,
+          3150, 4000, 5000, 6300, 8000, 10000, 12500] # get the centre frequencies of 3rd octave bands
+
+    # now I need to convert f onto the FR scale
+    logFR = np.log10(FR)
+    FR_step = logFR[1] - logFR[0]  # get the step size on the log scale
+    FR_min = logFR[0]  # get the minimum value of the logFR
+
+    logf = np.log10(f)  # get the log version of estimated frequencies
+    # estimate the indexes of the bark scale on the 3rd octave scale
+    estimated_index = ((logf - FR_min) / float(FR_step)) + 1
+
+    # weighting function based from the estimated indexes
+    Weighting_function = 2.13 * np.exp(-0.151 * estimated_index)
+
+    # change the LF indexes to roll off
+    Weighting_function[0] = 0.8  # this value is estimated
+    Weighting_function[1] = 1.05
+    Weighting_function[2] = 1.10
+    Weighting_function[3] = 1.18
+
+    # identify index where frequency is less than 280Hz
+    below_280_idx = np.where(f >= 280)[0][0]
+
+    I = loudspec * Weighting_function
+    loudness = np.sum(loudspec)
+    Ll = np.sum(loudspec[:below_280_idx])
+
+    Bandsum = timbral_util.log_sum(I)
+    BoomingIndex = Bandsum * (Ll / loudness)
+
+    return BoomingIndex
+
+
+def timbral_booming(fname, fs=0, dev_output=False, phase_correction=False, clip_output=False):
+    """
+     This is an implementation of the hasimoto booming index feature.
+     There are a few fudge factors with the code to convert between the internal representation of the sound using the
+     same loudness calculation as the sharpness code.  The equation for calculating the booming index is not
+     specifically quoted anywhere so I've done the best i can with the code that was presented.
+
+     Shin, SH, Ih, JG, Hashimoto, T., and Hatano, S.: "Sound quality evaluation of the booming sensation for passenger
+      cars", Applied Acoustics, Vol. 70, 2009.
+
+     Hatano, S., and Hashimoto, T. "Booming index as a measure for
+      evaluating booming sensation", The 29th International congress and Exhibition on Noise Control Engineering, 2000.
+
+     This function calculates the apparent Boominess of an audio file.
+
+     This version of timbral_booming contains self loudness normalising methods and can accept arrays as an input
+     instead of a string filename.
+
+     Version 0.4
+
+     Required parameter
+      :param fname:                   string or numpy array
+                                      string, audio filename to be analysed, including full file path and extension.
+                                      numpy array, array of audio samples, requires fs to be set to the sample rate.
+
+     Optional parameters
+      :param fs:                      int/float, when fname is a numpy array, this is a required to be the sample rate.
+                                      Defaults to 0.
+      :param dev_output:              bool, when False return the warmth, when True return all extracted features.
+                                      Defaults to False.
+      :param phase_correction:        bool, if the inter-channel phase should be estimated when performing a mono sum.
+                                      Defaults to False.
+      :param clip_output:             bool, force the output to be between 0 and 100.
+
+      :return                         float, apparent boominess of the audio file.
+
+     Copyright 2018 Andy Pearce, Institute of Sound Recording, University of Surrey, UK.
+
+     Licensed under the Apache License, Version 2.0 (the "License");
+     you may not use this file except in compliance with the License.
+     You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS,
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+    """
+    '''
+      Read input
+    '''
+    audio_samples, fs = timbral_util.file_read(fname, fs, phase_correction=phase_correction)
+
+
+    # window the audio file into 4096 sample sections
+    windowed_audio = timbral_util.window_audio(audio_samples, window_length=4096)
+
+    windowed_booming = []
+    windowed_rms = []
+    for i in range(windowed_audio.shape[0]):
+        samples = windowed_audio[i, :]  # the current time window
+        # get the rms value and append to list
+        windowed_rms.append(np.sqrt(np.mean(samples * samples)))
+
+        # calculate the specific loudness
+        N_entire, N_single = timbral_util.specific_loudness(samples, Pref=100.0, fs=fs, Mod=0)
+
+        # calculate the booming index is contains a level
+        if N_entire > 0:
+            # boom = boominess_calculate(N_single)
+            BoomingIndex = boominess_calculate(N_single)
+        else:
+            BoomingIndex = 0
+
+        windowed_booming.append(BoomingIndex)
+
+    # get level of low frequencies
+    ll, w_ll = timbral_util.weighted_bark_level(audio_samples, fs, 0, 70)
+
+    ll = np.log10(ll)
+    # convert to numpy arrays for fancy indexing
+    windowed_booming = np.array(windowed_booming)
+    windowed_rms = np.array(windowed_rms)
+
+    # get the weighted average
+    rms_boom = np.average(windowed_booming, weights=(windowed_rms * windowed_rms))
+    rms_boom = np.log10(rms_boom)
+
+    if dev_output:
+        return [rms_boom, ll]
+    else:
+
+        # perform thye linear regression
+        all_metrics = np.ones(3)
+        all_metrics[0] = rms_boom
+        all_metrics[1] = ll
+
+        coefficients = np.array([43.67402696195865, -10.90054738389845, 26.836530575185435])
+
+        boominess = np.sum(all_metrics * coefficients)
+
+        if clip_output:
+            boominess = timbral_util.output_clip(boominess)
+
+        return boominess
diff --git a/build/lib/timbral_models/Timbral_Brightness.py b/build/lib/timbral_models/Timbral_Brightness.py
@@ -0,0 +1,186 @@
+from __future__ import division
+import numpy as np
+import soundfile as sf
+from . import timbral_util
+from scipy.signal import spectrogram
+
+
+def timbral_brightness(fname, fs=0, dev_output=False, clip_output=False, phase_correction=False, threshold=0,
+                       ratio_crossover=2000, centroid_crossover=100, stepSize=1024, blockSize=2048, minFreq=20):
+    """
+      This function calculates the apparent Brightness of an audio file.
+      This version of timbral_brightness contains self loudness normalising methods and can accept arrays as an input
+      instead of a string filename.
+
+      Version 0.4
+
+      Required parameter
+       :param fname:               string or numpy array
+                                   string, audio filename to be analysed, including full file path and extension.
+                                   numpy array, array of audio samples, requires fs to be set to the sample rate.
+
+      Optional parameters
+       :param fs:                  int/float, when fname is a numpy array, this is a required to be the sample rate.
+                                   Defaults to 0.
+       :param dev_output:          bool, when False return the brightness, when True return all extracted features.
+       :param clip_output:         bool, force the output to be between 0 and 100.
+       :param phase_correction:    bool, Perform phase checking before summing to mono.
+       :param threshold:           Threshold below which to ignore the energy in a time window, default to 0.
+       :param ratio_crossover:     Crossover frequency for calculating the HF energy ratio, default to 2000 Hz.
+       :param centroid_crossover:  Highpass frequency for calculating the spectral centroid, default to 100 Hz.
+       :param stepSize:            Step size for calculating spectrogram, default to 1024.
+       :param blockSize:           Block size (fft length) for calculating spectrogram, default to 2048.
+       :param minFreq:             Frequency for high-pass filtering audio prior to all analysis, default to 20 Hz.
+
+       :return:                    Apparent brightness of audio file, float.
+
+     Copyright 2018 Andy Pearce, Institute of Sound Recording, University of Surrey, UK.
+
+     Licensed under the Apache License, Version 2.0 (the "License");
+     you may not use this file except in compliance with the License.
+     You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS,
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+    """
+    '''
+      Read input
+    '''
+    audio_samples, fs = timbral_util.file_read(fname, fs, phase_correction=phase_correction)
+
+    '''
+      Filter audio
+    '''
+    # highpass audio at minimum frequency
+    audio_samples = timbral_util.filter_audio_highpass(audio_samples, crossover=minFreq, fs=fs)
+    audio_samples = timbral_util.filter_audio_highpass(audio_samples, crossover=minFreq, fs=fs)
+    audio_samples = timbral_util.filter_audio_highpass(audio_samples, crossover=minFreq, fs=fs)
+
+    # get highpass audio at ratio crossover
+    ratio_highpass_audio = timbral_util.filter_audio_highpass(audio_samples, ratio_crossover, fs)
+    ratio_highpass_audio = timbral_util.filter_audio_highpass(ratio_highpass_audio, ratio_crossover, fs)
+    ratio_highpass_audio = timbral_util.filter_audio_highpass(ratio_highpass_audio, ratio_crossover, fs)
+
+    # get highpass audio at centroid crossover
+    centroid_highpass_audio = timbral_util.filter_audio_highpass(audio_samples, centroid_crossover, fs)
+    centroid_highpass_audio = timbral_util.filter_audio_highpass(centroid_highpass_audio, centroid_crossover, fs)
+    centroid_highpass_audio = timbral_util.filter_audio_highpass(centroid_highpass_audio, centroid_crossover, fs)
+
+    '''
+     Get spectrograms 
+    '''
+    # normalise audio to the maximum value in the unfiltered audio
+    ratio_highpass_audio *= (1.0 / max(abs(audio_samples)))
+    centroid_highpass_audio *= (1.0 / max(abs(audio_samples)))
+    audio_samples *= (1.0 / max(abs(audio_samples)))
+
+
+    # set FFT parameters
+    nfft = blockSize
+    hop_size = int(3 * nfft / 4)
+
+    # check that audio is long enough to generate spectrograms
+    if len(audio_samples) >= nfft:
+        # get spectrogram
+        ratio_all_freq, ratio_all_time, ratio_all_spec = spectrogram(audio_samples, fs, 'hamming', nfft,
+                                                                     hop_size, nfft, 'constant', True, 'spectrum')
+        ratio_hp_freq, ratio_hp_time, ratio_hp_spec = spectrogram(ratio_highpass_audio, fs, 'hamming', nfft,
+                                                                  hop_size, nfft, 'constant', True, 'spectrum')
+        centroid_hp_freq, centroid_hp_time, centroid_hp_spec = spectrogram(centroid_highpass_audio, fs, 'hamming', nfft,
+                                                                           hop_size, nfft, 'constant', True, 'spectrum')
+    else:
+        ratio_all_freq, ratio_all_time, ratio_all_spec = spectrogram(audio_samples, fs, 'hamming',
+                                                                     len(audio_samples),
+                                                                     len(audio_samples)-1,
+                                                                     nfft, 'constant', True, 'spectrum')
+        ratio_hp_freq, ratio_hp_time, ratio_hp_spec = spectrogram(ratio_highpass_audio, fs, 'hamming',
+                                                                  len(ratio_highpass_audio),
+                                                                  len(ratio_highpass_audio)-1,
+                                                                  nfft, 'constant', True, 'spectrum')
+        centroid_hp_freq, centroid_hp_time, centroid_hp_spec = spectrogram(centroid_highpass_audio, fs, 'hamming',
+                                                                           len(centroid_highpass_audio),
+                                                                           len(centroid_highpass_audio)-1,
+                                                                           nfft, 'constant', True, 'spectrum')
+
+    # initialise variables for storing data
+    all_ratio = []
+    all_hp_centroid = []
+    all_tpower = []
+    all_hp_centroid_tpower = []
+
+    # set threshold level at zero
+    threshold_db = threshold
+    if threshold_db == 0:
+        threshold = 0
+        hp_threshold = 0
+    else:
+        max_power = max(np.sum(ratio_all_spec, axis=1))
+        threshold = max_power * timbral_util.db2mag(threshold_db)
+        # get the threshold for centroid
+        # centroid_hp_max_power = max(np.sum(centroid_hp_spec, axis=1))
+        # hp_min_power = min(np.sum(hp_spec, axis=1))
+        # hp_threshold = hp_max_power * timbral_util.db2mag(threshold_db)
+    # threshold = 0.0
+
+    '''
+      Calculate features for each time window
+    '''
+    for idx in range(len(ratio_hp_time)):  #
+        # get the current spectrum for this time window
+        current_ratio_hp_spec = ratio_hp_spec[:, idx]
+        current_ratio_all_spec = ratio_all_spec[:, idx]
+        current_centroid_hp_spec = centroid_hp_spec[:, idx]
+
+        # get the power within each spectrum
+        tpower = np.sum(current_ratio_all_spec)
+        hp_tpower = np.sum(current_ratio_hp_spec)
+        # check there is energy in the time window before calculating the ratio (greater than 0)
+        if tpower > threshold:
+            # get the ratio
+            all_ratio.append(hp_tpower / tpower)
+            # store the powef for weighting
+            all_tpower.append(tpower)
+
+        # get the tpower to assure greater than zero
+        hp_centroid_tpower = np.sum(current_centroid_hp_spec)
+        if hp_centroid_tpower > 0.0:
+            # get the centroid
+            all_hp_centroid.append(np.sum(current_centroid_hp_spec * centroid_hp_freq[:len(current_centroid_hp_spec)]) /
+                                   np.sum(current_centroid_hp_spec))
+            # store the tpower for weighting
+            all_hp_centroid_tpower.append(hp_centroid_tpower)
+
+    '''
+      Get mean and weighted average values
+    '''
+    mean_ratio = np.mean(all_ratio)
+    mean_hp_centroid = np.mean(all_hp_centroid)
+
+    weighted_mean_ratio = np.average(all_ratio, weights=all_tpower)
+    weighted_mean_hp_centroid = np.average(all_hp_centroid, weights=all_hp_centroid_tpower)
+
+    if dev_output:
+        # return the ratio and centroid
+        return np.log10(weighted_mean_ratio), np.log10(weighted_mean_hp_centroid)
+    else:
+        # perform thye linear regression
+        all_metrics = np.ones(3)
+        all_metrics[0] = np.log10(weighted_mean_ratio)
+        all_metrics[1] = np.log10(weighted_mean_hp_centroid)
+        # all_metrics[2] = np.log10(weighted_mean_ratio) * np.log10(weighted_mean_hp_centroid)
+
+
+        coefficients = np.array([4.613128018020465, 17.378889309312974, 17.434733750553022])
+
+        # coefficients = np.array([-2.9197705625030235, 9.048261758526614, 3.940747859061009, 47.989783427908705])
+        bright = np.sum(all_metrics * coefficients)
+
+        if clip_output:
+            bright = timbral_util.output_clip(bright)
+
+        return bright