Skip to content

Commit

Permalink
Add extensive summary notes on the two oscillation alorithms, Oscilla…
Browse files Browse the repository at this point in the history
…tions2012 adn Oscillations2019.

Issue #500 Add extensive summary notes on the two oscillation algorithms, Oscillations2012 and Oscillations2019.
  • Loading branch information
towsey committed Jul 1, 2021
1 parent 8163d1d commit 94b9aaf
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 69 deletions.
61 changes: 60 additions & 1 deletion src/AudioAnalysisTools/Ocillations/OscillationParameters.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,61 @@ namespace AnalysisPrograms.Recognizers.Base
using AudioAnalysisTools;
using AudioAnalysisTools.Events;
using AudioAnalysisTools.StandardSpectrograms;
using SixLabors.ImageSharp;
using TowseyLibrary;

/// <summary>
/// There are currently two algorithms implemented in AnalysisPrograms to detect temporal oscillations in a spectrogram, "Standard" and "Hits".
/// At the heart of both is a Discrete Cosine Transform (DCT) which identifies an oscillation and determines its oscillation rate or the inverse, its periodicity.
/// Note that other algorithms could also be used to identify an oscillation, in particular a Discrete Fourier Transform, but currently this is not implemented.
/// The Standard DCT algorithm is implemented in the class Oscillations2019. The Hits algorithm is implemented in the class Oscillations2012.
/// Given a spectrogram, a search band and other constraints, these algorithms identify acoustic events containing temporal oscillations.
/// Eight of the ten parameters required for these algorithms are the same - just two differences.
/// The identical parameters are as follows:
/// (1) MinDuration, (2) MaxDuration, (3) MinHertz, (4) MaxHertz. These constrain the size of the event within the spectrogram.
/// MinHertz and MaxHertz idenfiy the search band. All discovered events will occupy this band.
/// (5) MinOscillationFrequency and (6) MaxOscillationFrequency set the minimum and maximum acceptable oscillation rate.
/// Although these rates are defined as "oscillations per second" the calculations are done using periodicity. Periodicity = 1/OscillationRate.
/// (7) DctDuration and (8) DctThreshold. These parameters determine how the DCT is implemented.
/// DctDuration sets the time span (in seconds) of the DCT. Typically forreliable detection, you would want several oscillations to occur within the DCT duration.
/// DctThreshold (a value in [0,1]) sets the minimum required amplitude for the oscillation to be accepted.
/// The final two parameters are used differently by each algorithm: (9) EventThreshold and (10) DecibelThresholds. ############### SMOOTHING WINDOW
/// The steps for each algorithm are outlined below. The first three steps and last two steps are identical for each.
///
/// ### THE STANDARD algorithm for detecting oscillations - This is implemented in the class Oscillations2019.
/// STEP 1: smooth the spectrum in each timeframe. This is intended to make oscillations more regular. Currently a smoothing window of 3 is used by default.
/// STEP 2: extract an array of decibel values, frame averaged over the search band.
/// decibelArray = SNR.CalculateFreqBandAvIntensity(sonogram.Data, minHz, maxHz, spectrogram.NyquistFrequency);
/// STEP 3: prepare a set of cosine basis functions.
/// STEP 4: DETECT OSCILLATIONS in the extracted array of average decibel values.
/// DetectOscillations(decibelArray, framesPerSecond, decibelThreshold, dctDuration, minOscFreq, maxOscFreq, dctThreshold, out var dctScores, out var oscFreq);
/// STEP 5: Scan the decibel array for peak values and do a DCT starting at each peak whose amplitude exceeds the current DecibelThreshold.
/// STEP 6: Ignore first four coefficients. Assign value of largest coefficient to the corresponding positions in the dctScores array only if its value is greater than the DctThreshold and greater than that in the dctScores array.
/// This becomes the array of oscillation scores.
/// STEP 7: Apply a smoothing window to the array of oscillation scores - window=11 has been the DEFAULT. Now letting user set this.
/// dctScores = DataTools.filterMovingAverage(dctScores, SmoothingWindow);
/// STEP 8: Search the array of DCT scores to find events that satisfy the constraints set by parameters (1) to (6). And ScoreThreshold.
/// events = OscillationEvent.ConvertOscillationScores2Events(spectrogram, minDuration, maxDuration, minHz, maxHz, minOscilFrequency, maxOscilFrequency, oscScores, eventThreshold, segmentStartOffset);
/// ###
/// ### THE HITS algorithm for detecting oscillations - This is implemented in the class Oscillations2012.
/// STEP 1: smooth the spectrum in each timeframe. This is intended to make oscillations more regular. Currently a smoothing window of 3 is used by default.
/// STEP 2: extract an array of decibel values, frame averaged over the search band.
/// decibelArray = SNR.CalculateFreqBandAvIntensity(sonogram.Data, minHz, maxHz, spectrogram.NyquistFrequency);
/// STEP 3: prepare a set of cosine basis functions.
/// STEP 4: DETECT OSCILLATIONS in each frequency bin separately. A 'hit' occurs where the DCT coefficient is greater than the DctThreshold and falls within the acceptable oscillation rate.
/// hits = DetectOscillations(sonogram, minHz, maxHz, dctDuration, minOscilFrequency.Value, maxOscilFrequency.Value, dctThreshold);
/// STEP 5: Remove isolated oscillations
/// hits = RemoveIsolatedOscillations(hits);
/// STEP 6: Calculate an array of oscillation scores as the fraction of frequency bins in the search frequecny band that contain a dct coefficient value greater than zero.
/// oscScores = GetOscillationScores(hits, minHz, maxHz, sonogram.FBinWidth);
/// STEP 7: Apply a smoothing window to the array of oscillation scores. Parameter: SmoothingWindow
/// STEP 6 is implemented by the same method as STEP 7 in the STANDARD algorithm.
/// oscScores = DataTools.filterMovingAverage(oscScores, smoothingWindow);
/// STEP 8: Search the array of DCT scores to find events that satisfy the constraints set by parameters (1) to (6). And ScoreThreshold.
/// STEP 7 is implemented by the same method as STEP 7 in the STANDARD algorithm.
/// events = OscillationEvent.ConvertOscillationScores2Events(spectrogram, minDuration, maxDuration, minHz, maxHz, minOscilFrequency, maxOscilFrequency, oscScores, eventThreshold, segmentStartOffset);
/// ###.
/// </summary>
public enum OscillationAlgorithm
{
Standard,
Expand Down Expand Up @@ -57,10 +110,11 @@ public static (List<EventCommon> OscillEvents, List<Plot> Plots) GetOscillationE

List<EventCommon> events;
List<Plot> plots;
double[,] hits = null;

if (algorithm == OscillationAlgorithm.Hits)
{
(events, plots) = Oscillations2012.GetComponentsWithOscillations(
(events, plots, hits) = Oscillations2012.GetComponentsWithOscillations(
spectrogram,
op,
decibelThreshold,
Expand All @@ -78,6 +132,11 @@ public static (List<EventCommon> OscillEvents, List<Plot> Plots) GetOscillationE
profileName);
}

// save a debug image of the spectrogram which includes the HITS overlay.
var image3 = SpectrogramTools.GetSonogramPlusCharts(spectrogram, events, plots, hits, profileName + " Oscillations");
var path = "C:\\temp\\oscillationsImage.png";
image3.Save(path);

return (events, plots);
}
}
Expand Down
93 changes: 39 additions & 54 deletions src/AudioAnalysisTools/Ocillations/Oscillations2012.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,14 @@ namespace AudioAnalysisTools
/// </summary>
public static class Oscillations2012
{
public static (List<EventCommon> SpectralEvents, List<Plot> DecibelPlots) GetComponentsWithOscillations(
public static (List<EventCommon> SpectralEvents, List<Plot> DecibelPlots, double[,] Hits) GetComponentsWithOscillations(
SpectrogramStandard spectrogram,
OscillationParameters op,
double? decibelThreshold,
TimeSpan segmentStartOffset,
string profileName)
{
var oscEvents = new List<EventCommon>();
var plots = new List<Plot>();

Oscillations2012.Execute(
spectrogram,
Expand All @@ -48,23 +47,19 @@ public static (List<EventCommon> SpectralEvents, List<Plot> DecibelPlots) GetCom
out var bandDecibels,
out var oscScores,
out var oscillationEvents,
out var hits, // keep this for debuggin purposes. See below.
out var hits, // return this for debugging purposes.
segmentStartOffset);

oscEvents.AddRange(oscillationEvents);

// prepare plot of resultant Harmonics decibel array.
// prepare plot of resultant decibel and score arrays.
var plots = new List<Plot>();
var plot1 = Plot.PreparePlot(bandDecibels, $"{profileName} (Oscillations:{decibelThreshold:F0}db)", decibelThreshold.Value);
plots.Add(plot1);
var plot2 = Plot.PreparePlot(oscScores, $"{profileName} (Oscillation Event Score:{op.EventThreshold:F2})", op.EventThreshold);
plots.Add(plot2);

// save a debug image
//var image3 = SpectrogramTools.GetSonogramPlusCharts(spectrogram, oscEvents, plots, hits, profileName + " Oscillations");
//var path = "C:\\temp\\oscillationsImage.png";
//image3.Save(path);

return (oscEvents, plots);
return (oscEvents, plots, hits);
}

public static void Execute(
Expand All @@ -77,7 +72,7 @@ public static void Execute(
double? maxOscillationFrequency,
double dctDuration,
double dctThreshold,
double scoreThreshold,
double eventThreshold,
out double[] bandDecibels,
out double[] oscScores,
out List<OscillationEvent> events,
Expand All @@ -96,7 +91,7 @@ public static void Execute(
maxOscillationFrequency,
dctDuration,
dctThreshold,
scoreThreshold,
eventThreshold,
scoreSmoothingWindow,
out bandDecibels,
out oscScores,
Expand All @@ -106,7 +101,7 @@ public static void Execute(
}

public static void Execute(
SpectrogramStandard sonogram,
SpectrogramStandard spectrogram,
double minDuration,
double maxDuration,
int minHz,
Expand All @@ -115,23 +110,22 @@ public static void Execute(
double? maxOscilFrequency,
double dctDuration,
double dctThreshold,
double scoreThreshold,
double eventThreshold,
int smoothingWindow,
out double[] bandDecibels,
out double[] decibelArray,
out double[] oscScores,
out List<OscillationEvent> events,
out double[,] hits,
TimeSpan segmentStartOffset)
{
int minBin = (int)(minHz / sonogram.FBinWidth);
int maxBin = (int)(maxHz / sonogram.FBinWidth);
bandDecibels = MatrixTools.GetRowAveragesOfSubmatrix(sonogram.Data, 0, minBin, sonogram.Data.GetLength(0) - 1, maxBin);
// smooth the spectra in all time-frames.
spectrogram.Data = MatrixTools.SmoothRows(spectrogram.Data, 3);

// smooth the time frames to make oscillations more regular.
sonogram.Data = MatrixTools.SmoothRows(sonogram.Data, 5);
// extract array of decibel values, frame averaged over required frequency band
decibelArray = SNR.CalculateFreqBandAvIntensity(spectrogram.Data, minHz, maxHz, spectrogram.NyquistFrequency);

//DETECT OSCILLATIONS
hits = DetectOscillations(sonogram, minHz, maxHz, dctDuration, minOscilFrequency.Value, maxOscilFrequency.Value, dctThreshold);
//DETECT OSCILLATIONS in the search band.
hits = DetectOscillations(spectrogram, minHz, maxHz, dctDuration, minOscilFrequency.Value, maxOscilFrequency.Value, dctThreshold);
if (hits == null)
{
oscScores = null;
Expand All @@ -142,20 +136,20 @@ public static void Execute(
hits = RemoveIsolatedOscillations(hits);

//EXTRACT SCORES AND ACOUSTIC EVENTS
oscScores = GetOscillationScores(hits, minHz, maxHz, sonogram.FBinWidth);
oscScores = GetOscillationScores(hits, minHz, maxHz, spectrogram.FBinWidth);

// smooth the scores - window=11 has been the DEFAULT. Now letting user set this.
oscScores = DataTools.filterMovingAverage(oscScores, smoothingWindow);
events = OscillationEvent.ConvertOscillationScores2Events(
sonogram,
spectrogram,
minDuration,
maxDuration,
minHz,
maxHz,
minOscilFrequency,
maxOscilFrequency,
oscScores,
scoreThreshold,
eventThreshold,
segmentStartOffset);
}

Expand Down Expand Up @@ -199,26 +193,38 @@ public static void Execute(
double[,] hits = new double[rows, cols];
double[,] matrix = sonogram.Data;

double[,] cosines = MFCCStuff.Cosines(dctLength, dctLength); //set up the cosine coefficients
double[,] cosines = DctMethods.Cosines(dctLength, dctLength); //set up the cosine coefficients

//traverse columns - skip DC column
for (int c = minBin; c <= maxBin; c++)
{
var dctArray = new double[dctLength];

for (int r = 0; r < rows - dctLength; r++)
for (int r = 1; r < rows - dctLength; r++)
{
// only stop if current location is a peak
if (matrix[r, c] < matrix[r - 1, c] || matrix[r, c] < matrix[r + 1, c])
{
continue;
}

// ... AND if current peak is above a decibel threhsold.
if (matrix[r, c] < 3.0)
{
continue;
}

// extract array and ready for DCT
for (int i = 0; i < dctLength; i++)
{
dctArray[i] = matrix[r + i, c];
}

int lowerDctBound = minIndex / 4;
var dctCoeff = DoDct(dctArray, cosines, lowerDctBound);
var dctCoeff = DctMethods.DoDct(dctArray, cosines, lowerDctBound);
int indexOfMaxValue = DataTools.GetMaxIndex(dctCoeff);

//mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude
// mark DCT location with oscillation freq, only if oscillation freq is in correct range and amplitude
if (indexOfMaxValue >= minIndex && indexOfMaxValue <= maxIndex && dctCoeff[indexOfMaxValue] > dctThreshold)
{
for (int i = 0; i < dctLength; i++)
Expand All @@ -227,38 +233,17 @@ public static void Execute(
}
}

r += 5; //skip rows i.e. do every sixth time frame.
// skip rows i.e. do every sixth time frame.
//r += 5;
}

c++; //do alternate columns i.e. every second frequency bin.
// do alternate columns i.e. every second frequency bin.
c++;
}

return hits;
}

public static double[] DoDct(double[] vector, double[,] cosines, int lowerDctBound)
{
//var dctArray = DataTools.Vector2Zscores(dctArray);
var dctArray = DataTools.SubtractMean(vector);
int dctLength = dctArray.Length;
double[] dctCoeff = MFCCStuff.DCT(dctArray, cosines);

// convert to absolute values because not interested in negative values due to phase.
for (int i = 0; i < dctLength; i++)
{
dctCoeff[i] = Math.Abs(dctCoeff[i]);
}

// remove lower coefficients from consideration because they dominate
for (int i = 0; i < lowerDctBound; i++)
{
dctCoeff[i] = 0.0;
}

dctCoeff = DataTools.normalise2UnitLength(dctCoeff);
return dctCoeff;
}

/// <summary>
/// Removes single lines of hits from Oscillation matrix.
/// </summary>
Expand Down
Loading

0 comments on commit 94b9aaf

Please sign in to comment.