Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP - Add pitch estimation #263

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"

[packages]
soundfile = "*"
numpy = "*"
matplotlib = "*"

[dev-packages]

[requires]
python_version = "3.9"
338 changes: 338 additions & 0 deletions Pipfile.lock

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions crates/augmented/audio/audio-processor-analysis/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ image = "0.24.3"
clap = "2.34.0"
piet = "0.5.0"
piet-common = { version = "0.5.0", features = ["png"] }
augmented_oscillator = { version = "1.0.0-alpha.6", path = "../oscillator" }

plotters = "0.3.1"
plotters-svg = "0.3.1"

[package.metadata.augmented]
private = false
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified crates/augmented/audio/audio-processor-analysis/screen.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
40 changes: 40 additions & 0 deletions crates/augmented/audio/audio-processor-analysis/spikes/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import matplotlib.pyplot as plt
import soundfile


def autocorrelation(signal, lag=1, window_size=10):
result = []
for i in range(len(signal)):
s = 0
for j in range(window_size):
x1 = signal[i + j] if i + j < len(signal) else 0.0
x2 = signal[i + j + lag] if i + j + lag < len(signal) else 0.0
s += x1 * x2
result.append(s)
return result


def main():
input_file, _sample_rate = soundfile.read("../../../../../input-files/piano-a440.wav")
input_file = [x[0] for x in input_file[0:400]]

fig = plt.figure()

axs = fig.add_subplot(projection="3d")
for i in range(0, len(input_file), 20):
acf = autocorrelation(input_file, lag=i)
x_data = [x for x in range(len(input_file))]
y_data = [i for _j in range(len(input_file))]
z_data = [acf[z] for z in range(len(input_file))]
axs.plot(x_data, y_data, z_data, "b-", alpha=0.8)

x_data = [i for i in range(len(input_file))]
y_data = [0 for _j in range(len(input_file))]
z_data = [input_file[j] for j in range(len(input_file))]
axs.plot(x_data, y_data, z_data, "r")

plt.show()


if __name__ == "__main__":
main()
3 changes: 3 additions & 0 deletions crates/augmented/audio/audio-processor-analysis/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,6 @@ pub mod transient_detection;

/// Many window functions
pub mod window_functions;

/// Pitch-detection
pub mod pitch_detection;
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
use super::utils;

/// Implements pitch-detection by finding the maximum bin in a FFT buffer and calculating its
/// corresponding frequency.
///
/// This is not accurate and susceptible to jumping around harmonics.
pub fn estimate_frequency(
sample_rate: f32,
fft_real_buffer: impl ExactSizeIterator<Item = f32>,
) -> Option<f32> {
let bin_count = fft_real_buffer.len();
let max_bin = utils::maximum_index(fft_real_buffer)?;

Some(utils::frequency_from_location(
sample_rate,
max_bin as f32,
bin_count,
))
}

#[cfg(test)]
mod test {
use audio_processor_testing_helpers::assert_f_eq;

use super::*;

#[test]
fn test_maximum_estimate_location() {
let iterator: Vec<f32> = vec![10.0, 30.0, 20.0, 5.0];
let freq = estimate_frequency(1000.0, iterator.iter().cloned()).unwrap();
// Maximum bin 1, sample rate is 1000Hz, num bins is 4
// Estimated freq should be 250Hz
assert_f_eq!(freq, 250.0);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
//! This module implements multiple pitch-detection strategies.
//!
//! References:
//!
//! * YIN, a fundamental frequency estimator for speech and music, Alain de Cheveigne and Hideki Kawahara
//! * Probabilistic Modelling of Note Events in the Transcription of Monophonic Melodies, Matti Ryynanen
//! * HMM Decoding: Viterbi Algorithm, Shallow Processing Techniques for NLP Ling570
//! * Parabolic Interpolation, http://fourier.eng.hmc.edu/

use audio_processor_traits::{AudioProcessorSettings, SimpleAudioProcessor};

use crate::fft_processor::FftProcessor;

mod max_value_estimator;
mod quadratic_estimator;
mod utils;
mod yin_estimator;

enum EstimationStrategy {
MaxValue,
Quadratic,
YIN,
}

/// Real-time pitch-detection processor. Pitch will be detected with latency from the FFT size.
///
/// Multiple pitch-detection strategies are implemented defaults to the best strategy.
pub struct PitchDetectorProcessor {
fft: FftProcessor,
strategy: EstimationStrategy,
sample_rate: f32,
estimate: f32,
}

impl From<EstimationStrategy> for PitchDetectorProcessor {
fn from(strategy: EstimationStrategy) -> Self {
let mut processor = Self::default();
processor.strategy = strategy;
processor
}
}

impl Default for PitchDetectorProcessor {
fn default() -> Self {
PitchDetectorProcessor {
fft: FftProcessor::default(),
strategy: EstimationStrategy::MaxValue,
sample_rate: 0.0,
estimate: 0.0,
}
}
}

impl PitchDetectorProcessor {
fn on_fft(&mut self) {
let fft_buffer = self.fft.buffer();
let fft_real_buffer = fft_buffer.iter().map(|f| f.re).take(fft_buffer.len() / 2);
let estimate = match self.strategy {
EstimationStrategy::MaxValue => {
max_value_estimator::estimate_frequency(self.sample_rate, fft_real_buffer)
}
EstimationStrategy::Quadratic => {
quadratic_estimator::estimate_frequency(self.sample_rate, &fft_buffer)
}
EstimationStrategy::YIN => todo!(),
};
if let Some(estimate) = estimate {
self.estimate = estimate;
}
}
}

impl SimpleAudioProcessor for PitchDetectorProcessor {
type SampleType = f32;

fn s_prepare(&mut self, settings: AudioProcessorSettings) {
self.fft.s_prepare(settings);
self.sample_rate = settings.sample_rate();
}

fn s_process(&mut self, sample: Self::SampleType) -> Self::SampleType {
self.fft.s_process(sample);
if self.fft.has_changed() {
self.on_fft();
}
sample
}
}

#[cfg(test)]
mod test {
use audio_processor_testing_helpers::charts::draw_vec_chart;
use audio_processor_testing_helpers::relative_path;

use audio_processor_file::AudioFileProcessor;
use audio_processor_traits::{
AudioBuffer, AudioProcessorSettings, OwnedAudioBuffer, SimpleAudioProcessor, VecAudioBuffer,
};

use crate::pitch_detection::{EstimationStrategy, PitchDetectorProcessor};

#[test]
fn test_max_estimator() {
let strategy = EstimationStrategy::MaxValue;
let strategy_name = "MaxValueEstimator";
draw_estimation_strategy(strategy, strategy_name);
}

#[test]
fn test_quadratic_estimator() {
let strategy = EstimationStrategy::Quadratic;
let strategy_name = "QuadraticEstimator";
draw_estimation_strategy(strategy, strategy_name);
}

fn draw_estimation_strategy(strategy: EstimationStrategy, strategy_name: &str) {
wisual_logger::init_from_env();
let input_file_path = relative_path!("../../../../input-files/C3-loop.mp3");

let settings = AudioProcessorSettings::default();
let mut input = AudioFileProcessor::from_path(
audio_garbage_collector::handle(),
settings,
&input_file_path,
)
.unwrap();
input.prepare(settings);

let mut pitch_detector = PitchDetectorProcessor::from(strategy);
pitch_detector.s_prepare(settings);

let mut results = vec![];
let mut buffer = VecAudioBuffer::new();
buffer.resize(1, settings.block_size(), 0.0);
let num_chunks = (input.num_samples() / 8) / settings.block_size();
// results.push(0.0);
for _chunk in 0..num_chunks {
for sample in buffer.slice_mut() {
*sample = 0.0;
}

input.process(&mut buffer);
for frame in buffer.frames_mut() {
let sample = frame[0];
pitch_detector.s_process(sample);
results.push(pitch_detector.estimate);
}
}
// results.push(0.0);

let results: Vec<f32> = results.iter().skip(10000).cloned().collect();
let output_path = relative_path!("src/pitch_detection/mod.rs");
draw_vec_chart(&output_path, strategy_name, results);
}
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
use super::utils;
use rustfft::num_complex::Complex;

/// Similar to `max_value_estimator`, this is also not accurate, but better.
///
/// Estimates the frequency by finding the maximum FFT bin, then performing quadratic interpolation
/// by fitting a second-order polynomial with the 3 points around the maximum bin.
pub fn estimate_frequency(sample_rate: f32, fft_buffer: &[Complex<f32>]) -> Option<f32> {
let bin_count = fft_buffer.len() / 2;
let max_bin = utils::maximum_index(fft_buffer.iter().map(|r| r.re).take(bin_count))?;

let y2 = fft_buffer[max_bin].re;
let y1 = if max_bin == 0 {
y2
} else {
fft_buffer[max_bin - 1].re
};
let y3 = if max_bin == bin_count - 1 {
y2
} else {
fft_buffer[max_bin + 1].re
};
let delta = (y3 - y1) / (2.0 * (2.0 * y2 - y1 - y3));
let location = max_bin as f32 + delta;

Some(utils::frequency_from_location(
sample_rate,
location,
bin_count,
))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
use std::cmp::Ordering;

/// Given a bin, estimate its frequency
pub fn frequency_from_location(sample_rate: f32, location: f32, bin_count: usize) -> f32 {
let ratio: f32 = location / bin_count as f32;
sample_rate * ratio
}

pub fn maximum_index(iterator: impl ExactSizeIterator<Item = f32>) -> Option<usize> {
iterator
.enumerate()
.max_by(|(_, f1): &(usize, f32), (_, f2): &(usize, f32)| {
f1.abs().partial_cmp(&f2.abs()).unwrap_or(Ordering::Equal)
})
.map(|(i, _f)| i)
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn test_maximum_index_when_exists() {
let iterator: Vec<f32> = vec![10.0, 30.0, 20.0, 5.0];
let index = maximum_index(iterator.iter().cloned()).unwrap();
assert_eq!(index, 1);
}

#[test]
fn test_maximum_index_when_does_not_exist() {
let iterator: Vec<f32> = vec![];
let index = maximum_index(iterator.iter().cloned());
assert_eq!(index, None);
}
}
Loading