-
Notifications
You must be signed in to change notification settings - Fork 536
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1413 from xaviliz/add-new-algo-audio2pitch
Add new algo audio2pitch
- Loading branch information
Showing
8 changed files
with
501 additions
and
86 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
#include "audio2pitch.h" | ||
#include "essentiamath.h" | ||
|
||
using namespace essentia; | ||
using namespace standard; | ||
|
||
const char* Audio2Pitch::name = "Audio2Pitch"; | ||
const char* Audio2Pitch::category = "Pitch"; | ||
const char* Audio2Pitch::description = DOC("This algorithm computes pitch with various pitch algorithms, specifically targeted for real-time pitch detection on audio signals. The algorithm internally uses pitch estimation with PitchYin (pitchyin) and PitchYinFFT (pitchyinfft)."); | ||
|
||
bool Audio2Pitch::isAboveThresholds(Real pitchConfidence, Real loudness) { | ||
return (pitchConfidence >= _pitchConfidenceThreshold) && (loudness >= _loudnessThresholdGain); | ||
} | ||
|
||
void Audio2Pitch::configure() { | ||
|
||
_sampleRate = parameter("sampleRate").toReal(); | ||
_frameSize = parameter("frameSize").toInt(); | ||
_minFrequency = parameter("minFrequency").toReal(); | ||
_maxFrequency = parameter("maxFrequency").toReal(); | ||
_pitchAlgorithmName = parameter("pitchAlgorithm").toString(); | ||
_tolerance = parameter("tolerance").toReal(); | ||
_pitchConfidenceThreshold = parameter("pitchConfidenceThreshold").toReal(); | ||
_loudnessThreshold = parameter("loudnessThreshold").toReal(); | ||
_loudnessThresholdGain = db2amp(_loudnessThreshold); | ||
|
||
if (_maxFrequency > _sampleRate * 0.5) { | ||
throw EssentiaException("Audio2Pitch: Max frequency cannot be higher than Nyquist frequency"); | ||
} | ||
if (_maxFrequency <= _minFrequency) { | ||
throw EssentiaException("Audio2Pitch: Max frequency cannot be lower or equal than the minimum frequency"); | ||
} | ||
|
||
if (_pitchAlgorithmName != "pitchyinfft" && _pitchAlgorithmName != "pitchyin") { | ||
throw EssentiaException("Audio2Pitch: Bad 'pitchAlgorithm' =", _pitchAlgorithmName); | ||
} | ||
|
||
if (_pitchAlgorithmName == "pitchyinfft") { | ||
_windowing = AlgorithmFactory::create("Windowing"); | ||
_spectrum = AlgorithmFactory::create("Spectrum"); | ||
_pitchAlgorithm = AlgorithmFactory::create("PitchYinFFT"); | ||
|
||
_windowing->configure("type", "hann", | ||
"size", _frameSize); | ||
_spectrum->configure("size", _frameSize); | ||
} | ||
else { | ||
_pitchAlgorithm = AlgorithmFactory::create("PitchYin"); | ||
} | ||
|
||
_loudnessAlgorithm = AlgorithmFactory::create("RMS"); | ||
|
||
// switch between pyin and pyin_fft to propagate the weighting parameter | ||
if (_pitchAlgorithmName == "pitchyin") { | ||
_pitchAlgorithm->configure(INHERIT("frameSize"), | ||
INHERIT("maxFrequency"), | ||
INHERIT("minFrequency"), | ||
INHERIT("sampleRate"), | ||
INHERIT("tolerance")); | ||
} | ||
else { | ||
_pitchAlgorithm->configure(INHERIT("frameSize"), | ||
INHERIT("maxFrequency"), | ||
INHERIT("minFrequency"), | ||
INHERIT("sampleRate"), | ||
INHERIT("weighting"), | ||
INHERIT("tolerance")); | ||
} | ||
} | ||
|
||
void Audio2Pitch::compute() { | ||
const std::vector<Real>& frame = _frame.get(); | ||
Real& pitch = _pitch.get(); | ||
Real& pitchConfidence = _pitchConfidence.get(); | ||
Real& loudness = _loudness.get(); | ||
int& voiced = _voiced.get(); | ||
|
||
if (frame.empty()) { | ||
throw EssentiaException("Audio2Pitch: cannot compute the pitch of an empty frame"); | ||
} | ||
|
||
if (frame.size() == 1) { | ||
throw EssentiaException("Audio2Pitch: cannot compute the pitch of a frame of size 1"); | ||
} | ||
|
||
_loudnessAlgorithm->input("array").set(frame); | ||
_loudnessAlgorithm->output("rms").set(loudness); | ||
_loudnessAlgorithm->compute(); | ||
|
||
std::vector<Real> windowedFrame, spectrum; | ||
if (_pitchAlgorithmName == "pitchyinfft") { | ||
_windowing->input("frame").set(frame); | ||
_windowing->output("frame").set(windowedFrame); | ||
_windowing->compute(); | ||
_spectrum->input("frame").set(windowedFrame); | ||
_spectrum->output("spectrum").set(spectrum); | ||
_spectrum->compute(); | ||
_pitchAlgorithm->input("spectrum").set(spectrum); | ||
} | ||
else if (_pitchAlgorithmName == "pitchyin") { | ||
_pitchAlgorithm->input("signal").set(frame); | ||
} | ||
|
||
_pitchAlgorithm->output("pitch").set(pitch); | ||
_pitchAlgorithm->output("pitchConfidence").set(pitchConfidence); | ||
_pitchAlgorithm->compute(); | ||
|
||
// define voiced by thresholding | ||
voiced = 0; // initially assumes an unvoiced frame | ||
if (isAboveThresholds(pitchConfidence, loudness)) { | ||
voiced = 1; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
#ifndef ESSENTIA_AUDIO2PITCH_H | ||
#define ESSENTIA_AUDIO2PITCH_H | ||
|
||
#include "algorithmfactory.h" | ||
|
||
namespace essentia { | ||
namespace standard { | ||
|
||
class Audio2Pitch : public Algorithm { | ||
|
||
protected: | ||
Input<std::vector<Real>> _frame; | ||
Output<Real> _pitch; | ||
Output<Real> _pitchConfidence; | ||
Output<Real> _loudness; | ||
Output<int> _voiced; | ||
|
||
Algorithm* _pitchAlgorithm; | ||
Algorithm* _loudnessAlgorithm; | ||
// auxiliary algorithms for FFT-based pitch | ||
Algorithm* _windowing; | ||
Algorithm* _spectrum; | ||
|
||
Real _sampleRate; | ||
int _frameSize; | ||
Real _minFrequency; | ||
Real _maxFrequency; | ||
std::string _pitchAlgorithmName; | ||
Real _tolerance; | ||
Real _pitchConfidenceThreshold; | ||
Real _loudnessThreshold; | ||
Real _loudnessThresholdGain; | ||
|
||
bool isAboveThresholds(Real pitchConfidence, Real loudness); | ||
|
||
public: | ||
Audio2Pitch() { | ||
declareInput(_frame, "frame", "the input frame to analyse"); | ||
declareOutput(_pitch, "pitch", "detected pitch in Hz"); | ||
declareOutput(_pitchConfidence, "pitchConfidence", "confidence of detected pitch (from 0.0 to 1.0)"); | ||
declareOutput(_loudness, "loudness", "detected loudness in decibels"); | ||
declareOutput(_voiced, "voiced", "voiced frame categorization, 1 for voiced and 0 for unvoiced frame"); | ||
} | ||
|
||
~Audio2Pitch() { | ||
if (_pitchAlgorithm) delete _pitchAlgorithm; | ||
if (_loudnessAlgorithm) delete _loudnessAlgorithm; | ||
if (_windowing) delete _windowing; | ||
if (_spectrum) delete _spectrum; | ||
} | ||
|
||
void declareParameters() { | ||
declareParameter("sampleRate", "sample rate of incoming audio frames", "[8000,inf)", 44100); | ||
declareParameter("frameSize", "size of input frame in samples", "[1,inf)", 1024); | ||
declareParameter("minFrequency", "minimum frequency to detect in Hz", "[10,20000]", 60.0); | ||
declareParameter("maxFrequency", "maximum frequency to detect in Hz", "[10,20000]", 2300.0); | ||
declareParameter("pitchAlgorithm", "pitch algorithm to use", "{pitchyin,pitchyinfft}", "pitchyinfft"); | ||
declareParameter("weighting", "string to assign a weighting function", "{custom,A,B,C,D,Z}", "custom"); | ||
declareParameter("tolerance", "sets tolerance for peak detection on pitch algorithm", "[0,1]", 1.0); | ||
declareParameter("pitchConfidenceThreshold", "level of pitch confidence above/below which note ON/OFF start to be considered", "[0,1]", 0.25); | ||
declareParameter("loudnessThreshold", "loudness level above/below which note ON/OFF start to be considered, in decibels", "[-inf,0]", -51.0); | ||
} | ||
|
||
void configure(); | ||
void compute(); | ||
|
||
static const char* name; | ||
static const char* category; | ||
static const char* description; | ||
}; | ||
|
||
} // namespace standard | ||
} // namespace essentia | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
Oops, something went wrong.