Skip to content

Commit

Permalink
Add first implementation and unitests of Audio2Midi algo
Browse files Browse the repository at this point in the history
  • Loading branch information
xaviliz committed Aug 30, 2024
1 parent 5d69a81 commit 1a6063a
Show file tree
Hide file tree
Showing 3 changed files with 553 additions and 0 deletions.
143 changes: 143 additions & 0 deletions src/algorithms/tonal/audio2midi.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#include "audio2midi.h"

using namespace std;
using namespace essentia;
using namespace standard;

const char *Audio2Midi::name = "Audio2Midi";
const char *Audio2Midi::category = "Pitch";
const char *Audio2Midi::description = DOC("Wrapper around Audio2Pitch and Pitch2Midi for real time application");

void Audio2Midi::configure()
{
_sampleRate = parameter("sampleRate").toReal();
// _frameSize = parameter("frameSize").toInt();
_hopSize = parameter("hopSize").toInt();
// _pitchAlgorithm = parameter("pitchAlgorithm").toString();
// _loudnessAlgorithm = parameter("loudnessAlgorithm").toString();
_minFrequency = parameter("minFrequency").toReal();
_maxFrequency = parameter("maxFrequency").toReal();
_tuningFrequency = parameter("tuningFrequency").toInt();
_pitchConfidenceThreshold = parameter("pitchConfidenceThreshold").toReal();
_loudnessThreshold = parameter("loudnessThreshold").toReal();
_transposition = parameter("transpositionAmount").toInt();
_minOcurrenceRate = parameter("minOcurrenceRate").toReal();
_midiBufferDuration = parameter("midiBufferDuration").toReal();
_minNoteChangePeriod = parameter("minNoteChangePeriod").toReal();
_minOnsetCheckPeriod = parameter("minOnsetCheckPeriod").toReal();
_minOffsetCheckPeriod = parameter("minOffsetCheckPeriod").toReal();

// define frameSize depending on sampleRate
switch (static_cast<int>(_sampleRate)) {
case 16000:
_frameSize = 2048;
break;
case 24000:
_frameSize = 4096;
break;
case 44100:
_frameSize = _fixedFrameSize;
break;
case 48000:
_frameSize = _fixedFrameSize;
break;
default:
_frameSize = _fixedFrameSize;
}

_applyTimeCompensation = parameter("applyTimeCompensation").toBool();

if (_frameSize > _sampleRate * 0.5)
{
throw EssentiaException("Sax2Pitch: Frame size cannot be higher than Nyquist frequency");
}

_lowpass->configure(INHERIT("sampleRate"),
"cutoffFrequency", 1000);
_framebuffer->configure("bufferSize", _frameSize);
_audio2pitch->configure(INHERIT("sampleRate"),
"frameSize", _frameSize,
"pitchAlgorithm", _pitchAlgorithm,
"minFrequency", _minFrequency,
"maxFrequency", _maxFrequency,
INHERIT("pitchConfidenceThreshold"),
INHERIT("loudnessThreshold"));

_pitch2midi->configure(INHERIT("sampleRate"),
INHERIT("hopSize"),
INHERIT("minOcurrenceRate"),
INHERIT("applyTimeCompensation"),
"minOnsetCheckPeriod", _minOnsetCheckPeriod,
"minOffsetCheckPeriod", _minOffsetCheckPeriod,
"minNoteChangePeriod", _minNoteChangePeriod,
"midiBufferDuration", _midiBufferDuration,
"minFrequency", _minFrequency,
"tuningFrequency", _tuningFrequency,
"transpositionAmount", _transposition);
}

void Audio2Midi::compute()
{
// get ref to input
const std::vector<Real> &frame = _frame.get();
Real& pitch = _pitch.get();
Real& loudness = _loudness.get();
vector<string>& messageType = _messageType.get();
vector<Real>& midiNoteNumber = _midiNoteNumber.get();
vector<Real>& timeCompensation = _timeCompensation.get();

_lowpass->input("signal").set(frame);
_lowpass->output("signal").set(lpFrame);

_framebuffer->input("frame").set(lpFrame);
_framebuffer->output("frame").set(analysisFrame);

_audio2pitch->input("frame").set(analysisFrame);
_audio2pitch->output("pitch").set(pitch);
_audio2pitch->output("pitchConfidence").set(pitchConfidence);
_audio2pitch->output("loudness").set(loudness);
_audio2pitch->output("voiced").set(voiced);

_pitch2midi->input("pitch").set(pitch);
_pitch2midi->input("voiced").set(voiced);
_pitch2midi->output("midiNoteNumber").set(midiNoteNumber);
_pitch2midi->output("timeCompensation").set(timeCompensation);
_pitch2midi->output("messageType").set(messageType);

// E_INFO("\nsax2midi: algorithm inputs and outputs set");
_lowpass->compute();
// E_INFO("sax2midi: lp compute");
_framebuffer->compute();
// E_INFO("sax2midi: framebuffer compute");
// std::cout << "frame: \n" << frame << "\nanalysisFrame: \n" << analysisFrame << "\n";
_audio2pitch->compute();
// E_INFO("sax2midi: a2p compute");
_pitch2midi->compute();
// E_INFO("sax2midi: p2m compute");

// TODO: assign outputs

// set outputs
// get pitchMessage from log_message_formatter
/*pitchMessage = _formatter->pitch_loudness(midiNoteNumber, pitch, pitchConfidence, loudness);
switch (messageType)
{
case 0:
noteOffMessage = _formatter->note_off(midiNoteNumber, offsetTimeCompensation);
break;
case 1:
noteOnMessage = _formatter->note_on(midiNoteNumber, pitch, pitchConfidence, onsetTimeCompensation);
break;
case 2:
noteOffMessage = _formatter->note_off(previousMidiNoteNumber, offsetTimeCompensation);
noteOnMessage = _formatter->note_on(midiNoteNumber, pitch, pitchConfidence, onsetTimeCompensation);
break;
default:
noteOnMessage = "";
noteOffMessage = "";
break;
}*/

// E_INFO("sax2midi compute is done");
}
104 changes: 104 additions & 0 deletions src/algorithms/tonal/audio2midi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#ifndef ESSENTIA_AUDIO2MIDI_H
#define ESSENTIA_AUDIO2MIDI_H

#include "algorithmfactory.h"

namespace essentia {
namespace standard {

class Audio2Midi : public Algorithm {
protected:
Input<std::vector<Real>> _frame;
Output<Real> _pitch;
Output<Real> _loudness;
Output<std::vector<std::string> > _messageType;
Output<std::vector<Real> > _midiNoteNumber;
Output<std::vector<Real> > _timeCompensation;

Algorithm* _lowpass;
Algorithm* _framebuffer;
Algorithm* _audio2pitch;
Algorithm* _pitch2midi;

Real _sampleRate;
int _frameSize;
int _fixedFrameSize = 8192;
int _hopSize;
std::string _pitchAlgorithm = "pitchyinfft";
std::string _loudnessAlgorithm = "rms";
Real _minFrequency;
Real _maxFrequency;
int _tuningFrequency;
Real _pitchConfidenceThreshold, _loudnessThreshold, _minOcurrenceRate;
Real _midiBufferDuration;
Real _minNoteChangePeriod;
Real _minOnsetCheckPeriod;
Real _minOffsetCheckPeriod;

bool _applyTimeCompensation;
int _transposition;

// Containers
std::vector<Real> lpFrame, analysisFrame;
Real pitch, pitchConfidence, loudness;
std::vector<Real> midiNoteNumber, timeCompensation;
std::vector<std::string> messageType;
Real onsetTimeCompensation, offsetTimeCompensation;

int voiced;

public:
Audio2Midi() {
declareInput(_frame, "frame", "the input frame to analyse");
declareOutput(_pitch, "pitch", "pitch given in Hz");
declareOutput(_loudness, "loudness", "detected loudness in decibels");
declareOutput(_messageType, "messageType", "the output of MIDI message type, as string, {noteoff, noteon, noteoff-noteon}");
declareOutput(_midiNoteNumber, "midiNoteNumber", "the output of detected MIDI note number, as integer, in range [0,127]");
declareOutput(_timeCompensation, "timeCompensation", "time to be compensated in the messages");

_lowpass = AlgorithmFactory::create("LowPass");
_framebuffer = AlgorithmFactory::create("FrameBuffer");
_audio2pitch = AlgorithmFactory::create("Audio2Pitch");
_pitch2midi = AlgorithmFactory::create("Pitch2Midi");
}

~Audio2Midi() {
delete _lowpass;
delete _framebuffer;
delete _audio2pitch;
delete _pitch2midi;
}

void declareParameters() {
// TODO: revise parameter description
declareParameter("sampleRate", "sample rate of incoming audio frames", "[8000,inf)", 44100);
declareParameter("hopSize", "equivalent to I/O buffer size", "[1,inf)", 32);
// declareParameter("pitchAlgorithm", "pitch algorithm to use", "{pyin,pyin_fft}", "pyin_fft");
// declareParameter("loudnessAlgorithm", "loudness algorithm to use", "{loudness,rms}", "rms");
declareParameter("minFrequency", "minimum frequency to detect in Hz", "[10,20000]", 60.0);
declareParameter("maxFrequency", "maximum frequency to detect in Hz", "[10,20000]", 2300.0);
declareParameter("tuningFrequency", "tuning frequency for semitone index calculation, corresponding to A3 [Hz]", "{432,440}", 440);
declareParameter("pitchConfidenceThreshold", "level of pitch confidence above which note ON/OFF start to be considered", "[0,1]", 0.25);
declareParameter("loudnessThreshold", "loudness level above/below which note ON/OFF start to be considered, in decibels", "[-inf,0]", -51.0);
declareParameter("transpositionAmount", "Apply transposition (in semitones) to the detected MIDI notes.", "(-69,50)", 0);
declareParameter("minOcurrenceRate", "rate of predominant pitch ocurrence in MidiPool buffer to consider note ON event", "[0,1]", 0.5);
declareParameter("midiBufferDuration", "duration in seconds of buffer used for voting in MidiPool algorithm", "[0.005,0.5]", 0.05); // 15ms
declareParameter("minNoteChangePeriod", "minimum time to wait until a note change is detected (testing only)", "(0,1]", 0.030);
declareParameter("minOnsetCheckPeriod", "minimum time to wait until an onset is detected (testing only)", "(0,1]", 0.075);
declareParameter("minOffsetCheckPeriod", "minimum time to wait until an offset is detected (testing only)", "(0,1]", 0.2);
declareParameter("applyTimeCompensation", "whether to apply time compensation correction to MIDI note detection", "{true,false}", true);
}

void configure();
void compute();

static const char* name;
static const char* category;
static const char* description;
};


} // namespace standard
} // namespace essentia

#endif
Loading

0 comments on commit 1a6063a

Please sign in to comment.