Add first implementation and unitests of Audio2Midi algo

MTG · Aug 30, 2024 · 1a6063a · 1a6063a
1 parent 5d69a81
commit 1a6063a
Show file tree

Hide file tree

Showing 3 changed files with 553 additions and 0 deletions.
diff --git a/src/algorithms/tonal/audio2midi.cpp b/src/algorithms/tonal/audio2midi.cpp
@@ -0,0 +1,143 @@
+#include "audio2midi.h"
+
+using namespace std;
+using namespace essentia;
+using namespace standard;
+
+const char *Audio2Midi::name = "Audio2Midi";
+const char *Audio2Midi::category = "Pitch";
+const char *Audio2Midi::description = DOC("Wrapper around Audio2Pitch and Pitch2Midi for real time application");
+
+void Audio2Midi::configure()
+{
+  _sampleRate = parameter("sampleRate").toReal();
+  // _frameSize = parameter("frameSize").toInt();
+  _hopSize = parameter("hopSize").toInt();
+  // _pitchAlgorithm = parameter("pitchAlgorithm").toString();
+  // _loudnessAlgorithm = parameter("loudnessAlgorithm").toString();
+  _minFrequency = parameter("minFrequency").toReal();
+  _maxFrequency = parameter("maxFrequency").toReal();
+  _tuningFrequency = parameter("tuningFrequency").toInt();
+  _pitchConfidenceThreshold = parameter("pitchConfidenceThreshold").toReal();
+  _loudnessThreshold = parameter("loudnessThreshold").toReal();
+  _transposition = parameter("transpositionAmount").toInt();
+  _minOcurrenceRate = parameter("minOcurrenceRate").toReal();
+  _midiBufferDuration = parameter("midiBufferDuration").toReal();
+  _minNoteChangePeriod = parameter("minNoteChangePeriod").toReal();
+  _minOnsetCheckPeriod = parameter("minOnsetCheckPeriod").toReal();
+  _minOffsetCheckPeriod = parameter("minOffsetCheckPeriod").toReal();
+
+  // define frameSize depending on sampleRate
+  switch (static_cast<int>(_sampleRate)) {
+    case 16000:
+      _frameSize = 2048;
+      break;
+    case 24000:
+      _frameSize = 4096;
+      break;
+    case 44100:
+      _frameSize = _fixedFrameSize;
+      break;
+    case 48000:
+      _frameSize = _fixedFrameSize;
+      break;
+    default:
+      _frameSize = _fixedFrameSize;
+  }
+
+  _applyTimeCompensation = parameter("applyTimeCompensation").toBool();
+
+  if (_frameSize > _sampleRate * 0.5)
+  {
+    throw EssentiaException("Sax2Pitch: Frame size cannot be higher than Nyquist frequency");
+  }
+
+  _lowpass->configure(INHERIT("sampleRate"),
+                      "cutoffFrequency", 1000);
+  _framebuffer->configure("bufferSize", _frameSize);
+  _audio2pitch->configure(INHERIT("sampleRate"),
+                          "frameSize", _frameSize,
+                          "pitchAlgorithm", _pitchAlgorithm,
+                          "minFrequency", _minFrequency,
+                          "maxFrequency", _maxFrequency,
+                          INHERIT("pitchConfidenceThreshold"),
+                          INHERIT("loudnessThreshold"));
+
+  _pitch2midi->configure(INHERIT("sampleRate"),
+                       INHERIT("hopSize"),
+                       INHERIT("minOcurrenceRate"),
+                       INHERIT("applyTimeCompensation"),
+                       "minOnsetCheckPeriod", _minOnsetCheckPeriod,
+                       "minOffsetCheckPeriod", _minOffsetCheckPeriod,
+                       "minNoteChangePeriod", _minNoteChangePeriod,
+                       "midiBufferDuration", _midiBufferDuration,
+                       "minFrequency", _minFrequency,
+                       "tuningFrequency", _tuningFrequency,
+                       "transpositionAmount", _transposition);
+}
+
+void Audio2Midi::compute()
+{
+  // get ref to input
+  const std::vector<Real> &frame = _frame.get();
+  Real& pitch = _pitch.get();
+  Real& loudness = _loudness.get();
+  vector<string>& messageType = _messageType.get();
+  vector<Real>& midiNoteNumber = _midiNoteNumber.get();
+  vector<Real>& timeCompensation = _timeCompensation.get();
+
+  _lowpass->input("signal").set(frame);
+  _lowpass->output("signal").set(lpFrame);
+
+  _framebuffer->input("frame").set(lpFrame);
+  _framebuffer->output("frame").set(analysisFrame);
+
+  _audio2pitch->input("frame").set(analysisFrame);
+  _audio2pitch->output("pitch").set(pitch);
+  _audio2pitch->output("pitchConfidence").set(pitchConfidence);
+  _audio2pitch->output("loudness").set(loudness);
+  _audio2pitch->output("voiced").set(voiced);
+
+  _pitch2midi->input("pitch").set(pitch);
+  _pitch2midi->input("voiced").set(voiced);
+  _pitch2midi->output("midiNoteNumber").set(midiNoteNumber);
+  _pitch2midi->output("timeCompensation").set(timeCompensation);
+  _pitch2midi->output("messageType").set(messageType);
+
+  // E_INFO("\nsax2midi: algorithm inputs and outputs set");
+  _lowpass->compute();
+  // E_INFO("sax2midi: lp compute");
+  _framebuffer->compute();
+  // E_INFO("sax2midi: framebuffer compute");
+  // std::cout << "frame: \n" << frame << "\nanalysisFrame: \n" << analysisFrame << "\n";
+  _audio2pitch->compute();
+  // E_INFO("sax2midi: a2p compute");
+  _pitch2midi->compute();
+  // E_INFO("sax2midi: p2m compute");
+
+  // TODO: assign outputs
+
+  // set outputs
+  // get pitchMessage from log_message_formatter
+  /*pitchMessage = _formatter->pitch_loudness(midiNoteNumber, pitch, pitchConfidence, loudness);
+
+  switch (messageType)
+  {
+  case 0:
+    noteOffMessage = _formatter->note_off(midiNoteNumber, offsetTimeCompensation);
+    break;
+  case 1:
+    noteOnMessage = _formatter->note_on(midiNoteNumber, pitch, pitchConfidence, onsetTimeCompensation);
+    break;
+  case 2:
+    noteOffMessage = _formatter->note_off(previousMidiNoteNumber, offsetTimeCompensation);
+    noteOnMessage = _formatter->note_on(midiNoteNumber, pitch, pitchConfidence, onsetTimeCompensation);
+    break;
+  default:
+    noteOnMessage = "";
+    noteOffMessage = "";
+    break;
+  }*/
+
+  // E_INFO("sax2midi compute is done");
+}
diff --git a/src/algorithms/tonal/audio2midi.h b/src/algorithms/tonal/audio2midi.h
@@ -0,0 +1,104 @@
+#ifndef ESSENTIA_AUDIO2MIDI_H
+#define ESSENTIA_AUDIO2MIDI_H
+
+#include "algorithmfactory.h"
+
+namespace essentia {
+namespace standard {
+
+  class Audio2Midi : public Algorithm {
+    protected:
+      Input<std::vector<Real>> _frame;
+      Output<Real> _pitch;
+      Output<Real> _loudness;
+      Output<std::vector<std::string> > _messageType;
+      Output<std::vector<Real> > _midiNoteNumber;
+      Output<std::vector<Real> > _timeCompensation;
+
+      Algorithm* _lowpass;
+      Algorithm* _framebuffer;
+      Algorithm* _audio2pitch;
+      Algorithm* _pitch2midi;
+
+      Real _sampleRate;
+      int _frameSize;
+      int _fixedFrameSize = 8192;
+      int _hopSize;
+      std::string _pitchAlgorithm = "pitchyinfft";
+      std::string _loudnessAlgorithm = "rms";
+      Real _minFrequency;
+      Real _maxFrequency;
+      int _tuningFrequency;
+      Real _pitchConfidenceThreshold, _loudnessThreshold, _minOcurrenceRate;
+      Real _midiBufferDuration;
+      Real _minNoteChangePeriod;
+      Real _minOnsetCheckPeriod;
+      Real _minOffsetCheckPeriod;
+
+      bool _applyTimeCompensation;
+      int _transposition;
+
+      // Containers
+      std::vector<Real> lpFrame, analysisFrame;
+      Real pitch, pitchConfidence, loudness;
+      std::vector<Real> midiNoteNumber, timeCompensation;
+      std::vector<std::string> messageType;
+      Real onsetTimeCompensation, offsetTimeCompensation;
+
+      int voiced;
+
+    public:
+      Audio2Midi() {
+        declareInput(_frame, "frame", "the input frame to analyse");
+        declareOutput(_pitch, "pitch", "pitch given in Hz");
+        declareOutput(_loudness, "loudness", "detected loudness in decibels");
+        declareOutput(_messageType, "messageType", "the output of MIDI message type, as string, {noteoff, noteon, noteoff-noteon}");
+        declareOutput(_midiNoteNumber, "midiNoteNumber", "the output of detected MIDI note number, as integer, in range [0,127]");
+        declareOutput(_timeCompensation, "timeCompensation", "time to be compensated in the messages");
+
+        _lowpass = AlgorithmFactory::create("LowPass");
+        _framebuffer = AlgorithmFactory::create("FrameBuffer");
+        _audio2pitch = AlgorithmFactory::create("Audio2Pitch");
+        _pitch2midi = AlgorithmFactory::create("Pitch2Midi");
+      }
+
+      ~Audio2Midi() {
+        delete _lowpass;
+        delete _framebuffer;
+        delete _audio2pitch;
+        delete _pitch2midi;
+      }
+
+      void declareParameters() {
+        // TODO: revise parameter description
+        declareParameter("sampleRate", "sample rate of incoming audio frames", "[8000,inf)", 44100);
+        declareParameter("hopSize", "equivalent to I/O buffer size", "[1,inf)", 32);
+        // declareParameter("pitchAlgorithm", "pitch algorithm to use", "{pyin,pyin_fft}", "pyin_fft");
+        // declareParameter("loudnessAlgorithm", "loudness algorithm to use", "{loudness,rms}", "rms");
+        declareParameter("minFrequency", "minimum frequency to detect in Hz", "[10,20000]", 60.0);
+        declareParameter("maxFrequency", "maximum frequency to detect in Hz", "[10,20000]", 2300.0);
+        declareParameter("tuningFrequency", "tuning frequency for semitone index calculation, corresponding to A3 [Hz]", "{432,440}", 440);
+        declareParameter("pitchConfidenceThreshold", "level of pitch confidence above which note ON/OFF start to be considered", "[0,1]", 0.25);
+        declareParameter("loudnessThreshold", "loudness level above/below which note ON/OFF start to be considered, in decibels", "[-inf,0]", -51.0);
+        declareParameter("transpositionAmount", "Apply transposition (in semitones) to the detected MIDI notes.", "(-69,50)", 0);
+        declareParameter("minOcurrenceRate", "rate of predominant pitch ocurrence in MidiPool buffer to consider note ON event", "[0,1]", 0.5);
+        declareParameter("midiBufferDuration", "duration in seconds of buffer used for voting in MidiPool algorithm", "[0.005,0.5]", 0.05); // 15ms
+        declareParameter("minNoteChangePeriod", "minimum time to wait until a note change is detected (testing only)", "(0,1]", 0.030);
+        declareParameter("minOnsetCheckPeriod", "minimum time to wait until an onset is detected (testing only)", "(0,1]", 0.075);
+        declareParameter("minOffsetCheckPeriod", "minimum time to wait until an offset is detected (testing only)", "(0,1]", 0.2);
+        declareParameter("applyTimeCompensation", "whether to apply time compensation correction to MIDI note detection", "{true,false}", true);
+      }
+
+      void configure();
+      void compute();
+
+      static const char* name;
+      static const char* category;
+      static const char* description;
+  };
+
+
+} // namespace standard
+} // namespace essentia
+
+#endif