diff --git a/src/algorithms/tonal/audio2midi.cpp b/src/algorithms/tonal/audio2midi.cpp
new file mode 100644
index 000000000..5248185d9
--- /dev/null
+++ b/src/algorithms/tonal/audio2midi.cpp
@@ -0,0 +1,143 @@
+#include "audio2midi.h"
+
+using namespace std;
+using namespace essentia;
+using namespace standard;
+
+const char *Audio2Midi::name = "Audio2Midi";
+const char *Audio2Midi::category = "Pitch";
+const char *Audio2Midi::description = DOC("Wrapper around Audio2Pitch and Pitch2Midi for real time application");
+
+void Audio2Midi::configure()
+{
+  _sampleRate = parameter("sampleRate").toReal();
+  // _frameSize = parameter("frameSize").toInt();
+  _hopSize = parameter("hopSize").toInt();
+  // _pitchAlgorithm = parameter("pitchAlgorithm").toString();
+  // _loudnessAlgorithm = parameter("loudnessAlgorithm").toString();
+  _minFrequency = parameter("minFrequency").toReal();
+  _maxFrequency = parameter("maxFrequency").toReal();
+  _tuningFrequency = parameter("tuningFrequency").toInt();
+  _pitchConfidenceThreshold = parameter("pitchConfidenceThreshold").toReal();
+  _loudnessThreshold = parameter("loudnessThreshold").toReal();
+  _transposition = parameter("transpositionAmount").toInt();
+  _minOccurrenceRate = parameter("minOccurrenceRate").toReal();
+  _midiBufferDuration = parameter("midiBufferDuration").toReal();
+  _minNoteChangePeriod = parameter("minNoteChangePeriod").toReal();
+  _minOnsetCheckPeriod = parameter("minOnsetCheckPeriod").toReal();
+  _minOffsetCheckPeriod = parameter("minOffsetCheckPeriod").toReal();
+  
+  // define frameSize depending on sampleRate
+  switch (static_cast<int>(_sampleRate)) {
+    case 16000:
+      _frameSize = 2048;
+      break;
+    case 24000:
+      _frameSize = 4096;
+      break;
+    case 44100:
+      _frameSize = _fixedFrameSize;
+      break;
+    case 48000:
+      _frameSize = _fixedFrameSize;
+      break;
+    default:
+      _frameSize = _fixedFrameSize;
+  }
+
+  _applyTimeCompensation = parameter("applyTimeCompensation").toBool();
+
+  if (_frameSize > _sampleRate * 0.5)
+  {
+    throw EssentiaException("Sax2Pitch: Frame size cannot be higher than Nyquist frequency");
+  }
+
+  _lowpass->configure(INHERIT("sampleRate"),
+                      "cutoffFrequency", 1000);
+  _framebuffer->configure("bufferSize", _frameSize);
+  _audio2pitch->configure(INHERIT("sampleRate"),
+                          "frameSize", _frameSize,
+                          "pitchAlgorithm", _pitchAlgorithm,
+                          "minFrequency", _minFrequency,
+                          "maxFrequency", _maxFrequency,
+                          INHERIT("pitchConfidenceThreshold"),
+                          INHERIT("loudnessThreshold"));
+  
+  _pitch2midi->configure(INHERIT("sampleRate"),
+                       INHERIT("hopSize"),
+                       INHERIT("minOccurrenceRate"),
+                       INHERIT("applyTimeCompensation"),
+                       "minOnsetCheckPeriod", _minOnsetCheckPeriod,
+                       "minOffsetCheckPeriod", _minOffsetCheckPeriod,
+                       "minNoteChangePeriod", _minNoteChangePeriod,
+                       "midiBufferDuration", _midiBufferDuration,
+                       "minFrequency", _minFrequency,
+                       "tuningFrequency", _tuningFrequency,
+                       "transpositionAmount", _transposition);
+}
+
+void Audio2Midi::compute()
+{
+  // get ref to input
+  const std::vector<Real> &frame = _frame.get();
+  Real& pitch = _pitch.get();
+  Real& loudness = _loudness.get();
+  vector<string>& messageType = _messageType.get();
+  vector<Real>& midiNoteNumber = _midiNoteNumber.get();
+  vector<Real>& timeCompensation = _timeCompensation.get();
+
+  _lowpass->input("signal").set(frame);
+  _lowpass->output("signal").set(lpFrame);
+
+  _framebuffer->input("frame").set(lpFrame);
+  _framebuffer->output("frame").set(analysisFrame);
+
+  _audio2pitch->input("frame").set(analysisFrame);
+  _audio2pitch->output("pitch").set(pitch);
+  _audio2pitch->output("pitchConfidence").set(pitchConfidence);
+  _audio2pitch->output("loudness").set(loudness);
+  _audio2pitch->output("voiced").set(voiced);
+
+  _pitch2midi->input("pitch").set(pitch);
+  _pitch2midi->input("voiced").set(voiced);
+  _pitch2midi->output("midiNoteNumber").set(midiNoteNumber);
+  _pitch2midi->output("timeCompensation").set(timeCompensation);
+  _pitch2midi->output("messageType").set(messageType);
+    
+  // E_INFO("\nsax2midi: algorithm inputs and outputs set");
+  _lowpass->compute();
+  // E_INFO("sax2midi: lp compute");
+  _framebuffer->compute();
+  // E_INFO("sax2midi: framebuffer compute");
+  // std::cout << "frame: \n" << frame << "\nanalysisFrame: \n" << analysisFrame << "\n";
+  _audio2pitch->compute();
+  // E_INFO("sax2midi: a2p compute");
+  _pitch2midi->compute();
+  // E_INFO("sax2midi: p2m compute");
+  
+  // TODO: assign outputs
+    
+  // set outputs
+  // get pitchMessage from log_message_formatter
+  /*pitchMessage = _formatter->pitch_loudness(midiNoteNumber, pitch, pitchConfidence, loudness);
+
+  switch (messageType)
+  {
+  case 0:
+    noteOffMessage = _formatter->note_off(midiNoteNumber, offsetTimeCompensation);
+    break;
+  case 1:
+    noteOnMessage = _formatter->note_on(midiNoteNumber, pitch, pitchConfidence, onsetTimeCompensation);
+    break;
+  case 2:
+    noteOffMessage = _formatter->note_off(previousMidiNoteNumber, offsetTimeCompensation);
+    noteOnMessage = _formatter->note_on(midiNoteNumber, pitch, pitchConfidence, onsetTimeCompensation);
+    break;
+  default:
+    noteOnMessage = "";
+    noteOffMessage = "";
+    break;
+  }*/
+
+  // E_INFO("sax2midi compute is done");
+}
diff --git a/src/algorithms/tonal/audio2midi.h b/src/algorithms/tonal/audio2midi.h
new file mode 100644
index 000000000..5779ff372
--- /dev/null
+++ b/src/algorithms/tonal/audio2midi.h
@@ -0,0 +1,103 @@
+#ifndef ESSENTIA_AUDIO2MIDI_H
+#define ESSENTIA_AUDIO2MIDI_H
+
+#include "algorithmfactory.h"
+
+namespace essentia {
+namespace standard {
+
+  class Audio2Midi : public Algorithm {
+    protected:
+      Input<std::vector<Real>> _frame;
+      Output<Real> _pitch;
+      Output<Real> _loudness;
+      Output<std::vector<std::string> > _messageType;
+      Output<std::vector<Real> > _midiNoteNumber;
+      Output<std::vector<Real> > _timeCompensation;
+
+      Algorithm* _lowpass;
+      Algorithm* _framebuffer;
+      Algorithm* _audio2pitch;
+      Algorithm* _pitch2midi;
+
+      Real _sampleRate;
+      int _frameSize;
+      int _fixedFrameSize = 8192;
+      int _hopSize;
+      std::string _pitchAlgorithm = "pitchyinfft";
+      std::string _loudnessAlgorithm = "rms";
+      Real _minFrequency;
+      Real _maxFrequency;
+      int _tuningFrequency;
+      Real _pitchConfidenceThreshold, _loudnessThreshold, _minOccurrenceRate;
+      Real _midiBufferDuration;
+      Real _minNoteChangePeriod;
+      Real _minOnsetCheckPeriod;
+      Real _minOffsetCheckPeriod;
+
+      bool _applyTimeCompensation;
+      int _transposition;
+
+      // Containers
+      std::vector<Real> lpFrame, analysisFrame;
+      Real pitch, pitchConfidence, loudness;
+      std::vector<Real> midiNoteNumber, timeCompensation;
+      std::vector<std::string> messageType;
+      Real onsetTimeCompensation, offsetTimeCompensation;
+      
+      int voiced;
+      
+    public:
+      Audio2Midi() {
+        declareInput(_frame, "frame", "the input frame to analyse");
+        declareOutput(_pitch, "pitch", "pitch given in Hz");
+        declareOutput(_loudness, "loudness", "detected loudness in decibels");
+        declareOutput(_messageType, "messageType", "the output of MIDI message type, as string, {noteoff, noteon, noteoff-noteon}");
+        declareOutput(_midiNoteNumber, "midiNoteNumber", "the output of detected MIDI note number, as integer, in range [0,127]");
+        declareOutput(_timeCompensation, "timeCompensation", "time to be compensated in the messages");
+          
+        _lowpass = AlgorithmFactory::create("LowPass");
+        _framebuffer = AlgorithmFactory::create("FrameBuffer");
+        _audio2pitch = AlgorithmFactory::create("Audio2Pitch");
+        _pitch2midi = AlgorithmFactory::create("Pitch2Midi");
+      }
+
+      ~Audio2Midi() {
+        delete _lowpass;
+        delete _framebuffer;
+        delete _audio2pitch;
+        delete _pitch2midi;
+      }
+
+      void declareParameters() {
+        declareParameter("sampleRate", "sample rate of incoming audio frames", "[8000,inf)", 44100);
+        declareParameter("hopSize", "equivalent to I/O buffer size", "[1,inf)", 32);
+        // declareParameter("pitchAlgorithm", "pitch algorithm to use", "{pyin,pyin_fft}", "pyin_fft");
+        // declareParameter("loudnessAlgorithm", "loudness algorithm to use", "{loudness,rms}", "rms");
+        declareParameter("minFrequency", "minimum frequency to detect in Hz", "[10,20000]", 60.0);
+        declareParameter("maxFrequency", "maximum frequency to detect in Hz", "[10,20000]", 2300.0);
+        declareParameter("tuningFrequency", "tuning frequency for semitone index calculation, corresponding to A3 [Hz]", "{432,440}", 440);
+        declareParameter("pitchConfidenceThreshold", "level of pitch confidence above which note ON/OFF start to be considered", "[0,1]", 0.25);
+        declareParameter("loudnessThreshold", "loudness level above/below which note ON/OFF start to be considered, in decibels", "[-inf,0]", -51.0);
+        declareParameter("transpositionAmount", "Apply transposition (in semitones) to the detected MIDI notes.", "(-69,50)", 0);
+        declareParameter("minOccurrenceRate", "rate of predominant pitch occurrence in MidiPool buffer to consider note ON event", "[0,1]", 0.5);
+        declareParameter("midiBufferDuration", "duration in seconds of buffer used for voting in MidiPool algorithm", "[0.005,0.5]", 0.05); // 15ms
+        declareParameter("minNoteChangePeriod", "minimum time to wait until a note change is detected (testing only)", "(0,1]", 0.030);
+        declareParameter("minOnsetCheckPeriod", "minimum time to wait until an onset is detected (testing only)", "(0,1]", 0.075);
+        declareParameter("minOffsetCheckPeriod", "minimum time to wait until an offset is detected (testing only)", "(0,1]", 0.2);
+        declareParameter("applyTimeCompensation", "whether to apply time compensation correction to MIDI note detection", "{true,false}", true);
+      }
+
+      void configure();
+      void compute();
+
+      static const char* name;
+      static const char* category;
+      static const char* description;
+  };
+
+
+} // namespace standard
+} // namespace essentia
+
+#endif
diff --git a/src/algorithms/tonal/pitch2midi.cpp b/src/algorithms/tonal/pitch2midi.cpp
index d40baeb9d..87bc7b7b9 100644
--- a/src/algorithms/tonal/pitch2midi.cpp
+++ b/src/algorithms/tonal/pitch2midi.cpp
@@ -14,7 +14,7 @@ void Pitch2Midi::configure()
   _sampleRate = parameter("sampleRate").toReal();
   _hopSize = parameter("hopSize").toInt();
   _minFrequency = parameter("minFrequency").toReal();
-  _minOcurrenceRate = parameter("minOcurrenceRate").toReal();
+  _minOccurrenceRate = parameter("minOccurrenceRate").toReal();
   _bufferDuration = parameter("midiBufferDuration").toReal();
   _minOnsetCheckPeriod = parameter("minOnsetCheckPeriod").toReal();
   _minOffsetCheckPeriod = parameter("minOffsetCheckPeriod").toReal();
@@ -33,8 +33,8 @@ void Pitch2Midi::configure()
   _offsetCheckCounter = 0;
   _onsetCheckCounter = 0;
     
-  _minOcurrenceRatePeriod = _minOcurrenceRate * _bufferDuration;
-  _minOcurrenceRateThreshold = _minOcurrenceRatePeriod / _frameTime;
+  _minOccurrenceRatePeriod = _minOccurrenceRate * _bufferDuration;
+  _minOccurrenceRateThreshold = _minOccurrenceRatePeriod / _frameTime;
 
   // estimate buffer capacity
   int c = static_cast<int>( round( _sampleRate / float(_hopSize) * _bufferDuration ) );
@@ -151,7 +151,6 @@ void Pitch2Midi::compute()
         _noteOff = true;
         updateDnote();
         setOutputs(dnote, 0.0, _minNoteChangePeriod);
-        //E_INFO("offset(unvoiced frame)");
         _unvoicedFrameCounter = 0;
         _offsetCheckCounter = 0;
         _onsetCheckCounter = 0;
@@ -220,8 +219,8 @@ void Pitch2Midi::compute()
   if (!hasCoherence() && _NOTED_ON) {
     if (_maxVoted[0] != 0.0) {
       _onsetCheckCounter++;
-      // combines checker with minOcurrenceRate
-      if ((_onsetCheckCounter > _minOcurrenceRateThreshold)){
+      // combines checker with minOccurrenceRate
+      if ((_onsetCheckCounter > _minOccurrenceRateThreshold)){
         _NOTED_ON = true;
         if (note != _maxVoted[0]){  // avoid note slicing effect
             _noteOff = true;
@@ -229,18 +228,18 @@ void Pitch2Midi::compute()
             updateDnote();
             note = _maxVoted[0];
         }
-        //E_INFO("off-onset(" << _maxVoted[0] << ", uncoherent & NOTED): " << _onsetCheckCounter << " - " << _minOcurrenceRateThreshold);
+        //E_INFO("off-onset(" << _maxVoted[0] << ", uncoherent & NOTED): " << _onsetCheckCounter << " - " << _minOccurrenceRateThreshold);
         _offsetCheckCounter = 0;
         _onsetCheckCounter = 0;
       }
     }
     // output the max-voted midi note to avoid unestable midi note numbers
-    setOutputs(_maxVoted[0], _minOcurrenceRatePeriod, _minOcurrenceRatePeriod);
+    setOutputs(_maxVoted[0], _minOccurrenceRatePeriod, _minOccurrenceRatePeriod);
     return;
   }
 
   if (!hasCoherence() && !_NOTED_ON) {
-    if (_maxVoted[1] > _minOcurrenceRate) {
+    if (_maxVoted[1] > _minOccurrenceRate) {
       _onsetCheckCounter++;
 
       if (_onsetCheckCounter > _minOnsetCheckThreshold) {
diff --git a/src/algorithms/tonal/pitch2midi.h b/src/algorithms/tonal/pitch2midi.h
index aa3b4a45b..23c291b1d 100644
--- a/src/algorithms/tonal/pitch2midi.h
+++ b/src/algorithms/tonal/pitch2midi.h
@@ -26,7 +26,7 @@ namespace standard {
       Real _sampleRate;
       int _hopSize;
       Real _minFrequency;
-      Real _minOcurrenceRate;
+      Real _minOccurrenceRate;
       Real _minOnsetCheckPeriod;
       Real _minOffsetCheckPeriod;
       Real _minNoteChangePeriod;
@@ -66,8 +66,8 @@ namespace standard {
       int _onsetCheckCounter;
       
       Real _frameTime;
-      Real _minOcurrenceRateThreshold;
-      Real _minOcurrenceRatePeriod;
+      Real _minOccurrenceRateThreshold;
+      Real _minOccurrenceRatePeriod;
 
       // former Pitch2Midi outputs, now interal vars
       Real _midiNoteNumberTransposed;
@@ -89,7 +89,7 @@ namespace standard {
         declareParameter("sampleRate", "Audio sample rate", "[8000,inf)", 44100);
         declareParameter("hopSize", "Pitch Detection analysis hop size in samples, equivalent to I/O buffer size", "[1,inf)", 128);
         declareParameter("minFrequency", "minimum detectable frequency", "[20,20000]", 60.0);
-        declareParameter("minOcurrenceRate", "minimum number of times a midi note has to ocur compared to total capacity", "[0,1]", 0.5);
+        declareParameter("minOccurrenceRate", "minimum number of times a midi note has to ocur compared to total capacity", "[0,1]", 0.5);
         declareParameter("midiBufferDuration", "duration in seconds of buffer used for voting in the note toggle detection algorithm", "[0.005,0.5]", 0.015); // 15ms
         declareParameter("minNoteChangePeriod", "minimum time to wait until a note change is detected (s)", "(0,1]", 0.030);
         declareParameter("minOnsetCheckPeriod", "minimum time to wait until an onset is detected (s)", "(0,1]", 0.075);
diff --git a/test/src/unittests/tonal/audio2midi/359500__mtg__sax-tenor-e-major.npy b/test/src/unittests/tonal/audio2midi/359500__mtg__sax-tenor-e-major.npy
new file mode 100644
index 000000000..b7f3e48b9
Binary files /dev/null and b/test/src/unittests/tonal/audio2midi/359500__mtg__sax-tenor-e-major.npy differ
diff --git a/test/src/unittests/tonal/audio2midi/359628__mtg__sax-tenor-d-minor.npy b/test/src/unittests/tonal/audio2midi/359628__mtg__sax-tenor-d-minor.npy
new file mode 100644
index 000000000..6b0842423
Binary files /dev/null and b/test/src/unittests/tonal/audio2midi/359628__mtg__sax-tenor-d-minor.npy differ
diff --git a/test/src/unittests/tonal/audio2midi/387517__deleted_user_7267864__saxophone-going-up.npy b/test/src/unittests/tonal/audio2midi/387517__deleted_user_7267864__saxophone-going-up.npy
new file mode 100644
index 000000000..30bff5573
Binary files /dev/null and b/test/src/unittests/tonal/audio2midi/387517__deleted_user_7267864__saxophone-going-up.npy differ
diff --git a/test/src/unittests/tonal/test_audio2midi.py b/test/src/unittests/tonal/test_audio2midi.py
new file mode 100644
index 000000000..c6120cc83
--- /dev/null
+++ b/test/src/unittests/tonal/test_audio2midi.py
@@ -0,0 +1,312 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2006-2024  Music Technology Group - Universitat Pompeu Fabra
+#
+# This file is part of Essentia
+#
+# Essentia is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation (FSF), either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the Affero GNU General Public License
+# version 3 along with this program. If not, see http://www.gnu.org/licenses/
+
+
+from essentia_test import *
+from numpy import mean, array, float32, square
+from pathlib import Path
+
+
+class TestAudio2Midi(TestCase):
+    def testEmpty(self):
+        self.assertComputeFails(Audio2Midi(), [])
+
+    def testZero(self):
+        pitch, loudness, message_type, midi_note, time_compensation = Audio2Midi()(
+            zeros(32)
+        )
+        self.assertEqual(pitch, 0)
+        self.assertEqual(loudness, 0.0)
+        self.assertEqual(message_type, [])
+        self.assertEqual(midi_note.tolist(), array([], dtype=float32).tolist())
+        self.assertEqual(time_compensation.tolist(), array([], dtype=float32).tolist())
+
+    def assessNoteList(
+        self,
+        reference_path: str,
+        estimated: list,
+        n_notes_tolerance: int = 0,
+        onset_tolerance: float = 0.01,
+        offset_tolerance: float = 0.01,
+        midi_note_tolerance: int = 0,
+    ):
+        # read the expected notes file manually annotated
+        expected_notes = numpy.load(join(filedir(), reference_path))
+        print("Expected notes:")
+        print(expected_notes)
+
+        print("\ndiffs")
+        print(array(estimated) - expected_notes[:, 1:])
+
+        # estimate the number of notes for expected and detected
+        n_detected_notes = len(estimated)
+        n_expected_notes = len(expected_notes)
+
+        # estimate the onset error for each note and estimate the mean
+        onset_mse = mean(
+            [square(note[1] - estimated[int(note[0])][0]) for note in expected_notes]
+        )
+
+        # estimate the onset error for each note and estimate the mean
+        offset_mse = mean(
+            [square(note[2] - estimated[int(note[0])][1]) for note in expected_notes]
+        )
+
+        # estimate the midi note error for each note and estimate the mean
+        midi_note_mse = mean(
+            [square(note[-1] - estimated[int(note[0])][-1]) for note in expected_notes]
+        )
+
+        # assert outputs
+        self.assertAlmostEqual(n_detected_notes, n_expected_notes, n_notes_tolerance)
+        self.assertAlmostEqual(onset_mse, 0, onset_tolerance)
+        self.assertAlmostEqual(offset_mse, 0, offset_tolerance)
+        self.assertAlmostEqual(midi_note_mse, midi_note_mse, midi_note_tolerance)
+
+    def testARealCaseWithEMajorScale(self):
+        frame_size = 8192
+        sample_rate = 48000
+        hop_size = 64
+        loudness_threshold = -40
+        pitch_confidence_threshold = 0.25
+        min_frequency = 103.83
+        max_frequency = 659.26
+        midi_buffer_duration = 0.05
+        min_note_change_period = 0.03
+        n_notes_tolerance = 0
+        onset_tolerance = 0.01
+        midi_note_tolerance = 0
+
+        stem = "359500__mtg__sax-tenor-e-major"
+        audio_path = Path("recorded") / f"{stem}.wav"
+        reference_path = Path("audio2midi") / f"{stem}.npy"
+
+        self.runARealCase(
+            audio_path=audio_path,
+            reference_path=reference_path,
+            sample_rate=sample_rate,
+            frame_size=frame_size,
+            hop_size=hop_size,
+            pitch_confidence_threshold=pitch_confidence_threshold,
+            loudness_threshold=loudness_threshold,
+            midi_buffer_duration=midi_buffer_duration,
+            min_note_change_period=min_note_change_period,
+            max_frequency=max_frequency,
+            min_frequency=min_frequency,
+            n_notes_tolerance=n_notes_tolerance,
+            onset_tolerance=onset_tolerance,
+            midi_note_tolerance=midi_note_tolerance,
+        )
+
+    def testARealCaseWithDMinorScale(self):
+        frame_size = 8192
+        sample_rate = 48000
+        hop_size = 64
+        loudness_threshold = -40
+        pitch_confidence_threshold = 0.25
+        min_frequency = 103.83
+        max_frequency = 659.26
+        midi_buffer_duration = 0.05
+        min_note_change_period = 0.03
+        n_notes_tolerance = 0
+        onset_tolerance = 0.01
+        midi_note_tolerance = 0
+
+        stem = "359628__mtg__sax-tenor-d-minor"
+        audio_path = Path("recorded") / f"{stem}.wav"
+        reference_path = Path("audio2midi") / f"{stem}.npy"
+
+        self.runARealCase(
+            audio_path=audio_path,
+            reference_path=reference_path,
+            sample_rate=sample_rate,
+            frame_size=frame_size,
+            hop_size=hop_size,
+            pitch_confidence_threshold=pitch_confidence_threshold,
+            loudness_threshold=loudness_threshold,
+            midi_buffer_duration=midi_buffer_duration,
+            min_note_change_period=min_note_change_period,
+            max_frequency=max_frequency,
+            min_frequency=min_frequency,
+            n_notes_tolerance=n_notes_tolerance,
+            onset_tolerance=onset_tolerance,
+            midi_note_tolerance=midi_note_tolerance,
+        )
+
+    def testSeparatedNotes(self):
+        frame_size = 8192
+        sample_rate = 44100
+        hop_size = 16
+        loudness_threshold = -50
+        pitch_confidence_threshold = 0.8
+        min_frequency = 103.83
+        max_frequency = 659.26
+        midi_buffer_duration = 0.05
+        min_note_change_period = 0.02
+        min_onset_period = 0.015
+        min_offset_period = 0.015
+        min_occurrence_rate = 0.05
+        n_notes_tolerance = 0
+        onset_tolerance = 0.01
+        midi_note_tolerance = 0
+
+        stem = "387517__deleted_user_7267864__saxophone-going-up"
+        audio_path = Path("recorded") / f"{stem}.wav"
+        reference_path = Path("audio2midi") / f"{stem}.npy"
+
+        self.runARealCase(
+            audio_path=audio_path,
+            reference_path=reference_path,
+            sample_rate=sample_rate,
+            frame_size=frame_size,
+            hop_size=hop_size,
+            pitch_confidence_threshold=pitch_confidence_threshold,
+            loudness_threshold=loudness_threshold,
+            max_frequency=max_frequency,
+            min_frequency=min_frequency,
+            midi_buffer_duration=midi_buffer_duration,
+            min_note_change_period=min_note_change_period,
+            min_onset_period=min_onset_period,
+            min_offset_period=min_offset_period,
+            min_occurrence_rate=min_occurrence_rate,
+            n_notes_tolerance=n_notes_tolerance,
+            onset_tolerance=onset_tolerance,
+            midi_note_tolerance=midi_note_tolerance,
+        )
+
+    def runARealCase(
+        self,
+        audio_path: str,
+        reference_path: str,
+        sample_rate: int,
+        frame_size: int,
+        hop_size: int,
+        pitch_confidence_threshold: float,
+        loudness_threshold: float,
+        max_frequency: float,
+        min_frequency: float,
+        midi_buffer_duration: float,
+        min_note_change_period: float,
+        min_onset_period: float = 0.075,
+        min_offset_period: float = 0.2,
+        min_occurrence_rate: float = 0.5,
+        n_notes_tolerance: int = 0,
+        onset_tolerance: float = 0.01,
+        offset_tolerance: float = 0.05,
+        midi_note_tolerance: int = 0,
+    ):
+        filename = join(testdata.audio_dir, audio_path)
+        if sys.platform == "darwin":
+            import soundfile as sf
+
+            audio, _ = sf.read(filename, dtype="float32")
+            if audio.ndim > 1:
+                audio = audio[:, 0]
+        else:
+            audio = MonoLoader(filename=filename, sampleRate=sample_rate)()
+        frames = FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size)
+        step_time = hop_size / sample_rate
+
+        # initialize audio2midi instance
+        a2m = Audio2Midi(
+            sampleRate=sample_rate,
+            hopSize=hop_size,
+            midiBufferDuration=midi_buffer_duration,
+            minNoteChangePeriod=min_note_change_period,
+            minOnsetCheckPeriod=min_onset_period,
+            minOffsetCheckPeriod=min_offset_period,
+            pitchConfidenceThreshold=pitch_confidence_threshold,
+            loudnessThreshold=loudness_threshold,
+            maxFrequency=max_frequency,
+            minFrequency=min_frequency,
+            minOccurrenceRate=min_occurrence_rate,
+        )
+        print(a2m.parameterNames())
+
+        # define estimate bin and some counters
+        nte_list = []  # note toggle event list
+        n = 0
+        time_stamp = 0
+        n_notes = 0
+
+        # simulates real-time process
+        for frame in frames:
+            # _pitch, _, _, _voiced = pitchDetect(frame)
+            _pitch, _, message, midi_note, time_compensation = a2m(frame)
+            time_stamp += step_time
+            # print(n, time_stamp, message, midi_note, time_compensation)
+            if message:
+                nte_list.append(
+                    [
+                        n_notes,
+                        time_stamp - time_compensation[1],
+                        time_stamp - time_compensation[0],
+                        int(midi_note[1]),
+                        message,
+                    ]
+                )
+                print(
+                    f"[{n_notes}][{n}]:{(time_stamp-time_compensation[1]):.3f}, {midi2note(int(midi_note[1]))}({int(midi_note[1])})~{_pitch:.2f}Hz, {message}"  # , {time_compensation}, {midi_note}, {message}
+                )
+                if "note_on" in message:
+                    n_notes += 1
+            n += 1
+
+        print(f"nte_list: {nte_list}")
+        # from the nte_list extracts the note list using note_off messages
+        note_list = self.ntes_to_notes(nte_list)
+        print(f"note_list: {note_list}")
+
+        self.assessNoteList(
+            reference_path,
+            note_list,
+            n_notes_tolerance=n_notes_tolerance,
+            onset_tolerance=onset_tolerance,
+            offset_tolerance=offset_tolerance,
+            midi_note_tolerance=midi_note_tolerance,
+        )
+
+    def ntes_to_notes(self, nte_list: list):
+        note_list = list()
+        for n, nte_message in enumerate(nte_list):
+            if "note_on" in nte_message[4]:
+                # extract time stamp
+                start_time = nte_message[1]
+
+                # in some cases the compensation might generate negative values
+                if start_time < 0:
+                    start_time = 0
+
+                # to get the note offset it is need to get time stamps in the next message (note-off)
+                if n + 1 < len(nte_list):  # when a note off message is provided
+                    # define timestamp for offset
+                    end_time = nte_list[n + 1][1]
+                else:  # there is a non-closed note at the end
+                    # define timestamp for offset
+                    end_time = nte_list[-1][1]
+                note = int(nte_message[3])
+                # define annotation in a list
+                note_list.append([float(start_time), float(end_time), note])
+        return note_list
+
+
+suite = allTests(TestAudio2Midi)
+
+if __name__ == "__main__":
+    TextTestRunner(verbosity=2).run(suite)
diff --git a/test/src/unittests/tonal/test_audio2pitch.py b/test/src/unittests/tonal/test_audio2pitch.py
index c1e6ea9ba..6c984464c 100644
--- a/test/src/unittests/tonal/test_audio2pitch.py
+++ b/test/src/unittests/tonal/test_audio2pitch.py
@@ -19,7 +19,7 @@
 
 
 from essentia_test import *
-from numpy import sin, pi, mean, random, sqrt
+from numpy import sin, pi, mean, random
 
 
 class TestAudio2Pitch(TestCase):