Update and fix some essentia examples in python using ffmpeg.

MTG · May 13, 2024 · e005d63 · e005d63
1 parent 69490c1
commit e005d63
Show file tree

Hide file tree

Showing 9 changed files with 375 additions and 1,151 deletions.
diff --git a/src/examples/python/example_pitch_predominantmelody.py b/src/examples/python/example_pitch_predominantmelody.py
@@ -15,9 +15,8 @@
 # You should have received a copy of the Affero GNU General Public License
 # version 3 along with this program. If not, see http://www.gnu.org/licenses/
 
-import sys, csv
-from essentia import *
-from essentia.standard import *
+import sys
+import essentia.standard as es
 from pylab import *
 from numpy import *
 
@@ -26,55 +25,61 @@
 try:
     filename = sys.argv[1]
 except:
-    print("usage: %s <input-audiofile>" % sys.argv[0])
+    print(f"usage: {sys.argv[0]} <input-audiofile>")
     sys.exit()
 
 
-
-
-# We will use a composite algorithm PredominantMelody, which combines a number of 
-# required steps for us. Let's declare and configure it first: 
+# We will use a composite algorithm PredominantMelody, which combines a number of
+# required steps for us. Let's declare and configure it first:
 hopSize = 128
 frameSize = 2048
 sampleRate = 44100
-guessUnvoiced = True # read the algorithm's reference for more details
-run_predominant_melody = PitchMelodia(guessUnvoiced=guessUnvoiced,
-                                      frameSize=frameSize,
-                                      hopSize=hopSize);
+guessUnvoiced = True  # read the algorithm's reference for more details
+run_predominant_melody = es.PitchMelodia(
+    guessUnvoiced=guessUnvoiced, frameSize=frameSize, hopSize=hopSize
+)
 
 # Load audio file, apply equal loudness filter, and compute predominant melody
-audio = MonoLoader(filename = filename, sampleRate=sampleRate)()
-audio = EqualLoudness()(audio)
+audio = es.MonoLoader(filename=filename, sampleRate=sampleRate)()
+audio = es.EqualLoudness()(audio)
 pitch, confidence = run_predominant_melody(audio)
 
 
 n_frames = len(pitch)
-print("number of frames: %d" % n_frames)
+print(f"number of frames: {n_frames}")
 
 # Visualize output pitch values
-fig = plt.figure()
-plot(range(n_frames), pitch, 'b')
+fig, ax = plt.subplots(1, figsize=(10, 4))
+ax.plot(range(n_frames), pitch, "b")
+ax.set_xlabel("Time (s)")
+ax.set_ylabel("Pitch (Hz)")
+ax.set_xlim([0, n_frames - 1])
+
 n_ticks = 10
 xtick_locs = [i * (n_frames / 10.0) for i in range(n_ticks)]
-xtick_lbls = [i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)]
-xtick_lbls = ["%.2f" % round(x,2) for x in xtick_lbls]
+xtick_lbls = [
+    i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)
+]
+xtick_lbls = [f"{round(x, 2):.2f}" for x in xtick_lbls]
+
+plt.sca(ax)
 plt.xticks(xtick_locs, xtick_lbls)
-ax = fig.add_subplot(111)
-ax.set_xlabel('Time (s)')
-ax.set_ylabel('Pitch (Hz)')
+
 suptitle("Predominant melody pitch")
+tight_layout()
+show()
 
 # Visualize output pitch confidence
-fig = plt.figure()
-plot(range(n_frames), confidence, 'b')
-n_ticks = 10
-xtick_locs = [i * (n_frames / 10.0) for i in range(n_ticks)]
-xtick_lbls = [i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)]
-xtick_lbls = ["%.2f" % round(x,2) for x in xtick_lbls]
+fig, ax = plt.subplots(1, figsize=(10, 4))
+ax.plot(range(n_frames), confidence, "b")
+
+ax.set_xlabel("Time (s)")
+ax.set_ylabel("Confidence")
+ax.set_xlim([0, n_frames - 1])
+
+plt.sca(ax)
 plt.xticks(xtick_locs, xtick_lbls)
-ax = fig.add_subplot(111)
-ax.set_xlabel('Time (s)')
-ax.set_ylabel('Confidence')
-suptitle("Predominant melody pitch confidence")
 
+suptitle("Predominant melody pitch confidence")
+tight_layout()
 show()
diff --git a/src/examples/python/example_pitch_predominantmelody_by_steps.py b/src/examples/python/example_pitch_predominantmelody_by_steps.py
@@ -15,7 +15,7 @@
 # You should have received a copy of the Affero GNU General Public License
 # version 3 along with this program. If not, see http://www.gnu.org/licenses/
 
-import sys, csv
+import sys
 from essentia import *
 from essentia.standard import *
 from pylab import *
@@ -24,36 +24,40 @@
 try:
     filename = sys.argv[1]
 except:
-    print("usage: %s <input-audiofile>" % sys.argv[0])
+    print(f"usage: {sys.argv[0]} <input-audiofile>")
     sys.exit()
 
 
-
 # In this example we will extract predominant melody given an audio file by
 # running a chain of algorithms.
- 
+
 # First, create our algorithms:
 hopSize = 128
 frameSize = 2048
 sampleRate = 44100
 guessUnvoiced = True
 
-run_windowing = Windowing(type='hann', zeroPadding=3*frameSize) # Hann window with x4 zero padding
+run_windowing = Windowing(
+    type="hann", zeroPadding=3 * frameSize
+)  # Hann window with x4 zero padding
 run_spectrum = Spectrum(size=frameSize * 4)
-run_spectral_peaks = SpectralPeaks(minFrequency=1,
-                                   maxFrequency=20000,
-                                   maxPeaks=100,
-                                   sampleRate=sampleRate,
-                                   magnitudeThreshold=0,
-                                   orderBy="magnitude")
+run_spectral_peaks = SpectralPeaks(
+    minFrequency=1,
+    maxFrequency=20000,
+    maxPeaks=100,
+    sampleRate=sampleRate,
+    magnitudeThreshold=0,
+    orderBy="magnitude",
+)
 run_pitch_salience_function = PitchSalienceFunction()
 run_pitch_salience_function_peaks = PitchSalienceFunctionPeaks()
 run_pitch_contours = PitchContours(hopSize=hopSize)
-run_pitch_contours_melody = PitchContoursMelody(guessUnvoiced=guessUnvoiced,
-                                                hopSize=hopSize)
+run_pitch_contours_melody = PitchContoursMelody(
+    guessUnvoiced=guessUnvoiced, hopSize=hopSize
+)
 
 # ... and create a Pool
-pool = Pool();
+pool = Pool()
 
 # Now we are ready to start processing.
 # 1. Load audio and pass it through the equal-loudness filter
@@ -66,54 +70,71 @@
     frame = run_windowing(frame)
     spectrum = run_spectrum(frame)
     peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum)
-    
+
     salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes)
-    salience_peaks_bins, salience_peaks_saliences = run_pitch_salience_function_peaks(salience)
-
-    pool.add('allframes_salience_peaks_bins', salience_peaks_bins)
-    pool.add('allframes_salience_peaks_saliences', salience_peaks_saliences)
+    (
+        salience_peaks_bins,
+        salience_peaks_saliences,
+    ) = run_pitch_salience_function_peaks(salience)
+
+    pool.add("allframes_salience_peaks_bins", salience_peaks_bins)
+    pool.add("allframes_salience_peaks_saliences", salience_peaks_saliences)
 
-# 3. Now, as we have gathered the required per-frame data, we can feed it to the contour 
+# 3. Now, as we have gathered the required per-frame data, we can feed it to the contour
 #    tracking and melody detection algorithms:
-contours_bins, contours_saliences, contours_start_times, duration = run_pitch_contours(
-        pool['allframes_salience_peaks_bins'],
-        pool['allframes_salience_peaks_saliences'])
-pitch, confidence = run_pitch_contours_melody(contours_bins,
-                                              contours_saliences,
-                                              contours_start_times,
-                                              duration)
+(
+    contours_bins,
+    contours_saliences,
+    contours_start_times,
+    duration,
+) = run_pitch_contours(
+    pool["allframes_salience_peaks_bins"],
+    pool["allframes_salience_peaks_saliences"],
+)
+pitch, confidence = run_pitch_contours_melody(
+    contours_bins, contours_saliences, contours_start_times, duration
+)
 
 # NOTE that we can avoid the majority of intermediate steps by using a composite algorithm
-#      PredominantMelody (see extractor_predominant_melody.py). This script will be usefull 
+#      PredominantMelody (see extractor_predominant_melody.py). This script will be usefull
 #      if you want to get access to pitch salience function and pitch contours.
 
 n_frames = len(pitch)
-print("number of frames: %d" % n_frames)
+print(f"number of frames: {n_frames}")
 
 # visualize output pitch
-fig = plt.figure()
-plot(range(n_frames), pitch, 'b')
+fig, ax = plt.subplots(1, figsize=(10, 4))
+ax.plot(range(n_frames), pitch, "b")
+
+ax.set_xlabel("Time (s)")
+ax.set_ylabel("Pitch (Hz)")
+ax.set_xlim([0, n_frames - 1])
+
 n_ticks = 10
 xtick_locs = [i * (n_frames / 10.0) for i in range(n_ticks)]
-xtick_lbls = [i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)]
-xtick_lbls = ["%.2f" % round(x,2) for x in xtick_lbls]
+xtick_lbls = [
+    i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)
+]
+xtick_lbls = [f"{round(x, 2):.2f}" for x in xtick_lbls]
+
+plt.sca(ax)
 plt.xticks(xtick_locs, xtick_lbls)
-ax = fig.add_subplot(111)
-ax.set_xlabel('Time (s)')
-ax.set_ylabel('Pitch (Hz)')
+
 suptitle("Predominant melody pitch")
+tight_layout()
+show()
 
 # visualize output pitch confidence
-fig = plt.figure()
-plot(range(n_frames), confidence, 'b')
-n_ticks = 10
-xtick_locs = [i * (n_frames / 10.0) for i in range(n_ticks)]
-xtick_lbls = [i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)]
-xtick_lbls = ["%.2f" % round(x,2) for x in xtick_lbls]
+fig, ax = plt.subplots(1, figsize=(10, 4))
+ax.plot(range(n_frames), confidence, "b")
+
+ax.set_xlabel("Time (s)")
+ax.set_ylabel("Confidence")
+ax.set_xlim([0, n_frames - 1])
+
+plt.sca(ax)
 plt.xticks(xtick_locs, xtick_lbls)
-ax = fig.add_subplot(111)
-ax.set_xlabel('Time (s)')
-ax.set_ylabel('Confidence')
-suptitle("Predominant melody pitch confidence")
 
+suptitle("Predominant melody pitch confidence")
+tight_layout()
 show()
diff --git a/src/examples/python/example_rhythm_beattrackermultifeature.py b/src/examples/python/example_rhythm_beattrackermultifeature.py
@@ -31,25 +31,25 @@
     input_filename = sys.argv[1]
     output_filename = sys.argv[2]
 except:
-    print('usage: %s <input-audiofile> <output-audiofile>' % sys.argv[0])
+    print(f"usage: {sys.argv[0]} <input-audiofile> <output-audiofile>")
     sys.exit()
 
 # don't forget, we can actually instantiate and call an algorithm on the same line!
-print('Loading audio file...')
-audio = MonoLoader(filename = input_filename)()
+print("Loading audio file...")
+audio = MonoLoader(filename=input_filename)()
 
 # compute beat positions
-print('Computing beat positions...')
+print("Computing beat positions...")
 bt = BeatTrackerMultiFeature()
 beats, _ = bt(audio)
-print(beats)
+print(f"beats: {beats}")
 
 # mark them on the audio, which we'll write back to disk
 # we use beeps instead of white noise to mark them, as it's more distinctive
-print('Writing audio files to disk with beats marked...')
+print("Writing audio files to disk with beats marked...")
 
-marker = AudioOnsetsMarker(onsets = beats, type = 'beep')
+marker = AudioOnsetsMarker(onsets=beats, type="beep")
 marked_audio = marker(audio)
-MonoWriter(filename = output_filename)(marked_audio)
+MonoWriter(filename=output_filename)(marked_audio)
 
-print('All done!')
+print("All done!")
diff --git a/src/examples/python/example_rhythm_bpmhistogram.py b/src/examples/python/example_rhythm_bpmhistogram.py
@@ -4,45 +4,45 @@
 
 
 try:
-	audiofile = sys.argv[1]
+    audiofile = sys.argv[1]
 except:
-	print ("usage: %s <audiofile>" % sys.argv[0])
-	sys.exit()
+    print(f"usage: {sys.argv[0]} <audiofile>")
+    sys.exit()
 
 pool = essentia.Pool()
 
-loader = MonoLoader(filename = audiofile)
+loader = MonoLoader(filename=audiofile)
 bt = RhythmExtractor2013()
 bpm_histogram = BpmHistogramDescriptors()
-centroid = Centroid(range=250) # BPM histogram output size is 250
+centroid = Centroid(range=250)  # BPM histogram output size is 250
 
 loader.audio >> bt.signal
-bt.bpm >> (pool, 'bpm')
+bt.bpm >> (pool, "bpm")
 bt.ticks >> None
 bt.confidence >> None
 bt.estimates >> None
 bt.bpmIntervals >> bpm_histogram.bpmIntervals
-bpm_histogram.firstPeakBPM >> (pool, 'bpm_first_peak')
+bpm_histogram.firstPeakBPM >> (pool, "bpm_first_peak")
 bpm_histogram.firstPeakWeight >> None
 bpm_histogram.firstPeakSpread >> None
-bpm_histogram.secondPeakBPM >> (pool, 'bpm_second_peak')
+bpm_histogram.secondPeakBPM >> (pool, "bpm_second_peak")
 bpm_histogram.secondPeakWeight >> None
 bpm_histogram.secondPeakSpread >> None
-bpm_histogram.histogram >> (pool, 'bpm_histogram')
+bpm_histogram.histogram >> (pool, "bpm_histogram")
 bpm_histogram.histogram >> centroid.array
-centroid.centroid >> (pool, 'bpm_centroid')
+centroid.centroid >> (pool, "bpm_centroid")
 
 essentia.run(loader)
-print("BPM: %0.1f" % pool['bpm'])
-print("Most prominent peak: %0.1f BPM" % pool['bpm_first_peak'][0])
-print("Centroid: %0.1f" % pool['bpm_centroid'][0]) 
+print(f"BPM: {pool['bpm']:.1f}")
+print(f"Most prominent peak: {pool['bpm_first_peak'][0]:.1f} BPM")
+print(f"Centroid: {pool['bpm_centroid'][0]:.1f}")
 
-histogram = pool['bpm_histogram'][0]
+histogram = pool["bpm_histogram"][0]
 
 fig, ax = plt.subplots()
 ax.bar(range(len(histogram)), histogram, width=1)
-ax.set_xlabel('BPM')
-ax.set_ylabel('Frequency')
+ax.set_xlabel("BPM")
+ax.set_ylabel("Frequency")
 ax.set_xticks([20 * x + 0.5 for x in range(int(len(histogram) / 20))])
 ax.set_xticklabels([str(20 * x) for x in range(int(len(histogram) / 20))])
-plt.show()
+plt.show()