Skip to content

Commit

Permalink
Update and fix some essentia examples in python using ffmpeg.
Browse files Browse the repository at this point in the history
  • Loading branch information
xaviliz committed May 13, 2024
1 parent 69490c1 commit e005d63
Show file tree
Hide file tree
Showing 9 changed files with 375 additions and 1,151 deletions.
69 changes: 37 additions & 32 deletions src/examples/python/example_pitch_predominantmelody.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@
# You should have received a copy of the Affero GNU General Public License
# version 3 along with this program. If not, see http://www.gnu.org/licenses/

import sys, csv
from essentia import *
from essentia.standard import *
import sys
import essentia.standard as es
from pylab import *
from numpy import *

Expand All @@ -26,55 +25,61 @@
try:
filename = sys.argv[1]
except:
print("usage: %s <input-audiofile>" % sys.argv[0])
print(f"usage: {sys.argv[0]} <input-audiofile>")
sys.exit()




# We will use a composite algorithm PredominantMelody, which combines a number of
# required steps for us. Let's declare and configure it first:
# We will use a composite algorithm PredominantMelody, which combines a number of
# required steps for us. Let's declare and configure it first:
hopSize = 128
frameSize = 2048
sampleRate = 44100
guessUnvoiced = True # read the algorithm's reference for more details
run_predominant_melody = PitchMelodia(guessUnvoiced=guessUnvoiced,
frameSize=frameSize,
hopSize=hopSize);
guessUnvoiced = True # read the algorithm's reference for more details
run_predominant_melody = es.PitchMelodia(
guessUnvoiced=guessUnvoiced, frameSize=frameSize, hopSize=hopSize
)

# Load audio file, apply equal loudness filter, and compute predominant melody
audio = MonoLoader(filename = filename, sampleRate=sampleRate)()
audio = EqualLoudness()(audio)
audio = es.MonoLoader(filename=filename, sampleRate=sampleRate)()
audio = es.EqualLoudness()(audio)
pitch, confidence = run_predominant_melody(audio)


n_frames = len(pitch)
print("number of frames: %d" % n_frames)
print(f"number of frames: {n_frames}")

# Visualize output pitch values
fig = plt.figure()
plot(range(n_frames), pitch, 'b')
fig, ax = plt.subplots(1, figsize=(10, 4))
ax.plot(range(n_frames), pitch, "b")
ax.set_xlabel("Time (s)")
ax.set_ylabel("Pitch (Hz)")
ax.set_xlim([0, n_frames - 1])

n_ticks = 10
xtick_locs = [i * (n_frames / 10.0) for i in range(n_ticks)]
xtick_lbls = [i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)]
xtick_lbls = ["%.2f" % round(x,2) for x in xtick_lbls]
xtick_lbls = [
i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)
]
xtick_lbls = [f"{round(x, 2):.2f}" for x in xtick_lbls]

plt.sca(ax)
plt.xticks(xtick_locs, xtick_lbls)
ax = fig.add_subplot(111)
ax.set_xlabel('Time (s)')
ax.set_ylabel('Pitch (Hz)')

suptitle("Predominant melody pitch")
tight_layout()
show()

# Visualize output pitch confidence
fig = plt.figure()
plot(range(n_frames), confidence, 'b')
n_ticks = 10
xtick_locs = [i * (n_frames / 10.0) for i in range(n_ticks)]
xtick_lbls = [i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)]
xtick_lbls = ["%.2f" % round(x,2) for x in xtick_lbls]
fig, ax = plt.subplots(1, figsize=(10, 4))
ax.plot(range(n_frames), confidence, "b")

ax.set_xlabel("Time (s)")
ax.set_ylabel("Confidence")
ax.set_xlim([0, n_frames - 1])

plt.sca(ax)
plt.xticks(xtick_locs, xtick_lbls)
ax = fig.add_subplot(111)
ax.set_xlabel('Time (s)')
ax.set_ylabel('Confidence')
suptitle("Predominant melody pitch confidence")

suptitle("Predominant melody pitch confidence")
tight_layout()
show()
113 changes: 67 additions & 46 deletions src/examples/python/example_pitch_predominantmelody_by_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# You should have received a copy of the Affero GNU General Public License
# version 3 along with this program. If not, see http://www.gnu.org/licenses/

import sys, csv
import sys
from essentia import *
from essentia.standard import *
from pylab import *
Expand All @@ -24,36 +24,40 @@
try:
filename = sys.argv[1]
except:
print("usage: %s <input-audiofile>" % sys.argv[0])
print(f"usage: {sys.argv[0]} <input-audiofile>")
sys.exit()



# In this example we will extract predominant melody given an audio file by
# running a chain of algorithms.

# First, create our algorithms:
hopSize = 128
frameSize = 2048
sampleRate = 44100
guessUnvoiced = True

run_windowing = Windowing(type='hann', zeroPadding=3*frameSize) # Hann window with x4 zero padding
run_windowing = Windowing(
type="hann", zeroPadding=3 * frameSize
) # Hann window with x4 zero padding
run_spectrum = Spectrum(size=frameSize * 4)
run_spectral_peaks = SpectralPeaks(minFrequency=1,
maxFrequency=20000,
maxPeaks=100,
sampleRate=sampleRate,
magnitudeThreshold=0,
orderBy="magnitude")
run_spectral_peaks = SpectralPeaks(
minFrequency=1,
maxFrequency=20000,
maxPeaks=100,
sampleRate=sampleRate,
magnitudeThreshold=0,
orderBy="magnitude",
)
run_pitch_salience_function = PitchSalienceFunction()
run_pitch_salience_function_peaks = PitchSalienceFunctionPeaks()
run_pitch_contours = PitchContours(hopSize=hopSize)
run_pitch_contours_melody = PitchContoursMelody(guessUnvoiced=guessUnvoiced,
hopSize=hopSize)
run_pitch_contours_melody = PitchContoursMelody(
guessUnvoiced=guessUnvoiced, hopSize=hopSize
)

# ... and create a Pool
pool = Pool();
pool = Pool()

# Now we are ready to start processing.
# 1. Load audio and pass it through the equal-loudness filter
Expand All @@ -66,54 +70,71 @@
frame = run_windowing(frame)
spectrum = run_spectrum(frame)
peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum)

salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes)
salience_peaks_bins, salience_peaks_saliences = run_pitch_salience_function_peaks(salience)

pool.add('allframes_salience_peaks_bins', salience_peaks_bins)
pool.add('allframes_salience_peaks_saliences', salience_peaks_saliences)
(
salience_peaks_bins,
salience_peaks_saliences,
) = run_pitch_salience_function_peaks(salience)

pool.add("allframes_salience_peaks_bins", salience_peaks_bins)
pool.add("allframes_salience_peaks_saliences", salience_peaks_saliences)

# 3. Now, as we have gathered the required per-frame data, we can feed it to the contour
# 3. Now, as we have gathered the required per-frame data, we can feed it to the contour
# tracking and melody detection algorithms:
contours_bins, contours_saliences, contours_start_times, duration = run_pitch_contours(
pool['allframes_salience_peaks_bins'],
pool['allframes_salience_peaks_saliences'])
pitch, confidence = run_pitch_contours_melody(contours_bins,
contours_saliences,
contours_start_times,
duration)
(
contours_bins,
contours_saliences,
contours_start_times,
duration,
) = run_pitch_contours(
pool["allframes_salience_peaks_bins"],
pool["allframes_salience_peaks_saliences"],
)
pitch, confidence = run_pitch_contours_melody(
contours_bins, contours_saliences, contours_start_times, duration
)

# NOTE that we can avoid the majority of intermediate steps by using a composite algorithm
# PredominantMelody (see extractor_predominant_melody.py). This script will be usefull
# PredominantMelody (see extractor_predominant_melody.py). This script will be usefull
# if you want to get access to pitch salience function and pitch contours.

n_frames = len(pitch)
print("number of frames: %d" % n_frames)
print(f"number of frames: {n_frames}")

# visualize output pitch
fig = plt.figure()
plot(range(n_frames), pitch, 'b')
fig, ax = plt.subplots(1, figsize=(10, 4))
ax.plot(range(n_frames), pitch, "b")

ax.set_xlabel("Time (s)")
ax.set_ylabel("Pitch (Hz)")
ax.set_xlim([0, n_frames - 1])

n_ticks = 10
xtick_locs = [i * (n_frames / 10.0) for i in range(n_ticks)]
xtick_lbls = [i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)]
xtick_lbls = ["%.2f" % round(x,2) for x in xtick_lbls]
xtick_lbls = [
i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)
]
xtick_lbls = [f"{round(x, 2):.2f}" for x in xtick_lbls]

plt.sca(ax)
plt.xticks(xtick_locs, xtick_lbls)
ax = fig.add_subplot(111)
ax.set_xlabel('Time (s)')
ax.set_ylabel('Pitch (Hz)')

suptitle("Predominant melody pitch")
tight_layout()
show()

# visualize output pitch confidence
fig = plt.figure()
plot(range(n_frames), confidence, 'b')
n_ticks = 10
xtick_locs = [i * (n_frames / 10.0) for i in range(n_ticks)]
xtick_lbls = [i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)]
xtick_lbls = ["%.2f" % round(x,2) for x in xtick_lbls]
fig, ax = plt.subplots(1, figsize=(10, 4))
ax.plot(range(n_frames), confidence, "b")

ax.set_xlabel("Time (s)")
ax.set_ylabel("Confidence")
ax.set_xlim([0, n_frames - 1])

plt.sca(ax)
plt.xticks(xtick_locs, xtick_lbls)
ax = fig.add_subplot(111)
ax.set_xlabel('Time (s)')
ax.set_ylabel('Confidence')
suptitle("Predominant melody pitch confidence")

suptitle("Predominant melody pitch confidence")
tight_layout()
show()
18 changes: 9 additions & 9 deletions src/examples/python/example_rhythm_beattrackermultifeature.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,25 +31,25 @@
input_filename = sys.argv[1]
output_filename = sys.argv[2]
except:
print('usage: %s <input-audiofile> <output-audiofile>' % sys.argv[0])
print(f"usage: {sys.argv[0]} <input-audiofile> <output-audiofile>")
sys.exit()

# don't forget, we can actually instantiate and call an algorithm on the same line!
print('Loading audio file...')
audio = MonoLoader(filename = input_filename)()
print("Loading audio file...")
audio = MonoLoader(filename=input_filename)()

# compute beat positions
print('Computing beat positions...')
print("Computing beat positions...")
bt = BeatTrackerMultiFeature()
beats, _ = bt(audio)
print(beats)
print(f"beats: {beats}")

# mark them on the audio, which we'll write back to disk
# we use beeps instead of white noise to mark them, as it's more distinctive
print('Writing audio files to disk with beats marked...')
print("Writing audio files to disk with beats marked...")

marker = AudioOnsetsMarker(onsets = beats, type = 'beep')
marker = AudioOnsetsMarker(onsets=beats, type="beep")
marked_audio = marker(audio)
MonoWriter(filename = output_filename)(marked_audio)
MonoWriter(filename=output_filename)(marked_audio)

print('All done!')
print("All done!")
34 changes: 17 additions & 17 deletions src/examples/python/example_rhythm_bpmhistogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,45 +4,45 @@


try:
audiofile = sys.argv[1]
audiofile = sys.argv[1]
except:
print ("usage: %s <audiofile>" % sys.argv[0])
sys.exit()
print(f"usage: {sys.argv[0]} <audiofile>")
sys.exit()

pool = essentia.Pool()

loader = MonoLoader(filename = audiofile)
loader = MonoLoader(filename=audiofile)
bt = RhythmExtractor2013()
bpm_histogram = BpmHistogramDescriptors()
centroid = Centroid(range=250) # BPM histogram output size is 250
centroid = Centroid(range=250) # BPM histogram output size is 250

loader.audio >> bt.signal
bt.bpm >> (pool, 'bpm')
bt.bpm >> (pool, "bpm")
bt.ticks >> None
bt.confidence >> None
bt.estimates >> None
bt.bpmIntervals >> bpm_histogram.bpmIntervals
bpm_histogram.firstPeakBPM >> (pool, 'bpm_first_peak')
bpm_histogram.firstPeakBPM >> (pool, "bpm_first_peak")
bpm_histogram.firstPeakWeight >> None
bpm_histogram.firstPeakSpread >> None
bpm_histogram.secondPeakBPM >> (pool, 'bpm_second_peak')
bpm_histogram.secondPeakBPM >> (pool, "bpm_second_peak")
bpm_histogram.secondPeakWeight >> None
bpm_histogram.secondPeakSpread >> None
bpm_histogram.histogram >> (pool, 'bpm_histogram')
bpm_histogram.histogram >> (pool, "bpm_histogram")
bpm_histogram.histogram >> centroid.array
centroid.centroid >> (pool, 'bpm_centroid')
centroid.centroid >> (pool, "bpm_centroid")

essentia.run(loader)
print("BPM: %0.1f" % pool['bpm'])
print("Most prominent peak: %0.1f BPM" % pool['bpm_first_peak'][0])
print("Centroid: %0.1f" % pool['bpm_centroid'][0])
print(f"BPM: {pool['bpm']:.1f}")
print(f"Most prominent peak: {pool['bpm_first_peak'][0]:.1f} BPM")
print(f"Centroid: {pool['bpm_centroid'][0]:.1f}")

histogram = pool['bpm_histogram'][0]
histogram = pool["bpm_histogram"][0]

fig, ax = plt.subplots()
ax.bar(range(len(histogram)), histogram, width=1)
ax.set_xlabel('BPM')
ax.set_ylabel('Frequency')
ax.set_xlabel("BPM")
ax.set_ylabel("Frequency")
ax.set_xticks([20 * x + 0.5 for x in range(int(len(histogram) / 20))])
ax.set_xticklabels([str(20 * x) for x in range(int(len(histogram) / 20))])
plt.show()
plt.show()
Loading

0 comments on commit e005d63

Please sign in to comment.