Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python examples #1414

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 37 additions & 32 deletions src/examples/python/example_pitch_predominantmelody.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@
# You should have received a copy of the Affero GNU General Public License
# version 3 along with this program. If not, see http://www.gnu.org/licenses/

import sys, csv
from essentia import *
from essentia.standard import *
import sys
import essentia.standard as es
from pylab import *
from numpy import *

Expand All @@ -26,55 +25,61 @@
try:
filename = sys.argv[1]
except:
print("usage: %s <input-audiofile>" % sys.argv[0])
print(f"usage: {sys.argv[0]} <input-audiofile>")
sys.exit()




# We will use a composite algorithm PredominantMelody, which combines a number of
# required steps for us. Let's declare and configure it first:
# We will use a composite algorithm PredominantMelody, which combines a number of
# required steps for us. Let's declare and configure it first:
hopSize = 128
frameSize = 2048
sampleRate = 44100
guessUnvoiced = True # read the algorithm's reference for more details
run_predominant_melody = PitchMelodia(guessUnvoiced=guessUnvoiced,
frameSize=frameSize,
hopSize=hopSize);
guessUnvoiced = True # read the algorithm's reference for more details
run_predominant_melody = es.PitchMelodia(
guessUnvoiced=guessUnvoiced, frameSize=frameSize, hopSize=hopSize
)

# Load audio file, apply equal loudness filter, and compute predominant melody
audio = MonoLoader(filename = filename, sampleRate=sampleRate)()
audio = EqualLoudness()(audio)
audio = es.MonoLoader(filename=filename, sampleRate=sampleRate)()
audio = es.EqualLoudness()(audio)
pitch, confidence = run_predominant_melody(audio)


n_frames = len(pitch)
print("number of frames: %d" % n_frames)
print(f"number of frames: {n_frames}")

# Visualize output pitch values
fig = plt.figure()
plot(range(n_frames), pitch, 'b')
fig, ax = plt.subplots(1, figsize=(10, 4))
ax.plot(range(n_frames), pitch, "b")
ax.set_xlabel("Time (s)")
ax.set_ylabel("Pitch (Hz)")
ax.set_xlim([0, n_frames - 1])

n_ticks = 10
xtick_locs = [i * (n_frames / 10.0) for i in range(n_ticks)]
xtick_lbls = [i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)]
xtick_lbls = ["%.2f" % round(x,2) for x in xtick_lbls]
xtick_lbls = [
i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)
]
xtick_lbls = [f"{round(x, 2):.2f}" for x in xtick_lbls]

plt.sca(ax)
plt.xticks(xtick_locs, xtick_lbls)
ax = fig.add_subplot(111)
ax.set_xlabel('Time (s)')
ax.set_ylabel('Pitch (Hz)')

suptitle("Predominant melody pitch")
tight_layout()
show()

# Visualize output pitch confidence
fig = plt.figure()
plot(range(n_frames), confidence, 'b')
n_ticks = 10
xtick_locs = [i * (n_frames / 10.0) for i in range(n_ticks)]
xtick_lbls = [i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)]
xtick_lbls = ["%.2f" % round(x,2) for x in xtick_lbls]
fig, ax = plt.subplots(1, figsize=(10, 4))
ax.plot(range(n_frames), confidence, "b")

ax.set_xlabel("Time (s)")
ax.set_ylabel("Confidence")
ax.set_xlim([0, n_frames - 1])

plt.sca(ax)
plt.xticks(xtick_locs, xtick_lbls)
ax = fig.add_subplot(111)
ax.set_xlabel('Time (s)')
ax.set_ylabel('Confidence')
suptitle("Predominant melody pitch confidence")

suptitle("Predominant melody pitch confidence")
tight_layout()
show()
113 changes: 67 additions & 46 deletions src/examples/python/example_pitch_predominantmelody_by_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# You should have received a copy of the Affero GNU General Public License
# version 3 along with this program. If not, see http://www.gnu.org/licenses/

import sys, csv
import sys
from essentia import *
from essentia.standard import *
from pylab import *
Expand All @@ -24,36 +24,40 @@
try:
filename = sys.argv[1]
except:
print("usage: %s <input-audiofile>" % sys.argv[0])
print(f"usage: {sys.argv[0]} <input-audiofile>")
sys.exit()



# In this example we will extract predominant melody given an audio file by
# running a chain of algorithms.

# First, create our algorithms:
hopSize = 128
frameSize = 2048
sampleRate = 44100
guessUnvoiced = True

run_windowing = Windowing(type='hann', zeroPadding=3*frameSize) # Hann window with x4 zero padding
run_windowing = Windowing(
type="hann", zeroPadding=3 * frameSize
) # Hann window with x4 zero padding
run_spectrum = Spectrum(size=frameSize * 4)
run_spectral_peaks = SpectralPeaks(minFrequency=1,
maxFrequency=20000,
maxPeaks=100,
sampleRate=sampleRate,
magnitudeThreshold=0,
orderBy="magnitude")
run_spectral_peaks = SpectralPeaks(
minFrequency=1,
maxFrequency=20000,
maxPeaks=100,
sampleRate=sampleRate,
magnitudeThreshold=0,
orderBy="magnitude",
)
run_pitch_salience_function = PitchSalienceFunction()
run_pitch_salience_function_peaks = PitchSalienceFunctionPeaks()
run_pitch_contours = PitchContours(hopSize=hopSize)
run_pitch_contours_melody = PitchContoursMelody(guessUnvoiced=guessUnvoiced,
hopSize=hopSize)
run_pitch_contours_melody = PitchContoursMelody(
guessUnvoiced=guessUnvoiced, hopSize=hopSize
)

# ... and create a Pool
pool = Pool();
pool = Pool()

# Now we are ready to start processing.
# 1. Load audio and pass it through the equal-loudness filter
Expand All @@ -66,54 +70,71 @@
frame = run_windowing(frame)
spectrum = run_spectrum(frame)
peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum)

salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes)
salience_peaks_bins, salience_peaks_saliences = run_pitch_salience_function_peaks(salience)

pool.add('allframes_salience_peaks_bins', salience_peaks_bins)
pool.add('allframes_salience_peaks_saliences', salience_peaks_saliences)
(
salience_peaks_bins,
salience_peaks_saliences,
) = run_pitch_salience_function_peaks(salience)

pool.add("allframes_salience_peaks_bins", salience_peaks_bins)
pool.add("allframes_salience_peaks_saliences", salience_peaks_saliences)

# 3. Now, as we have gathered the required per-frame data, we can feed it to the contour
# 3. Now, as we have gathered the required per-frame data, we can feed it to the contour
# tracking and melody detection algorithms:
contours_bins, contours_saliences, contours_start_times, duration = run_pitch_contours(
pool['allframes_salience_peaks_bins'],
pool['allframes_salience_peaks_saliences'])
pitch, confidence = run_pitch_contours_melody(contours_bins,
contours_saliences,
contours_start_times,
duration)
(
contours_bins,
contours_saliences,
contours_start_times,
duration,
) = run_pitch_contours(
pool["allframes_salience_peaks_bins"],
pool["allframes_salience_peaks_saliences"],
)
pitch, confidence = run_pitch_contours_melody(
contours_bins, contours_saliences, contours_start_times, duration
)

# NOTE that we can avoid the majority of intermediate steps by using a composite algorithm
# PredominantMelody (see extractor_predominant_melody.py). This script will be usefull
# PredominantMelody (see extractor_predominant_melody.py). This script will be usefull
# if you want to get access to pitch salience function and pitch contours.

n_frames = len(pitch)
print("number of frames: %d" % n_frames)
print(f"number of frames: {n_frames}")

# visualize output pitch
fig = plt.figure()
plot(range(n_frames), pitch, 'b')
fig, ax = plt.subplots(1, figsize=(10, 4))
ax.plot(range(n_frames), pitch, "b")

ax.set_xlabel("Time (s)")
ax.set_ylabel("Pitch (Hz)")
ax.set_xlim([0, n_frames - 1])

n_ticks = 10
xtick_locs = [i * (n_frames / 10.0) for i in range(n_ticks)]
xtick_lbls = [i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)]
xtick_lbls = ["%.2f" % round(x,2) for x in xtick_lbls]
xtick_lbls = [
i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)
]
xtick_lbls = [f"{round(x, 2):.2f}" for x in xtick_lbls]

plt.sca(ax)
plt.xticks(xtick_locs, xtick_lbls)
ax = fig.add_subplot(111)
ax.set_xlabel('Time (s)')
ax.set_ylabel('Pitch (Hz)')

suptitle("Predominant melody pitch")
tight_layout()
show()

# visualize output pitch confidence
fig = plt.figure()
plot(range(n_frames), confidence, 'b')
n_ticks = 10
xtick_locs = [i * (n_frames / 10.0) for i in range(n_ticks)]
xtick_lbls = [i * (n_frames / 10.0) * hopSize / sampleRate for i in range(n_ticks)]
xtick_lbls = ["%.2f" % round(x,2) for x in xtick_lbls]
fig, ax = plt.subplots(1, figsize=(10, 4))
ax.plot(range(n_frames), confidence, "b")

ax.set_xlabel("Time (s)")
ax.set_ylabel("Confidence")
ax.set_xlim([0, n_frames - 1])

plt.sca(ax)
plt.xticks(xtick_locs, xtick_lbls)
ax = fig.add_subplot(111)
ax.set_xlabel('Time (s)')
ax.set_ylabel('Confidence')
suptitle("Predominant melody pitch confidence")

suptitle("Predominant melody pitch confidence")
tight_layout()
show()
18 changes: 9 additions & 9 deletions src/examples/python/example_rhythm_beattrackermultifeature.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,25 +31,25 @@
input_filename = sys.argv[1]
output_filename = sys.argv[2]
except:
print('usage: %s <input-audiofile> <output-audiofile>' % sys.argv[0])
print(f"usage: {sys.argv[0]} <input-audiofile> <output-audiofile>")
sys.exit()

# don't forget, we can actually instantiate and call an algorithm on the same line!
print('Loading audio file...')
audio = MonoLoader(filename = input_filename)()
print("Loading audio file...")
audio = MonoLoader(filename=input_filename)()

# compute beat positions
print('Computing beat positions...')
print("Computing beat positions...")
bt = BeatTrackerMultiFeature()
beats, _ = bt(audio)
print(beats)
print(f"beats: {beats}")

# mark them on the audio, which we'll write back to disk
# we use beeps instead of white noise to mark them, as it's more distinctive
print('Writing audio files to disk with beats marked...')
print("Writing audio files to disk with beats marked...")

marker = AudioOnsetsMarker(onsets = beats, type = 'beep')
marker = AudioOnsetsMarker(onsets=beats, type="beep")
marked_audio = marker(audio)
MonoWriter(filename = output_filename)(marked_audio)
MonoWriter(filename=output_filename)(marked_audio)

print('All done!')
print("All done!")
34 changes: 17 additions & 17 deletions src/examples/python/example_rhythm_bpmhistogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,45 +4,45 @@


try:
audiofile = sys.argv[1]
audiofile = sys.argv[1]
except:
print ("usage: %s <audiofile>" % sys.argv[0])
sys.exit()
print(f"usage: {sys.argv[0]} <audiofile>")
sys.exit()

pool = essentia.Pool()

loader = MonoLoader(filename = audiofile)
loader = MonoLoader(filename=audiofile)
bt = RhythmExtractor2013()
bpm_histogram = BpmHistogramDescriptors()
centroid = Centroid(range=250) # BPM histogram output size is 250
centroid = Centroid(range=250) # BPM histogram output size is 250

loader.audio >> bt.signal
bt.bpm >> (pool, 'bpm')
bt.bpm >> (pool, "bpm")
bt.ticks >> None
bt.confidence >> None
bt.estimates >> None
bt.bpmIntervals >> bpm_histogram.bpmIntervals
bpm_histogram.firstPeakBPM >> (pool, 'bpm_first_peak')
bpm_histogram.firstPeakBPM >> (pool, "bpm_first_peak")
bpm_histogram.firstPeakWeight >> None
bpm_histogram.firstPeakSpread >> None
bpm_histogram.secondPeakBPM >> (pool, 'bpm_second_peak')
bpm_histogram.secondPeakBPM >> (pool, "bpm_second_peak")
bpm_histogram.secondPeakWeight >> None
bpm_histogram.secondPeakSpread >> None
bpm_histogram.histogram >> (pool, 'bpm_histogram')
bpm_histogram.histogram >> (pool, "bpm_histogram")
bpm_histogram.histogram >> centroid.array
centroid.centroid >> (pool, 'bpm_centroid')
centroid.centroid >> (pool, "bpm_centroid")

essentia.run(loader)
print("BPM: %0.1f" % pool['bpm'])
print("Most prominent peak: %0.1f BPM" % pool['bpm_first_peak'][0])
print("Centroid: %0.1f" % pool['bpm_centroid'][0])
print(f"BPM: {pool['bpm']:.1f}")
print(f"Most prominent peak: {pool['bpm_first_peak'][0]:.1f} BPM")
print(f"Centroid: {pool['bpm_centroid'][0]:.1f}")

histogram = pool['bpm_histogram'][0]
histogram = pool["bpm_histogram"][0]

fig, ax = plt.subplots()
ax.bar(range(len(histogram)), histogram, width=1)
ax.set_xlabel('BPM')
ax.set_ylabel('Frequency')
ax.set_xlabel("BPM")
ax.set_ylabel("Frequency")
ax.set_xticks([20 * x + 0.5 for x in range(int(len(histogram) / 20))])
ax.set_xticklabels([str(20 * x) for x in range(int(len(histogram) / 20))])
plt.show()
plt.show()
Loading
Loading