import os
from os.path import join, getsize
import struct
from pydub import AudioSegment
import essentia.standard
import re
rcParams['figure.figsize'] = (16, 4) #wide graphs by default
The following functions are used to detect the 'correct' offset in the autocorrelation output. 'get_second_peak' will return the index of the second-largest peak, which we will assume represents the lag equal to the period of the beat.
def get_local_maxima(start_idx, end_idx, data, peaks):
i = start_idx
#at local max
while data[i+1] < data[i]:
i += 1
if i >= end_idx:
return
#at local min
while data[i+1] > data[i]:
i += 1
if i >= end_idx:
return
#at local max
peaks.append((data[i], i))
get_local_maxes(i, end_idx, data, peaks)
def get_candidate_peaks(data):
max_idx = argmax(data)
last_idx = len(data) - 1
peaks = [(data[max_idx], max_idx)]
get_local_maxima(max_idx, last_idx, data, peaks)
peaks.sort(reverse=True)
return peaks
def windowed_rms(input_sig, win_size, hop=None, sr=1.0):
if not hop:
hop = winsize/2
rms = []
window_start = arange(0, len(input_sig), hop)
for start in window_start:
w = input_sig[start: start+win_size].astype(float)
rms_inst = sqrt(mean(w**2))
rms.append(rms_inst)
times = (window_start + win_size/2)/float(sr)
return times, rms
Testing:
albums = [#'/Users/ogc/Music/iTunes/Aphex Twin/Richard D. James Album', '/Users/ogc/Music/iTunes/fononaut/ripples',
'/Users/ogc/Music/iTunes/The White Stripes/White Blood Cells']#, '/Users/ogc/Music/iTunes/The Dead Texan/The Dead Texan']
#using the same default bpm limits as essentia's RhythmExtractor2013
min_bpm = 40.0
max_bpm = 208.0
test = True
with open('tempos.txt', 'wb') as f:
for root, dirs, files in os.walk('/Users/ogc/Music/iTunes'):
if 'CVS' in dirs:
dirs.remove('CVS') # don't visit CVS directories
if root in albums:
print 'album: {0}\n'.format(root)
for name in files:
if name.endswith(('.wav', '.aif', 'aiff', '.flac', '.mp3', '.m4a')) and test == True:
#try:
#load the file
filename = join(root, name)
filetype = name.split('.')[-1]
song = AudioSegment.from_file(filename, filetype)
max_amp = audio.max_possible_amplitude
sr = song.frame_rate
channels = song.channels
total_length = int(song.frame_count())
#let's just examine a 30sec clip in the middle of the song
start = total_length/2
end = start + 30 * sr
clip_length = end - start
data = []
#pydub doesn't provide an easy way to downmix. There's a method for it in the source code but I can't
#figure out how to access it
for i in range(clip_length):
frame = song.get_frame(start + i)
sample_l, sample_r = struct.unpack("hh", frame)
sample_mono = (sample_l / 2) + (sample_r / 2)
data.append(sample_mono)
plot(asarray(data))
figure()
times, rms = windowed_rms(asarray(data), 256, 128, sr)
title('{} RMS from t={}s to t={}s'.format(name, start/float(sr), end/float(sr)))
plot(times, rms)
figure()
max_lags = 500
lags, cc, lines, line = acorr(rms, maxlags=max_lags)
grid();
peaks = get_candidate_peaks(cc)
if len(peaks) == 1:
print 'auto-correlation failed to find peaks'
continue
valid_bpm = False
for peak in peaks[1:]: #try to find a lag which translates to a reasonable bpm
offset = peak[1] - max_lags
bpm = 60.0/times[offset]
if bpm >= min_bpm and bpm <= max_bpm:
valid_bpm = True
break
if valid_bpm == False: #fall back to 'best guess'
bpm = 60.0/times[peaks[1][1] - max_lags]
gcf()
#ylim(0, 1.5)
vlines(offset, 0.0, 1.0, lw='4', colors='y')
#plot(cc);
print 'Estimated BPM: {}'.format(bpm)
test = False
#f.write('{0}, '.format(tempo))
#print '{0},'.format(tempo),
#except (RuntimeError, TypeError, NameError):
#print 'failed while processing file {}'.format(name)
#continue
print
#print extensions.keys()
#print durations
album: /Users/ogc/Music/iTunes/The White Stripes/White Blood Cells Estimated BPM: 79.8141891892
Now we'll use this technique to gather tempo estimations from several albums
albums = dict()
directories = ['/Users/ogc/Music/iTunes/Aphex Twin/Richard D. James Album', '/Users/ogc/Music/iTunes/fononaut/ripples',
'/Users/ogc/Music/iTunes/The White Stripes/White Blood Cells', '/Users/ogc/Music/iTunes/The Dead Texan/The Dead Texan']
#using the same default bpm limits as essentia's RhythmExtractor2013
min_bpm = 40.0
max_bpm = 208.0
with open('cc_tempos.txt', 'wb') as f:
for root, dirs, files in os.walk('/Users/ogc/Music/iTunes'):
if 'CVS' in dirs:
dirs.remove('CVS') # don't visit CVS directories
if root in directories:
album = root.split('/')[-1]
print 'album: {0}\n'.format(album)
if album not in albums:
albums[album] = dict()
for name in files:
if name.endswith(('.wav', '.aif', 'aiff', '.flac', '.mp3', '.m4a')):
try:
#load the file
filename = join(root, name)
filetype = name.split('.')[-1]
song = AudioSegment.from_file(filename, filetype)
max_amp = audio.max_possible_amplitude
sr = song.frame_rate
channels = song.channels
total_length = int(song.frame_count())
#let's examine the middle third of the song (t = 33% to t = 66%)
start = total_length/3
end = start * 2
clip_length = end - start
data = []
#pydub doesn't provide an easy way to downmix. There's a method for it in the source code but I can't
#figure out how to access it
for i in range(clip_length):
frame = song.get_frame(start + i)
sample_l, sample_r = struct.unpack("hh", frame) #thanks to Joseph for this
sample_mono = (sample_l / 2) + (sample_r / 2)
data.append(sample_mono)
times, rms = windowed_rms(asarray(data), 256, 128, sr)
max_lags = 500
lags, cc, lines, line = acorr(rms, maxlags=max_lags);
peaks = get_candidate_peaks(cc)
if len(peaks) == 1:
print 'auto-correlation failed to find peaks'
continue
valid_bpm = False
for peak in peaks[1:]: #try to find a lag which translates to a reasonable bpm
offset = peak[1] - max_lags
bpm = 60.0/times[offset]
if bpm >= min_bpm and bpm <= max_bpm:
valid_bpm = True
break
if valid_bpm == False: #fall back to 'best guess'
print 'failed to find tempo within bounds'
bpm = 60.0/times[peaks[1][1] - max_lags]
print 'Estimated BPM for {}: {:.2f}'.format(name, bpm)
f.write('{}, {}\n'.format(name, bpm))
if name not in albums[album]:
albums[album][name] = []
albums[album][name].append(bpm)
except (RuntimeError, TypeError, NameError):
print 'failed while processing file {}'.format(name)
continue
print
album: Richard D. James Album Estimated BPM for 01 4.mp3: 206.42 Estimated BPM for 02 Cornish Acid.mp3: 138.89 Estimated BPM for 03 Peek 824545201.mp3: 113.07 Estimated BPM for 04 Fingerbib.mp3: 65.79 Estimated BPM for 05 Carn Marth.mp3: 165.44 Estimated BPM for 06 To Cure a Weakling Child.mp3: 163.04 Estimated BPM for 07 Goon Gumpas.mp3: 195.65 Estimated BPM for 08 Yellow Calx.mp3: 165.44 Estimated BPM for 09 Girl_Boy Song.mp3: 84.27 Estimated BPM for 10 Logan Rock Witch.mp3: 182.93 Estimated BPM for 11 Milk Man.mp3: 164.23 Estimated BPM for 12 Inkey$.mp3: 159.57 Estimated BPM for 13 Girl_Boy Song (£18 Snare Rush mix).mp3: 84.59 Estimated BPM for 14 Beetles.mp3: 91.84 Estimated BPM for 15 Girl_Boy Song (Redruth mix).mp3: 127.12 album: ripples Estimated BPM for 01 people.wav: 79.81 Estimated BPM for 02 fallingElevatorBlues.wav: 138.74 Estimated BPM for 03 controlledBurn.wav: 82.69 Estimated BPM for 04 ceaseToBe.wav: 109.38 Estimated BPM for 05 untitled.wav: 79.81 Estimated BPM for 06 iteration.wav: 206.72 Estimated BPM for 07 smilingFaces.wav: 178.21 Estimated BPM for 08 buoyancy.wav: 206.72 Estimated BPM for 09 richSoil.wav: 152.00 Estimated BPM for 10 oldStandby.wav: 206.72 Estimated BPM for 11 wiseChild.wav: 154.27 Estimated BPM for 12 beanstalk.wav: 134.23 Estimated BPM for 13 ripples.wav: 178.21 album: The Dead Texan Estimated BPM for 1-01 The 6 Million Dollar Sandwich.m4a: 181.33 Estimated BPM for 1-02 The Dead Texan - Glen's Goo.m4a: 195.02 Estimated BPM for 1-03 The Dead Texan - A Chronicle of Early Failures, Part One.m4a: 170.84 Estimated BPM for 1-04 The Dead Texan - A Chronicle of Early Failures, Part Two.m4a: 206.72 Estimated BPM for 1-05 The Dead Texan - Taco Me Manque.m4a: 204.67 Estimated BPM for 1-06 Aegina Airlines.m4a: 202.67 Estimated BPM for 1-07 The Dead Texan - When I See Scissors, I Cannot Help but Think of You.m4a: 198.77 Estimated BPM for 1-08 The Dead Texan - Girth Rides a (Horse)+.m4a: 204.67 Estimated BPM for 1-09 The Dead Texan - La Ballade d'Alain Georges.m4a: 168.06 Estimated BPM for 1-10 The Dead Texan - Beatrice, Part Two.m4a: 204.67 Estimated BPM for 1-11 The Dead Texan - The Struggle.m4a: 124.53 album: White Blood Cells Estimated BPM for 01 Dead Leaves and the Dirty Ground.mp3: 80.12 Estimated BPM for 02 Hotel Yorba.mp3: 193.20 Estimated BPM for 03 I'm Finding It Harder to Be a Gentleman.mp3: 79.81 Estimated BPM for 04 Fell in Love With a Girl.mp3: 186.23 Estimated BPM for 05 Expecting.mp3: 90.67 Estimated BPM for 06 Little Room.mp3: 173.71 Estimated BPM for 07 The Union Forever.mp3: 129.20 Estimated BPM for 08 The Same Boy You've Always Known.mp3: 80.75 Estimated BPM for 09 We're Going to Be Friends.mp3: 200.70 Estimated BPM for 10 Offend in Every Way.mp3: 178.21 Estimated BPM for 11 I Think I Smell a Rat.mp3: 148.72 Estimated BPM for 12 Aluminum.mp3: 179.76 Estimated BPM for 13 I Can't Wait.mp3: 168.06 Estimated BPM for 14 Now Mary.mp3: 129.20 Estimated BPM for 15 I Can Learn.mp3: 204.67 Estimated BPM for 16 This Protector.mp3: 206.72
The raw auto-correlation technique above is not very robust on its own, but it does work reasonaly well for some songs. The number of analysis windows plays a large part in how well it works, I got better results using a 256 sample window with a 128 sample hopsize than I did with a 4096 sample window and 2048 sample hop. I also found that I got much more accurate results when I analyzed a 30sec clip as opposed to a 10sec clip. When I analyzed the full songs I started to see weird results again, so I settled on analyzing the middle third of each song, which seemed to be a good way to avoid bias from the beginning & end, when tempo might be ramping up/down. I analyzed my own recordings so that I would be able to compare it with ground-truth, and a few of the bpm estimates were actually pretty acurate. When analyzing the 'Dead Texan' tracks, which are pretty ambient, the algorithm had kind of a hard time, it was usually much faster than what I tried to tap out while listening, which makes me think maybe it was getting a 'double-time' estimate, or maybe some other factor related to the time signature.
Now let's compare some of these results to Essentia's 'RhythmExtractor2013' algorithm.
directories = ['/Users/ogc/Music/iTunes/Aphex Twin/Richard D. James Album', '/Users/ogc/Music/iTunes/fononaut/ripples',
'/Users/ogc/Music/iTunes/The White Stripes/White Blood Cells', '/Users/ogc/Music/iTunes/The Dead Texan/The Dead Texan']
min_bpm = 50.0
max_bpm = 300.0
with open('essentia_tempos.txt', 'wb') as f:
for root, dirs, files in os.walk('/Users/ogc/Music/iTunes'):
if 'CVS' in dirs:
dirs.remove('CVS') # don't visit CVS directories
if root in directories:
album = root.split('/')[-1]
print 'album: {0}\n'.format(album)
if album not in albums:
albums[album] = dict()
for name in files:
if name.endswith(('.wav', '.aif', 'aiff', '.flac', '.mp3', '.m4a')):
#try:
#load the file
file_name = join(root, name)
loader = essentia.standard.MonoLoader(filename = file_name)
song = loader()
sr = 44100
total_length = len(song)
extractor = essentia.standard.RhythmExtractor2013()
bpm, ticks, confidence, estimates, bpmIntervals = extractor(song)
print 'Estimated BPM for {}: {:.2f}'.format(name, bpm)
f.write('{}, {}\n'.format(name, bpm))
if name not in albums[album]:
albums[album][name] = []
albums[album][name].append(bpm)
#except (RuntimeError, TypeError, NameError):
# print 'failed while processing file {}'.format(name)
# continue
print
album: Richard D. James Album Estimated BPM for 01 4.mp3: 165.89 Estimated BPM for 02 Cornish Acid.mp3: 141.86 Estimated BPM for 03 Peek 824545201.mp3: 172.27 Estimated BPM for 04 Fingerbib.mp3: 131.95 Estimated BPM for 05 Carn Marth.mp3: 111.30 Estimated BPM for 06 To Cure a Weakling Child.mp3: 162.96 Estimated BPM for 07 Goon Gumpas.mp3: 134.98 Estimated BPM for 08 Yellow Calx.mp3: 111.29 Estimated BPM for 09 Girl_Boy Song.mp3: 166.27 Estimated BPM for 10 Logan Rock Witch.mp3: 132.07 Estimated BPM for 11 Milk Man.mp3: 163.70 Estimated BPM for 12 Inkey$.mp3: 172.27 Estimated BPM for 13 Girl_Boy Song (£18 Snare Rush mix).mp3: 172.27 Estimated BPM for 14 Beetles.mp3: 91.81 Estimated BPM for 15 Girl_Boy Song (Redruth mix).mp3: 79.74 album: ripples Estimated BPM for 01 people.wav: 120.00 Estimated BPM for 02 fallingElevatorBlues.wav: 140.06 Estimated BPM for 03 controlledBurn.wav: 123.83 Estimated BPM for 04 ceaseToBe.wav: 109.40 Estimated BPM for 05 untitled.wav: 162.14 Estimated BPM for 06 iteration.wav: 139.98 Estimated BPM for 07 smilingFaces.wav: 90.07 Estimated BPM for 08 buoyancy.wav: 124.92 Estimated BPM for 09 richSoil.wav: 114.68 Estimated BPM for 10 oldStandby.wav: 109.95 Estimated BPM for 11 wiseChild.wav: 152.93 Estimated BPM for 12 beanstalk.wav: 135.14 Estimated BPM for 13 ripples.wav: 117.20 album: The Dead Texan Estimated BPM for 1-01 The 6 Million Dollar Sandwich.m4a: 84.12 Estimated BPM for 1-02 The Dead Texan - Glen's Goo.m4a: 129.63 Estimated BPM for 1-03 The Dead Texan - A Chronicle of Early Failures, Part One.m4a: 105.38 Estimated BPM for 1-04 The Dead Texan - A Chronicle of Early Failures, Part Two.m4a: 90.45 Estimated BPM for 1-05 The Dead Texan - Taco Me Manque.m4a: 90.14 Estimated BPM for 1-06 Aegina Airlines.m4a: 114.71 Estimated BPM for 1-07 The Dead Texan - When I See Scissors, I Cannot Help but Think of You.m4a: 129.08 Estimated BPM for 1-08 The Dead Texan - Girth Rides a (Horse)+.m4a: 132.77 Estimated BPM for 1-09 The Dead Texan - La Ballade d'Alain Georges.m4a: 86.21 Estimated BPM for 1-10 The Dead Texan - Beatrice, Part Two.m4a: 143.95 Estimated BPM for 1-11 The Dead Texan - The Struggle.m4a: 136.23 album: White Blood Cells Estimated BPM for 01 Dead Leaves and the Dirty Ground.mp3: 79.91 Estimated BPM for 02 Hotel Yorba.mp3: 97.84 Estimated BPM for 03 I'm Finding It Harder to Be a Gentleman.mp3: 78.91 Estimated BPM for 04 Fell in Love With a Girl.mp3: 95.96 Estimated BPM for 05 Expecting.mp3: 90.30 Estimated BPM for 06 Little Room.mp3: 172.27 Estimated BPM for 07 The Union Forever.mp3: 100.35 Estimated BPM for 08 The Same Boy You've Always Known.mp3: 80.27 Estimated BPM for 09 We're Going to Be Friends.mp3: 97.53 Estimated BPM for 10 Offend in Every Way.mp3: 91.14 Estimated BPM for 11 I Think I Smell a Rat.mp3: 152.06 Estimated BPM for 12 Aluminum.mp3: 164.05 Estimated BPM for 13 I Can't Wait.mp3: 172.27 Estimated BPM for 14 Now Mary.mp3: 139.38 Estimated BPM for 15 I Can Learn.mp3: 132.93 Estimated BPM for 16 This Protector.mp3: 90.06
I also listened to approximately the same section of each song and used TapNTune (free tap tempo metronome/tuner software) to estimate the BPMs myself. In cases where the tempo varied over the course of the clip, I used my guess that was closest to what the other algorithms predicted (if the two algorithms differed significantly, I recorded an average of tap tempo my guesses). I noticed that when there were tempo changes, the algorithms seemed to choose tempos closer to the 'loudest' section's tempo. Here are some plots to compare the results:
album_names = albums.keys()
print album_names
['The Dead Texan', 'Richard D. James Album', 'White Blood Cells', 'ripples']
#we'll plot them by album, starting with The Dead Texan
cc_tempos = []
essentia_tempos = []
artist_name = 'The Dead Texan'
album_name = 'The Dead Texan'
album = albums[album_name]
song_names = album.keys()
for song in song_names:
cc_tempos.append(album[song][0])
essentia_tempos.append(album[song][1])
#these are my own tempo guesses, estimated using tap tempo software (TapNTune)
human_tempos = [90.0, 130.0, 105.0, 90.0, 74.0, 75.0, 65.0, 70.0, 80.0, 70.0, 65.0]
ind = arange(len(song_names))
#create scatter plot
pyplot.scatter(ind, human_tempos, marker='o', s=120, color='k', label='Human Estimation BPMs')
pyplot.scatter(ind, cc_tempos, marker='D', s=60, color='m', label='Auto-correlation BPMs')
pyplot.scatter(ind, essentia_tempos, marker='s', color='y', s=60, label='RhythmExtractor2013 BPMs')
#set up x axis labels
xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names])
xticks(ind, [re.sub('^(\d\s*)+(The Dead Texan - )*', '', x) for x in xnames])
gca().set_axisbelow(True)
width = 0.8
grid()
#moving legend so it doesn't obscure the plot
legend(loc='upper right', bbox_to_anchor=(1.25, 1.0))
xticks(rotation='vertical')
title("Tempo Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm to Human Tempo Estimation\nArtist: {} | Album: {}".format(artist_name, album_name))
show()
pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, cc_tempos)], color='m', lw=3, label="Auto-correlation 'Error'")
pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, essentia_tempos)], color='y', lw=3, label="RhythmExtractor2013 'Error'")
#set up x axis labels
xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names])
xticks(ind, [re.sub('^(\d\s*)+(The Dead Texan - )*', '', x) for x in xnames])
gca().set_axisbelow(True)
width = 0.8
grid()
#moving legend so it doesn't obscure the plot
legend(loc='upper right', bbox_to_anchor=(1.25, 1.0))
xticks(rotation='vertical')
title("Difference From BPM Human Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm\nArtist: {} | Album: {}".format(artist_name, album_name))
show()
#we'll plot them by album, starting with The Dead Texan
cc_tempos = []
essentia_tempos = []
artist_name = 'Aphex Twin'
album_name = 'Richard D. James Album'
album = albums[album_name]
song_names = album.keys()
for song in song_names:
cc_tempos.append(album[song][0])
essentia_tempos.append(album[song][1])
#these are my own tempo guesses, estimated using tap tempo software (TapNTune)
human_tempos = [170.0, 140.0, 170.0, 130.0, 170.0, 166.0, 136.0, 166.0, 166.0, 130.0, 166.0, 170.0, 170.0, 90.0, 160.0]
#print len(cc_tempos), len(essentia_tempos), len(human_tempos)
ind = arange(len(song_names))
#create scatter plot
pyplot.scatter(ind, human_tempos, marker='o', s=120, color='k', label='Human Estimation BPMs')
pyplot.scatter(ind, cc_tempos, marker='D', s=60, color='m', label='Auto-correlation BPMs')
pyplot.scatter(ind, essentia_tempos, marker='s', color='y', s=60, label='RhythmExtractor2013 BPMs')
#set up x axis labels
gca().set_axisbelow(True)
xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names])
#xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING
width = 0.8
grid()
#moving legend so it doesn't obscure the plot
legend(loc='upper right', bbox_to_anchor=(1.25, 1.0))
xticks(rotation='vertical')
title("Tempo Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm to Human Tempo Estimation\nArtist: {} | Album: {}".format(artist_name, album_name))
show()
pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, cc_tempos)], color='m', lw=3, label="Auto-correlation 'Error'")
pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, essentia_tempos)], color='y', lw=3, label="RhythmExtractor2013 'Error'")
#set up x axis labels
gca().set_axisbelow(True)
xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names])
#xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING
width = 0.8
grid()
#moving legend so it doesn't obscure the plot
legend(loc='upper right', bbox_to_anchor=(1.25, 1.0))
xticks(rotation='vertical')
title("Difference From BPM Human Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm\nArtist: {} | Album: {}".format(artist_name, album_name))
show()
#we'll plot them by album, starting with The Dead Texan
cc_tempos = []
essentia_tempos = []
artist_name = 'The White Stripes'
album_name = 'White Blood Cells'
album = albums[album_name]
song_names = album.keys()
for song in song_names:
cc_tempos.append(album[song][0])
essentia_tempos.append(album[song][1])
#these are my own tempo guesses, estimated using tap tempo software (TapNTune)
human_tempos = [80.0, 192.0, 78.0, 196.0, 90.0, 170.0, 111.0, 80.0, 97.0, 90.0, 144.0, 60.0, 85.0, 132.0, 60.0, 180.0]
#print len(cc_tempos), len(essentia_tempos), len(human_tempos)
ind = arange(len(song_names))
#create scatter plot
pyplot.scatter(ind, human_tempos, marker='o', s=120, color='k', label='Human Estimation BPMs')
pyplot.scatter(ind, cc_tempos, marker='D', s=60, color='m', label='Auto-correlation BPMs')
pyplot.scatter(ind, essentia_tempos, marker='s', color='y', s=60, label='RhythmExtractor2013 BPMs')
#set up x axis labels
gca().set_axisbelow(True)
xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names])
#xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING
width = 0.8
grid()
#moving legend so it doesn't obscure the plot
legend(loc='upper right', bbox_to_anchor=(1.25, 1.0))
xticks(rotation='vertical')
title("Tempo Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm to Human Tempo Estimation\nArtist: {} | Album: {}".format(artist_name, album_name))
show()
[80.0, 192.0, 78.0, 196.0, 90.0, 170.0, 111.0, 80.0, 97.0, 90.0, 144.0, 60.0, 85.0, 132.0, 60.0, 180.0] 16 16 16
pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, cc_tempos)], color='m', lw=3, label="Auto-correlation 'Error'")
pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, essentia_tempos)], color='y', lw=3, label="RhythmExtractor2013 'Error'")
#set up x axis labels
gca().set_axisbelow(True)
xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names])
#xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING
width = 0.8
grid()
#moving legend so it doesn't obscure the plot
legend(loc='upper right', bbox_to_anchor=(1.25, 1.0))
xticks(rotation='vertical')
title("Difference From BPM Human Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm\nArtist: {} | Album: {}".format(artist_name, album_name))
show()
#we'll plot them by album, starting with The Dead Texan
cc_tempos = []
essentia_tempos = []
artist_name = 'fononaut'
album_name = 'ripples'
album = albums[album_name]
song_names = album.keys()
for song in song_names:
cc_tempos.append(album[song][0])
essentia_tempos.append(album[song][1])
#in this case I had ground truth, it's my music
human_tempos = [120.0, 140.0, 125.0, 110.0, 160.0, 140.0, 90.0, 125.0, 115.0, 110.0, 115.0, 135.0, 90.0]
#print len(cc_tempos), len(essentia_tempos), len(human_tempos)
ind = arange(len(song_names))
#create scatter plot
pyplot.scatter(ind, human_tempos, marker='o', s=120, color='k', label='Human Estimation BPMs')
pyplot.scatter(ind, cc_tempos, marker='D', s=60, color='m', label='Auto-correlation BPMs')
pyplot.scatter(ind, essentia_tempos, marker='s', color='y', s=60, label='RhythmExtractor2013 BPMs')
#set up x axis labels
gca().set_axisbelow(True)
xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names])
#xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING
width = 0.8
grid()
#moving legend so it doesn't obscure the plot
legend(loc='upper right', bbox_to_anchor=(1.25, 1.0))
xticks(rotation='vertical')
title("Tempo Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm to Human Tempo Estimation\nArtist: {} | Album: {}".format(artist_name, album_name))
show()
pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, cc_tempos)], color='m', lw=3, label="Auto-correlation 'Error'")
pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, essentia_tempos)], color='y', lw=3, label="RhythmExtractor2013 'Error'")
#set up x axis labels
gca().set_axisbelow(True)
xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names])
#xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING
width = 0.8
grid()
#moving legend so it doesn't obscure the plot
legend(loc='upper right', bbox_to_anchor=(1.25, 1.0))
xticks(rotation='vertical')
title("Difference From BPM Human Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm\nArtist: {} | Album: {}".format(artist_name, album_name))
show()
Essentia's RhythmExtractor2013 algorithm generally yields much better results. I noticed that both algorthms produced some results where it was off by roughly a factor of two, which makes sense. I'm not sure what to do to correct that without introducing some other metrics to help decide between candidate tempos.