Notebook

In [170]:

import os
from os.path import join, getsize
import struct
from pydub import AudioSegment
import essentia.standard
import re

rcParams['figure.figsize'] = (16, 4) #wide graphs by default

The following functions are used to detect the 'correct' offset in the autocorrelation output. 'get_second_peak' will return the index of the second-largest peak, which we will assume represents the lag equal to the period of the beat.

In [72]:

def get_local_maxima(start_idx, end_idx, data, peaks):
    i = start_idx
    #at local max
    while data[i+1] < data[i]:
        i += 1
        if i >= end_idx: 
            return
    #at local min
    while data[i+1] > data[i]:
        i += 1
        if i >= end_idx: 
            return
    #at local max
    peaks.append((data[i], i))
    get_local_maxes(i, end_idx, data, peaks)

def get_candidate_peaks(data):
    max_idx = argmax(data)
    last_idx = len(data) - 1
    peaks = [(data[max_idx], max_idx)]
    get_local_maxima(max_idx, last_idx, data, peaks)
    peaks.sort(reverse=True)
    return peaks

In [71]:

def windowed_rms(input_sig, win_size, hop=None, sr=1.0):
    if not hop:
        hop = winsize/2
    rms = []
    window_start = arange(0, len(input_sig), hop)
    
    for start in window_start:
        w = input_sig[start: start+win_size].astype(float)
        rms_inst = sqrt(mean(w**2))
        rms.append(rms_inst)
    times = (window_start + win_size/2)/float(sr)
    return times, rms

Testing:

In [111]:

albums = [#'/Users/ogc/Music/iTunes/Aphex Twin/Richard D. James Album', '/Users/ogc/Music/iTunes/fononaut/ripples', 
          '/Users/ogc/Music/iTunes/The White Stripes/White Blood Cells']#, '/Users/ogc/Music/iTunes/The Dead Texan/The Dead Texan']

#using the same default bpm limits as essentia's RhythmExtractor2013
min_bpm = 40.0
max_bpm = 208.0

test = True
with open('tempos.txt', 'wb') as f:
    for root, dirs, files in os.walk('/Users/ogc/Music/iTunes'):
        if 'CVS' in dirs:
            dirs.remove('CVS')  # don't visit CVS directories
        if root in albums:
            print 'album: {0}\n'.format(root)
            for name in files:
                if name.endswith(('.wav', '.aif', 'aiff', '.flac', '.mp3', '.m4a')) and test == True:
                    #try:
                        #load the file
                        filename = join(root, name)
                        filetype = name.split('.')[-1]
                        song = AudioSegment.from_file(filename, filetype)
                        max_amp = audio.max_possible_amplitude
                        sr = song.frame_rate
                        channels = song.channels
                        total_length = int(song.frame_count())
                        
                        #let's just examine a 30sec clip in the middle of the song
                        start = total_length/2
                        end = start + 30 * sr
                        clip_length = end - start
                        data = []
                        
                        #pydub doesn't provide an easy way to downmix. There's a method for it in the source code but I can't 
                        #figure out how to access it
                        for i in range(clip_length):
                            frame = song.get_frame(start + i)
                            sample_l, sample_r = struct.unpack("hh", frame)
                            sample_mono = (sample_l / 2) + (sample_r / 2)
                            data.append(sample_mono)
                        plot(asarray(data))
                        
                        
                        figure()
                        times, rms = windowed_rms(asarray(data), 256, 128, sr)

                        title('{} RMS from t={}s to t={}s'.format(name, start/float(sr), end/float(sr)))
                        plot(times, rms)
                        figure()
                        
                        
                        max_lags = 500
                        lags, cc, lines, line = acorr(rms, maxlags=max_lags)
                        grid();
                        peaks = get_candidate_peaks(cc)
                        
                        if len(peaks) == 1: 
                            print 'auto-correlation failed to find peaks'
                            continue
                            
                        valid_bpm = False
                        for peak in peaks[1:]: #try to find a lag which translates to a reasonable bpm
                            offset = peak[1] - max_lags
                            bpm = 60.0/times[offset]
                            if bpm >= min_bpm and bpm <= max_bpm:
                                valid_bpm = True
                                break
                        if valid_bpm == False: #fall back to 'best guess'
                            bpm = 60.0/times[peaks[1][1] - max_lags] 
                        
                        
                        gcf()
                        #ylim(0, 1.5)
                        vlines(offset, 0.0, 1.0, lw='4', colors='y')
                        #plot(cc);
                        print 'Estimated BPM: {}'.format(bpm)
                        test = False
                        #f.write('{0}, '.format(tempo))
                        #print '{0},'.format(tempo),
                    #except (RuntimeError, TypeError, NameError):
                        #print 'failed while processing file {}'.format(name)
                        #continue
            print
                        
#print extensions.keys()
#print durations

album: /Users/ogc/Music/iTunes/The White Stripes/White Blood Cells

Estimated BPM: 79.8141891892

Now we'll use this technique to gather tempo estimations from several albums

In [131]:

albums = dict()

In [133]:

directories = ['/Users/ogc/Music/iTunes/Aphex Twin/Richard D. James Album', '/Users/ogc/Music/iTunes/fononaut/ripples', 
          '/Users/ogc/Music/iTunes/The White Stripes/White Blood Cells', '/Users/ogc/Music/iTunes/The Dead Texan/The Dead Texan']

#using the same default bpm limits as essentia's RhythmExtractor2013
min_bpm = 40.0
max_bpm = 208.0
with open('cc_tempos.txt', 'wb') as f:
    for root, dirs, files in os.walk('/Users/ogc/Music/iTunes'):
        if 'CVS' in dirs:
            dirs.remove('CVS')  # don't visit CVS directories
        if root in directories:
            album = root.split('/')[-1]
            print 'album: {0}\n'.format(album)
            if album not in albums:
                albums[album] = dict()
            for name in files:
                if name.endswith(('.wav', '.aif', 'aiff', '.flac', '.mp3', '.m4a')):
                    try:
                        #load the file
                        filename = join(root, name)
                        filetype = name.split('.')[-1]
                        song = AudioSegment.from_file(filename, filetype)
                        max_amp = audio.max_possible_amplitude
                        sr = song.frame_rate
                        channels = song.channels
                        total_length = int(song.frame_count())
                        
                        #let's examine the middle third of the song (t = 33% to t = 66%)
                        start = total_length/3
                        end = start * 2
                        clip_length = end - start
                        data = []
                        
                        #pydub doesn't provide an easy way to downmix. There's a method for it in the source code but I can't 
                        #figure out how to access it
                        for i in range(clip_length):
                            frame = song.get_frame(start + i)
                            sample_l, sample_r = struct.unpack("hh", frame) #thanks to Joseph for this
                            sample_mono = (sample_l / 2) + (sample_r / 2)
                            data.append(sample_mono)
                            
                        times, rms = windowed_rms(asarray(data), 256, 128, sr)
                        
                        max_lags = 500
                        lags, cc, lines, line = acorr(rms, maxlags=max_lags);
                        peaks = get_candidate_peaks(cc)
                        
                        if len(peaks) == 1: 
                            print 'auto-correlation failed to find peaks'
                            continue
                            
                        valid_bpm = False
                        for peak in peaks[1:]: #try to find a lag which translates to a reasonable bpm
                            offset = peak[1] - max_lags
                            bpm = 60.0/times[offset]
                            if bpm >= min_bpm and bpm <= max_bpm:
                                valid_bpm = True
                                break
                        if valid_bpm == False: #fall back to 'best guess'
                            print 'failed to find tempo within bounds'
                            bpm = 60.0/times[peaks[1][1] - max_lags] 
                        
                        print 'Estimated BPM for {}: {:.2f}'.format(name, bpm)
                        f.write('{}, {}\n'.format(name, bpm))
                        if name not in albums[album]:
                            albums[album][name] = []
                        albums[album][name].append(bpm)
                    except (RuntimeError, TypeError, NameError):
                        print 'failed while processing file {}'.format(name)
                        continue
            print

album: Richard D. James Album

Estimated BPM for 01 4.mp3: 206.42
Estimated BPM for 02 Cornish Acid.mp3: 138.89
Estimated BPM for 03 Peek 824545201.mp3: 113.07
Estimated BPM for 04 Fingerbib.mp3: 65.79
Estimated BPM for 05 Carn Marth.mp3: 165.44
Estimated BPM for 06 To Cure a Weakling Child.mp3: 163.04
Estimated BPM for 07 Goon Gumpas.mp3: 195.65
Estimated BPM for 08 Yellow Calx.mp3: 165.44
Estimated BPM for 09 Girl_Boy Song.mp3: 84.27
Estimated BPM for 10 Logan Rock Witch.mp3: 182.93
Estimated BPM for 11 Milk Man.mp3: 164.23
Estimated BPM for 12 Inkey$.mp3: 159.57
Estimated BPM for 13 Girl_Boy Song (£18 Snare Rush mix).mp3: 84.59
Estimated BPM for 14 Beetles.mp3: 91.84
Estimated BPM for 15 Girl_Boy Song (Redruth mix).mp3: 127.12

album: ripples

Estimated BPM for 01 people.wav: 79.81
Estimated BPM for 02 fallingElevatorBlues.wav: 138.74
Estimated BPM for 03 controlledBurn.wav: 82.69
Estimated BPM for 04 ceaseToBe.wav: 109.38
Estimated BPM for 05 untitled.wav: 79.81
Estimated BPM for 06 iteration.wav: 206.72
Estimated BPM for 07 smilingFaces.wav: 178.21
Estimated BPM for 08 buoyancy.wav: 206.72
Estimated BPM for 09 richSoil.wav: 152.00
Estimated BPM for 10 oldStandby.wav: 206.72
Estimated BPM for 11 wiseChild.wav: 154.27
Estimated BPM for 12 beanstalk.wav: 134.23
Estimated BPM for 13 ripples.wav: 178.21

album: The Dead Texan

Estimated BPM for 1-01 The 6 Million Dollar Sandwich.m4a: 181.33
Estimated BPM for 1-02 The Dead Texan - Glen's Goo.m4a: 195.02
Estimated BPM for 1-03 The Dead Texan - A Chronicle of Early Failures, Part One.m4a: 170.84
Estimated BPM for 1-04 The Dead Texan - A Chronicle of Early Failures, Part Two.m4a: 206.72
Estimated BPM for 1-05 The Dead Texan - Taco Me Manque.m4a: 204.67
Estimated BPM for 1-06 Aegina Airlines.m4a: 202.67
Estimated BPM for 1-07 The Dead Texan - When I See Scissors, I Cannot Help but Think of You.m4a: 198.77
Estimated BPM for 1-08 The Dead Texan - Girth Rides a (Horse)+.m4a: 204.67
Estimated BPM for 1-09 The Dead Texan - La Ballade d'Alain Georges.m4a: 168.06
Estimated BPM for 1-10 The Dead Texan - Beatrice, Part Two.m4a: 204.67
Estimated BPM for 1-11 The Dead Texan - The Struggle.m4a: 124.53

album: White Blood Cells

Estimated BPM for 01 Dead Leaves and the Dirty Ground.mp3: 80.12
Estimated BPM for 02 Hotel Yorba.mp3: 193.20
Estimated BPM for 03 I'm Finding It Harder to Be a Gentleman.mp3: 79.81
Estimated BPM for 04 Fell in Love With a Girl.mp3: 186.23
Estimated BPM for 05 Expecting.mp3: 90.67
Estimated BPM for 06 Little Room.mp3: 173.71
Estimated BPM for 07 The Union Forever.mp3: 129.20
Estimated BPM for 08 The Same Boy You've Always Known.mp3: 80.75
Estimated BPM for 09 We're Going to Be Friends.mp3: 200.70
Estimated BPM for 10 Offend in Every Way.mp3: 178.21
Estimated BPM for 11 I Think I Smell a Rat.mp3: 148.72
Estimated BPM for 12 Aluminum.mp3: 179.76
Estimated BPM for 13 I Can't Wait.mp3: 168.06
Estimated BPM for 14 Now Mary.mp3: 129.20
Estimated BPM for 15 I Can Learn.mp3: 204.67
Estimated BPM for 16 This Protector.mp3: 206.72

The raw auto-correlation technique above is not very robust on its own, but it does work reasonaly well for some songs. The number of analysis windows plays a large part in how well it works, I got better results using a 256 sample window with a 128 sample hopsize than I did with a 4096 sample window and 2048 sample hop. I also found that I got much more accurate results when I analyzed a 30sec clip as opposed to a 10sec clip. When I analyzed the full songs I started to see weird results again, so I settled on analyzing the middle third of each song, which seemed to be a good way to avoid bias from the beginning & end, when tempo might be ramping up/down. I analyzed my own recordings so that I would be able to compare it with ground-truth, and a few of the bpm estimates were actually pretty acurate. When analyzing the 'Dead Texan' tracks, which are pretty ambient, the algorithm had kind of a hard time, it was usually much faster than what I tried to tap out while listening, which makes me think maybe it was getting a 'double-time' estimate, or maybe some other factor related to the time signature.

Now let's compare some of these results to Essentia's 'RhythmExtractor2013' algorithm.

In [134]:

directories = ['/Users/ogc/Music/iTunes/Aphex Twin/Richard D. James Album', '/Users/ogc/Music/iTunes/fononaut/ripples', 
          '/Users/ogc/Music/iTunes/The White Stripes/White Blood Cells', '/Users/ogc/Music/iTunes/The Dead Texan/The Dead Texan']

min_bpm = 50.0
max_bpm = 300.0
with open('essentia_tempos.txt', 'wb') as f:
    for root, dirs, files in os.walk('/Users/ogc/Music/iTunes'):
        if 'CVS' in dirs:
            dirs.remove('CVS')  # don't visit CVS directories
        if root in directories:
            album = root.split('/')[-1]
            print 'album: {0}\n'.format(album)
            if album not in albums:
                albums[album] = dict()
            for name in files:
                if name.endswith(('.wav', '.aif', 'aiff', '.flac', '.mp3', '.m4a')):
                    #try:
                        #load the file
                        file_name = join(root, name)
                        
                        loader = essentia.standard.MonoLoader(filename = file_name)
                        song = loader()

                        sr = 44100
                        total_length = len(song)
                        
                        extractor = essentia.standard.RhythmExtractor2013()
                        bpm, ticks, confidence, estimates, bpmIntervals = extractor(song)
                        
                        print 'Estimated BPM for {}: {:.2f}'.format(name, bpm)
                        f.write('{}, {}\n'.format(name, bpm))
                        if name not in albums[album]:
                            albums[album][name] = []
                        albums[album][name].append(bpm)
                    #except (RuntimeError, TypeError, NameError):
                     #   print 'failed while processing file {}'.format(name)
                     #   continue
            print

album: Richard D. James Album

Estimated BPM for 01 4.mp3: 165.89
Estimated BPM for 02 Cornish Acid.mp3: 141.86
Estimated BPM for 03 Peek 824545201.mp3: 172.27
Estimated BPM for 04 Fingerbib.mp3: 131.95
Estimated BPM for 05 Carn Marth.mp3: 111.30
Estimated BPM for 06 To Cure a Weakling Child.mp3: 162.96
Estimated BPM for 07 Goon Gumpas.mp3: 134.98
Estimated BPM for 08 Yellow Calx.mp3: 111.29
Estimated BPM for 09 Girl_Boy Song.mp3: 166.27
Estimated BPM for 10 Logan Rock Witch.mp3: 132.07
Estimated BPM for 11 Milk Man.mp3: 163.70
Estimated BPM for 12 Inkey$.mp3: 172.27
Estimated BPM for 13 Girl_Boy Song (£18 Snare Rush mix).mp3: 172.27
Estimated BPM for 14 Beetles.mp3: 91.81
Estimated BPM for 15 Girl_Boy Song (Redruth mix).mp3: 79.74

album: ripples

Estimated BPM for 01 people.wav: 120.00
Estimated BPM for 02 fallingElevatorBlues.wav: 140.06
Estimated BPM for 03 controlledBurn.wav: 123.83
Estimated BPM for 04 ceaseToBe.wav: 109.40
Estimated BPM for 05 untitled.wav: 162.14
Estimated BPM for 06 iteration.wav: 139.98
Estimated BPM for 07 smilingFaces.wav: 90.07
Estimated BPM for 08 buoyancy.wav: 124.92
Estimated BPM for 09 richSoil.wav: 114.68
Estimated BPM for 10 oldStandby.wav: 109.95
Estimated BPM for 11 wiseChild.wav: 152.93
Estimated BPM for 12 beanstalk.wav: 135.14
Estimated BPM for 13 ripples.wav: 117.20

album: The Dead Texan

Estimated BPM for 1-01 The 6 Million Dollar Sandwich.m4a: 84.12
Estimated BPM for 1-02 The Dead Texan - Glen's Goo.m4a: 129.63
Estimated BPM for 1-03 The Dead Texan - A Chronicle of Early Failures, Part One.m4a: 105.38
Estimated BPM for 1-04 The Dead Texan - A Chronicle of Early Failures, Part Two.m4a: 90.45
Estimated BPM for 1-05 The Dead Texan - Taco Me Manque.m4a: 90.14
Estimated BPM for 1-06 Aegina Airlines.m4a: 114.71
Estimated BPM for 1-07 The Dead Texan - When I See Scissors, I Cannot Help but Think of You.m4a: 129.08
Estimated BPM for 1-08 The Dead Texan - Girth Rides a (Horse)+.m4a: 132.77
Estimated BPM for 1-09 The Dead Texan - La Ballade d'Alain Georges.m4a: 86.21
Estimated BPM for 1-10 The Dead Texan - Beatrice, Part Two.m4a: 143.95
Estimated BPM for 1-11 The Dead Texan - The Struggle.m4a: 136.23

album: White Blood Cells

Estimated BPM for 01 Dead Leaves and the Dirty Ground.mp3: 79.91
Estimated BPM for 02 Hotel Yorba.mp3: 97.84
Estimated BPM for 03 I'm Finding It Harder to Be a Gentleman.mp3: 78.91
Estimated BPM for 04 Fell in Love With a Girl.mp3: 95.96
Estimated BPM for 05 Expecting.mp3: 90.30
Estimated BPM for 06 Little Room.mp3: 172.27
Estimated BPM for 07 The Union Forever.mp3: 100.35
Estimated BPM for 08 The Same Boy You've Always Known.mp3: 80.27
Estimated BPM for 09 We're Going to Be Friends.mp3: 97.53
Estimated BPM for 10 Offend in Every Way.mp3: 91.14
Estimated BPM for 11 I Think I Smell a Rat.mp3: 152.06
Estimated BPM for 12 Aluminum.mp3: 164.05
Estimated BPM for 13 I Can't Wait.mp3: 172.27
Estimated BPM for 14 Now Mary.mp3: 139.38
Estimated BPM for 15 I Can Learn.mp3: 132.93
Estimated BPM for 16 This Protector.mp3: 90.06

I also listened to approximately the same section of each song and used TapNTune (free tap tempo metronome/tuner software) to estimate the BPMs myself. In cases where the tempo varied over the course of the clip, I used my guess that was closest to what the other algorithms predicted (if the two algorithms differed significantly, I recorded an average of tap tempo my guesses). I noticed that when there were tempo changes, the algorithms seemed to choose tempos closer to the 'loudest' section's tempo. Here are some plots to compare the results:

In [135]:

album_names = albums.keys()
print album_names

['The Dead Texan', 'Richard D. James Album', 'White Blood Cells', 'ripples']

In [312]:

#we'll plot them by album, starting with The Dead Texan
cc_tempos = []
essentia_tempos = []
artist_name = 'The Dead Texan'
album_name = 'The Dead Texan'
album = albums[album_name]
song_names = album.keys()
for song in song_names:
    cc_tempos.append(album[song][0])
    essentia_tempos.append(album[song][1])

#these are my own tempo guesses, estimated using tap tempo software (TapNTune)
human_tempos = [90.0, 130.0, 105.0, 90.0, 74.0, 75.0, 65.0, 70.0, 80.0, 70.0, 65.0]

ind = arange(len(song_names))


#create scatter plot
pyplot.scatter(ind, human_tempos, marker='o', s=120, color='k', label='Human Estimation BPMs')
pyplot.scatter(ind, cc_tempos, marker='D', s=60, color='m', label='Auto-correlation BPMs')
pyplot.scatter(ind, essentia_tempos, marker='s', color='y', s=60, label='RhythmExtractor2013 BPMs')


#set up x axis labels
xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names])
xticks(ind, [re.sub('^(\d\s*)+(The Dead Texan - )*', '', x) for x in xnames])
gca().set_axisbelow(True)
width = 0.8
grid()

#moving legend so it doesn't obscure the plot
legend(loc='upper right', bbox_to_anchor=(1.25, 1.0))
xticks(rotation='vertical')
title("Tempo Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm to Human Tempo Estimation\nArtist: {} | Album: {}".format(artist_name, album_name))
show()

In [313]:

pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, cc_tempos)], color='m', lw=3, label="Auto-correlation 'Error'")
pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, essentia_tempos)], color='y', lw=3, label="RhythmExtractor2013 'Error'")

#set up x axis labels
xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names])
xticks(ind, [re.sub('^(\d\s*)+(The Dead Texan - )*', '', x) for x in xnames])
gca().set_axisbelow(True)
width = 0.8
grid()

#moving legend so it doesn't obscure the plot
legend(loc='upper right', bbox_to_anchor=(1.25, 1.0))
xticks(rotation='vertical')
title("Difference From BPM Human Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm\nArtist: {} | Album: {}".format(artist_name, album_name))
show()

In [316]:

#we'll plot them by album, starting with The Dead Texan
cc_tempos = []
essentia_tempos = []
artist_name = 'Aphex Twin'
album_name = 'Richard D. James Album'
album = albums[album_name]
song_names = album.keys()
for song in song_names:
    cc_tempos.append(album[song][0])
    essentia_tempos.append(album[song][1])

#these are my own tempo guesses, estimated using tap tempo software (TapNTune)
human_tempos = [170.0, 140.0, 170.0, 130.0, 170.0, 166.0, 136.0, 166.0, 166.0, 130.0, 166.0, 170.0, 170.0, 90.0, 160.0]
#print len(cc_tempos), len(essentia_tempos), len(human_tempos)
ind = arange(len(song_names))


#create scatter plot
pyplot.scatter(ind, human_tempos, marker='o', s=120, color='k', label='Human Estimation BPMs')
pyplot.scatter(ind, cc_tempos, marker='D', s=60, color='m', label='Auto-correlation BPMs')
pyplot.scatter(ind, essentia_tempos, marker='s', color='y', s=60, label='RhythmExtractor2013 BPMs')


#set up x axis labels
gca().set_axisbelow(True)
xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names])
#xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING
width = 0.8
grid()

#moving legend so it doesn't obscure the plot
legend(loc='upper right', bbox_to_anchor=(1.25, 1.0))
xticks(rotation='vertical')
title("Tempo Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm to Human Tempo Estimation\nArtist: {} | Album: {}".format(artist_name, album_name))
show()

In [317]:

pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, cc_tempos)], color='m', lw=3, label="Auto-correlation 'Error'")
pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, essentia_tempos)], color='y', lw=3, label="RhythmExtractor2013 'Error'")

#set up x axis labels
gca().set_axisbelow(True)
xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names])
#xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING
width = 0.8
grid()

#moving legend so it doesn't obscure the plot
legend(loc='upper right', bbox_to_anchor=(1.25, 1.0))
xticks(rotation='vertical')
title("Difference From BPM Human Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm\nArtist: {} | Album: {}".format(artist_name, album_name))
show()

In [323]:

#we'll plot them by album, starting with The Dead Texan
cc_tempos = []
essentia_tempos = []
artist_name = 'The White Stripes'
album_name = 'White Blood Cells'
album = albums[album_name]
song_names = album.keys()
for song in song_names:
    cc_tempos.append(album[song][0])
    essentia_tempos.append(album[song][1])

#these are my own tempo guesses, estimated using tap tempo software (TapNTune)
human_tempos = [80.0, 192.0, 78.0, 196.0, 90.0, 170.0, 111.0, 80.0, 97.0, 90.0, 144.0, 60.0, 85.0, 132.0, 60.0, 180.0]
               
#print len(cc_tempos), len(essentia_tempos), len(human_tempos)
ind = arange(len(song_names))


#create scatter plot
pyplot.scatter(ind, human_tempos, marker='o', s=120, color='k', label='Human Estimation BPMs')
pyplot.scatter(ind, cc_tempos, marker='D', s=60, color='m', label='Auto-correlation BPMs')
pyplot.scatter(ind, essentia_tempos, marker='s', color='y', s=60, label='RhythmExtractor2013 BPMs')


#set up x axis labels
gca().set_axisbelow(True)
xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names])
#xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING
width = 0.8
grid()

#moving legend so it doesn't obscure the plot
legend(loc='upper right', bbox_to_anchor=(1.25, 1.0))
xticks(rotation='vertical')
title("Tempo Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm to Human Tempo Estimation\nArtist: {} | Album: {}".format(artist_name, album_name))
show()

[80.0, 192.0, 78.0, 196.0, 90.0, 170.0, 111.0, 80.0, 97.0, 90.0, 144.0, 60.0, 85.0, 132.0, 60.0, 180.0]
16 16 16

In [324]:

pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, cc_tempos)], color='m', lw=3, label="Auto-correlation 'Error'")
pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, essentia_tempos)], color='y', lw=3, label="RhythmExtractor2013 'Error'")

#set up x axis labels
gca().set_axisbelow(True)
xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names])
#xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING
width = 0.8
grid()

#moving legend so it doesn't obscure the plot
legend(loc='upper right', bbox_to_anchor=(1.25, 1.0))
xticks(rotation='vertical')
title("Difference From BPM Human Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm\nArtist: {} | Album: {}".format(artist_name, album_name))
show()

In [325]:

#we'll plot them by album, starting with The Dead Texan
cc_tempos = []
essentia_tempos = []
artist_name = 'fononaut'
album_name = 'ripples'
album = albums[album_name]
song_names = album.keys()
for song in song_names:
    cc_tempos.append(album[song][0])
    essentia_tempos.append(album[song][1])

#in this case I had ground truth, it's my music
human_tempos = [120.0, 140.0, 125.0, 110.0, 160.0, 140.0, 90.0, 125.0, 115.0, 110.0, 115.0, 135.0, 90.0]
               
#print len(cc_tempos), len(essentia_tempos), len(human_tempos)
ind = arange(len(song_names))


#create scatter plot
pyplot.scatter(ind, human_tempos, marker='o', s=120, color='k', label='Human Estimation BPMs')
pyplot.scatter(ind, cc_tempos, marker='D', s=60, color='m', label='Auto-correlation BPMs')
pyplot.scatter(ind, essentia_tempos, marker='s', color='y', s=60, label='RhythmExtractor2013 BPMs')


#set up x axis labels
gca().set_axisbelow(True)
xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names])
#xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING
width = 0.8
grid()

#moving legend so it doesn't obscure the plot
legend(loc='upper right', bbox_to_anchor=(1.25, 1.0))
xticks(rotation='vertical')
title("Tempo Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm to Human Tempo Estimation\nArtist: {} | Album: {}".format(artist_name, album_name))
show()

In [326]:

pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, cc_tempos)], color='m', lw=3, label="Auto-correlation 'Error'")
pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, essentia_tempos)], color='y', lw=3, label="RhythmExtractor2013 'Error'")

#set up x axis labels
gca().set_axisbelow(True)
xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names])
#xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING
width = 0.8
grid()

#moving legend so it doesn't obscure the plot
legend(loc='upper right', bbox_to_anchor=(1.25, 1.0))
xticks(rotation='vertical')
title("Difference From BPM Human Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm\nArtist: {} | Album: {}".format(artist_name, album_name))
show()

Essentia's RhythmExtractor2013 algorithm generally yields much better results. I noticed that both algorthms produced some results where it was off by roughly a factor of two, which makes sense. I'm not sure what to do to correct that without introducing some other metrics to help decide between candidate tempos.