import os from os.path import join, getsize import struct from pydub import AudioSegment import essentia.standard import re rcParams['figure.figsize'] = (16, 4) #wide graphs by default def get_local_maxima(start_idx, end_idx, data, peaks): i = start_idx #at local max while data[i+1] < data[i]: i += 1 if i >= end_idx: return #at local min while data[i+1] > data[i]: i += 1 if i >= end_idx: return #at local max peaks.append((data[i], i)) get_local_maxes(i, end_idx, data, peaks) def get_candidate_peaks(data): max_idx = argmax(data) last_idx = len(data) - 1 peaks = [(data[max_idx], max_idx)] get_local_maxima(max_idx, last_idx, data, peaks) peaks.sort(reverse=True) return peaks def windowed_rms(input_sig, win_size, hop=None, sr=1.0): if not hop: hop = winsize/2 rms = [] window_start = arange(0, len(input_sig), hop) for start in window_start: w = input_sig[start: start+win_size].astype(float) rms_inst = sqrt(mean(w**2)) rms.append(rms_inst) times = (window_start + win_size/2)/float(sr) return times, rms albums = [#'/Users/ogc/Music/iTunes/Aphex Twin/Richard D. James Album', '/Users/ogc/Music/iTunes/fononaut/ripples', '/Users/ogc/Music/iTunes/The White Stripes/White Blood Cells']#, '/Users/ogc/Music/iTunes/The Dead Texan/The Dead Texan'] #using the same default bpm limits as essentia's RhythmExtractor2013 min_bpm = 40.0 max_bpm = 208.0 test = True with open('tempos.txt', 'wb') as f: for root, dirs, files in os.walk('/Users/ogc/Music/iTunes'): if 'CVS' in dirs: dirs.remove('CVS') # don't visit CVS directories if root in albums: print 'album: {0}\n'.format(root) for name in files: if name.endswith(('.wav', '.aif', 'aiff', '.flac', '.mp3', '.m4a')) and test == True: #try: #load the file filename = join(root, name) filetype = name.split('.')[-1] song = AudioSegment.from_file(filename, filetype) max_amp = audio.max_possible_amplitude sr = song.frame_rate channels = song.channels total_length = int(song.frame_count()) #let's just examine a 30sec clip in the middle of the song start = total_length/2 end = start + 30 * sr clip_length = end - start data = [] #pydub doesn't provide an easy way to downmix. There's a method for it in the source code but I can't #figure out how to access it for i in range(clip_length): frame = song.get_frame(start + i) sample_l, sample_r = struct.unpack("hh", frame) sample_mono = (sample_l / 2) + (sample_r / 2) data.append(sample_mono) plot(asarray(data)) figure() times, rms = windowed_rms(asarray(data), 256, 128, sr) title('{} RMS from t={}s to t={}s'.format(name, start/float(sr), end/float(sr))) plot(times, rms) figure() max_lags = 500 lags, cc, lines, line = acorr(rms, maxlags=max_lags) grid(); peaks = get_candidate_peaks(cc) if len(peaks) == 1: print 'auto-correlation failed to find peaks' continue valid_bpm = False for peak in peaks[1:]: #try to find a lag which translates to a reasonable bpm offset = peak[1] - max_lags bpm = 60.0/times[offset] if bpm >= min_bpm and bpm <= max_bpm: valid_bpm = True break if valid_bpm == False: #fall back to 'best guess' bpm = 60.0/times[peaks[1][1] - max_lags] gcf() #ylim(0, 1.5) vlines(offset, 0.0, 1.0, lw='4', colors='y') #plot(cc); print 'Estimated BPM: {}'.format(bpm) test = False #f.write('{0}, '.format(tempo)) #print '{0},'.format(tempo), #except (RuntimeError, TypeError, NameError): #print 'failed while processing file {}'.format(name) #continue print #print extensions.keys() #print durations albums = dict() directories = ['/Users/ogc/Music/iTunes/Aphex Twin/Richard D. James Album', '/Users/ogc/Music/iTunes/fononaut/ripples', '/Users/ogc/Music/iTunes/The White Stripes/White Blood Cells', '/Users/ogc/Music/iTunes/The Dead Texan/The Dead Texan'] #using the same default bpm limits as essentia's RhythmExtractor2013 min_bpm = 40.0 max_bpm = 208.0 with open('cc_tempos.txt', 'wb') as f: for root, dirs, files in os.walk('/Users/ogc/Music/iTunes'): if 'CVS' in dirs: dirs.remove('CVS') # don't visit CVS directories if root in directories: album = root.split('/')[-1] print 'album: {0}\n'.format(album) if album not in albums: albums[album] = dict() for name in files: if name.endswith(('.wav', '.aif', 'aiff', '.flac', '.mp3', '.m4a')): try: #load the file filename = join(root, name) filetype = name.split('.')[-1] song = AudioSegment.from_file(filename, filetype) max_amp = audio.max_possible_amplitude sr = song.frame_rate channels = song.channels total_length = int(song.frame_count()) #let's examine the middle third of the song (t = 33% to t = 66%) start = total_length/3 end = start * 2 clip_length = end - start data = [] #pydub doesn't provide an easy way to downmix. There's a method for it in the source code but I can't #figure out how to access it for i in range(clip_length): frame = song.get_frame(start + i) sample_l, sample_r = struct.unpack("hh", frame) #thanks to Joseph for this sample_mono = (sample_l / 2) + (sample_r / 2) data.append(sample_mono) times, rms = windowed_rms(asarray(data), 256, 128, sr) max_lags = 500 lags, cc, lines, line = acorr(rms, maxlags=max_lags); peaks = get_candidate_peaks(cc) if len(peaks) == 1: print 'auto-correlation failed to find peaks' continue valid_bpm = False for peak in peaks[1:]: #try to find a lag which translates to a reasonable bpm offset = peak[1] - max_lags bpm = 60.0/times[offset] if bpm >= min_bpm and bpm <= max_bpm: valid_bpm = True break if valid_bpm == False: #fall back to 'best guess' print 'failed to find tempo within bounds' bpm = 60.0/times[peaks[1][1] - max_lags] print 'Estimated BPM for {}: {:.2f}'.format(name, bpm) f.write('{}, {}\n'.format(name, bpm)) if name not in albums[album]: albums[album][name] = [] albums[album][name].append(bpm) except (RuntimeError, TypeError, NameError): print 'failed while processing file {}'.format(name) continue print directories = ['/Users/ogc/Music/iTunes/Aphex Twin/Richard D. James Album', '/Users/ogc/Music/iTunes/fononaut/ripples', '/Users/ogc/Music/iTunes/The White Stripes/White Blood Cells', '/Users/ogc/Music/iTunes/The Dead Texan/The Dead Texan'] min_bpm = 50.0 max_bpm = 300.0 with open('essentia_tempos.txt', 'wb') as f: for root, dirs, files in os.walk('/Users/ogc/Music/iTunes'): if 'CVS' in dirs: dirs.remove('CVS') # don't visit CVS directories if root in directories: album = root.split('/')[-1] print 'album: {0}\n'.format(album) if album not in albums: albums[album] = dict() for name in files: if name.endswith(('.wav', '.aif', 'aiff', '.flac', '.mp3', '.m4a')): #try: #load the file file_name = join(root, name) loader = essentia.standard.MonoLoader(filename = file_name) song = loader() sr = 44100 total_length = len(song) extractor = essentia.standard.RhythmExtractor2013() bpm, ticks, confidence, estimates, bpmIntervals = extractor(song) print 'Estimated BPM for {}: {:.2f}'.format(name, bpm) f.write('{}, {}\n'.format(name, bpm)) if name not in albums[album]: albums[album][name] = [] albums[album][name].append(bpm) #except (RuntimeError, TypeError, NameError): # print 'failed while processing file {}'.format(name) # continue print album_names = albums.keys() print album_names #we'll plot them by album, starting with The Dead Texan cc_tempos = [] essentia_tempos = [] artist_name = 'The Dead Texan' album_name = 'The Dead Texan' album = albums[album_name] song_names = album.keys() for song in song_names: cc_tempos.append(album[song][0]) essentia_tempos.append(album[song][1]) #these are my own tempo guesses, estimated using tap tempo software (TapNTune) human_tempos = [90.0, 130.0, 105.0, 90.0, 74.0, 75.0, 65.0, 70.0, 80.0, 70.0, 65.0] ind = arange(len(song_names)) #create scatter plot pyplot.scatter(ind, human_tempos, marker='o', s=120, color='k', label='Human Estimation BPMs') pyplot.scatter(ind, cc_tempos, marker='D', s=60, color='m', label='Auto-correlation BPMs') pyplot.scatter(ind, essentia_tempos, marker='s', color='y', s=60, label='RhythmExtractor2013 BPMs') #set up x axis labels xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names]) xticks(ind, [re.sub('^(\d\s*)+(The Dead Texan - )*', '', x) for x in xnames]) gca().set_axisbelow(True) width = 0.8 grid() #moving legend so it doesn't obscure the plot legend(loc='upper right', bbox_to_anchor=(1.25, 1.0)) xticks(rotation='vertical') title("Tempo Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm to Human Tempo Estimation\nArtist: {} | Album: {}".format(artist_name, album_name)) show() pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, cc_tempos)], color='m', lw=3, label="Auto-correlation 'Error'") pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, essentia_tempos)], color='y', lw=3, label="RhythmExtractor2013 'Error'") #set up x axis labels xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names]) xticks(ind, [re.sub('^(\d\s*)+(The Dead Texan - )*', '', x) for x in xnames]) gca().set_axisbelow(True) width = 0.8 grid() #moving legend so it doesn't obscure the plot legend(loc='upper right', bbox_to_anchor=(1.25, 1.0)) xticks(rotation='vertical') title("Difference From BPM Human Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm\nArtist: {} | Album: {}".format(artist_name, album_name)) show() #we'll plot them by album, starting with The Dead Texan cc_tempos = [] essentia_tempos = [] artist_name = 'Aphex Twin' album_name = 'Richard D. James Album' album = albums[album_name] song_names = album.keys() for song in song_names: cc_tempos.append(album[song][0]) essentia_tempos.append(album[song][1]) #these are my own tempo guesses, estimated using tap tempo software (TapNTune) human_tempos = [170.0, 140.0, 170.0, 130.0, 170.0, 166.0, 136.0, 166.0, 166.0, 130.0, 166.0, 170.0, 170.0, 90.0, 160.0] #print len(cc_tempos), len(essentia_tempos), len(human_tempos) ind = arange(len(song_names)) #create scatter plot pyplot.scatter(ind, human_tempos, marker='o', s=120, color='k', label='Human Estimation BPMs') pyplot.scatter(ind, cc_tempos, marker='D', s=60, color='m', label='Auto-correlation BPMs') pyplot.scatter(ind, essentia_tempos, marker='s', color='y', s=60, label='RhythmExtractor2013 BPMs') #set up x axis labels gca().set_axisbelow(True) xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names]) #xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING width = 0.8 grid() #moving legend so it doesn't obscure the plot legend(loc='upper right', bbox_to_anchor=(1.25, 1.0)) xticks(rotation='vertical') title("Tempo Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm to Human Tempo Estimation\nArtist: {} | Album: {}".format(artist_name, album_name)) show() pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, cc_tempos)], color='m', lw=3, label="Auto-correlation 'Error'") pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, essentia_tempos)], color='y', lw=3, label="RhythmExtractor2013 'Error'") #set up x axis labels gca().set_axisbelow(True) xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names]) #xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING width = 0.8 grid() #moving legend so it doesn't obscure the plot legend(loc='upper right', bbox_to_anchor=(1.25, 1.0)) xticks(rotation='vertical') title("Difference From BPM Human Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm\nArtist: {} | Album: {}".format(artist_name, album_name)) show() #we'll plot them by album, starting with The Dead Texan cc_tempos = [] essentia_tempos = [] artist_name = 'The White Stripes' album_name = 'White Blood Cells' album = albums[album_name] song_names = album.keys() for song in song_names: cc_tempos.append(album[song][0]) essentia_tempos.append(album[song][1]) #these are my own tempo guesses, estimated using tap tempo software (TapNTune) human_tempos = [80.0, 192.0, 78.0, 196.0, 90.0, 170.0, 111.0, 80.0, 97.0, 90.0, 144.0, 60.0, 85.0, 132.0, 60.0, 180.0] #print len(cc_tempos), len(essentia_tempos), len(human_tempos) ind = arange(len(song_names)) #create scatter plot pyplot.scatter(ind, human_tempos, marker='o', s=120, color='k', label='Human Estimation BPMs') pyplot.scatter(ind, cc_tempos, marker='D', s=60, color='m', label='Auto-correlation BPMs') pyplot.scatter(ind, essentia_tempos, marker='s', color='y', s=60, label='RhythmExtractor2013 BPMs') #set up x axis labels gca().set_axisbelow(True) xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names]) #xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING width = 0.8 grid() #moving legend so it doesn't obscure the plot legend(loc='upper right', bbox_to_anchor=(1.25, 1.0)) xticks(rotation='vertical') title("Tempo Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm to Human Tempo Estimation\nArtist: {} | Album: {}".format(artist_name, album_name)) show() pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, cc_tempos)], color='m', lw=3, label="Auto-correlation 'Error'") pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, essentia_tempos)], color='y', lw=3, label="RhythmExtractor2013 'Error'") #set up x axis labels gca().set_axisbelow(True) xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names]) #xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING width = 0.8 grid() #moving legend so it doesn't obscure the plot legend(loc='upper right', bbox_to_anchor=(1.25, 1.0)) xticks(rotation='vertical') title("Difference From BPM Human Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm\nArtist: {} | Album: {}".format(artist_name, album_name)) show() #we'll plot them by album, starting with The Dead Texan cc_tempos = [] essentia_tempos = [] artist_name = 'fononaut' album_name = 'ripples' album = albums[album_name] song_names = album.keys() for song in song_names: cc_tempos.append(album[song][0]) essentia_tempos.append(album[song][1]) #in this case I had ground truth, it's my music human_tempos = [120.0, 140.0, 125.0, 110.0, 160.0, 140.0, 90.0, 125.0, 115.0, 110.0, 115.0, 135.0, 90.0] #print len(cc_tempos), len(essentia_tempos), len(human_tempos) ind = arange(len(song_names)) #create scatter plot pyplot.scatter(ind, human_tempos, marker='o', s=120, color='k', label='Human Estimation BPMs') pyplot.scatter(ind, cc_tempos, marker='D', s=60, color='m', label='Auto-correlation BPMs') pyplot.scatter(ind, essentia_tempos, marker='s', color='y', s=60, label='RhythmExtractor2013 BPMs') #set up x axis labels gca().set_axisbelow(True) xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names]) #xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING width = 0.8 grid() #moving legend so it doesn't obscure the plot legend(loc='upper right', bbox_to_anchor=(1.25, 1.0)) xticks(rotation='vertical') title("Tempo Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm to Human Tempo Estimation\nArtist: {} | Album: {}".format(artist_name, album_name)) show() pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, cc_tempos)], color='m', lw=3, label="Auto-correlation 'Error'") pyplot.plot(ind, [abs(x - y) for (x, y) in zip(human_tempos, essentia_tempos)], color='y', lw=3, label="RhythmExtractor2013 'Error'") #set up x axis labels gca().set_axisbelow(True) xnames = sort([re.sub('^(\d-)*', '', song_name, ).split('.')[0] for song_name in song_names]) #xticks(ind, [re.sub('^(\d*\s*)', '', x) for x in xnames]) #CAN'T FIGURE OUT WHY THIS LINE IS PREVENTING MY PLOT FROM SHOWING width = 0.8 grid() #moving legend so it doesn't obscure the plot legend(loc='upper right', bbox_to_anchor=(1.25, 1.0)) xticks(rotation='vertical') title("Difference From BPM Human Estimation: Comparison of Auto-correlation and Essentia's RhythmExtractor2013 Algotrithm\nArtist: {} | Album: {}".format(artist_name, album_name)) show()