from scipy.io import wavfile rcParams['figure.figsize'] = (16, 4) #wide graphs by default import essentia.standard loader = essentia.standard.MonoLoader(filename = 'sources/Stevie Wonder - Superstition.mp3') superstition = loader() superstition_sr = 44100 def windowed_rms(input_sig, win_size, hop=None, sr=1.0): if not hop: hop = winsize/2 rms = [] window_start = arange(0, len(input_sig), hop) for start in window_start: w = input_sig[start: start+win_size].astype(float) rms_inst = sqrt(mean(w**2)) rms.append(rms_inst) times = (window_start + win_size/2)/float(sr) return times, rms times, super_rms = windowed_rms(superstition[:441000], 4096, 512, 44100) plot(times, super_rms) lags, cc, lines, line = acorr(super_rms, maxlags=600) grid(); argmax(cc[625:]) + 625 lags[argmax(cc[625:]) + 625] 44100.0/53 times[53] 1.0/times[53] bpm = 60.0/times[53] print bpm cepstrum = real(fft.rfft(log10(cc[600:]))) plot(cepstrum) cepstrum = real(fft.rfft(log10(cc[600:]), n=4096)) plot(cepstrum) plot(cepstrum) ylim((-120, 120)) grid() argmax(cepstrum[50:]) + 50 4096/77 bpm = 60.0/times[53] print bpm def windowed_acorr(input_sig, win_size, hop=None, sr=1.0, maxlags=None): if not hop: hop = win_size/2 if not maxlags: maxlags = win_size/4 window_start = arange(0, len(input_sig) - win_size, hop) acorrfs = [] for start in window_start: w = input_sig[start: start+win_size] lags, acorr_inst, lines, line = acorr(w, maxlags=maxlags) acorrfs.append(acorr_inst) times = (window_start + win_size/2)/float(sr) clf() return times, lags, acorrfs times, super_rms = windowed_rms(superstition, 4096, 512, 44100) plot(times,super_rms) lags, cc, lines, line = acorr(super_rms, maxlags=600, usevlines=False) argmax(cc[610:]) + 610 lags[argmax(cc[610:]) + 610] offset = 610 bpm = 60.0/times[lags[argmax(cc[offset:]) + offset]] print bpm win_time = 10.0 # seconds rms_win_freq = 44100.0/512 win_size = int (win_time * rms_win_freq) win_size times_rms, lags_rms, acorrs = windowed_acorr(super_rms, win_size, int(win_size/2), sr=44100, maxlags=600) imshow(array(acorrs).T, aspect='auto') colorbar() imshow(array(acorrs).T**2, aspect='auto') colorbar() acorrs_sub = array(acorrs)[:,:580] imshow(acorrs_sub.T, aspect='auto') acorrs_sub.shape plot(argmax(array(acorrs)[:,:580], axis=1), color='w', lw=3) offset = 30 maxlags = 600 acorrs_sub = array(acorrs)[:,:maxlags-offset] imshow(acorrs_sub.T, aspect='auto') acorrs_sub.shape plot(argmax(array(acorrs)[:,:maxlags-offset], axis=1), color='w', lw=3) plot(array(acorrs)[25]) ylim((0.8, 1)) tempobpm = argmax(array(acorrs)[:,:maxlags-offset], axis=1) plot(60.0/times[abs(lags_rms[tempobpm])]) plot(60.0/times[abs(lags_rms[tempobpm])], 'x-') ylim((80, 100)) loader = essentia.standard.MonoLoader(filename = 'sources/Led Zeppelin - Rock And Roll.mp3') rockroll = loader() rockroll_sr = 44100 rms_hop = 512 rms_winsize = 4096 times, rock_rms = windowed_rms(rockroll, rms_winsize, rms_hop, rockroll_sr) plot(times[:2500], rock_rms[:2500]) maxlags = 600 lags, cc, lines, line = acorr(rock_rms, maxlags=maxlags) ylim((0.9, 1.0)) offset = 30 bpm = 60.0/times[lags[argmax(cc[maxlags + offset:]) + maxlags + offset]] print bpm win_time = 10.0 # seconds rms_win_freq = float(rockroll_sr)/rms_hop win_size = int (win_time * rms_win_freq) times_rms, lags_rms, acorrs = windowed_acorr(rock_rms, win_size, int(win_size/2), sr=44100, maxlags=maxlags) offset = 30 acorrs_sub = array(acorrs)[:,:maxlags] imshow(acorrs_sub.T, aspect='auto', interpolation='nearest') acorrs_sub.shape plot(argmax(array(acorrs)[:,:maxlags-offset], axis=1), color='w', lw=3) tempobpm = argmax(array(acorrs)[:,:maxlags-offset], axis=1) plot(60.0/times[abs(lags_rms[tempobpm])]) loader = essentia.standard.MonoLoader(filename = 'sources/Isaac Hayes - Out Of The Ghetto.mp3') rockroll = loader() rockroll_sr = 44100 rms_hop = 512 rms_winsize = 4096 times, rock_rms = windowed_rms(rockroll, rms_winsize, rms_hop, rockroll_sr) plot(times[:2500], rock_rms[:2500]) maxlags = 200 lags, cc, lines, line = acorr(rock_rms, maxlags=maxlags) ylim((0.7, 1.0)) offset = 30 bpm = 60.0/times[lags[argmax(cc[maxlags + offset:]) + maxlags + offset]] print bpm win_time = 10.0 # seconds rms_win_freq = float(rockroll_sr)/rms_hop win_size = int (win_time * rms_win_freq) times_rms, lags_rms, acorrs = windowed_acorr(rock_rms, win_size, int(win_size/2), sr=44100, maxlags=maxlags) offset = 30 acorrs_sub = array(acorrs)[:,:maxlags] imshow(acorrs_sub.T, aspect='auto', interpolation='nearest') acorrs_sub.shape plot(argmax(array(acorrs)[:,:maxlags-offset], axis=1), color='w', lw=3) tempobpm = argmax(array(acorrs)[:,:maxlags-offset], axis=1) plot(60.0/times[abs(lags_rms[tempobpm])]) ylim((110, 125))