from scipy.io import wavfile
rcParams['figure.figsize'] = (16, 4) #wide graphs by default
import essentia.standard
loader = essentia.standard.MonoLoader(filename = 'sources/Stevie Wonder - Superstition.mp3')
superstition = loader()
superstition_sr = 44100
def windowed_rms(input_sig, win_size, hop=None, sr=1.0):
if not hop:
hop = winsize/2
rms = []
window_start = arange(0, len(input_sig), hop)
for start in window_start:
w = input_sig[start: start+win_size].astype(float)
rms_inst = sqrt(mean(w**2))
rms.append(rms_inst)
times = (window_start + win_size/2)/float(sr)
return times, rms
times, super_rms = windowed_rms(superstition[:441000], 4096, 512, 44100)
plot(times, super_rms)
[<matplotlib.lines.Line2D at 0x4b2f910>]
lags, cc, lines, line = acorr(super_rms, maxlags=600)
grid();
argmax(cc[625:]) + 625
653
lags[argmax(cc[625:]) + 625]
53
44100.0/53
832.0754716981132
times[53]
0.66176870748299321
1.0/times[53]
1.5111019736842104
bpm = 60.0/times[53]
print bpm
90.6661184211
cepstrum = real(fft.rfft(log10(cc[600:])))
plot(cepstrum)
[<matplotlib.lines.Line2D at 0x530dc50>]
cepstrum = real(fft.rfft(log10(cc[600:]), n=4096))
plot(cepstrum)
[<matplotlib.lines.Line2D at 0x563a950>]
plot(cepstrum)
ylim((-120, 120))
grid()
argmax(cepstrum[50:]) + 50
77
4096/77
53
bpm = 60.0/times[53]
print bpm
90.6661184211
def windowed_acorr(input_sig, win_size, hop=None, sr=1.0, maxlags=None):
if not hop:
hop = win_size/2
if not maxlags:
maxlags = win_size/4
window_start = arange(0, len(input_sig) - win_size, hop)
acorrfs = []
for start in window_start:
w = input_sig[start: start+win_size]
lags, acorr_inst, lines, line = acorr(w, maxlags=maxlags)
acorrfs.append(acorr_inst)
times = (window_start + win_size/2)/float(sr)
clf()
return times, lags, acorrfs
times, super_rms = windowed_rms(superstition, 4096, 512, 44100)
plot(times,super_rms)
[<matplotlib.lines.Line2D at 0x5deb410>]
lags, cc, lines, line = acorr(super_rms, maxlags=600, usevlines=False)
argmax(cc[610:]) + 610
651
lags[argmax(cc[610:]) + 610]
51
offset = 610
bpm = 60.0/times[lags[argmax(cc[offset:]) + offset]]
print bpm
93.9630681818
win_time = 10.0 # seconds
rms_win_freq = 44100.0/512
win_size = int (win_time * rms_win_freq)
win_size
861
times_rms, lags_rms, acorrs = windowed_acorr(super_rms, win_size, int(win_size/2), sr=44100, maxlags=600)
imshow(array(acorrs).T, aspect='auto')
colorbar()
<matplotlib.colorbar.Colorbar instance at 0x550a4d0>
imshow(array(acorrs).T**2, aspect='auto')
colorbar()
<matplotlib.colorbar.Colorbar instance at 0x92618c0>
acorrs_sub = array(acorrs)[:,:580]
imshow(acorrs_sub.T, aspect='auto')
acorrs_sub.shape
plot(argmax(array(acorrs)[:,:580], axis=1), color='w', lw=3)
[<matplotlib.lines.Line2D at 0xaa5edd0>]
offset = 30
maxlags = 600
acorrs_sub = array(acorrs)[:,:maxlags-offset]
imshow(acorrs_sub.T, aspect='auto')
acorrs_sub.shape
plot(argmax(array(acorrs)[:,:maxlags-offset], axis=1), color='w', lw=3)
[<matplotlib.lines.Line2D at 0x8d85ed0>]
plot(array(acorrs)[25])
ylim((0.8, 1))
(0.8, 1)
tempobpm = argmax(array(acorrs)[:,:maxlags-offset], axis=1)
plot(60.0/times[abs(lags_rms[tempobpm])])
[<matplotlib.lines.Line2D at 0x53da690>]
plot(60.0/times[abs(lags_rms[tempobpm])], 'x-')
ylim((80, 100))
(80, 100)
loader = essentia.standard.MonoLoader(filename = 'sources/Led Zeppelin - Rock And Roll.mp3')
rockroll = loader()
rockroll_sr = 44100
rms_hop = 512
rms_winsize = 4096
times, rock_rms = windowed_rms(rockroll, rms_winsize, rms_hop, rockroll_sr)
plot(times[:2500], rock_rms[:2500])
[<matplotlib.lines.Line2D at 0xc213090>]
maxlags = 600
lags, cc, lines, line = acorr(rock_rms, maxlags=maxlags)
ylim((0.9, 1.0))
(0.9, 1.0)
offset = 30
bpm = 60.0/times[lags[argmax(cc[maxlags + offset:]) + maxlags + offset]]
print bpm
151.999080882
win_time = 10.0 # seconds
rms_win_freq = float(rockroll_sr)/rms_hop
win_size = int (win_time * rms_win_freq)
times_rms, lags_rms, acorrs = windowed_acorr(rock_rms, win_size, int(win_size/2), sr=44100, maxlags=maxlags)
offset = 30
acorrs_sub = array(acorrs)[:,:maxlags]
imshow(acorrs_sub.T, aspect='auto', interpolation='nearest')
acorrs_sub.shape
plot(argmax(array(acorrs)[:,:maxlags-offset], axis=1), color='w', lw=3)
[<matplotlib.lines.Line2D at 0xc36ef90>]
tempobpm = argmax(array(acorrs)[:,:maxlags-offset], axis=1)
plot(60.0/times[abs(lags_rms[tempobpm])])
[<matplotlib.lines.Line2D at 0x841a1d0>]
loader = essentia.standard.MonoLoader(filename = 'sources/Isaac Hayes - Out Of The Ghetto.mp3')
rockroll = loader()
rockroll_sr = 44100
rms_hop = 512
rms_winsize = 4096
times, rock_rms = windowed_rms(rockroll, rms_winsize, rms_hop, rockroll_sr)
plot(times[:2500], rock_rms[:2500])
[<matplotlib.lines.Line2D at 0x8421c50>]
maxlags = 200
lags, cc, lines, line = acorr(rock_rms, maxlags=maxlags)
ylim((0.7, 1.0))
(0.7, 1.0)
offset = 30
bpm = 60.0/times[lags[argmax(cc[maxlags + offset:]) + maxlags + offset]]
print bpm
61.5234375
win_time = 10.0 # seconds
rms_win_freq = float(rockroll_sr)/rms_hop
win_size = int (win_time * rms_win_freq)
times_rms, lags_rms, acorrs = windowed_acorr(rock_rms, win_size, int(win_size/2), sr=44100, maxlags=maxlags)
offset = 30
acorrs_sub = array(acorrs)[:,:maxlags]
imshow(acorrs_sub.T, aspect='auto', interpolation='nearest')
acorrs_sub.shape
plot(argmax(array(acorrs)[:,:maxlags-offset], axis=1), color='w', lw=3)
[<matplotlib.lines.Line2D at 0xdf7d990>]
tempobpm = argmax(array(acorrs)[:,:maxlags-offset], axis=1)
plot(60.0/times[abs(lags_rms[tempobpm])])
ylim((110, 125))
(110, 125)
How can we give better resolution to the BPM detector?
By Andrés Cabrera mantaraya36@gmail.com
For course MAT 240E at UCSB
This ipython notebook is licensed under the CC-BY-NC-SA license: http://creativecommons.org/licenses/by-nc-sa/4.0/