%pylab inline
import matplotlib as mpl
from matplotlib.colors import LogNorm
mpl.rcParams['figure.figsize'] = (16,4)
Populating the interactive namespace from numpy and matplotlib
from essentia.streaming import *
sr = 44100
frameSize = 4096
hopSize = frameSize/ 2
loader = MonoLoader(filename = './Music/iTunes/iTunes Media/Music/Ravi Shankar/Master Of Sitar/01 Raag Alahya Bilawal (Early Morning Raag).mp3', sampleRate=sr)
frameCutter = FrameCutter(frameSize = frameSize, hopSize = hopSize)
w = Windowing(type = 'hann')
spec = Spectrum()
spectralPeaks = SpectralPeaks()
onsetDetector = OnsetRate()
loader.audio >> frameCutter.signal
loader.audio >> onsetDetector.signal#
frameCutter.frame >> w.frame >> spec.frame
spec.spectrum >> spectralPeaks.spectrum
<essentia.streaming._StreamConnector instance at 0x109f1da70>
pool = essentia.Pool()
onsetDetector.onsetTimes >> (pool, 'onsetTimes')
onsetDetector.onsetRate >> (pool, 'onsetRate')
spec.spectrum >> (pool, 'lowlevel.spectrum')
spectralPeaks.frequencies >> (pool, 'spectralPeaks.frequencies')
spectralPeaks.magnitudes >> (pool, 'spectralPeaks.magnitudes')
essentia.reset(loader)
essentia.run(loader)
alapGroundTruth = 42 #a priori knowledge 0-42 secs
alapGroundTruthScaled = (float(alapGroundTruth)/60)/hopSize * sr* 60
jhalaGroundTruth = (15*60)+ 22 #mins+secs 15:22
jhalaGroundTruthScaled = (float(jhalaGroundTruth)/60)/hopSize * sr* 60
jhalaGroundTruth2 = (18*60)+ 48 #mins+secs 18:48
jhalaGroundTruth2Scaled = (float(jhalaGroundTruth2)/60)/hopSize * sr* 60
alapGroundTruthScaled, alapGroundTruth
(904.3945312499999, 42)
(float(len(pool['lowlevel.spectrum'])*hopSize)/sr)/ 60, len(pool['lowlevel.spectrum'])
(21.880937263794408, 28270)
pool['onsetTimes'][58]
42.202267
alapGroundTruthOnset= argwhere(pool['onsetTimes'] > alapGroundTruth)[0]
tablaGroundTruthOnset= argwhere(tablaSounds< pool['onsetTimes'])[0]
tablaGroundTruthEndOnset= argwhere(pool['onsetTimes']> tablaSounds+12)[0]
tablaGroundTruthOnset, tablaGroundTruthEndOnset
(array([3155]), array([3212]))
sitarOnsetScaled=[]
for onset in pool['onsetTimes'][:alapGroundTruthOnset]:
scaledOnset = (float(onset)/60)/hopSize * sr* 60
sitarOnsetScaled.append(scaledOnset)
tablaSounds = (14*60)+ 38 #14:38 - 14:50
tablaOnsetScaled=[]
for onset in pool['onsetTimes'][tablaGroundTruthOnset: tablaGroundTruthEndOnset]:
scaledTablaOnset = (float(onset)/60)/hopSize * sr* 60
tablaOnsetScaled.append(scaledTablaOnset)
imshow(pool['lowlevel.spectrum'][:,:alapGroundTruthScaled].T, aspect = 'auto', norm = LogNorm())
<matplotlib.image.AxesImage at 0x135f98410>
imshow(pool['lowlevel.spectrum'][:alapGroundTruthScaled, :1000].T, aspect = 'auto', norm = LogNorm())
<matplotlib.image.AxesImage at 0x4deae7cd0>
pool['lowlevel.spectrum'].T[:][:alapGroundTruth].size, pool['lowlevel.spectrum'].T[:sitarOnsetScaled[-1]].size
(1187340, 21683090)
imshow(pool['lowlevel.spectrum'].T[:], aspect = 'auto', interpolation = 'nearest', norm = LogNorm())
#for everyOnset in sitarOnsetScaled:
# vlines(everyOnset, 0,500, color='b', alpha = '0.5')#, linestyles = 'dashed')
vlines(sitarOnsetScaled[2], 0,2000, color='b', alpha = '0.8')#, linestyles = 'dashed')
#xlim(sitarOnsetScaled[2]-5, sitarOnsetScaled[2]+ 50)
<matplotlib.collections.LineCollection at 0x12a795650>
imshow(pool['lowlevel.spectrum'][alapGroundTruthScaled: tablaOnsetScaled[7], :1000].T, aspect = 'auto', norm = LogNorm())
<matplotlib.image.AxesImage at 0x11b3dcb90>
imshow(pool['lowlevel.spectrum'][tablaOnsetScaled[0]: tablaOnsetScaled[-1], :1000].T, aspect = 'auto', norm = LogNorm())
#vlines(5, 0, 1000, color='k', alpha = '0.8')
for everyOnset in tablaOnsetScaled:
vlines(everyOnset- tablaOnsetScaled[0], 0,1000, color='k', alpha = '0.9')#, linestyles = 'dashed')
width = 40
fig, (ax1, ax2, ax3, ax4) = plt.subplots(1,4)
fig, (bx1, bx2, bx3, bx4) = plt.subplots(1,4)
fig.suptitle('Spectrum difference between Sitar vs Tabla onset', fontsize= 20)
fig.subplots_adjust(top=0.85)
ax1.set_title('sitar onset 0')
im1= ax1.imshow(pool['lowlevel.spectrum'][sitarOnsetScaled[2]-5: sitarOnsetScaled[2]+ 50, :1000].T, aspect = 'auto', norm = LogNorm())
im1= ax1.vlines(5, 0, 1000, color='k', alpha = '0.8')
#divider1 = make_axes_locatable(ax1)
ax2.set_title('sitar onset 1')
im2= ax2.imshow(pool['lowlevel.spectrum'][sitarOnsetScaled[10]-5: sitarOnsetScaled[10]+ 50, :1000].T, aspect = 'auto', norm = LogNorm())
im2= ax2.vlines(5, 0, 1000, color='k', alpha = '0.8')
#cax1 = divider1.append_axes('right', size="20%", pad=0.05)
ax3.set_title('sitar onset 2')
im3= ax3.imshow(pool['lowlevel.spectrum'][sitarOnsetScaled[19]-5: sitarOnsetScaled[19]+ 50, :1000].T, aspect = 'auto', norm = LogNorm())
im3= ax3.vlines(5, 0, 1000, color='k', alpha = '0.8')
ax4.set_title('sitar onset 3')
im4= ax4.imshow(pool['lowlevel.spectrum'][sitarOnsetScaled[31]-5: sitarOnsetScaled[31]+ 50, :1000].T, aspect = 'auto', norm = LogNorm())
im4= ax4.vlines(5, 0, 1000, color='k', alpha = '0.8')
#cbar = plt.colorbar(im1, cax=cax1, ticks=MultipleLocator(0.2), format="%.2f")
#ax1.xaxis.set_visible(False)
bx1.set_title('tabla onset 0')
im1= bx1.imshow(pool['lowlevel.spectrum'][tablaOnsetScaled[2]-5: tablaOnsetScaled[2]+ 50, :1000].T, aspect = 'auto', norm = LogNorm())
im1= bx1.vlines(5, 0, 1000, color='k', alpha = '0.8')
#divider1 = make_axes_locatable(ax1)
bx2.set_title('tabla onset 1')
im2= bx2.imshow(pool['lowlevel.spectrum'][tablaOnsetScaled[10]-5: tablaOnsetScaled[10]+ 50, :1000].T, aspect = 'auto', norm = LogNorm())
im2= bx2.vlines(5, 0, 1000, color='k', alpha = '0.8')
#cax1 = divider1.append_axes('right', size="20%", pad=0.05)
bx3.set_title('tabla onset 2')
im3= bx3.imshow(pool['lowlevel.spectrum'][tablaOnsetScaled[19]-5: tablaOnsetScaled[19]+ 50, :1000].T, aspect = 'auto', norm = LogNorm())
im3= bx3.vlines(5, 0, 1000, color='k', alpha = '0.8')
bx4.set_title('tabla onset 3')
im4= bx4.imshow(pool['lowlevel.spectrum'][tablaOnsetScaled[31]-5: tablaOnsetScaled[31]+ 50, :1000].T, aspect = 'auto', norm = LogNorm())
im4= bx4.vlines(5, 0, 1000, color='k', alpha = '0.8')