from essentia.standard import *
cd ~/Desktop/Music/
/Users/adj/Desktop/Music
loader = MonoLoader(filename = '03 Waldstein Sonata, I..m4a')
waldstein = loader()
sr = 44100
plot(waldstein)
[<matplotlib.lines.Line2D at 0x10f625850>]
#Let's try to use some self-similarity matrices on the spectrum of the Waldstein.
Pxx, freqs, times, im = specgram(waldstein, NFFT=2048, Fs=sr, window=window_hanning, noverlap=1024);
ssm = []
for v in sqrt(Pxx): #we take the square root since Pxx is power
for w in sqrt(Pxx):
cos = dot(v, w)
cos /= (linalg.norm(v) * linalg.norm(w)) # this represents the cosine of the angle between the two vectors v, w
# as described by Foote. If v and w are very similar (i.e., close to parallel),
# then the cosine will attain its maximum value of 1.
ssm.append(cos)
Pxx.shape
(1025, 24547)
ssm = array(ssm).reshape(1025, 1025) #we transform the ssm from a 1-dimensional list into a square matrix
ssm.shape
(1025, 1025)
imshow(ssm, cmap='gray')
gcf().set_figheight(10)
#This is a pretty interesting result---if I am interpreting it correctly, it shows that there are two very distinctive themes in the piece
#that differ from each other. Perhaps this corresponds to the contrasting A and B themes of the sonata?
#We can also see the overall ABA structure of the sonata form: Exposition-->Development-->Recapitulation!
#We can also try a similar technique on the rms, as in class. In this case we will use the correlation coefficients rather than
#the 'cosine'. First we need an rms function.
def windowed_rms(input_sig, win_size, hop=None, sr=1.0):
if not hop:
hop = winsize/2
rms = []
window_start = arange(0, len(input_sig) - win_size, hop)
for start in window_start:
w = input_sig[start: start+win_size].astype(float)
rms_inst = sqrt(mean(w**2))
rms.append(rms_inst)
times = (window_start + win_size/2)/float(sr)
return times, rms
times, super_rms = windowed_rms(waldstein, 4096, 512, sr)
plot(times, super_rms)
[<matplotlib.lines.Line2D at 0x134955c90>]
#To speed up computations, we can safely downsample without losing too much relevant musical information.
from scipy.signal import decimate
rms_dec = decimate(super_rms, 3)
plot(rms_dec)
[<matplotlib.lines.Line2D at 0x1349a2450>]
#For this version of the ssm, we use a windowing technique, as encouraged by Foote and demonstrated in class.
ss_win_size = 200
ss_hop = 10
in_sig = rms_dec
win_start = arange(0, len(in_sig)- ss_win_size, ss_hop)
ssm = []
for starti in win_start:
for startj in win_start:
wini = in_sig[starti: starti+ss_win_size]
winj = in_sig[startj: startj+ss_win_size]
ssm.append(corrcoef(wini, winj)[0, 1]) #this is the autocorrelation coefficient of the two vectors, which will be 1
#when the vectors are highly similar
ssm = array(ssm).reshape(len(win_start), len(win_start))
imshow(ssm, cmap='gray')
gcf().set_figheight(10)
#The usual checkerboard pattern shows the many similarities.
#Now let's try an ssm for the MFCCs.
mfcc = MFCC()
spectrum = Spectrum()
w = Windowing(type = 'hann')
frame = waldstein[sr : sr + 1024]
spec = spectrum(w(frame))
plot(spec)
[<matplotlib.lines.Line2D at 0x134d63dd0>]
mfccs = []
for frame in FrameGenerator(waldstein, frameSize = 2048, hopSize = 1024):
mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame)))
mfccs.append(mfcc_coeffs)
mfccs = array(mfccs).T #This reshapes the MFCCs appropriately.
mfccs.shape
(13, 24550)
#The first dimension corresponds to the 13 bands used. Now let's make the ssm.
ssm = []
for v in mfccs:
for w in mfccs:
cos = dot(v, w)
cos /= (linalg.norm(v) * linalg.norm(w))
ssm.append(cos)
ssm = array(ssm).reshape(13, 13)
imshow(ssm, cmap='gray')
gcf().set_figheight(10)
#The few narrowish bands of black seem the most distinctive, although I am not sure in general how to interpret this result.
#I tried doing the analysis on the transpose, where there are 24550 columns, but it took too long.