from essentia.standard import * cd ~/Desktop/Music/ loader = MonoLoader(filename = '03 Waldstein Sonata, I..m4a') waldstein = loader() sr = 44100 plot(waldstein) #Let's try to use some self-similarity matrices on the spectrum of the Waldstein. Pxx, freqs, times, im = specgram(waldstein, NFFT=2048, Fs=sr, window=window_hanning, noverlap=1024); ssm = [] for v in sqrt(Pxx): #we take the square root since Pxx is power for w in sqrt(Pxx): cos = dot(v, w) cos /= (linalg.norm(v) * linalg.norm(w)) # this represents the cosine of the angle between the two vectors v, w # as described by Foote. If v and w are very similar (i.e., close to parallel), # then the cosine will attain its maximum value of 1. ssm.append(cos) Pxx.shape ssm = array(ssm).reshape(1025, 1025) #we transform the ssm from a 1-dimensional list into a square matrix ssm.shape imshow(ssm, cmap='gray') gcf().set_figheight(10) #This is a pretty interesting result---if I am interpreting it correctly, it shows that there are two very distinctive themes in the piece #that differ from each other. Perhaps this corresponds to the contrasting A and B themes of the sonata? #We can also see the overall ABA structure of the sonata form: Exposition-->Development-->Recapitulation! #We can also try a similar technique on the rms, as in class. In this case we will use the correlation coefficients rather than #the 'cosine'. First we need an rms function. def windowed_rms(input_sig, win_size, hop=None, sr=1.0): if not hop: hop = winsize/2 rms = [] window_start = arange(0, len(input_sig) - win_size, hop) for start in window_start: w = input_sig[start: start+win_size].astype(float) rms_inst = sqrt(mean(w**2)) rms.append(rms_inst) times = (window_start + win_size/2)/float(sr) return times, rms times, super_rms = windowed_rms(waldstein, 4096, 512, sr) plot(times, super_rms) #To speed up computations, we can safely downsample without losing too much relevant musical information. from scipy.signal import decimate rms_dec = decimate(super_rms, 3) plot(rms_dec) #For this version of the ssm, we use a windowing technique, as encouraged by Foote and demonstrated in class. ss_win_size = 200 ss_hop = 10 in_sig = rms_dec win_start = arange(0, len(in_sig)- ss_win_size, ss_hop) ssm = [] for starti in win_start: for startj in win_start: wini = in_sig[starti: starti+ss_win_size] winj = in_sig[startj: startj+ss_win_size] ssm.append(corrcoef(wini, winj)[0, 1]) #this is the autocorrelation coefficient of the two vectors, which will be 1 #when the vectors are highly similar ssm = array(ssm).reshape(len(win_start), len(win_start)) imshow(ssm, cmap='gray') gcf().set_figheight(10) #The usual checkerboard pattern shows the many similarities. #Now let's try an ssm for the MFCCs. mfcc = MFCC() spectrum = Spectrum() w = Windowing(type = 'hann') frame = waldstein[sr : sr + 1024] spec = spectrum(w(frame)) plot(spec) mfccs = [] for frame in FrameGenerator(waldstein, frameSize = 2048, hopSize = 1024): mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame))) mfccs.append(mfcc_coeffs) mfccs = array(mfccs).T #This reshapes the MFCCs appropriately. mfccs.shape #The first dimension corresponds to the 13 bands used. Now let's make the ssm. ssm = [] for v in mfccs: for w in mfccs: cos = dot(v, w) cos /= (linalg.norm(v) * linalg.norm(w)) ssm.append(cos) ssm = array(ssm).reshape(13, 13) imshow(ssm, cmap='gray') gcf().set_figheight(10) #The few narrowish bands of black seem the most distinctive, although I am not sure in general how to interpret this result. #I tried doing the analysis on the transpose, where there are 24550 columns, but it took too long.