%matplotlib inline cd /Users/dpwe/projects/dtw/dp_python import numpy as np import matplotlib.pyplot as plt import librosa # soundfile access library - see https://github.com/bmcfee/librosa import dpcore # the current library # Mirror matlab example from http://www.ee.columbia.edu/ln/rosa/matlab/dtw/ # Two different speakers saying "Cottage cheese with chives is delicious" d1, sr = librosa.load('sm1_cln.wav', sr=None) d2, sr = librosa.load('sm2_cln.wav', sr=None) # Calculate their short-time Fourier transforms D1C = librosa.stft(d1, n_fft=512, hop_length=128) D2C = librosa.stft(d2, n_fft=512, hop_length=128) # We'll use the magnitudes to calculate similarity (ignore phase) D1 = np.abs(D1C) D2 = np.abs(D2C) # Plot the spectrograms. You see a similar sequence of sounds, but different timing details plt.subplot(2,1,1) librosa.display.specshow(20*np.log10(D1), sr=sr, hop_length=128, y_axis="linear", x_axis="time", cmap='jet') plt.subplot(2,1,2) librosa.display.specshow(20*np.log10(D2), sr=sr, hop_length=128, y_axis="linear", x_axis="time", cmap='jet') # Cosine similarity matrix (slow one-liner) # Each cell SM[i,j] is the cosine similarity between D1[:,i] and D2[:,j] # i.e. the inner product of the two unit-normalized feature vectors SM = np.array([[np.sum(a*b)/np.sqrt(np.sum(a**2)*np.sum(b**2)) for b in D2.T] for a in D1.T]) # Find the minimum-cost path. We use 1-SM so that cosine similarity == 1 -> cost = 0 # penalty is the additional cost incurred by non-diagonal steps (promotes diagonality) localcost = np.array(1.0-SM, order='C', dtype=float) p, q, C, phi = dpcore.dp(localcost, penalty=0.1) # p and q are vectors giving the row and column indices along the best path # C returns the full minimum-cost matrix, and phi is the full traceback matrix # Plot the best path on top of local similarity matrix fig, ax = plt.subplots() ax.imshow(SM, interpolation='none', cmap='binary') ax.hold(True) ax.plot(q,p,'-r') ax.hold(False) ax.axis([0, np.size(D2, axis=1), 0, np.size(D1, axis=1)]) # We can use the best-aligned columns to plot aligned spectrograms # Notice how the features in the two spectrograms are now lined up horizontally plt.subplot(2,1,1) librosa.display.specshow(20*np.log10(D1[:, p]), sr=sr, hop_length=128, y_axis="linear", x_axis="time", cmap='jet') plt.subplot(2,1,2) librosa.display.specshow(20*np.log10(D2[:, q]), sr=sr, hop_length=128, y_axis="linear", x_axis="time", cmap='jet') # We can compare the timings of the C external implementation # to the pure Python version of the core DP routine # with the use_extension argument: %timeit C, phi = dpcore.dpcore(localcost, 0.1, use_extension=True) %timeit C, phi = dpcore.dpcore(localcost, 0.1, use_extension=False) # Just to check, they do give the same result (within numerical precision) Cext, phiext = dpcore.dpcore(localcost, 0.1, use_extension=True) Cpyt, phipyt = dpcore.dpcore(localcost, 0.1, use_extension=False) print np.max(np.abs(Cext-Cpyt))