%matplotlib inline import matplotlib.image as mpimg import matplotlib.pyplot as plt from matplotlib import mlab import numpy as np from scikits import audiolab import scipy.io.wavfile as wavfile from scipy import interpolate plt.rcParams['figure.figsize'] = (15, 10) # Read the violin sound sampling_rate, violin = wavfile.read("flute.wav") # What does violin look like? violin.shape # What does violin sound like? # audiolab.play(violin, fs = sampling_rate) plt.plot(violin) # Let's plot a spectrogram of violin and save it to a .png Pxx, freqs, bins, im = plt.specgram(violin, Fs=sampling_rate) # plt.savefig('violin_spectrogram.png', bbox_inches='tight', pad_inches=0) Pxx.shape, freqs.shape, bins.shape # Now let's load up the original .png and see if we can recreate the sound # original_spectrogram = mpimg.imread('original_violin_spectrogram.png') # plt.imshow(original_spectrogram) # There are 3 values corresponding to RGB color # original_spectrogram.shape # Let's convert to a single luminosity, so our amplitude is just one number # rather than an (R, G, B) tuple # Using the formula given in http://en.wikipedia.org/wiki/Grayscale#Converting_color_to_grayscale # def lum(a): # # R is a[0], G is a[1], B is a[2] # return 0.2126*a[0] + 0.7152*a[1] + 0.0722*a[2] # lum_spectrogram = np.apply_along_axis(lum, 2, original_spectrogram) # plt.imshow(lum_spectrogram) # plt.plot(lum_spectrogram[:,0][::-1]) num_rows, num_cols = Pxx.shape SAMPLES_PER_WINDOW = len(violin)/num_cols SAMPLING_RATE = 44100 num_samples = num_cols * SAMPLES_PER_WINDOW num_rows, num_cols, SAMPLES_PER_WINDOW, num_samples, len(violin) def linear_luminance(rgb): # rgb should be an array of length 3 with red, green, and blue values return 0.2126*rgb[0] + 0.7152*rgb[1] + 0.0722*rgb[2] def luminosity(spectrogram): # Apply linear_luminance to the 3rd dimension array (i.e. the RGB values) of the spectrogram return np.apply_along_axis(linear_luminance, 2, spectrogram) def frequency(normalized_row_index): # normalized_row_index should be the row index divided by the total number of rows # frequency is a function f: normalized row indices -> frequency # such that the range of frequencies is from 20Hz to 20000Hz (the range of human hearing) # and such that most of the high amplitude frequencies are <1000Hz return 120*np.exp(5.125*normalized_row_index)-100 def time(num_samples, sampling_rate=SAMPLING_RATE): return (1.0/sampling_rate)*np.arange(num_samples) def step(x, y, nx=326): X = np.linspace(x.min(), x.max(), len(x)*nx) Y = np.zeros(len(x)*nx) #Y = np.interp(X, x, y) for i in xrange(0,len(x)): for j in xrange(0,nx): #print i*nx + j Y[i*nx + j] = y[i] return X, Y def lerp(x, y, nx=326, nBlend=50): X = np.linspace(x.min(), x.max(), len(x)*nx) Y = np.zeros(len(x)*nx) for i in xrange(0,len(x)): for j in xrange(0,nx): #print i*nx + j Y[i*nx + j] = y[i] for i in xrange(0, len(x)-1): if i == 0: continue X_temp = X[i*nx-nBlend:i*nx+nBlend+1] x_temp = (X[i*nx-nBlend], X[i*nx+nBlend]) y_temp = y[i-1:i+1] # print x_temp Y[i*nx-nBlend:i*nx+nBlend+1] = np.interp(X_temp, x_temp, y_temp) return X, Y t = np.matrix(np.linspace(bins[0], bins[-1], num_samples)) # t = np.matrix(bins) f = np.matrix(freqs).transpose() phi = np.matrix(np.random.rand(num_rows)*2*np.pi) phi = phi.transpose() # phi = 0 f.shape, t.shape, phi.shape # a = np.zeros((num_rows, num_samples)) # for i in range(num_rows): # X, Y = lerp(np.arange(num_cols), Pxx[i,:], nx=SAMPLES_PER_WINDOW, nBlend=SAMPLES_PER_WINDOW/2) # a[i] = Y a = Pxx a = np.sqrt(Pxx) interp_a = np.zeros((num_rows, num_samples)) for i in xrange(len(a)): interpolation = interpolate.PchipInterpolator(bins, a[i,:]) interp_a[i,:] = interpolation(np.linspace(bins[0], bins[-1], num_samples)) # interp_f = np.linspace(freqs[0], freqs[-1], num_rows*10) # interp_a2 = np.zeros((num_rows*10, num_samples)) # for j in xrange(num_samples): # interpolation = interpolate.PchipInterpolator(freqs, interp_a[:,i]) # interp_a2[:,i] = interpolation(interp_f) plt.subplot(211) plt.plot(interp_a[0,:]) plt.subplot(212) plt.plot(a[0,:]) # upside_down = np.flipud(lum_spectrogram) # plt.subplot(2, 1, 1) # plt.imshow(lum_spectrogram) # plt.subplot(2, 1, 2) # plt.imshow(upside_down) interp_a.shape, f.shape, t.shape, phi.shape oscillators = np.multiply(interp_a, np.sin(2*np.pi*f*t+phi)) oscillators.shape signal = oscillators.sum(axis=0) signal.shape signal = np.squeeze(np.asarray(signal)) signal = signal / np.amax(np.absolute(signal)) plt.subplot(2, 1, 1) plt.plot(signal[0:400]) plt.subplot(2, 1, 2) plt.plot(violin[0:400]) plt.subplot(2, 1, 1) plt.plot(signal) plt.subplot(2, 1, 2) plt.plot(violin) wavfile.write("new_signal.wav", sampling_rate, signal) wavfile.write("original_flute.wav", sampling_rate, violin)