%matplotlib inline
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from matplotlib import mlab
import numpy as np
from scikits import audiolab
import scipy.io.wavfile as wavfile
from scipy import interpolate
plt.rcParams['figure.figsize'] = (15, 10)
# Read the violin sound
sampling_rate, violin = wavfile.read("flute.wav")
# What does violin look like?
violin.shape
(65738,)
# What does violin sound like?
# audiolab.play(violin, fs = sampling_rate)
plt.plot(violin)
[<matplotlib.lines.Line2D at 0x1032c0290>]
# Let's plot a spectrogram of violin and save it to a .png
Pxx, freqs, bins, im = plt.specgram(violin, Fs=sampling_rate)
# plt.savefig('violin_spectrogram.png', bbox_inches='tight', pad_inches=0)
Pxx.shape, freqs.shape, bins.shape
((129, 512), (129,), (512,))
# Now let's load up the original .png and see if we can recreate the sound
# original_spectrogram = mpimg.imread('original_violin_spectrogram.png')
# plt.imshow(original_spectrogram)
# There are 3 values corresponding to RGB color
# original_spectrogram.shape
# Let's convert to a single luminosity, so our amplitude is just one number
# rather than an (R, G, B) tuple
# Using the formula given in http://en.wikipedia.org/wiki/Grayscale#Converting_color_to_grayscale
# def lum(a):
# # R is a[0], G is a[1], B is a[2]
# return 0.2126*a[0] + 0.7152*a[1] + 0.0722*a[2]
# lum_spectrogram = np.apply_along_axis(lum, 2, original_spectrogram)
# plt.imshow(lum_spectrogram)
# plt.plot(lum_spectrogram[:,0][::-1])
num_rows, num_cols = Pxx.shape
SAMPLES_PER_WINDOW = len(violin)/num_cols
SAMPLING_RATE = 44100
num_samples = num_cols * SAMPLES_PER_WINDOW
num_rows, num_cols, SAMPLES_PER_WINDOW, num_samples, len(violin)
(129, 512, 128, 65536, 65738)
def linear_luminance(rgb):
# rgb should be an array of length 3 with red, green, and blue values
return 0.2126*rgb[0] + 0.7152*rgb[1] + 0.0722*rgb[2]
def luminosity(spectrogram):
# Apply linear_luminance to the 3rd dimension array (i.e. the RGB values) of the spectrogram
return np.apply_along_axis(linear_luminance, 2, spectrogram)
def frequency(normalized_row_index):
# normalized_row_index should be the row index divided by the total number of rows
# frequency is a function f: normalized row indices -> frequency
# such that the range of frequencies is from 20Hz to 20000Hz (the range of human hearing)
# and such that most of the high amplitude frequencies are <1000Hz
return 120*np.exp(5.125*normalized_row_index)-100
def time(num_samples, sampling_rate=SAMPLING_RATE):
return (1.0/sampling_rate)*np.arange(num_samples)
def step(x, y, nx=326):
X = np.linspace(x.min(), x.max(), len(x)*nx)
Y = np.zeros(len(x)*nx)
#Y = np.interp(X, x, y)
for i in xrange(0,len(x)):
for j in xrange(0,nx):
#print i*nx + j
Y[i*nx + j] = y[i]
return X, Y
def lerp(x, y, nx=326, nBlend=50):
X = np.linspace(x.min(), x.max(), len(x)*nx)
Y = np.zeros(len(x)*nx)
for i in xrange(0,len(x)):
for j in xrange(0,nx):
#print i*nx + j
Y[i*nx + j] = y[i]
for i in xrange(0, len(x)-1):
if i == 0:
continue
X_temp = X[i*nx-nBlend:i*nx+nBlend+1]
x_temp = (X[i*nx-nBlend], X[i*nx+nBlend])
y_temp = y[i-1:i+1]
# print x_temp
Y[i*nx-nBlend:i*nx+nBlend+1] = np.interp(X_temp, x_temp, y_temp)
return X, Y
t = np.matrix(np.linspace(bins[0], bins[-1], num_samples))
# t = np.matrix(bins)
f = np.matrix(freqs).transpose()
phi = np.matrix(np.random.rand(num_rows)*2*np.pi)
phi = phi.transpose()
# phi = 0
f.shape, t.shape, phi.shape
((129, 1), (1, 65536), (129, 1))
# a = np.zeros((num_rows, num_samples))
# for i in range(num_rows):
# X, Y = lerp(np.arange(num_cols), Pxx[i,:], nx=SAMPLES_PER_WINDOW, nBlend=SAMPLES_PER_WINDOW/2)
# a[i] = Y
a = Pxx
a = np.sqrt(Pxx)
interp_a = np.zeros((num_rows, num_samples))
for i in xrange(len(a)):
interpolation = interpolate.PchipInterpolator(bins, a[i,:])
interp_a[i,:] = interpolation(np.linspace(bins[0], bins[-1], num_samples))
# interp_f = np.linspace(freqs[0], freqs[-1], num_rows*10)
# interp_a2 = np.zeros((num_rows*10, num_samples))
# for j in xrange(num_samples):
# interpolation = interpolate.PchipInterpolator(freqs, interp_a[:,i])
# interp_a2[:,i] = interpolation(interp_f)
plt.subplot(211)
plt.plot(interp_a[0,:])
plt.subplot(212)
plt.plot(a[0,:])
[<matplotlib.lines.Line2D at 0x1033f6110>]
# upside_down = np.flipud(lum_spectrogram)
# plt.subplot(2, 1, 1)
# plt.imshow(lum_spectrogram)
# plt.subplot(2, 1, 2)
# plt.imshow(upside_down)
interp_a.shape, f.shape, t.shape, phi.shape
((129, 65536), (129, 1), (1, 65536), (129, 1))
oscillators = np.multiply(interp_a, np.sin(2*np.pi*f*t+phi))
oscillators.shape
(129, 65536)
signal = oscillators.sum(axis=0)
signal.shape
(1, 65536)
signal = np.squeeze(np.asarray(signal))
signal = signal / np.amax(np.absolute(signal))
plt.subplot(2, 1, 1)
plt.plot(signal[0:400])
plt.subplot(2, 1, 2)
plt.plot(violin[0:400])
[<matplotlib.lines.Line2D at 0x105d28450>]
plt.subplot(2, 1, 1)
plt.plot(signal)
plt.subplot(2, 1, 2)
plt.plot(violin)
[<matplotlib.lines.Line2D at 0x129c17b10>]
wavfile.write("new_signal.wav", sampling_rate, signal)
wavfile.write("original_flute.wav", sampling_rate, violin)