In [33]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy
import scipy.io.wavfile
import IPython

def setup_graph(title='', x_label='', y_label='', fig_size=None):
    fig = plt.figure()
    if fig_size != None:
        fig.set_size_inches(fig_size[0], fig_size[1])
    ax = fig.add_subplot(111)
    ax.set_title(title)
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)

Seeing sound!

In [34]:
# NOTE: This is only works with 1 channel (mono).  To record a mono audio sample,
# you can use this command: rec -r 44100 -c 1 -b 16 test.wav
(sample_rate, input_signal) = scipy.io.wavfile.read("audio_files/vowel_ah.wav")
time_array = np.arange(0, len(input_signal)/sample_rate, 1/sample_rate)
In [35]:
setup_graph(title='Ah vowel sound', x_label='time (in seconds)', y_label='amplitude', fig_size=(14,7))
_ = plt.plot(time_array[0:4000], input_signal[0:4000])
In [36]:
#IPython.display.Audio("audio_files/vowel_ah.wav")
In [37]:
fft_out = np.fft.rfft(input_signal)
fft_mag = [np.sqrt(i.real**2 + i.imag**2)/len(fft_out) for i in fft_out]
num_samples = len(input_signal)
rfreqs = [(i*1.0/num_samples)*sample_rate for i in range(num_samples//2+1)]

setup_graph(title='FFT of Ah Vowel (first 5000)', x_label='FFT Bins', y_label='magnitude', fig_size=(14,7))
_ = plt.plot(rfreqs[0:5000], fft_mag[0:5000])

Few notes about this

  • Ratio of harmonics = timbre = This is what makes different people's voices sound different (and different from violins)
    • Even if they are singing the same note, and the same volume
  • Possible application: synthesizing new sounds (with different harmonic profiles)
    • Example: If you changed the ratio of the harmonics, you could make your voice sound like something else (Darth Vader?)

Spectrogram (FFT over time)

Axes

  • x-axis: time
  • y-axis: frequency
  • z-axis (color): strength of each frequency

See the Harmonics!

In [38]:
(doremi_sample_rate, doremi) = scipy.io.wavfile.read("audio_files/do-re-mi.wav")
In [39]:
setup_graph(title='Spectrogram of diatonic scale (44100Hz sample rate)', x_label='time (in seconds)', y_label='frequency', fig_size=(14,8))
_ = plt.specgram(doremi, Fs=doremi_sample_rate)
In [40]:
doremi_8000hz = [doremi[i] for i in range(0, len(doremi), 44100//8000)]
setup_graph(title='Spectrogram (8000Hz sample rate)', x_label='time (in seconds)', y_label='frequency', fig_size=(14,7))
_ = plt.specgram(doremi_8000hz, Fs=8000)