%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy
import scipy.io.wavfile
def setup_graph(title='', x_label='', y_label='', fig_size=None):
fig = plt.figure()
if fig_size != None:
fig.set_size_inches(fig_size[0], fig_size[1])
ax = fig.add_subplot(111)
ax.set_title(title)
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
Procedure:
sample_rate = 100 # in samples per second
total_time = 10 # in seconds
t = np.linspace(0, total_time, total_time * sample_rate)
original = [5 for i in t]
setup_graph(title='f(x) = 5 function', x_label='time (in seconds)', y_label='amplitude')
_ = plt.plot(t, original)
window_size = 100 # 100 points (which is 1 second in this case)
hop_size = window_size // 2
window = scipy.hamming(window_size)
def flatten(lst):
return [item for sublist in lst for item in sublist]
window_times = [t[i:i+window_size] for i in range(0, len(original)-window_size, hop_size)]
window_graphs = [[wtime, window] for wtime in window_times]
flattened_window_graphs = flatten(window_graphs)
setup_graph(title='Hamming windows', x_label='time (in seconds)', y_label='amplitude', fig_size=(14,5))
_ = plt.plot(*flattened_window_graphs)
windowed = [window * original[i:i+window_size] for i in range(0, len(original)-window_size, hop_size)]
convoluted = scipy.zeros(total_time * sample_rate)
for n,i in enumerate(range(0, len(original)-window_size, hop_size)):
convoluted[i:i+window_size] += windowed[n]
setup_graph(title='Resynthesized windowed parts (vs original)', x_label='time (in seconds)', y_label='amplitude', fig_size=(14,5))
_ = plt.plot(t, original, t, convoluted)
def stft(input_data, sample_rate, window_size, hop_size):
window = scipy.hamming(window_size)
output = scipy.array([scipy.fft(window*input_data[i:i+window_size])
for i in range(0, len(input_data)-window_size, hop_size)])
return output
def istft(input_data, sample_rate, window_size, hop_size, total_time):
output = scipy.zeros(total_time*sample_rate)
for n,i in enumerate(range(0, len(output)-window_size, hop_size)):
output[i:i+window_size] += scipy.real(scipy.ifft(input_data[n]))
return output
(doremi_sample_rate, doremi) = scipy.io.wavfile.read("audio_files/do-re-mi.wav")
doremi_8000hz = [doremi[i] for i in range(0, len(doremi), 44100//8000)]
setup_graph(title='Spectrogram (window size = 256)', x_label='time (in seconds)', y_label='frequency', fig_size=(14,7))
_ = plt.specgram(doremi_8000hz, Fs=8000, NFFT=256)
setup_graph(title='Spectrogram (window size = 512)', x_label='time (in seconds)', y_label='frequency', fig_size=(14,7))
_ = plt.specgram(doremi_8000hz, Fs=8000, NFFT=512)
setup_graph(title='Spectrogram (window size = 1024)', x_label='time (in seconds)', y_label='frequency', fig_size=(14,7))
_ = plt.specgram(doremi_8000hz, Fs=8000, NFFT=1024)
setup_graph(title='Spectrogram (window size = 2048)', x_label='time (in seconds)', y_label='frequency', fig_size=(14,7))
_ = plt.specgram(doremi_8000hz, Fs=8000, NFFT=2048)
setup_graph(title='Spectrogram (window size = 8000)', x_label='time (in seconds)', y_label='frequency', fig_size=(14,7))
_ = plt.specgram(doremi_8000hz, Fs=8000, NFFT=8000)
# Cleanup to reduce notebook size
del doremi, doremi_8000hz, _