%pylab inline rcParams['figure.figsize'] = (10, 4) #wide graphs by default from __future__ import print_function from __future__ import division from scipy.io import wavfile sr, e = wavfile.read('e.wav') sr from IPython.display import Audio Audio(e, rate=sr) plot(e) start = 4000 fourier_trans = rfft(e[start:start + 2048]* hanning(2048), n=4096) mag_spectrum_e = abs(fourier_trans) plot(mag_spectrum_e) log_mag_spec_e = log(mag_spectrum_e) plot(log_mag_spec_e[:1000]) freqs = linspace(0, sr/2, 2049) plot(freqs, log_mag_spec_e) xlabel('Frequency (Hz)') ylabel('Log amplitude') xlim((0, 4000)) grid() sr, a = wavfile.read('a.wav') plot(a) Audio(a, rate=sr) start = 4000 fourier_trans = rfft(a[start:start + 2048]* hanning(2048), n=4096) mag_spectrum_a = abs(fourier_trans) log_mag_spec_a = log(mag_spectrum_a) freqs = linspace(0, sr/2, 2049) plot(freqs, log_mag_spec_a) xlabel('Frequency (Hz)') ylabel('Log amplitude') xlim((0, 4000)) grid() sr, o = wavfile.read('o.wav') plot(o) Audio(o, rate=sr) start = 4000 fourier_trans = rfft(o[start:start + 2048]* hanning(2048), n=4096) mag_spectrum_o = abs(fourier_trans) log_mag_spec_o = log(mag_spectrum_o) plot(log_mag_spec_o[:1000]) figure() plot(mag_spectrum_o[:1000]) plot(log_mag_spec_a[:1000], alpha=0.5) plot(log_mag_spec_e[:1000], alpha=0.5) plot(log_mag_spec_o[:1000], alpha=0.5) legend(['a','e','o']) cepstrum_a = ifft(log_mag_spec_a) plot(cepstrum_a[1:]) plot(cepstrum_a[:]) pulse = list(r_[1, zeros(50)])*40 plot(pulse) plot(abs(rfft(pulse))) filtered = abs(rfft(pulse)) * cos(linspace(0, 0.5*pi, len(rfft(pulse)), endpoint=False)) plot(filtered) plot(abs(rfft(filtered))) n0 = argmax(cepstrum_a[1:])+ 1 n0 cepstrum_filter_a = abs(fft.fft(cepstrum_a[:n0 - 1], n=2048)) plot(cepstrum_filter_a[:1000]) grid() plot(cepstrum_filter_a, 'green', lw=3) twinx() plot(log_mag_spec_a) xlim((0,1000)) source_coeffs = r_[zeros(n0),cepstrum_a[n0:]] cepstrum_source_a = fft.fft(source_coeffs, n=2048) plot(abs(cepstrum_source_a)[:1025]) source_spec = np.e ** (abs(cepstrum_source_a)[:1025]) plot(source_spec) source = fft.irfft(source_spec) plot(source) source_cycled = list(source * 500) * 50 plot(source_cycled) Audio(source_cycled, rate=sr*2) cepstrum_e = ifft(log_mag_spec_e) n0 = argmax(cepstrum_e[1:])+ 1 cepstrum_filter_e = abs(fft.fft(cepstrum_e[:n0 - 1], n=2048)) plot(abs(cepstrum_filter_e), 'green', lw=3) twinx() plot(log_mag_spec_e) xlim((0,1000)) cepstrum_o = ifft(log_mag_spec_o) n0 = argmax(cepstrum_o[10:1024])+ 10 # had to do some minor manual adjustments here! cepstrum_filter_o = abs(fft.fft(cepstrum_o[:n0 - 1], n=2048)) plot(cepstrum_filter_o, 'green', lw=3) twinx() plot(log_mag_spec_o) xlim((0,1000)) freqs = linspace(0, sr/2, 2048) plot(freqs, cepstrum_filter_a) plot(freqs, cepstrum_filter_e) plot(freqs, cepstrum_filter_o) legend(['a','e','o']) xlabel('Frequency (Hz)') grid() xlim((0, 4000)) title('Cepstra extracted filter for vowels'); num_coeffs = [10, 15, 30, 50] for n in num_coeffs: cepstrum_filter = abs(fft.fft(cepstrum_a[:n], n=512)) plot(abs(cepstrum_filter)[:250]) legend(num_coeffs) grid() title('Different number of coefficients for Cepstral filter ("a")'); num_coeffs = [10, 15, 30,50] for n in num_coeffs: cepstrum_filter = abs(fft.fft(cepstrum_e[:n], n=512)) plot(abs(cepstrum_filter)[:250]) legend(num_coeffs) grid() title('Different number of coefficients for Cepstral filter ("e")'); num_coeffs = [10, 15, 30,50] for n in num_coeffs: cepstrum_filter = abs(fft.fft(cepstrum_o[:n], n=512)) plot(abs(cepstrum_filter)[:250]) legend(num_coeffs) grid() title('Different number of coefficients for Cepstral filter ("o")'); N = 1024 k = 0 phs = linspace(k * 0.5*pi/N, (k * pi *(N-0.5))/N, N) plot(cos(phs)) k = 1 phs = linspace(k * 0.5*pi/N, (k * pi *(N-0.5))/N, N) plot(cos(phs)) k = 2 phs = linspace(k * 0.5*pi/N, (k * pi *(N-0.5))/N, N) plot(cos(phs)) k = 1023 phs = linspace(k * 0.5*pi/N, (k * pi *(N-0.5))/N, N) plot(cos(phs)) phs = linspace(k * 0.5*pi/N, (k * pi *(N-0.5))/N, N) plot(cos(phs)[0:100]) from scipy.fftpack import dct cepstrum_dct_o = dct(log_mag_spec_o) n0 = argmax(cepstrum_dct_o[10:1024])+ 10 # had to do some minor manual adjustments here! cepstrum_dct_o = abs(fft.fft(cepstrum_dct_o[:n0 - 1], n=4096)) plot(cepstrum_dct_o, 'green', lw=3) twinx() plot((cepstrum_filter_o), 'r') twinx() plot(log_mag_spec_o) xlim((0,1000)) plot(cepstrum_a[1:200]) len(cepstrum_a) plot(cepstrum_e[1:200]) plot(cepstrum_o[1:200]) argmax(cepstrum_a[50:100])+ 50 argmax(cepstrum_e[50:100])+ 50 argmax(cepstrum_o[50:100])+ 50 # Cepstrum was oversampled f_a = sr /(2.0*(argmax(cepstrum_a[50:100])+ 50)) f_a f_e = sr /(2.0 * (argmax(cepstrum_e[50:100])+ 50)) f_e f_o = sr /(2.0 * (argmax(cepstrum_o[50:100])+ 50)) f_o freqs = linspace(0, sr/2, 2049) plot(freqs, log_mag_spec_a) xlim((0, 500)) vlines(f_a, 0, 14) grid() freqs = linspace(0, sr/2, 2049) plot(freqs, log_mag_spec_e) xlim((0, 500)) vlines(f_e, 0, 14) grid() freqs = linspace(0, sr/2, 2049) plot(freqs, log_mag_spec_o) xlim((0, 500)) vlines(f_o, 0, 16) grid() noise = 5000.0 * (random.random(2048) - 0.5) fourier_trans = rfft(noise * hanning(2048), n=4096) mag_spectrum_noise = abs(fourier_trans) log_mag_spec_noise = log(mag_spectrum_noise) plot(log_mag_spec_noise) cepstrum_noise = ifft(log_mag_spec_noise) plot(abs(cepstrum_noise[1:])) sinsig = 2500.0 * ((sin(linspace(0, 20*2*pi,2048))) + (sin(linspace(0, 40*2*pi,2048)))) fourier_trans = rfft(sinsig * hanning(2048), n=4096) mag_spectrum_sinsig = abs(fourier_trans) log_mag_spec_sinsig = log(mag_spectrum_sinsig) plot(log_mag_spec_sinsig) cepstrum_sinsig = ifft(log_mag_spec_sinsig) plot(abs(cepstrum_sinsig[1:])) Pxx, freqs, bins, im = specgram(e, 2048, 16000, noverlap=1024, pad_to=8192) Pxx.shape plot(Pxx[:,0]) maxima = argwhere((Pxx[:-2,0] < Pxx[1:-1,0]) & (Pxx[2:,0] < Pxx[1:-1,0])) + 1 plot(Pxx[:,0]) plot(maxima, Pxx[maxima, 0], 'o') peaks = [index for index in maxima if Pxx[index, 0] > 1000] peaks plot(Pxx[:,0]) plot(peaks, Pxx[peaks, 0], 'o') peak_list = [] for spec in Pxx.T: maxima = argwhere((spec[:-2] < spec[1:-1]) & (spec[2:] < spec[1:-1])) + 1 peaks = [(freqs[index][0], spec[index][0]) for index in maxima if spec[index] > 1000] peak_list.append(peaks) A = (4,5) A[0] = 2 peak_list[0] array(peak_list[0]) array(peak_list[0])[0] array(peak_list[0])[0, 0] array(peak_list[0])[:,0] array(peak_list[0])[:,1] # amplitudes for i, peaks in enumerate(peak_list): freqs = array(peaks)[:,0] plot(ones(len(freqs))*i, freqs, 'o') specgram(e, 2048, 16000, noverlap=1024, pad_to=8192) for i, peaks in enumerate(peak_list): freqs = array(peaks)[:,0] plot(ones(len(freqs))*bins[i], freqs, 'o') specgram(e, 2048, 16000, noverlap=1024, pad_to=8192, interpolation='nearest') for i, peaks in enumerate(peak_list): freqs = array(peaks)[:,0] plot(ones(len(freqs))*bins[i], freqs, 'o') ylim((0, 1000)) specgram(e, 2048, 16000, noverlap=1024, pad_to=8192, interpolation='nearest') for i, peaks in enumerate(peak_list): freqs = array(peaks)[:,0] plot(ones(len(freqs))*bins[i], freqs, 'o') ylim((1500, 3000)) tracks = [[r_[freq, amp, bins[0]]] for freq, amp in peak_list.pop(0)] tracks tracks_ = array(tracks) tracks_, tracks_.shape new_peaks = peak_list.pop(0) new_peaks f = new_peaks[0][0] f tracks[:] last_bps_freq = [] last_bps_time = [] for bps in tracks: last_bps_freq.append(bps[-1][0]) # get last breakpoint for all tracks last_bps_time.append(bps[-1][2]) # get last breakpoint for all tracks print(last_bps_freq) tracks[:] previous_frame_time = bins[0] previous_frame_time active_tracks = argwhere(last_bps_time == previous_frame_time) active_tracks = array(active_tracks) active_tracks prev_freqs = array(tracks)[active_tracks,-1,0] prev_freqs dists = abs(prev_freqs - f) dists argmin(dists) active_tracks[argmin(dists)] best_matches = [] for peak in new_peaks: f = peak[0] dists = abs(prev_freqs - f) best_matches.append([active_tracks[argmin(dists)], dists.min()]) best_matches = array(best_matches) best_matches argwhere(best_matches[:, 0] == 0) argwhere(best_matches[:, 0] == 3) best_matches[argwhere(best_matches[:, 0] == 3)] best_matches[argwhere(best_matches[:, 0] == 3)][:,:,1] best_next = argmin(best_matches[argwhere(best_matches[:, 0] == 3)][:,:,1]) best_next best_next += argwhere(best_matches[:, 0] == 3)[0] best_next dist_th = 500 # Set maximum distance allowed for connection if best_matches[best_next,1] < dist_th: print("Match!") Pxx, freqs, bins, im = specgram(e, 2048, 16000, noverlap=1024, pad_to=8192) peak_list = [] for spec in Pxx.T: maxima = argwhere((spec[:-2] < spec[1:-1]) & (spec[2:] < spec[1:-1])) + 1 peaks = [(freqs[index], spec[index]) for index in maxima if spec[index] > 1000] peak_list.append(peaks) tracks = [[r_[freq, amp, bins[0]]] for freq, amp in peak_list.pop(0)] #inital tracks from initial peaks tracks = array(tracks) new_peaks = peak_list.pop(0) last_bps = tracks[:,-1,:] last_bps[:,2] previous_frame_time = 0.064 active_tracks = argwhere(last_bps[:,2] == previous_frame_time) active_tracks = array(active_tracks) prev_freqs = tracks[active_tracks,-1,0] best_matches = [] for peak in new_peaks: f = peak[0] dists = abs(prev_freqs - f) best_matches.append([active_tracks[argmin(dists)][0], dists.min()]) best_matches = array(best_matches) connections = dict() for i in set(best_matches[:, 0]): best_next = argmin(best_matches[argwhere(best_matches[:, 0] == i)][:,:,1]) best_next += argwhere(best_matches[:, 0] == i)[0] connections[int(i)] = best_next[0] connections