rcParams['figure.figsize'] = (16, 4) #wide graphs by default import essentia.standard loader = essentia.standard.MonoLoader(filename = 'sources/180451__iluppai__alto-saxophone-solo.wav') sax = loader() sax_sr = 44100 plot(sax); import essentia.standard loader = essentia.standard.MonoLoader(filename = 'sources/109193__juskiddink__leq-acappella.wav') voice = loader() voice_sr = 44100 import essentia.standard loader = essentia.standard.MonoLoader(filename = 'sources/32804__johnwally__solo-man.wav') guitar = loader() guitar_sr = 44100 def windowed_zcr(sig_in, winsize, hop, sr = 1.0): l = len(sig_in) win_start = arange(0, l - winsize, hop) zcr = zeros((len(win_start))) for i, start in enumerate(win_start): sl = sig_in[start: start + winsize].astype(float) zcr[i] = (sr/float(winsize)) * sum(sl[:-1]*sl[1:] < 0) times = win_start + winsize/2 return times/float(sr), zcr plot(linspace(0, 1, 44100), sax[:44100]) ylim((-1,1)) twinx() plot(*windowed_zcr(sax[:44100], 1024, 128, sax_sr), color='k', lw=2); ylabel('frequency (Hz)') grid() plot(linspace(0, 1, 44100), voice[:44100]) ylim((-1,1)) twinx() plot(*windowed_zcr(voice[:44100], 1024, 128, voice_sr), color='k', lw=2); ylabel('frequency (Hz)') grid() plot(linspace(0, 1, 44100), guitar[:44100]) ylim((-1,1)) twinx() plot(*windowed_zcr(guitar[:44100], 1024, 128, guitar_sr), color='k', lw=2); ylabel('frequency (Hz)') grid() plot(linspace(0, 1, 44100), guitar[:44100]) ylim((-0.2,0.2)) hlines(0, 0.55, 0.6) twinx() plot(*windowed_zcr(guitar[:44100], 1024, 128, guitar_sr), color='k', marker='o',lw=2); ylabel('frequency (Hz)') grid() xlim((0.55, 0.6)) plot(linspace(0, 1, 44100), sax[:44100]) ylim((-0.02,0.02)) hlines(0, 0.8, 0.85) twinx() plot(*windowed_zcr(sax[:44100], 1024, 128, sax_sr), color='k', marker='o',lw=2); ylabel('frequency (Hz)') grid() xlim((0.0, 0.05)) def windowed_rms(input_sig, win_size, hop=None, sr=1.0): if not hop: hop = winsize/2 rms = [] window_start = arange(0, len(input_sig) - win_size, hop) for start in window_start: w = input_sig[start: start+win_size].astype(float) rms_inst = sqrt(mean(w**2)) rms.append(rms_inst) times = (window_start + win_size/2)/float(sr) return times, rms rms = windowed_rms(sax[:44100], 1024, 128, sax_sr) zcr = windowed_zcr(sax[:44100], 1024, 128, sax_sr) plot(*rms) rms_th = where(array(rms[1]) > 0.02, 1.0, 0.0) plot(zcr[0], zcr[1]*rms_th) def midi2Hz(midinote, tuning=440.0): return tuning * (2**((midinote - 69)/12.0)) midi2Hz(69), midi2Hz(60), midi2Hz(24) num_freqs = 8*12 # eight octaves from C0 quant_freqs = [midi2Hz(i + 24) for i in range(num_freqs)] array(quant_freqs) def quantize_freq(freq_list, quant_freqs, quant_offset=24): quantized = zeros_like(freq_list) for i in range(len(freq_list)): arg = argwhere(quant_freqs > freq_list[i]) if arg.size == 0 or arg[0] == 0: quantized[i] = 0 elif quant_freqs[arg[0]] - freq_list[i] > freq_list[i] - quant_freqs[arg[0] - 1]: quantized[i] = arg[0] - 1 else: quantized[i] = arg[0] return quantized + quant_offset rms_th = where(array(rms[1]) > 0.02, 1.0, 0.0) quantized = quantize_freq(zcr[1], quant_freqs) plot(zcr[0], quantized*rms_th) plot(zcr[0], quantized*rms_th) #ylim((50, 75)) grid() 44100.0/512 def windowed_acorr(input_sig, win_size, hop=None, sr=1.0, maxlags=None): if not hop: hop = win_size/2 if not maxlags: maxlags = win_size/4 window_start = arange(0, len(input_sig) - win_size, hop) acorrfs = [] for start in window_start: w = input_sig[start: start+win_size] lags, acorr_inst, lines, line = acorr(w, maxlags=maxlags) acorrfs.append(acorr_inst) times = (window_start + win_size/2)/float(sr) clf() return times, lags, acorrfs times, lags, acorrfs = windowed_acorr(sax[:44100], 2048, 512, sax_sr) imshow(array(acorrfs).T, aspect='auto') yticks(linspace(0, 1024, 5), linspace(-512, 512, 5).astype(int)); plot(sax[:44100]) array(acorrfs).shape fmax = 4000.0 L = 44100.0/fmax L apeaks = argmax(array(acorrfs)[:,:512 - round(L)], axis=1) imshow(array(acorrfs).T, aspect='auto') plot(apeaks, color='w', lw=3) xlim((0, 83)) ylim((0, 520)) yticks(linspace(512, 0, 5), linspace(0, -512, 5).astype(int)); plot(lags, acorrfs[30]) grid() plot(44100.0/(512 - apeaks)) times, sax_rms = windowed_rms(sax[:44100], 2048, 512, sax_sr) plot(times, sax_rms) sax_rms_th = where(array(sax_rms) > 0.02, 1.0, 0.0) sax_freqs = sax_rms_th * 44100.0/(512 - apeaks) plot(sax_freqs) quantized = quantize_freq(sax_freqs, quant_freqs) plot(sax_freqs) plot(440 * 2**((quantized - 69)/12.0)) plot(sax_freqs) plot(440 * 2**((quantized - 69)/12.0)) ylim((150, 200)) legend(['measured', 'quantized'], loc='best') ylabel('Freq (Hz)') grid() quantized[30], quantized[60] plot(quantized, 'g') grid() ylim((50, 60)) win_size = 2048 hop = 512 times, lags, acorrfs = windowed_acorr(guitar[:44100], win_size, hop, guitar_sr) fmax = 3000.0 L = 44100.0/fmax apeaks = argmax(array(acorrfs)[:,:hop - round(L)], axis=1) times, rms = windowed_rms(guitar[:44100], win_size, hop, guitar_sr) rms_th = where(array(rms) > 0.02, 1.0, 0.0) detected_freqs = rms_th * 44100.0/(hop - apeaks) quantized = quantize_freq(detected_freqs, quant_freqs) imshow(array(acorrfs).T, aspect='auto') plot(apeaks, color='w', lw=3) xlim((0, 83)) ylim((0, 520)) yticks(linspace(512, 0, 5), linspace(0, -512, 5).astype(int)); plot(detected_freqs) plot(440 * 2**((quantized - 69)/12.0)) ylim((250, 450)) legend(['measured', 'quantized'], loc='best') grid() plot(quantized) ylim((55, 70)) grid() win_size = 2048 hop = 512 times, lags, acorrfs = windowed_acorr(voice[:44100], win_size, hop, voice_sr) fmax = 3000.0 L = 44100.0/fmax apeaks = argmax(array(acorrfs)[:,:hop - round(L)], axis=1) times, rms = windowed_rms(voice[:44100], win_size, hop, voice_sr) rms_th = where(array(rms) > 0.02, 1.0, 0.0) detected_freqs = rms_th * 44100.0/(hop - apeaks) quantized = quantize_freq(detected_freqs, quant_freqs) imshow(array(acorrfs).T, aspect='auto') plot(apeaks, color='w', lw=3) xlim((0, 83)) ylim((0, 520)) yticks(linspace(512, 0, 5), linspace(0, -512, 5).astype(int)); plot(detected_freqs) plot(440 * 2**((quantized - 69)/12.0)) ylim((150, 450)) legend(['measured', 'quantized'], loc='best') grid() plot(quantized) ylim((50, 65)) grid() Pxx, times, freqs, line = specgram(sax[:44100], NFFT=2048, noverlap=512+1024, Fs=sax_sr) peaks = argmax(Pxx, axis=0) peaks.shape plot(peaks) Pxx, times, freqs, line = specgram(sax[:44100], NFFT=4096, noverlap=2048, Fs=sax_sr) peaks = argmax(Pxx, axis=0) plot(sax_sr* peaks/4096.0) grid() peaks = argsort(Pxx, axis=0) peaks.shape plot(peaks[-4:].T); hist(peaks[-4:].flat, bins=100); plot(sax_sr* peaks[-4:].T/4096.0); plot(sax_sr* peaks[-6:].T/4096.0); plot(sax_sr* peaks[-6:].T/4096.0); ylim((150, 300)) grid() midi2Hz(53), midi2Hz(55) plot(sax_sr* peaks[-6:].T/4096.0); ylim((150, 200)) grid() hlines((midi2Hz(53), midi2Hz(55)), 0, 20) def windowed_amdf(input_sig, win_size, hop=None, sr=1.0, maxlags=None, k = 1): if not hop: hop = win_size/2 if not maxlags: maxlags = win_size/4 window_start = arange(0, len(input_sig) - win_size - maxlags, hop) amdfs = [] for start in window_start: amdfsn = [] w = input_sig[start: start+win_size].astype(float) for lag in range(maxlags): wm = input_sig[start + lag: start+win_size + lag].astype(float) amdfsn.append(sum(abs(w - wm)**k)) amdfs.append(amdfsn) times = (window_start + win_size/2)/float(sr) return times, amdfs times, amdfs = windowed_amdf(sax[:44100], win_size=2048, hop=1024, sr=44100, maxlags=1024, k=1) imshow(array(amdfs).T, aspect='auto') colorbar() plot(amdfs[20][:]) minima_at = argmin(array(amdfs)[:,70:], axis=1) + 70 plot(minima_at) freqs = sax_sr/minima_at plot(freqs) plot(freqs) ylim((50, 250)) grid() plot(quantize_freq(freqs, quant_freqs)) ylim((30, 60)) grid() times, amdfs = windowed_amdf(voice[:44100], win_size=2048, hop=1024, sr=44100, maxlags=1024, k=1) imshow(array(amdfs).T, aspect='auto') colorbar() minima_at = argmin(array(amdfs)[:,20:], axis=1) + 20 freqs = voice_sr/minima_at plot(quantize_freq(freqs, quant_freqs)) ylim((50, 65)) grid() times, amdfs = windowed_amdf(guitar[:44100], win_size=2048, hop=1024, sr=44100, maxlags=1024, k=1) imshow(array(amdfs).T, aspect='auto') colorbar() minima_at = argmin(array(amdfs)[:,20:], axis=1) + 20 freqs = guitar_sr/minima_at plot(quantize_freq(freqs, quant_freqs)) ylim((55, 70)) grid() Pxx, times, freqs, im = specgram(sax[:44100], NFFT=2048, noverlap=1024, Fs=sax_sr, scale_by_freq=False) imshow(log10(Pxx), aspect='auto') log_spec = log10(Pxx)[:] cepstrum = real(fft.rfft(log_spec, axis=0))**2 imshow(cepstrum[100:,:], aspect='auto') plot(cepstrum[50:,15]) plot(cepstrum[50:,32]) plot(cepstrum[50:,5]) plot(log10(Pxx[:,15])) plot(log10(Pxx[:,35])) xlim((0, 100)) maxima = argmax(cepstrum[50:], axis=0) + 50 plot(maxima) plot(maxima) ylim((100, 150)) grid() maxima[15], maxima[30] 22050.0/maxima[15], 22050.0/maxima[30] freqs = 22050.0/maxima plot(freqs) grid() plot(quantize_freq(freqs, quant_freqs)) grid() Pxx, times, freqs, im = specgram(voice[:44100], NFFT=2048, noverlap=1024, Fs=voice_sr, scale_by_freq=False) log_spec = log10(Pxx) cepstrum = real(fft.rfft(log_spec, axis=0))**2 maxima = argmax(cepstrum[50:], axis=0) + 50 freqs = 22050.0/maxima clf() plot(freqs) plot(quantize_freq(freqs, quant_freqs)) Pxx, times, freqs, im = specgram(guitar[:44100], NFFT=2048, noverlap=1024, Fs=guitar_sr, scale_by_freq=False) log_spec = log10(Pxx) cepstrum = real(fft.rfft(log_spec, axis=0))**2 maxima = argmax(cepstrum[50:], axis=0) + 50 freqs = 22050.0/maxima clf() plot(freqs) grid() plot(quantize_freq(freqs, quant_freqs)) grid()