from scipy.io import wavfile sr, src = wavfile.read('sources/THX.wav') sr2, src2 = wavfile.read('sources/passport.wav') # Downmix src_mono = sum(src.astype(float)/src.shape[1], axis=1).astype(src.dtype) # Normalize abs_max = max(abs(src_mono.min().astype(float)), abs(src_mono.max().astype(float))) src_mono_norm = src_mono.astype(float) / abs_max #Decimate #from scipy.signal import decimate #src_mono_norm_dec = decimate(src_mono_norm, 4) #sr = sr/4.0 rcParams['figure.figsize'] = (16, 4) #wide graphs by default def windowed_rms(input_sig, win_sizes = [512, 1024, 2048, 4096], hop=None): rms_windows = [] for win_size in win_sizes: if not hop: hop = win_size/2 window_start = arange(0, len(input_sig) - win_size, hop) rms = [] for start in window_start: w = input_sig[start: start+win_size].astype(float) rms_inst = sqrt(mean(w**2)) rms.append(rms_inst) rms_windows.append(rms) return rms_windows, win_sizes L = 0.1 win_len = sr * L w_rms, win_lens = windowed_rms(src_mono_norm, win_sizes=[win_len]) sample_dur_secs = len(src_mono_norm)/float(sr) time = linspace(0, sample_dur_secs, len(w_rms[0])) tc = sum(w_rms[0]*time)/sum(w_rms[0]) print tc, "Temporal Centroid" len(w_rms[0]) plot(linspace(0, sample_dur_secs, len(src_mono_norm)), src_mono_norm, alpha=0.5) vlines(tc, -1, 1, lw=4) grid() L = 0.1 win_len = sr * L w_rms, win_lens = windowed_rms(src2, win_sizes=[win_len]) sample_dur_secs = len(src2)/float(sr2) time = linspace(0, sample_dur_secs, len(w_rms[0])) tc = sum(w_rms[0]*time)/sum(w_rms[0]) print tc, "Temporal Centroid" plot(linspace(0, sample_dur_secs, len(src2)), src2, alpha=0.5) vlines(tc, -40000, 40000, lw=4) grid() print "%.2f%% Temporal Centroid"%(100* tc/sample_dur_secs) absmax = max(src2.max(), abs(src2.min())) th = absmax * 0.4 eff_dur = argwhere(abs(src2) > th) len(eff_dur), len(src2) print "%.2f%% effective duration"%(len(eff_dur)/ float( len(src2)) * 100) absmax = max(w_rms[0]) th = absmax * 0.4 eff_dur = argwhere(w_rms[0] > th) len(eff_dur), len(w_rms[0]) print "%.2f%% effective duration"%(len(eff_dur)/ float( len(w_rms[0])) * 100) absmax = max(src2.max(), abs(src2.min())) db_th = -60 th = absmax * pow(10, db_th/20.0) eff_dur = argwhere(src2 > th) print "%.2f%% effective duration"%(len(eff_dur)/ float( len(src2)) * 100) in_sig = src2.astype(float) zc = argwhere((in_sig[:-1]*in_sig[1:]) < 0) zc = r_[zc, argwhere(in_sig == 0)] plot(zc, zeros_like(zc), 'o') plot(in_sig) plot(zc, zeros_like(zc), 'o') grid() xlim(50000, 50400); ylim((-10000, 10000)); in_sig_sr = sr2 in_sig_dur = len(in_sig)/float(in_sig_sr) zcr = len(zc)/in_sig_dur print zcr, 'Zero Crossing Rate' def windowed_zcr(sig_in, winsize, hop, sr = 1.0): l = len(sig_in) win_start = arange(0, l - winsize, hop) zcr = zeros((len(win_start))) for i, start in enumerate(win_start): sl = sig_in[start: start + winsize].astype(float) zcr[i] = (sr/float(winsize)) * sum(sl[:-1]*sl[1:] < 0) times = win_start + winsize/2 return times/float(sr), zcr times, zcr = windowed_zcr(src2, 4410, 2205, sr2) plot(times, zcr) in_sig = src_mono_norm in_sig_sr = sr in_sig_dur = len(in_sig)/float(in_sig_sr) zcr = len(zc)/in_sig_dur print zcr, 'Zero Crossing Rate' times, zcr2 = windowed_zcr(in_sig, 4410, 2205, in_sig_sr) plot(times, zcr2) hlines(zcr, 0, times.max(), color='r') hlines(zcr2.mean(), 0, times.max(), color='g') grid() N = 1000 sr = 44100 P = N/float(sr) f = 1.0/P f ccf = [] N = 5000 for i in range(N): cc = sum(src_mono_norm[:-N]*src_mono_norm[i:-N+i]) ccf.append(cc) plot(ccf) grid() ccf = [] N = 1000 for i in range(N): cc = sum(src2[:-N]*src2[i:-N+i]) ccf.append(cc) plot(ccf) acorr(src_mono_norm, maxlags = 10) specgram(src_mono_norm); Pxx, freqs, times, im = specgram(src_mono_norm, NFFT=2048, Fs=sr, window=window_hanning, noverlap=1024); Pxx.shape freqs.shape times.shape im plot(Pxx[:, 99]) title('The 100th spectral frame') times[99] plot(freqs,Pxx[:, 99]) title('The 100th spectral frame') semilogx(freqs,Pxx[:, 99]) title('The 100th spectral frame') plot(freqs,Pxx[:, 199]) title('The 200th spectral frame') loglog(freqs,Pxx[:, 199]) title('The 200th spectral frame') X = sqrt(Pxx) sc = sum(X[:,99]*freqs)/sum(X[:,99]) print sc plot(freqs,X[:, 99]) title('The 100th spectral frame') vlines([sc], X[:,99].min(), X[:,99].max(), lw=3) text(sc*1.1, X[:,99].max() * 0.9, 'sc = ' + str(sc)) sc = sum(X[:,199]*freqs)/sum(X[:,199]) plot(freqs,X[:, 199]) title('The 200th spectral frame') vlines([sc], X[:,199].min(), X[:,199].max(), lw=3) text(sc*1.1, X[:,199].max() * 0.9, 'sc = ' + str(sc)) centroid = [] for spec in X.T: sc = sum(spec*freqs)/sum(spec) centroid.append(sc) plot(times, centroid) win_len = 0.1 * sr w_rms, win_lens = windowed_rms(src_mono_norm, win_sizes=[win_len]) from scipy.interpolate import interp1d ifunc = interp1d(linspace(0, len(src_mono_norm)/float(sr), len(w_rms[0])), w_rms[0]) interp_rms = ifunc(times) plot(times, array(centroid) * interp_rms) max(centroid), interp_rms.max() plot(interp_rms) plot(src_mono_norm) ss = var(X[:,99]) print ss spread = [] for spec in X.T: ss = var(spec) spread.append(ss) plot(spread) p1 = plot(spread) twinx() p2 = plot(centroid, 'g') legend((p1 + p2), ('spread', 'centroid'), loc='best') grid() spread = [] for spec in X.T: ss = var(spec) spread.append(ss) plot(spread) from scipy.signal import butter, lfilter noise = random.random(10000) - 0.5 b, a = butter(8, [0.2, 0.4], btype='band') b2, a2 = butter(8, [0.4, 0.6], btype='band') b3, a3 = butter(8, [0.2, 0.6], btype='band') filt_noise = r_[lfilter(b,a, noise), lfilter(b2,a2, noise), lfilter(b3,a3, noise)] Pxx, freqs,times, im = specgram(filt_noise); X = sqrt(Pxx) centroid = [] for spec in X.T: sc = sum(spec*freqs)/sum(spec) centroid.append(sc) spread = [] for spec in X.T: ss = var(spec) spread.append(ss) p1 = plot(spread) twinx() p2 = plot(centroid, 'g') legend((p1 + p2), ('spread', 'centroid'), loc='best') grid() from scipy.stats import moment skewness = [] for spec in X.T: ss = moment(spec, moment=3) skewness.append(ss) plot(skewness) Pxx, freqs, times, im = specgram(src_mono_norm, NFFT=2048, Fs=sr, window=window_hanning, noverlap=1024); X = sqrt(Pxx) skewness = [] for spec in X.T: ss = moment(spec, moment=3) skewness.append(ss) clf() plot(skewness) Pxx, freqs, times, im = specgram(src2, NFFT=2048, Fs=sr, window=window_hanning, noverlap=1024); X = sqrt(Pxx) skewness = [] for spec in X.T: ss = moment(spec, moment=3) skewness.append(ss) clf() plot(skewness) Pxx, freqs,times, im = specgram(filt_noise); kurtosis = [] for spec in X.T: sk = moment(spec, moment=4) kurtosis.append(sk) plot(kurtosis) X = sqrt(Pxx) centroid = [] for spec in X.T: sc = sum(spec*freqs)/sum(spec) centroid.append(sc) spread = [] kurtosis = [] for spec in X.T: ss = var(spec) spread.append(ss) sk = moment(spec, moment=4) kurtosis.append(sk) p1 = plot(spread) twinx() p2 = plot(centroid, 'g') twinx() p3 = plot(kurtosis, 'r') legend((p1 + p2 + p3), ('spread', 'centroid', 'kurtosis'), loc='best') grid() Pxx3, freqs3,times3, im3 = specgram(filt_noise); X = sqrt(Pxx3) centroid = [] for spec in X.T: sc = sum(spec*freqs3)/sum(spec) centroid.append(sc) spread = [] kurtosis = [] for spec in X.T: ss = var(spec) spread.append(ss) sk = moment(spec, moment=4) kurtosis.append(sk) clf() p1 = plot(spread) twinx() p2 = plot(centroid, 'g') twinx() p3 = plot(kurtosis, 'r') legend((p1 + p2 + p3), ('spread', 'centroid', 'kurtosis'), loc='upper left') grid() Pxx3, freqs3,times3, im3 = specgram(filt_noise, NFFT=1024, noverlap=64); X = sqrt(Pxx3) centroid = [] for spec in X.T: sc = sum(spec*freqs3)/sum(spec) centroid.append(sc) spread = [] kurtosis = [] for spec in X.T: ss = var(spec) spread.append(ss) sk = moment(spec, moment=4) kurtosis.append(sk) clf() p1 = plot(spread) twinx() p2 = plot(centroid, 'g') twinx() p3 = plot(kurtosis, 'r') legend((p1 + p2 + p3), ('spread', 'centroid', 'kurtosis'), loc='upper left') grid() Pxx3, freqs3,times3, im3 = specgram(filt_noise, NFFT=64, noverlap=0); X = sqrt(Pxx3) centroid = [] for spec in X.T: sc = sum(spec*freqs3)/sum(spec) centroid.append(sc) spread = [] skewness = [] kurtosis = [] for spec in X.T: ss = var(spec) spread.append(ss) se = moment(spec, moment=3) skewness.append(se) sk = moment(spec, moment=4) kurtosis.append(sk) clf() p1 = plot(spread) twinx() p2 = plot(centroid, 'g') twinx() p3 = plot(skewness, 'r') twinx() p4 = plot(kurtosis, 'k') legend((p1 + p2 + p3 + p4), ('spread', 'centroid', 'skewness', 'kurtosis'), loc='upper left') grid() Pxx3, freqs3,times3, im3 = specgram(filt_noise); X = sqrt(Pxx3) sum(X, axis=0).shape X = sqrt(Pxx3) entropy = [] for spec in X.T: bands = spec / sum(spec, axis=0) entropy.append(- sum(bands*log2(bands))) plot(entropy) X = sqrt(Pxx3) entropy = [] for spec in X.T: bands = spec / sum(spec, axis=0) entropy.append(- sum(bands*log2(bands))) plot(entropy) Pxx, freqs,times, im = specgram(src_mono_norm); Pxx2, freqs2,times2, im2 = specgram(src2); clf() X = sqrt(Pxx) entropy = [] for spec in X.T: bands = spec / sum(spec, axis=0) entropy.append(- sum(bands*log2(bands))) plot(entropy) figure() X = sqrt(Pxx2) entropy = [] for spec in X.T: bands = spec / sum(spec, axis=0) entropy.append(- sum(bands*log2(bands))) plot(entropy) cutoff=0.85 rolloff = [] X = sqrt(Pxx) for spec in X.T: where_greater = find(cumsum(spec) >= cutoff*sum(spec)) rf = where_greater[0]/float(len(spec)) rolloff.append(rf) plot(rolloff) cutoff=0.85 rolloff = [] X = sqrt(Pxx2) for spec in X.T: where_greater = find(cumsum(spec) >= cutoff*sum(spec)) rf = where_greater[0]/float(len(spec)) rolloff.append(rf) plot(rolloff) cutoff=0.85 rolloff = [] X = sqrt(Pxx3) for spec in X.T: where_greater = find(cumsum(spec) >= cutoff*sum(spec)) rf = where_greater[0]/float(len(spec)) rolloff.append(rf) plot(rolloff) Pxx3, freqs3,times3, im = specgram(filt_noise, NFFT=2048, noverlap=1024); clf() cutoff=0.85 rolloff = [] X = sqrt(Pxx3) for spec in X.T: where_greater = find(cumsum(spec) >= cutoff*sum(spec)) rf = where_greater[0]/float(len(spec)) rolloff.append(rf) plot(rolloff) Pxx3, freqs3,times3, im = specgram(filt_noise); X_mag = sqrt(Pxx3) X1 = X_mag[:,:-1]/sum(X_mag[:,:-1]) X2 = X_mag[:, 1:]/sum(X_mag[:, 1:]) spec_diff = X2 - X1 flux = sum(spec_diff**2, axis=0) twinx() plot(linspace(0, times3.max(), len(flux)), flux, lw=4, color='w') Pxx3, freqs3,times3, im = specgram(src_mono_norm, NFFT=2048, noverlap=1024); X_mag = sqrt(Pxx3) X1 = X_mag[:,:-1]/sum(X_mag[:,:-1]) X2 = X_mag[:, 1:]/sum(X_mag[:, 1:]) spec_diff = X2 - X1 flux = sum(spec_diff**2, axis=0) twinx() plot(linspace(0, times3.max(), len(flux)), flux, lw=4, color='w') Pxx3, freqs3,times3, im = specgram(src2); X_mag = sqrt(Pxx3) X1 = X_mag[:,:-1]/sum(X_mag[:,:-1]) X2 = X_mag[:, 1:]/sum(X_mag[:, 1:]) spec_diff = X2 - X1 flux = sum(spec_diff**2, axis=0) twinx() plot(linspace(0, times3.max(), len(flux)), flux, lw=4, color='w') def halfwaverect(in_sig): out_sig = (in_sig + abs(in_sig))/2.0 return out_sig Pxx3, freqs3,times3, im = specgram(filt_noise); X = sqrt(Pxx3) spec_diff = X[:,:-1] - X[:, 1:] flux = sum(halfwaverect(spec_diff), axis=0) twinx() plot(linspace(0, times3.max(), len(flux)), flux, lw=4, color='w') Pxx, freqs,times, im = specgram(src_mono_norm, NFFT=2048, noverlap=1024); X = sqrt(Pxx) spec_diff = X[:,:-1] - X[:, 1:] flux = sum(halfwaverect(spec_diff), axis=0) twinx() plot(linspace(0, times.max(), len(flux)), flux, color='w') Pxx2, freqs2,times2, im = specgram(src2); X = sqrt(Pxx2) spec_diff = X[:,:-1] - X[:, 1:] flux = sum(halfwaverect(spec_diff), axis=0) twinx() plot(linspace(0, times2.max(), len(flux)), flux, color='w') Pxx, freqs,times, im = specgram(src_mono_norm, NFFT=1024, noverlap=512); mag_spec = log10(Pxx) # The 10 factor is optional, but useful to make it dB scale def lin2mel(flin): return 1127 * log( 1 + (flin/700.0)) def mel2lin(fmel): return 700.0*(exp(fmel/1127) -1) frequencies = linspace(20, 5000, 1000) plot(linspace(20, 5000, 1000), lin2mel(frequencies)) grid() frequencies = linspace(20, 5000, 1000) plot(linspace(20, 5000, 1000), mel2lin(frequencies)) grid() linear_bands = linspace(200, 1000, 13, endpoint=True) print linear_bands print diff(linear_bands)[0] fc40 = 6400.0 numLogFilt = 27 logStep = exp(log(fc40/1000.0)/numLogFilt) print logStep log_bands = [1000 * logStep**i for i in range(numLogFilt + 2)] upper_limit = log_bands[-1] log_bands = log_bands[1:-1] print upper_limit print log_bands bands = r_[linear_bands, log_bands] len(bands) plot(bands) filter_edges_ = r_[200.0 - diff(linear_bands)[0], bands ,upper_limit] filter_edges = [(filter_edges_[i], filter_edges_[i+2]) for i in range(len(filter_edges_) - 2)] filter_edges for band, edges in zip(bands, filter_edges): print band, edges mf_bands = zeros(len(bands)) freqs_hz = freqs * sr /2.0 # if frequencies are normalized between 0-1 band_scalings = [] for band, edges in zip(bands, filter_edges): band_scaling = [] norm_factor = 2.0/(edges[1] - edges[0]) for f in freqs_hz: if f >= edges[0] and f <= edges[1]: if f < band: band_scaling.append(norm_factor * (f - edges[0])/(band - edges[0])) else: band_scaling.append(norm_factor * (edges[1] - f)/(edges[1] - band)) else: band_scaling.append(0.0) band_scalings.append(band_scaling) for s in band_scalings: plot(s) for s in band_scalings: plot(s) xlim((0, 100)) for s in band_scalings: plot(s) xlim((0, 25)) mag_spec.shape count = 0 for s in mag_spec: plot(s) count += 1 print count mf_bands = zeros((len(bands), mag_spec.shape[1])) for i, spec in enumerate(mag_spec.T): for j, s in enumerate(band_scalings): mel_band = sum(spec * s) mf_bands[j, i] = mel_band imshow(mf_bands, aspect='auto') plot(mf_bands[:, 1500]) # Slow version (but explicit...) def DCT(in_sig): assert ndim(in_sig) == 1 N = len(in_sig) dct = [] for k in range(N): phs = linspace(0.5 * k * pi/N, ((N - 1) + 0.5) * pi * k/N, N, endpoint=True) bin_value = sum(in_sig*cos(phs)) dct.append(bin_value) return dct plot(DCT(mf_bands[:, 1500])) mfcc = [] for s in mf_bands.T: mfcc.append(DCT(s)[:14]) imshow(array(mfcc).T, aspect='auto', interpolation='nearest') colorbar() from scipy.fftpack import dct mfcc = [] for s in mf_bands.T: mfcc.append(dct(s)[:14]) imshow(array(mfcc).T, aspect='auto', interpolation='nearest') colorbar() imshow(array(mfcc)[:,1:13].T, aspect='auto', interpolation='nearest') from essentia.standard import * w = Windowing(type = 'hann') spectrum = Spectrum() # FFT() would return the complex FFT, here we just want the magnitude spectrum mfcc = MFCC() mfccs = [] audio = essentia.array(src_mono_norm) for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512): mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame))) mfccs.append(mfcc_coeffs) mfccs = essentia.array(mfccs).T imshow(mfccs[1:,:], aspect = 'auto', interpolation='nearest') mfccs = [] audio = essentia.array(src2) for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512): mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame))) mfccs.append(mfcc_coeffs) mfccs = essentia.array(mfccs).T imshow(mfccs[1:,:], aspect = 'auto', interpolation='nearest') mfccs = [] audio = essentia.array(filt_noise) for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512): mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame))) mfccs.append(mfcc_coeffs) mfccs = essentia.array(mfccs).T imshow(mfccs[1:,:], aspect = 'auto', interpolation='nearest')