# Embdedded player from: https://github.com/Carreau/posts
import StringIO
import base64
import struct
from IPython.core.display import HTML
def wavPlayer(data, rate):
""" will display html 5 player for compatible browser
The browser need to know how to play wav through html5.
there is no autoplay to prevent file playing when the browser opens
Adapted from SciPy.io.
"""
buffer = StringIO.StringIO()
buffer.write(b'RIFF')
buffer.write(b'\x00\x00\x00\x00')
buffer.write(b'WAVE')
buffer.write(b'fmt ')
if data.ndim == 1:
noc = 1
else:
noc = data.shape[1]
bits = data.dtype.itemsize * 8
sbytes = rate*(bits // 8)*noc
ba = noc * (bits // 8)
buffer.write(struct.pack('<ihHIIHH', 16, 1, noc, rate, sbytes, ba, bits))
# data chunk
buffer.write(b'data')
buffer.write(struct.pack('<i', data.nbytes))
if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and sys.byteorder == 'big'):
data = data.byteswap()
buffer.write(data.tostring())
size = buffer.tell()
buffer.seek(4)
buffer.write(struct.pack('<i', size-8))
val = buffer.getvalue()
src = """
<body>
<audio controls="controls" style="width:600px" >
<source controls src="data:audio/wav;base64,{base64}" type="audio/wav" />
Your browser does not support the audio element.
</audio>
</body>
""".format(base64=base64.encodestring(val))
display(HTML(src))
from scipy.io import wavfile
sr, src = wavfile.read('sources/THX.wav')
wavPlayer(src , sr)
src.shape, src.dtype
((1103616, 2), dtype('int16'))
A nifty trick for setting plot defaults:
rcParams['figure.figsize'] = (16, 4)
src_mono = sum(src/2, axis=1)
src_mono.shape
(1103616,)
src_mono = sum(src.astype(float), axis=1)/src.shape[1]
src_mono.shape
(1103616,)
src_mono = sum(src.astype(float)/src.shape[1], axis=1).astype(src.dtype)
src_mono.shape, src_mono.dtype
((1103616,), dtype('int16'))
plot(src_mono)
figure()
plot(src);
src.max(), src_mono.max(), src.min(), src_mono.min()
(32767, 32767, -32768, -32768)
abs_max = max(abs(src_mono.min()), abs(src_mono.max()))
print abs_max
32767
abs(src_mono.min())
-32768
-src_mono.min(),src_mono.min()
(-32768, -32768)
abs(src_mono.min().astype(int64))
32768
Longer but more robust:
abs_max = max(abs(src_mono.min().astype(float)), abs(src_mono.max().astype(float)))
print abs_max
32768.0
So, this file is already normalized...
iinfo(int16), iinfo(uint8), finfo(float32), finfo(float)
(iinfo(min=-32768, max=32767, dtype=int16), iinfo(min=0, max=255, dtype=uint8), finfo(resolution=1e-06, min=-3.4028235e+38, max=3.4028235e+38, dtype=float32), finfo(resolution=1e-15, min=-1.7976931348623157e+308, max=1.7976931348623157e+308, dtype=float64))
src_mono.dtype
dtype('int16')
iinfo(src_mono.dtype)
iinfo(min=-32768, max=32767, dtype=int16)
src_mono_norm = src_mono.astype(float) * -iinfo(src_mono.dtype).min / abs_max
src_mono_norm.dtype
But it could be useful to normalize from -1 to 1 in floating point format:
src_mono.dtype
src_mono_norm = src_mono.astype(float) / abs_max
plot(src_mono_norm)
[<matplotlib.lines.Line2D at 0x4578310>]
from scipy.signal import decimate
src_mono_norm_dec = decimate(src_mono_norm, 4)
plot(src_mono_norm_dec)
[<matplotlib.lines.Line2D at 0x47c18d0>]
src_mono_norm_dec.max()
1.063278585576124
len(src_mono_norm_dec)/float(len(src_mono_norm))
0.25
Decimation is useful to reduce by integer factors, but can't do anything in between.
from scipy.signal import resample
src_mono_norm_rsmpl = resample(src_mono_norm, len(src_mono_norm) * 1.5)
plot(src_mono_norm_rsmpl)
[<matplotlib.lines.Line2D at 0x95e1050>]
Resampling can increase or remove the number of samples at any factor.
There are always artifacts when resampling/decimating. Make sure you use tested and recommended algorithms.
src_mono_norm_dec.max(), src_mono_norm_dec.min()
(1.063278585576124, -1.0239748627672003)
src_mono_norm_dec.max() - src_mono_norm_dec.min()
2.0872534483433243
src_mono_norm_dec.mean()
-0.00027398996581580329
abs(src_mono_norm_dec).mean()
0.14948573258836526
dur = len(src_mono_norm_dec)/(sr*1.5)
print dur, 'seconds'
4.17088435374 seconds
N = float(len(src_mono_norm_dec))
ms = sum(src_mono_norm_dec**2)/N # or use the mean() function
rms = sqrt(ms)
print rms, 'RMS'
0.209113836394 RMS
from scipy.signal import argrelextrema
maxima = argrelextrema(src_mono_norm_dec, np.greater)
minima = argrelextrema(src_mono_norm_dec, np.less)
minima
(array([ 13, 16, 18, ..., 275897, 275899, 275902]),)
plot(src_mono_norm_dec)
plot(maxima[0], zeros_like(maxima[0]),'x')
plot(minima[0], zeros_like(minima[0]),'x')
xlim((50000, 50200))
ylim((-0.25, 0.25))
grid()
maxima = argrelextrema(abs(src_mono_norm_dec), np.greater)
plot(abs(src_mono_norm_dec))
plot(maxima[0], zeros_like(maxima[0]),'x')
xlim((50000, 50200))
ylim((-0.25, 0.25))
grid()
gcf().set_figwidth(16)
maxima = argrelextrema(abs(src_mono_norm_dec), np.greater)
plot(abs(src_mono_norm_dec))
plot(maxima[0], abs(src_mono_norm_dec[maxima[0]]),'o-')
xlim((50000, 50200))
ylim((0, 0.25))
grid()
gcf().set_figwidth(16)
plot(maxima[0], abs(src_mono_norm_dec[maxima[0]]),'g')
[<matplotlib.lines.Line2D at 0x7bf9090>]
plot(maxima[0], abs(src_mono_norm_dec[maxima[0]]),'g')
xlim((150000,160000))
(150000, 160000)
from scipy.interpolate import interp1d
fi = interp1d(maxima[0], abs(src_mono_norm_dec[maxima[0]]))
plot(fi(linspace(0, len(src_mono_norm_dec), 1600)))
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-48-a11396f5ef0a> in <module>() 1 fi = interp1d(maxima[0], abs(src_mono_norm_dec[maxima[0]])) ----> 2 plot(fi(linspace(0, len(src_mono_norm_dec), 1600))) /usr/lib/python2.7/dist-packages/scipy/interpolate/interpolate.pyc in __call__(self, x_new) 392 # The behavior is set by the bounds_error variable. 393 x_new = asarray(x_new) --> 394 out_of_bounds = self._check_bounds(x_new) 395 396 y_new = self._call(x_new) /usr/lib/python2.7/dist-packages/scipy/interpolate/interpolate.pyc in _check_bounds(self, x_new) 447 # !! Could provide more information about which values are out of bounds 448 if self.bounds_error and below_bounds.any(): --> 449 raise ValueError("A value in x_new is below the interpolation " 450 "range.") 451 if self.bounds_error and above_bounds.any(): ValueError: A value in x_new is below the interpolation range.
fi = interp1d(maxima[0], abs(src_mono_norm_dec[maxima[0]]))
plot(fi(linspace(100, len(src_mono_norm_dec) - 100, 1600)))
[<matplotlib.lines.Line2D at 0x9601bd0>]
fi = interp1d(maxima[0], abs(src_mono_norm_dec[maxima[0]]))
plot(fi(linspace(maxima[0][0], maxima[0][-1], 1600)))
[<matplotlib.lines.Line2D at 0x9292850>]
plot(src_mono_norm_dec)
fi = interp1d(maxima[0], abs(src_mono_norm_dec[maxima[0]]))
interp_out = fi(linspace(maxima[0][0], maxima[0][-1], 100))
plot(linspace(0, len(src_mono_norm_dec), len(interp_out)), interp_out, lw=3, color='w')
[<matplotlib.lines.Line2D at 0x1588ea50>]
fi = interp1d(maxima[0], abs(src_mono_norm_dec[maxima[0]]))
peak_interp = fi( linspace(maxima[0][0], maxima[0][-1],
len(src_mono_norm_dec) - (maxima[0][0] - maxima[0][-1])))
plot(linspace(maxima[0][0], maxima[0][-1], len(peak_interp)), peak_interp, 'x')
plot(maxima[0], abs(src_mono_norm_dec[maxima[0]]),'o-')
xlim((50000, 50200))
ylim((0, 0.25))
grid()
peak_interp_dec = decimate(peak_interp,99)
plot(peak_interp_dec)
/usr/lib/python2.7/dist-packages/scipy/signal/filter_design.py:288: BadCoefficients: Badly conditioned filter coefficients (numerator): the results may be meaningless "results may be meaningless", BadCoefficients)
[<matplotlib.lines.Line2D at 0x93ebe10>]
plot(src_mono_norm_dec)
twinx()
plot(linspace(0, len(src_mono_norm_dec), len(peak_interp_dec)),peak_interp_dec, color='r', lw='3')
ylim((-0.0020, 0.0020))
xlim((50000, 65000))
(50000, 65000)
plot(src_mono_norm_dec)
twinx()
plot(linspace(0, len(src_mono_norm_dec), len(peak_interp_dec)),peak_interp_dec, color='r', lw='3')
ylim((-0.0020, 0.0020))
xlim((50000, 225000))
(50000, 225000)
# Moving average (smoothing filter)
w = ones(32)
plot(convolve(w/w.sum(),peak_interp_dec,mode='valid'))
figure()
plot(src_mono_norm_dec)
[<matplotlib.lines.Line2D at 0x1ce0e490>]
sr2, src2 = wavfile.read('sources/passport.wav')
wavPlayer(src2 , sr2)
maxima = argrelextrema(abs(src2), np.greater)
plot(abs(src2))
plot(maxima[0], abs(src2[maxima[0]]),'o-')
[<matplotlib.lines.Line2D at 0x940cc10>]
fi = interp1d(maxima[0], abs(src2[maxima[0]]))
peak_interp = fi(linspace(100, len(src2)-100, 1600))
plot(peak_interp)
[<matplotlib.lines.Line2D at 0x135352d0>]
peak_interp_dec = decimate(peak_interp,99)
plot(peak_interp_dec)
[<matplotlib.lines.Line2D at 0x1c5c9490>]
peak_interp_dec = decimate(peak_interp,5)
plot(peak_interp_dec)
[<matplotlib.lines.Line2D at 0x1f37d890>]
w = ones(5)
plot(convolve(w/w.sum(),peak_interp_dec,mode='valid'))
figure()
plot(src2)
[<matplotlib.lines.Line2D at 0x213ecb10>]
win_size = 2048
hop = 1024
window_start = arange(0, len(src_mono_norm), hop)
window_start
array([ 0, 1024, 2048, ..., 1100800, 1101824, 1102848])
rms = []
for start in window_start:
w = src_mono_norm[start: start+win_size].astype(float)
rms_inst = sqrt(mean(w**2))
rms.append(rms_inst)
plot(rms)
[<matplotlib.lines.Line2D at 0x19fdf550>]
win_sizes = [512, 1024, 2048, 4096]
rms_windows = []
for win_size in win_sizes:
hop = win_size/2
window_start = arange(0, len(src_mono_norm), hop)
rms = []
for start in window_start:
w = src_mono_norm[start: start+win_size].astype(float)
rms_inst = sqrt(mean(w**2))
rms.append(rms_inst)
rms_windows.append(rms)
for rms_plot in rms_windows:
plot(rms_plot)
for rms_plot in rms_windows:
plot(linspace(0, len(src_mono_norm_dec), len(rms_plot)), rms_plot)
plot(linspace(0, len(src_mono_norm_dec), len(rms_windows[-1])), rms_windows[-1], lw=3, color='w')
[<matplotlib.lines.Line2D at 0x213f1410>]
for rms_plot in rms_windows:
plot(linspace(0, len(src_mono_norm_dec), len(rms_plot)), rms_plot, 'x-')
xlim((100000, 105000))
ylim((0.1, 0.3));
legend(win_sizes)
<matplotlib.legend.Legend at 0x21416150>
def windowed_rms(input_sig, win_sizes = [512, 1024, 2048, 4096], hop=None):
rms_windows = []
for win_size in win_sizes:
if not hop:
hop = win_size/2
window_start = arange(0, len(input_sig) - win_size, hop)
rms = []
for start in window_start:
w = input_sig[start: start+win_size].astype(float)
rms_inst = sqrt(mean(w**2))
rms.append(rms_inst)
rms_windows.append(rms)
return rms_windows, win_sizes
w_rms, win_sizes = windowed_rms(src2)
for rms_plot in w_rms:
plot(linspace(0, len(src2), len(rms_plot)), rms_plot)
plot(linspace(0, len(src2), len(w_rms[-1])), w_rms[-1], lw=3, color='k')
[<matplotlib.lines.Line2D at 0x1f3857d0>]
Peeters has suggested using an RMS window of 100ms, which is somewhat equivalent to a 5kHz low-pass filter.
Peeters, G. (2004). A large set of audio features for sound description (similarity and classification) in the CUIDADO project (pp. 1–25). Retrieved from http://www.citeulike.org/group/1854/article/1562527
sr = 44100
L = 0.1
win_len = sr * L
print win_len
4410.0
w_rms, win_lens = windowed_rms(src2, win_sizes=[win_len, 10000])
for rms_plot in w_rms:
plot(linspace(0, len(src2), len(rms_plot)), rms_plot)
legend(win_lens)
gcf().set_figwidth(16)
By: Andrés Cabrera mantaraya36@gmail.com
For Course MAT 240E at UCSB
This ipython notebook is licensed under the CC-BY-NC-SA license: http://creativecommons.org/licenses/by-nc-sa/4.0/