import matplotlib.pyplot as plt
import numpy as np
from scipy import signal
from scipy.io import loadmat
import soundfile as sf
import tools
brir_clap, clap_fs = sf.read('data/brir_clap.wav')
clap_fs
44100
brir_clap.shape
(45998, 2)
len(brir_clap) / clap_fs # duration in seconds
1.0430385487528344
mat_contents = loadmat('data/brir_sweep.mat',
struct_as_record=False, squeeze_me=True)
mat_contents['__header__']
b'MATLAB 5.0 MAT-file, Platform: PCWIN64, Created on: Tue May 07 11:52:13 2013'
data = mat_contents['data']
type(data)
scipy.io.matlab._mio5_params.mat_struct
data.SystemLatencySamples, data.SystemlatencyRemoved
(2167, 'YES')
sweep_fs = data.fs
assert sweep_fs == clap_fs
sweep_fs
44100
data.head_azimuth
0
brir_sweep = data.ir
brir_sweep.shape
(66150, 2)
len(brir_sweep) / sweep_fs # duration in seconds
1.5
speech, speech_fs = sf.read('data/xmas.wav')
speech.shape
(114400,)
assert speech_fs == clap_fs == sweep_fs
fs = speech_fs
def convolve_multichannel_ir(x, h, **kwargs):
"""Convolve mono signal x with multichannel impulse reponse h.
After the convolution, the result is normalized to the maximum
amplitude of x.
"""
x = np.squeeze(np.asarray(x))
if x.ndim != 1:
raise ValueError("x must be a mono signal")
h = np.asarray(h)
if h.ndim == 1:
h = h.reshape(-1, 1)
y = signal.fftconvolve(x[:, np.newaxis], h, mode='full', axes=0)
y = tools.normalize(y, np.max(np.abs(x)))
return y
speech_clap = convolve_multichannel_ir(speech, brir_clap)
speech_clap.shape
(160397, 2)
speech_sweep = convolve_multichannel_ir(speech, brir_sweep)
sf.write('data/xmas_brir_clap.wav', speech_clap, fs)
sf.write('data/xmas_brir_sweep.wav', speech_sweep, fs)
hcomp, hcomp_fs = sf.read('data/THOMSON_HED415N_KEMAR_hcomp.wav')
assert hcomp_fs == fs
hcomp.shape
(512, 2)
def compensate(brir):
"""Apply headphone compensation to a BRIR.
Side note: The variable "hcomp" is taken from the global scope
(it is *not* a function argument)!
"""
return signal.fftconvolve(brir, hcomp, mode='full', axes=0)
brir_clap_hcomp = compensate(brir_clap)
brir_sweep_hcomp = compensate(brir_sweep)
speech_clap_hcomp = convolve_multichannel_ir(speech, brir_clap_hcomp)
speech_sweep_hcomp = convolve_multichannel_ir(speech, brir_sweep_hcomp)
sf.write('data/xmas_brir_clap_hcomp.wav', speech_clap_hcomp, fs)
sf.write('data/xmas_brir_sweep_hcomp.wav', speech_sweep_hcomp, fs)
data/xmas_brir_sweep_hcomp.wav
Note that the headphone compensation filter used here was designed for THOMSON HED415N headphones. If you use any other type of headphones, the result might not sound better, it might even sound worse!
# TODO: plot BRIRs
# TODO: estimate time-of-flight
# TODO: estimate SNR
# TODO: plot frequency response
def load_ir_from_mat(angle):
filename = 'data/brir_sweep{:+03d}.mat'.format(angle)
data = loadmat(filename, struct_as_record=False, squeeze_me=True)['data']
assert data.fs == fs
assert np.isclose(np.rad2deg(data.head_azimuth), angle)
return data.ir
angles = -80, -40, 40, 80
brirs = {angle: load_ir_from_mat(angle) for angle in angles}
brirs_hcomp = {angle: compensate(ir) for angle, ir in brirs.items()}
speech_hcomp = {angle: convolve_multichannel_ir(speech, ir)
for angle, ir in brirs_hcomp.items()}
# TODO: plot impulse responses of 1 ear for all angles
# TODO: plot magnitude spectra
In the top right is the angle of the loudspeaker, relative to the head.
import io
from urllib.request import urlopen
url = 'https://zenodo.org/record/4459911/files/QU_KEMAR_anechoic_2m.mat'
file = io.BytesIO(urlopen(url).read())
# if you want to load a local file instead:
#file = "QU_KEMAR_anechoic_2m.mat"
irs = loadmat(file, struct_as_record=False, squeeze_me=True)['irs']
plt.imshow(irs.left)
<matplotlib.image.AxesImage at 0x1376dad90>
What?
plt.imshow(irs.left, aspect='auto')
<matplotlib.image.AxesImage at 0x1379c7820>
Better, but still: what?
Let's try to convert the values to decibels:
plt.imshow(20 * np.log10(np.abs(irs.left)), aspect='auto')
plt.colorbar()
<matplotlib.colorbar.Colorbar at 0x137a72f10>
OK, we're getting somewhere, but there are still things to do:
replace x- and y-indices with meaningful values
axis labels
use one of the perceptually optimized colormaps: viridis
(default), plasma
, inferno
, magma
, cividis
def plot_irs(samples):
extent = [np.rad2deg(irs.apparent_azimuth[0]),
np.rad2deg(irs.apparent_azimuth[-1]),
len(irs.left[:samples]) * 1000 / irs.fs,
0]
plt.imshow(20 * np.log10(np.abs(irs.left[:samples])), aspect='auto',
vmax=0, vmin=-120,
cmap='inferno', extent=extent)
plt.xlabel("apparent source azimuth in degree")
plt.ylabel("time in milliseconds")
cbar = plt.colorbar()
cbar.set_label('dB', rotation=90)
plot_irs(-1)
plot_irs(500)
plot_irs(200)
# TODO: waterfall diagrams?