Directly listening to a room impulse response (RIR) doesn't reveal much information (except probably for room acoustics experts). It's normally more helpful to use a bunch of dry recordings with different characteristics (speech, music, tonal, percussive, ...), convolve them with the given RIR and listen to the results.
import numpy as np
from scipy import signal
import soundfile as sf
import tools
speech, fs = sf.read("data/xmas.wav")
rir, fs_rir = sf.read("data/rir_clap.wav")
assert fs == fs_rir
speech_clap = signal.fftconvolve(speech, rir)
# normalize to the same maximum value as the original speech signal:
speech_clap = tools.normalize(speech_clap, np.max(np.abs(speech)))
sf.write("data/xmas_clap.wav", speech_clap, fs)
It doesn't sound exactly like the measured room, because the frequency response of the clapping device is not flat and its characteristics are part of the measured RIR and therefore also audible in the convolved signal.
import matplotlib.pyplot as plt
t = np.arange(len(rir)) / fs
plt.plot(t, rir)
plt.xlabel("t / s")
plt.figure()
plt.plot(t, tools.db(rir))
plt.xlabel("t / s");
def plot_impulse_response(ir, fs=44100, db=True):
L = len(ir)
time = np.arange(L) / fs * 1000
plt.figure()
if db:
plt.plot(time, tools.db(ir))
plt.ylabel('Level / dB')
else:
plt.plot(time, ir)
plt.ylabel('Amplitude')
plt.xlabel('t / ms')
plt.grid()
return
def energy_decay_curve(ir):
"""Normalized energy decay curve (EDC) of the impulse response.
"""
L = np.zeros_like(ir)
L[-1] = ir[-1]**2
for n in range(len(ir)-2, -1, -1):
L[n] = L[n+1] + ir[n]**2
return L / L[0]
Notice that the energy decay curve can be interpreted as a convolution of $h^2(t)$ and a rectangular window. Therefore, it can be computed more efficiently using the fast convolution.
def fast_edc(ir):
L = len(ir)
window = np.ones_like(ir)
L = signal.fftconvolve(ir**2, window)[L-1:]
return L / L[-1]
Of course, this will be efficient only if the length of the impulse response is sufficiently long.
import io
import soundfile as sf
import tools
import zipfile
url = "http://legacy.spa.aalto.fi/projects/poririrs/wavs/omni.zip"
filename = "s1_r1_o.wav"
zf = zipfile.ZipFile(tools.HttpFile(url))
pori, fs = sf.read(io.BytesIO(zf.read(filename)))
print(pori.shape, fs)
# you can also just download and unzip the file manually:
#pori, fs = sf.read(filename)
assert pori.shape[1] == 2 # stereo IR
pori = pori.sum(axis=1)
fs
The sampling frequencies of the input signal and the impulse response have to match!
It's very easy to convert between sampling frequencies with SoX, e.g. like this:
sox xmas.wav -r 48000 xmas48k.wav
speech48k, fs48k = sf.read("data/xmas48k.wav")
assert fs48k == 48000
speech_pori = signal.fftconvolve(speech48k, pori)
speech_pori = tools.normalize(speech_pori, np.max(np.abs(speech)))
sf.write("data/xmas_pori.wav", speech_pori, fs)
t = np.arange(len(pori)) / fs
plt.figure()
plt.plot(t, pori)
plt.xlabel("t / s");
# TODO: use custom plotting function
plt.figure()
plt.plot(t, tools.db(pori))
plt.xlabel("t / s")
plt.ylabel("Level / dB")
plot_impulse_response(pori, fs=fs, db=True)