NMF Example¶

In [105]:

# import some libraries
import urllib
import scipy,sklearn
import librosa
import IPython.display
import matplotlib.pyplot as plt
import numpy
%matplotlib nbagg

Load the first file¶

In [91]:

filename = 'conga_groove.wav'
urllib.urlretrieve('http://audio.musicinformationretrieval.com/' + filename,
                  filename=filename)

Out[91]:

('conga_groove.wav', <httplib.HTTPMessage instance at 0x7f20fc78ccb0>)

In [92]:

x, fs = librosa.load(filename)

In [93]:

IPython.display.Audio(x,rate=fs)

Out[93]:

Load the second file¶

In [94]:

filename2 = '1_bar_funk_groove.mp3'
urllib.urlretrieve('http://audio.musicinformationretrieval.com/' + filename2,
                  filename=filename2)

Out[94]:

('1_bar_funk_groove.mp3', <httplib.HTTPMessage instance at 0x7f20e6813368>)

In [95]:

x2, fs = librosa.load(filename2)
x2 = x2[0:(4*len(x))]
x2[0] = 1

In [96]:

IPython.display.Audio(x2,rate=fs)

Out[96]:

Mix the two audio files. Since they are different lengths we will replicate the shorter one 4 times.¶

In [97]:

x4 = numpy.zeros(len(x)*4)
for i in range(4) :
    x4[i*len(x):(i+1)*len(x)] = 2*x
x3 = x4 + x2
x3[0] = 1
IPython.display.Audio(x3, rate=fs)

Out[97]:

Plot the spectrogram¶

In [106]:

S = librosa.stft(x3)
plt.figure()
logX = librosa.logamplitude(S)
librosa.display.specshow(logX, sr=fs, x_axis='time', y_axis='log')

Out[106]:

<matplotlib.image.AxesImage at 0x7f20e5ea69d0>

Perform NMF to decompose the spectrogram V as W times H¶

In [99]:

X = numpy.absolute(S)
n_components = 16
W, H = librosa.decompose.decompose(X, n_components=n_components, sort=True)
print W.shape
print H.shape

(1025, 16)
(16, 752)

Plot the "basis" vectors i.e. columns of W¶

In [107]:

plt.figure()
logW = numpy.log10(W)
for n in range(n_components):
    plt.subplot(numpy.ceil(n_components/2.0), 2, n+1)
    plt.plot(logW[:,n])
    plt.ylim(-2, logW.max())
    plt.xlim(0, W.shape[0])
    plt.ylabel('Component %d' % n)

/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py:2: RuntimeWarning: divide by zero encountered in log10
  from ipykernel import kernelapp as app

Plot the columns of H i.e. the temporal activations¶

In [101]:

plt.figure()
for n in range(n_components):
    plt.subplot(numpy.ceil(n_components/2.0), 2, n+1)
    plt.plot(H[n])
    plt.ylim(0, H.max())
    plt.xlim(0, H.shape[1])
    plt.ylabel('Component %d' % n)

Listen to individual components¶

In [102]:

reconstructed_signal = scipy.zeros(len(x4))
components = list()
for n in range(n_components):
    Y = scipy.outer(W[:,n], H[n])*numpy.exp(1j*numpy.angle(S))
    y = librosa.istft(Y)
    components.append(y)
    reconstructed_signal[:len(y)] += y
    IPython.display.display( IPython.display.Audio(y, rate=fs) )

Manually select a subset of the components that sound like the drum part and add them up¶

In [103]:

IPython.display.Audio(components[0]+components[1]+components[4]+components[3]+components[11]+components[13]+components[10]+components[9]+components[5],rate=fs)

Out[103]:

Manually select a subset of componenets that sound most like the guitar and add them up¶

In [104]:

IPython.display.Audio(components[2]+components[6]+0*components[7]+components[8]+components[12]+components[14]+components[15], rate=fs)

Out[104]: