#!/usr/bin/env python # coding: utf-8 # # NMF Example # In[105]: # import some libraries import urllib import scipy,sklearn import librosa import IPython.display import matplotlib.pyplot as plt import numpy get_ipython().run_line_magic('matplotlib', 'nbagg') # ## Load the first file # In[91]: filename = 'conga_groove.wav' urllib.urlretrieve('http://audio.musicinformationretrieval.com/' + filename, filename=filename) # In[92]: x, fs = librosa.load(filename) # In[93]: IPython.display.Audio(x,rate=fs) # ## Load the second file # In[94]: filename2 = '1_bar_funk_groove.mp3' urllib.urlretrieve('http://audio.musicinformationretrieval.com/' + filename2, filename=filename2) # In[95]: x2, fs = librosa.load(filename2) x2 = x2[0:(4*len(x))] x2[0] = 1 # In[96]: IPython.display.Audio(x2,rate=fs) # ## Mix the two audio files. Since they are different lengths we will replicate the shorter one 4 times. # In[97]: x4 = numpy.zeros(len(x)*4) for i in range(4) : x4[i*len(x):(i+1)*len(x)] = 2*x x3 = x4 + x2 x3[0] = 1 IPython.display.Audio(x3, rate=fs) # ## Plot the spectrogram # In[106]: S = librosa.stft(x3) plt.figure() logX = librosa.logamplitude(S) librosa.display.specshow(logX, sr=fs, x_axis='time', y_axis='log') # ## Perform NMF to decompose the spectrogram V as W times H # In[99]: X = numpy.absolute(S) n_components = 16 W, H = librosa.decompose.decompose(X, n_components=n_components, sort=True) print W.shape print H.shape # ## Plot the "basis" vectors i.e. columns of W # In[107]: plt.figure() logW = numpy.log10(W) for n in range(n_components): plt.subplot(numpy.ceil(n_components/2.0), 2, n+1) plt.plot(logW[:,n]) plt.ylim(-2, logW.max()) plt.xlim(0, W.shape[0]) plt.ylabel('Component %d' % n) # ## Plot the columns of H i.e. the temporal activations # In[101]: plt.figure() for n in range(n_components): plt.subplot(numpy.ceil(n_components/2.0), 2, n+1) plt.plot(H[n]) plt.ylim(0, H.max()) plt.xlim(0, H.shape[1]) plt.ylabel('Component %d' % n) # ## Listen to individual components # In[102]: reconstructed_signal = scipy.zeros(len(x4)) components = list() for n in range(n_components): Y = scipy.outer(W[:,n], H[n])*numpy.exp(1j*numpy.angle(S)) y = librosa.istft(Y) components.append(y) reconstructed_signal[:len(y)] += y IPython.display.display( IPython.display.Audio(y, rate=fs) ) # ## Manually select a subset of the components that sound like the drum part and add them up # In[103]: IPython.display.Audio(components[0]+components[1]+components[4]+components[3]+components[11]+components[13]+components[10]+components[9]+components[5],rate=fs) # ## Manually select a subset of componenets that sound most like the guitar and add them up # In[104]: IPython.display.Audio(components[2]+components[6]+0*components[7]+components[8]+components[12]+components[14]+components[15], rate=fs)