#!/usr/bin/env python
# coding: utf-8

# # NMF Example

# In[105]:


# import some libraries
import urllib
import scipy,sklearn
import librosa
import IPython.display
import matplotlib.pyplot as plt
import numpy
get_ipython().run_line_magic('matplotlib', 'nbagg')


# ## Load the first file

# In[91]:


filename = 'conga_groove.wav'
urllib.urlretrieve('http://audio.musicinformationretrieval.com/' + filename,
                  filename=filename)


# In[92]:


x, fs = librosa.load(filename)


# In[93]:


IPython.display.Audio(x,rate=fs)


# ## Load the second file

# In[94]:


filename2 = '1_bar_funk_groove.mp3'
urllib.urlretrieve('http://audio.musicinformationretrieval.com/' + filename2,
                  filename=filename2)


# In[95]:


x2, fs = librosa.load(filename2)
x2 = x2[0:(4*len(x))]
x2[0] = 1


# In[96]:


IPython.display.Audio(x2,rate=fs)


# ## Mix the two audio files. Since they are different lengths we will replicate the shorter one 4 times.

# In[97]:


x4 = numpy.zeros(len(x)*4)
for i in range(4) :
    x4[i*len(x):(i+1)*len(x)] = 2*x
x3 = x4 + x2
x3[0] = 1
IPython.display.Audio(x3, rate=fs)


# ## Plot the spectrogram

# In[106]:


S = librosa.stft(x3)
plt.figure()
logX = librosa.logamplitude(S)
librosa.display.specshow(logX, sr=fs, x_axis='time', y_axis='log')


# ## Perform NMF to decompose the spectrogram V as W times H

# In[99]:


X = numpy.absolute(S)
n_components = 16
W, H = librosa.decompose.decompose(X, n_components=n_components, sort=True)
print W.shape
print H.shape


# ## Plot the "basis" vectors i.e. columns of W

# In[107]:


plt.figure()
logW = numpy.log10(W)
for n in range(n_components):
    plt.subplot(numpy.ceil(n_components/2.0), 2, n+1)
    plt.plot(logW[:,n])
    plt.ylim(-2, logW.max())
    plt.xlim(0, W.shape[0])
    plt.ylabel('Component %d' % n)


# ## Plot the columns of H i.e. the temporal activations

# In[101]:


plt.figure()
for n in range(n_components):
    plt.subplot(numpy.ceil(n_components/2.0), 2, n+1)
    plt.plot(H[n])
    plt.ylim(0, H.max())
    plt.xlim(0, H.shape[1])
    plt.ylabel('Component %d' % n)


# ## Listen to individual components

# In[102]:


reconstructed_signal = scipy.zeros(len(x4))
components = list()
for n in range(n_components):
    Y = scipy.outer(W[:,n], H[n])*numpy.exp(1j*numpy.angle(S))
    y = librosa.istft(Y)
    components.append(y)
    reconstructed_signal[:len(y)] += y
    IPython.display.display( IPython.display.Audio(y, rate=fs) )


# ## Manually select a subset of the components that sound like the drum part and add them up

# In[103]:


IPython.display.Audio(components[0]+components[1]+components[4]+components[3]+components[11]+components[13]+components[10]+components[9]+components[5],rate=fs)


# ## Manually select a subset of componenets that sound most like the guitar and add them up

# In[104]:


IPython.display.Audio(components[2]+components[6]+0*components[7]+components[8]+components[12]+components[14]+components[15], rate=fs)