# import some libraries
import urllib
import scipy,sklearn
import librosa
import IPython.display
import matplotlib.pyplot as plt
import numpy
%matplotlib nbagg
filename = 'conga_groove.wav'
urllib.urlretrieve('http://audio.musicinformationretrieval.com/' + filename,
filename=filename)
('conga_groove.wav', <httplib.HTTPMessage instance at 0x7f20fc78ccb0>)
x, fs = librosa.load(filename)
IPython.display.Audio(x,rate=fs)
filename2 = '1_bar_funk_groove.mp3'
urllib.urlretrieve('http://audio.musicinformationretrieval.com/' + filename2,
filename=filename2)
('1_bar_funk_groove.mp3', <httplib.HTTPMessage instance at 0x7f20e6813368>)
x2, fs = librosa.load(filename2)
x2 = x2[0:(4*len(x))]
x2[0] = 1
IPython.display.Audio(x2,rate=fs)
x4 = numpy.zeros(len(x)*4)
for i in range(4) :
x4[i*len(x):(i+1)*len(x)] = 2*x
x3 = x4 + x2
x3[0] = 1
IPython.display.Audio(x3, rate=fs)
S = librosa.stft(x3)
plt.figure()
logX = librosa.logamplitude(S)
librosa.display.specshow(logX, sr=fs, x_axis='time', y_axis='log')
<matplotlib.image.AxesImage at 0x7f20e5ea69d0>
X = numpy.absolute(S)
n_components = 16
W, H = librosa.decompose.decompose(X, n_components=n_components, sort=True)
print W.shape
print H.shape
(1025, 16) (16, 752)
plt.figure()
logW = numpy.log10(W)
for n in range(n_components):
plt.subplot(numpy.ceil(n_components/2.0), 2, n+1)
plt.plot(logW[:,n])
plt.ylim(-2, logW.max())
plt.xlim(0, W.shape[0])
plt.ylabel('Component %d' % n)
/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py:2: RuntimeWarning: divide by zero encountered in log10 from ipykernel import kernelapp as app
plt.figure()
for n in range(n_components):
plt.subplot(numpy.ceil(n_components/2.0), 2, n+1)
plt.plot(H[n])
plt.ylim(0, H.max())
plt.xlim(0, H.shape[1])
plt.ylabel('Component %d' % n)
reconstructed_signal = scipy.zeros(len(x4))
components = list()
for n in range(n_components):
Y = scipy.outer(W[:,n], H[n])*numpy.exp(1j*numpy.angle(S))
y = librosa.istft(Y)
components.append(y)
reconstructed_signal[:len(y)] += y
IPython.display.display( IPython.display.Audio(y, rate=fs) )
IPython.display.Audio(components[0]+components[1]+components[4]+components[3]+components[11]+components[13]+components[10]+components[9]+components[5],rate=fs)
IPython.display.Audio(components[2]+components[6]+0*components[7]+components[8]+components[12]+components[14]+components[15], rate=fs)