%pylab inline
rcParams['figure.figsize'] = (10, 4) #wide graphs by default
from __future__ import print_function
from __future__ import division
from scipy.io import wavfile
%matplotlib inline
Populating the interactive namespace from numpy and matplotlib
/usr/local/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment. warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')
img = imread("scape.png") #load and display the image
imshow(img)
<matplotlib.image.AxesImage at 0x107787b50>
img.shape
(640, 1024, 3)
img.size/44100/3
14.860770975056688
audspec = zeros((img.size)) #set audiospectrum array, the size is equal to number of pixels, init to zero
#init the audiospec array, the values equal to sum of all tree color components in one pixel
for i in xrange(0,img.shape[0]):
for j in xrange(0, img.shape[1]):
audspec[j + i * 1280] = img[i,j,0] + img[i,j,1] + img[i,j,2]
audspec = audspec/audspec.max() #normalize to [0-1] range
plot(audspec)
[<matplotlib.lines.Line2D at 0x1047d6850>]
testspec = zeros(256) #some test for irfft
testspec[8] = 1.0
test = irfft(testspec)
plot(test*testspec.size) #the output size is (2*n+1), where n is the size of the input array
[<matplotlib.lines.Line2D at 0x1095e95d0>]
audspec.size/256 # set the irfft segment size to 256
7680.0
plot(audspec[:256])
[<matplotlib.lines.Line2D at 0x111761490>]
testspec = audspec[:256] #test for first [256] elements of audiospec
testspec = testspec - testspec.min()
plot(testspec)
[<matplotlib.lines.Line2D at 0x111873990>]
test = irfft(testspec) # see what we get for irfft for first 256 elements
plot(test)
[<matplotlib.lines.Line2D at 0x111989210>]
test = test[64:512-64] #cut off begin and end part of the music
plot(test) #the middle part is nice for music
[<matplotlib.lines.Line2D at 0x111aa62d0>]
total = audspec.size/256 #total numbe of spec segements
aud = np.zeros(256) #audio pieces
#perform irfft on each segement, cut off the begin and end part of the transformed array, and concatenate them
for i in xrange(0, int(total)):
specseg = audspec[i*256:i*256 + 256]
specseg = specseg - specseg.min()
audseg = irfft(specseg)
audseg = audseg[64:512-64]
aud = concatenate((aud, audseg))
plot(aud) #get audio file, need to convert [0-1] range and multiply by 255
[<matplotlib.lines.Line2D at 0x105a0ea10>]
aud = abs(aud) * 32768 * 256
plot(aud) #plot the converted music
[<matplotlib.lines.Line2D at 0x10787c0d0>]
from IPython.display import Audio
Audio(aud/32768,rate=44100) #display the final musics