In [1]:

%pylab inline
rcParams['figure.figsize'] = (10, 4) #wide graphs by default
from __future__ import print_function
from __future__ import division
from scipy.io import wavfile
%matplotlib inline

Populating the interactive namespace from numpy and matplotlib

/usr/local/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

In [2]:

img = imread("scape.png") #load and display the image
imshow(img)

Out[2]:

<matplotlib.image.AxesImage at 0x107787b50>

In [3]:

img.shape

Out[3]:

(640, 1024, 3)

In [4]:

img.size/44100/3

Out[4]:

14.860770975056688

In [5]:

audspec = zeros((img.size))  #set audiospectrum array, the size is equal to number of pixels, init to zero

In [6]:

#init the audiospec array, the values equal to sum of all tree color components in one pixel
for i in xrange(0,img.shape[0]):
    for j in xrange(0, img.shape[1]):
        audspec[j + i * 1280] = img[i,j,0] + img[i,j,1] + img[i,j,2]

In [7]:

audspec = audspec/audspec.max() #normalize to [0-1] range
plot(audspec)

Out[7]:

[<matplotlib.lines.Line2D at 0x1047d6850>]

In [8]:

testspec = zeros(256)  #some test for irfft
testspec[8] = 1.0
test = irfft(testspec)
plot(test*testspec.size) #the output size is (2*n+1), where n is the size of the input array

Out[8]:

[<matplotlib.lines.Line2D at 0x1095e95d0>]

In [9]:

audspec.size/256 # set the irfft segment size to 256

Out[9]:

7680.0

In [10]:

plot(audspec[:256])

Out[10]:

[<matplotlib.lines.Line2D at 0x111761490>]

In [11]:

testspec = audspec[:256]  #test for first [256] elements of audiospec
testspec = testspec - testspec.min()
plot(testspec)

Out[11]:

[<matplotlib.lines.Line2D at 0x111873990>]

In [12]:

test = irfft(testspec)  # see what we get for irfft for first 256 elements
plot(test)

Out[12]:

[<matplotlib.lines.Line2D at 0x111989210>]

In [13]:

test = test[64:512-64]    #cut off begin and end part of the music
plot(test)       #the middle part is nice for music

Out[13]:

[<matplotlib.lines.Line2D at 0x111aa62d0>]

In [14]:

total = audspec.size/256 #total numbe of spec segements
aud = np.zeros(256)  #audio pieces
#perform irfft on each segement, cut off the begin and end part of the transformed array, and concatenate them
for i in xrange(0, int(total)): 
    specseg = audspec[i*256:i*256 + 256]
    specseg = specseg - specseg.min()
    audseg = irfft(specseg)
    audseg = audseg[64:512-64]
    aud = concatenate((aud, audseg))
plot(aud) #get audio file, need to convert [0-1] range and multiply by 255

Out[14]:

[<matplotlib.lines.Line2D at 0x105a0ea10>]

In [15]:

aud = abs(aud) * 32768 * 256

In [16]:

plot(aud)  #plot the converted music

Out[16]:

[<matplotlib.lines.Line2D at 0x10787c0d0>]

In [17]:

from IPython.display import Audio
Audio(aud/32768,rate=44100)   #display the final musics

Out[17]:

In [ ]: