Description: This homework load a picture and use the pixel data as FFT bins in an STFT, then using the IFT, produce audio from it.
I used my thumbnail picture on MAT website as the input image.
I used all 3 layers of the picture as input data. Because segmenting the image pixels into the right size for the IFT is a main complication. After several tries, I choose 2000.
I keeped some raw debug code there, such as print and plot. So you could see some of my progress in middle.
The code are as follow:
%pylab inline
from __future__ import print_function
from __future__ import division
from scipy.io import wavfile
from IPython.display import Audio
import numpy as np
import IPython
from IPython.display import HTML
from base64 import b64encode
Populating the interactive namespace from numpy and matplotlib
signal = wavfile.read('outsig2.wav')
IPython.display.Audio('outsig2.wav')
This is the audio output form last homework as coparision
img = imread('test.jpeg')
imshow(img);
This is the image I use as iput
sig1 = img[0,:, 0]
sig2 = img[1,:, 0]
sig3 = img[2,:, 0]
plot(abs(fft.rfft(sig1)))
[<matplotlib.lines.Line2D at 0x10b28a5d0>]
lst = []
for j in range(0, 3):
for i in range(0,img.shape[0]):
listtmp = (img[:,i,0]).tolist()
lst.extend(listtmp)
print(len(lst))
plot(abs(fft.rfft(lst)))
ylim((0, 40000))
xlim((0, 15100))
30000
(0, 15100)
win1 = lst[0:1024]
win2 = lst[1024:2048]
win3 = lst[2048: 3072]
plot(abs(fft.rfft(win1)))
plot(abs(fft.rfft(win2)))
plot(abs(fft.rfft(win3)))
#print(abs(fft.rfft(win1)))
[<matplotlib.lines.Line2D at 0x10c3e95d0>]
---------------------------- Up to here, I tried something related to better see the signals
win_start = arange(0, len(lst), 2000)
win_len = 2000
phs_spectrum = ones(1001) * pi
signal = []
for start in win_start:
win = lst[start: start + win_len]
mag_spectrum = fft.rfft(win)
mag_spectrum = abs(mag_spectrum)/float(win_len/2)
nums = []
for i in range(0, 1001):
number = [np.complex(cos(phs_spectrum[i])* mag_spectrum[i], -sin(phs_spectrum[i])* mag_spectrum[i])]
nums += [number]
sig = fft.ifft(nums)
#print(array(nums).shape)
signal.extend(sig)
#print(mag_spectrum)
plot(signal);
#array(signal).shape
#print(array(signal).shape)
#imshow(mag_spectrum, aspect='auto')
xlabel("time")
ylabel("magniture")
title("out put signal")
/usr/local/lib/python2.7/site-packages/numpy/core/numeric.py:474: ComplexWarning: Casting complex values to real discards the imaginary part return array(a, dtype, copy=False, order=order)
<matplotlib.text.Text at 0x10c632190>
array(signal).dtype
dtype('complex128')
signal = abs(array(signal)) * 100
wavfile.write('hw2.wav', 44100, array(signal, dtype=int16))
max(signal)
array([ 27140.9])
IPython.display.Audio("hw2.wav")
Here is the out put audio file
Audio(signal.astype(int16).T, rate = 22050)
Play with half of the regular rate
signalSegment = []
seglen = size(img, axis = 1)
for oneLine in img:
X = fft.irfft(oneLine)
X = X * seglen/2
signalSegment.append(X)
signalSegment = array(signalSegment).flatten()
plot(signalSegment)
xlabel('Sampling point')
ylabel('Amplitude')
title('The signal converted directly from the image');
Assume the zero phase for all time, and display the audio signal produced from IFT of the image
wavfile.write('hw2-2.wav', 44100, array(signalSegment, dtype=int16))
IPython.display.Audio("hw2-2.wav")