# MAT 201A Winter 2016
# HW 2
# Atakan Gunal
%pylab inline
Populating the interactive namespace from numpy and matplotlib
# Loading an image file
img = imread('sonos.jpg')
imshow(img);
# Checking the dimensions
img.shape
(1133, 1510, 3)
# Removing the green and blue components as the image is richer in red
redOnly = img.copy()
redOnly[:,:,1] = 0
redOnly[:,:,2] = 0
imshow(redOnly);
# Checking how a column would look as an fft bin
sig = redOnly[:,200, 0]
plot(sig);
xlabel("index");
ylabel("color value");
# Column normalized
sigNorm = sig / 255.0
plot(sigNorm);
sigNorm.dtype
xlabel("index");
ylabel("color value (normalized)");
# Normalizing the whole image
redOnlyNorm = redOnly /255.0
# Checking normalization
max(redOnly[:,200,0])
255
# I want to use the first 500 columns
z = arange(500)
# first 500 columns as spectrogram. Interpret as Horizontal: time , Vertical: frequency
r = redOnlyNorm[:,z,0]
imshow(r);
r.shape
(1133, 500)
# swapping axes to make things easier for myself in indexing
Xim = np.swapaxes(r,0,1);
imshow(Xim);
xlabel("frequency");
ylabel("time");
Xim.shape
(500, 1133)
For one row i.e. one STFT "slice" or frame
#from http://docs.scipy.org/doc/numpy-1.10.1/reference/generated/numpy.fft.irfft.html#numpy.fft.irfft
#"the real zero-frequency term followed by the complex positive frequency terms in order of increasing frequency"
mags = [0] + (Xim[0].tolist())
len(mags)
1134
# plot of one row in image
plot(mags);
xlabel("row index");
ylabel("color value");
# generating phase spectrum. Randomizing to avoid mirroring. (Credit: Andrés)
ph = np.random.uniform(0, 2*np.pi, (len(mags), 1))
plot(ph);
len(ph)
1134
# using inverse real fft as the image is treated as the output of rfft
Xb = [np.complex(cos(phs)* mag, sin(phs)* mag) for mag, phs in zip(mags, ph)]
xb = fft.irfft(Xb) * 1133
plot(xb)
len(xb)
xlabel("time (samples)");
ylabel("amplitude");
# checking to see if the fft of signal matches the image row
plot(abs(fft.rfft(xb)));
xlabel("frequency");
ylabel("amplitude");
plot(angle(fft.fft(xb)));
xlabel("frequency");
ylabel("angle");
# N/2 + 1 (Credit: Andrés)
fftsize = 1134
x = [];
For all of the rows i.e. all slices put into irfft, results concatenated to form signal
for i in range(0, 500):
magss = ([0] + Xim[i]).tolist()
X = [np.complex(cos(phs)* mag, sin(phs)* mag) for mag, phs in zip(magss, ph)]
x[i*fftsize:(i+1)*fftsize] = fft.irfft(X) * 1133
# checking how long in seconds the output should be with a 44.1 kHz sampling rate
len(x)/44100
12
# converting from list to array
# I am no longer converting to int16 as it produces artifacts (Credit: Andrés)
x = np.asarray(x)
type(x)
numpy.ndarray
# Scaling the samples to amplitude range
sig = (real(x)) * 600
plot(sig);
xlabel("time (samples)");
ylabel("amplitude");
# spectrogram of the output signal to check resemblence to initial image
sout = specgram(sig, NFFT=1134, noverlap=0);
xlabel("time (sec*fs)");
ylabel("frequency");
colorbar();
from scipy.io import wavfile
wavfile.write('outhw2.wav', 44100, array(sig, dtype=int16))
from IPython.display import Audio
sound_file = './outhw2.wav'
from IPython.display import HTML
from base64 import b64encode
path_to_audio = "./outhw2.wav"
audio_type = "wav"
sound = open(path_to_audio, "rb").read()
sound_encoded = b64encode(sound)
sound_tag = """
<audio id="beep" controls src="data:audio/{1};base64,{0}">
</audio>""".format(sound_encoded, audio_type)
play_beep = """
<script type="text/javascript">
var audio = document.getElementById("beep");
audio.play();
</script>
"""
HTML(sound_tag)