In [381]:

# MAT 201A Winter 2016 
# HW 2
# Atakan Gunal

In [382]:

%pylab inline

Populating the interactive namespace from numpy and matplotlib

In [383]:

# Loading an image file
img = imread('sonos.jpg')
imshow(img);

In [384]:

# Checking the dimensions
img.shape

Out[384]:

(1133, 1510, 3)

In [385]:

# Removing the green and blue components as the image is richer in red
redOnly = img.copy()
redOnly[:,:,1] = 0
redOnly[:,:,2] = 0
imshow(redOnly);

In [386]:

# Checking how a column would look as an fft bin
sig = redOnly[:,200, 0]
plot(sig);
xlabel("index");
ylabel("color value");

In [387]:

# Column normalized
sigNorm = sig / 255.0
plot(sigNorm);
sigNorm.dtype
xlabel("index");
ylabel("color value (normalized)");

In [388]:

# Normalizing the whole image
redOnlyNorm = redOnly /255.0
# Checking normalization
max(redOnly[:,200,0])

Out[388]:

In [389]:

# I want to use the first 500 columns
z = arange(500)

In [390]:

# first 500 columns as spectrogram. Interpret as Horizontal: time , Vertical: frequency
r = redOnlyNorm[:,z,0]

imshow(r);

In [391]:

r.shape

Out[391]:

(1133, 500)

In [392]:

# swapping axes to make things easier for myself in indexing
Xim = np.swapaxes(r,0,1);
imshow(Xim);
xlabel("frequency");
ylabel("time");
Xim.shape

Out[392]:

(500, 1133)

For one row i.e. one STFT "slice" or frame

In [393]:

#from http://docs.scipy.org/doc/numpy-1.10.1/reference/generated/numpy.fft.irfft.html#numpy.fft.irfft
#"the real zero-frequency term followed by the complex positive frequency terms in order of increasing frequency"
mags = [0] + (Xim[0].tolist())
len(mags)

Out[393]:

In [394]:

# plot of one row in image
plot(mags);
xlabel("row index");
ylabel("color value");

In [395]:

# generating phase spectrum. Randomizing to avoid mirroring. (Credit: Andrés)
ph = np.random.uniform(0, 2*np.pi, (len(mags), 1))
plot(ph);
len(ph)

Out[395]:

In [396]:

# using inverse real fft as the image is treated as the output of rfft
Xb = [np.complex(cos(phs)* mag, sin(phs)* mag) for mag, phs in zip(mags, ph)]
xb = fft.irfft(Xb) * 1133
plot(xb)
len(xb)
xlabel("time (samples)");
ylabel("amplitude");

In [397]:

# checking to see if the fft of signal matches the image row
plot(abs(fft.rfft(xb)));
xlabel("frequency");
ylabel("amplitude");

In [398]:

plot(angle(fft.fft(xb)));
xlabel("frequency");
ylabel("angle");

In [399]:

# N/2 + 1 (Credit: Andrés)
fftsize = 1134
x = [];

For all of the rows i.e. all slices put into irfft, results concatenated to form signal

In [400]:

for i in range(0, 500):
    magss = ([0] + Xim[i]).tolist()
    X = [np.complex(cos(phs)* mag, sin(phs)* mag) for mag, phs in zip(magss, ph)]
    x[i*fftsize:(i+1)*fftsize] = fft.irfft(X) * 1133

In [401]:

# checking how long in seconds the output should be with a 44.1 kHz sampling rate
len(x)/44100

Out[401]:

In [402]:

# converting from list to array
# I am no longer converting to int16 as it produces artifacts (Credit: Andrés)
x = np.asarray(x)
type(x)

Out[402]:

numpy.ndarray

In [403]:

# Scaling the samples to amplitude range
sig = (real(x)) * 600
plot(sig);
xlabel("time (samples)");
ylabel("amplitude");

In [404]:

# spectrogram of the output signal to check resemblence to initial image
sout = specgram(sig, NFFT=1134, noverlap=0);
xlabel("time (sec*fs)");
ylabel("frequency");
colorbar();

In [405]:

from scipy.io import wavfile
wavfile.write('outhw2.wav', 44100, array(sig, dtype=int16))
from IPython.display import Audio
sound_file = './outhw2.wav'
from IPython.display import HTML
from base64 import b64encode

path_to_audio = "./outhw2.wav"
audio_type = "wav"

sound = open(path_to_audio, "rb").read()
sound_encoded = b64encode(sound)
sound_tag = """
    <audio id="beep" controls src="data:audio/{1};base64,{0}">
    </audio>""".format(sound_encoded, audio_type)

play_beep = """
<script type="text/javascript">
    var audio = document.getElementById("beep");
    audio.play();
</script>
"""

HTML(sound_tag)

Out[405]:

In [ ]: