**MAT 201A Winter 2016

Yitian Shao¶

yitianshao@umail.ucsb.edu

Homework 1 : Produce a soundfile from image data or vicecersa. Condition and select the data to make the results as interesting as possible.

*** The code convert a input audio signal to a image data (spectrogram)

In [62]:

# MAT 201A Winter 2016
# Homework 1 : Produce a soundfile from image data or vicecersa.
# Try to condition and select the data to make the results as interesting as possible.
# ======================================================================================================================
__author__ = 'Yitian Shao'
# yitianshao@umail.ucsb.edu
# Created on 01/22/2016
# Updated on 02/02/2016
# The code first convert a image data to a audio signal (Updated)
# Then it convert a imported audio signal to a spectrogram (Old version)
# ======================================================================================================================
%pylab inline
from __future__ import print_function
from __future__ import division
from scipy.io import wavfile
from IPython.display import Audio
import pylab as p


# ----------------------------------------------------------------------------------------------------------------------
# Import image and display it
img1 = imread('wave.png') # Import a image with "wave"
img1 = img1[:,:,2] # Keep single image channel only

figure(figsize=(25,6))
imshow(img1, cmap = cm.Greys) # Display the image
xlabel("Width")
ylabel("Height")
title("The image including nine ellipses")

img2 = imread('zigzag.png') # Import a image with "zigzag"
img2 = 1-img2[:,:,3] # Keep single image channel only

figure(figsize=(25,6))
imshow(img2, cmap = cm.Greys) # Display the image
xlabel("Width")
ylabel("Height")
title("The image including four triangles")

Populating the interactive namespace from numpy and matplotlib

Out[62]:

<matplotlib.text.Text at 0x34508be0>

In [63]:

# ----------------------------------------------------------------------------------------------------------------------
# Slice every row of the image and concate them to construct a audio signal
sig1 = img1.flatten(order = 'C')

# Display the signal
figure(figsize=(25,6))
plot(sig1)
xlabel("Sampling point")
ylabel("Amplitude")
title("The signal converted from the image containing nine ellipses")
# ----------------------------------------------------------------------------------------------------------------------
# Play the audio signal
Audio(sig1, rate = 50400)

Out[63]:

In [64]:

# ----------------------------------------------------------------------------------------------------------------------
# Slice every row of the image and concate them to construct a audio signal
sig2 = img2.flatten(order = 'C')

# Display the signal
figure(figsize=(25,6))
plot(sig2)
xlabel("Sampling point")
ylabel("Amplitude")
title("The signal converted from the image containing four triangles")
# ----------------------------------------------------------------------------------------------------------------------
# Play the audio signal
Audio(sig2, rate = 50400)

Out[64]:

In [67]:

# ----------------------------------------------------------------------------------------------------------------------
# Mixed the two signals above by element-wise mutiplication
sig3 = multiply(sig1, sig2)

# Display the signal
figure(figsize=(25,6))
plot(sig3)
xlabel("Sampling point")
ylabel("Amplitude")
title("Mixed (Element-wise multiplication) signals")
# ----------------------------------------------------------------------------------------------------------------------
# Play the mixed audio signal
Audio(sig3, rate = 50400)

Out[67]:

In [2]:

# ----------------------------------------------------------------------------------------------------------------------
# The following are works before updated.
# ----------------------------------------------------------------------------------------------------------------------

# Import audio signal and display it
sr,audio = wavfile.read('starwars.wav') # Input audio is a Star Wars theme song (36 sec)
audio = audio[:,0] # Keep only one of the two sound tracks
audioLen = size(audio, axis = 0)
# ----------------------------------------------------------------------------------------------------------------------
# Display the audio signal 
figure(figsize=(25,6))
plot(audio)
xlabel("Sampling point")
ylabel("Amplitude")
title("Theme music")
print("Amplitude range: [", audio.min()," to " , audio.max(),"], Sampling rate = " , sr," Hz")
print ("Audio signal length = ", audioLen)
# ----------------------------------------------------------------------------------------------------------------------
# Play the theme song
Audio(audio, rate = sr)

Amplitude range: [ -22473  to  22884 ], Sampling rate =  44100  Hz
Audio signal length =  1608394

c:\python27\lib\site-packages\scipy\io\wavfile.py:42: WavFileWarning: Unknown wave file format
  warnings.warn("Unknown wave file format", WavFileWarning)

Out[2]:

In [3]:

# ----------------------------------------------------------------------------------------------------------------------
# Compute the spectrogram of the audio signal
# ----------------------------------------------------------------------------------------------------------------------

win_len = 2**11 # Size of each spectrogram window, must be power of 2
winNum = int(audioLen/win_len) # Number of windows in the spectrogram
print("window number = ", winNum, " and window length = ", win_len)

yLimMax = 4 # Set the range of y axis

# ----------------------------------------------------------------------------------------------------------------------
# Compute y-axis ticks, in unit of frequency, for the spectrogram
ytickNum = 8 # Display maximum 8 ticks on y axis
nyquist = sr/2.0 # Nyquist frequency
fInterval = int(round(floor(nyquist/ytickNum)/1000)*1000) # Interval between y ticks, multiples of 1000

# ----------------------------------------------------------------------------------------------------------------------
# Find the index of y axis corresponding to the selected frequency 
ytickInd = empty(ytickNum, dtype=int) 
fw = linspace(0, nyquist, win_len/2+1, endpoint=True).astype(int)
for i in range(ytickNum):
    ytickInd[i]=where(fw>fInterval*i)[0][0]-1

# ----------------------------------------------------------------------------------------------------------------------
# Compute the spectrogram   
pow_spectrum = []
for start in arange(0, winNum*win_len, win_len):
    win = audio[start: start + win_len]
    X = fft.rfft(win)
    pow_spectrum.append(10*log10(abs(X)**2/float(win_len/2)))
pow_spectrum = array(pow_spectrum).T

freqs = sr * array( [i/float(audioLen) for i in range(audioLen)] )

# ----------------------------------------------------------------------------------------------------------------------
# Display the spectrogram 
figure(figsize=(25,6))
imshow(pow_spectrum)
colorbar()
gca().invert_yaxis()
yticks(ytickInd[:yLimMax], range(0, yLimMax*fInterval, fInterval))
gca().set_ylim(0,ytickInd[yLimMax-1])

xlabel("Time segment")
ylabel("Frequency (Hz)")
title("Spectrogram")

window number =  785  and window length =  2048

Out[3]:

<matplotlib.text.Text at 0x1eed3860>

In [ ]: