First question is: can we play a sound the browser? With the help of this link, we just might. Let's see.
import scipy.constants as const
import scipy
from scipy.io import wavfile
from IPython.core.display import HTML
from __future__ import division
# this is a wrapper that take a filename and publish an html <audio> tag to listen to it
import StringIO
import base64
import struct
def wavPlayer(data, rate):
""" will display html 5 player for compatible browser
The browser need to know how to play wav through html5.
there is no autoplay to prevent file playing when the browser opens
Adapted from SciPy.io.
"""
buffer = StringIO.StringIO()
buffer.write(b'RIFF')
buffer.write(b'\x00\x00\x00\x00')
buffer.write(b'WAVE')
buffer.write(b'fmt ')
if data.ndim == 1:
noc = 1
else:
noc = data.shape[1]
bits = data.dtype.itemsize * 8
sbytes = rate*(bits // 8)*noc
ba = noc * (bits // 8)
buffer.write(struct.pack('<ihHIIHH', 16, 1, noc, rate, sbytes, ba, bits))
# data chunk
buffer.write(b'data')
buffer.write(struct.pack('<i', data.nbytes))
if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and sys.byteorder == 'big'):
data = data.byteswap()
buffer.write(data.tostring())
# return buffer.getvalue()
# Determine file size and place it in correct
# position at start of the file.
size = buffer.tell()
buffer.seek(4)
buffer.write(struct.pack('<i', size-8))
val = buffer.getvalue()
src = """
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<title>Simple Test</title>
</head>
<body>
<audio controls="controls" style="width:600px" >
<source controls src="data:audio/wav;base64,{base64}" type="audio/wav" />
Your browser does not support the audio element.
</audio>
</body>
""".format(base64=base64.encodestring(val))
display(HTML(src))
## some consstant for our audio file
rate = 44100 #44.1 khz
duration =2.5 # in sec
# this will give us sin with the righ amplitude to use with wav files
normedsin = lambda f,t : 2**13*sin(2*pi*f*t)
time = np.linspace(0,duration, num=rate*duration)
# define A as a 440 Hz sin function
la = lambda t : normedsin(440,t)
# look at it on the first 25 ms
plot(time[0:1000], la(time)[0:1000])
ampl = la(time).astype(np.int16)
# write the file on disk, and show in in a Html 5 audio player
wavPlayer(ampl, 0.5 * rate)
Conclusions: the wav player works. Let's see if we can get it to play the recorded sound.
Let's read a file containing a sound of the low E-string on the guitar.
from scipy.io import wavfile
tone = wavfile.read('files/Low_tone.wav')
tone
(8000, array([ 1, -1, 1, ..., -39, -79, -90], dtype=int16))
plot(tone[1])
[<matplotlib.lines.Line2D at 0x9b2d62c>]
Now let's play the tone associated with this waveform.
wavPlayer(tone[1], tone[0])
Wonderful, it works like a charm! On to some frequency analysis.
plot(abs(fft.fft(tone[1])))
[<matplotlib.lines.Line2D at 0x9da3fac>]
Sure enough, we get a spectrum with harmonics for the guitar sound we recorded. According to my tuner, the low E string should be around 82.4 Hz. Is this what we have?
sample_freq = tone[0]
s = tone[1]
s_fft = fft.fft(s)
n = len(s)
freq_vector = linspace(0, 1, num=n/2) * sample_freq / 2
s_fft_out = s_fft[:n/2]
plot(freq_vector, abs(s_fft_out))
[<matplotlib.lines.Line2D at 0xa9748ec>]
You're asking, what's the max amplitude frequency? I'll tell you:
freq_vector[argmax(abs(s_fft_out))]
164.57226113015116
Interesting, it's double what I was expecting! Let's get a closer look on the lower frequencies.
plot(freq_vector, abs(s_fft_out))
xlim(0, 1000)
(0, 1000)
print ('first max is at %.2f Hz' % freq_vector[argmax(abs(s_fft_out))])
print ('second max is at %.2f Hz' % freq_vector[argmax(abs(s_fft_out) * (freq_vector > 300) * (freq_vector < 400))])
print ('third max is at %.2f Hz' % freq_vector[argmax(abs(s_fft_out) * (freq_vector > 400) * (freq_vector < 500))])
first max is at 164.57 Hz second max is at 329.61 Hz third max is at 411.86 Hz
Here, I'm a bit lost, because I don't know what these notes correspond to. Therefore, let's build ourselves some tools for this analysis.
First, let's write a function that returns the nearest note from a given frequency according to the formula
$$P_n=P_a(\sqrt[12]{2})^{(n-a)}$$In our case, the reference pitch will be $P_a = 440 Hz$ (that's the note A4).
Explanations from the Wikipedia article.
def pitch_from_ref(n): # 440 Hz = A4
return 440 * 2 ** (n / 12.)
for i in [0, 1, 2, 3, 11, 12]:
print(pitch_from_ref(i))
440.0 466.163761518 493.883301256 523.251130601 830.60939516 880.0
def closest_note_from_pitch(pitch, verbose=False):
octave = 0
lowest_pitch = 10
if pitch < lowest_pitch: return None
while pitch_from_ref((octave - 4) * 12) < pitch:
octave += 1
octave -= 1
note = 0
while pitch_from_ref((octave - 4) * 12 + note) < pitch:
note +=1
note -= 1
lower_pitch = pitch_from_ref((octave - 4) * 12 + note)
upper_pitch = pitch_from_ref((octave - 4) * 12 + note + 1)
notes = ['A', 'A#', 'B', 'C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#']
if verbose:
print (octave, note, lower_pitch, upper_pitch)
if abs(pitch - lower_pitch) < abs(pitch - upper_pitch):
return notes[note % 12] + str(octave)
else:
return notes[(note + 1) % 12] + str(octave + 1 * ((note + 1) == 12))
closest_note_from_pitch(240, verbose=True)
(3, 1, 233.08188075904496, 246.94165062806206)
'A#3'
for p in [24, 80, 122, 230, 400, 450, 675, 906]:
print(closest_note_from_pitch(p))
G-1 D#1 B2 A#3 G3 A4 E4 A5
closest_note_from_pitch(440)
'A4'
As it turns out, my octave count is wrong. Instead of recoding the function to get the octave right, I'm just gonna calculate this with the C4 notes as a reference. This comes after I crosschecked some of the frequencies above with this Wikipedia article.
# C4 is 261.626
def pitch_from_ref(n): # 261.626 Hz = C4
return 261.626 * 2 ** (n / 12.)
def closest_note_from_pitch(pitch, verbose=False):
octave = 0
lowest_pitch = 27.49
if pitch < lowest_pitch: return None
while pitch_from_ref((octave - 4) * 12) < pitch:
octave += 1
octave -= 1
note = 0
while pitch_from_ref((octave - 4) * 12 + note) < pitch:
note +=1
note -= 1
lower_pitch = pitch_from_ref((octave - 4) * 12 + note)
upper_pitch = pitch_from_ref((octave - 4) * 12 + note + 1)
notes = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
if verbose:
print (octave, note, lower_pitch, upper_pitch)
if abs(pitch - lower_pitch) < abs(pitch - upper_pitch):
return notes[note % 12] + str(octave)
else:
return notes[(note + 1) % 12] + str(octave + 1 * ((note + 1) == 12))
closest_note_from_pitch(440)
'A4'
Let's cross check some of the notes with their expected results.
notes = ['D2', 'B1', 'B0', 'F3', 'E4', 'D5']
pitches = [73.41, 61.73, 30.86, 174.6, 329.628, 587.330]
for ind, pitch in enumerate(pitches):
print(pitch, closest_note_from_pitch(pitch), notes[ind])
(73.41, 'D2', 'D2') (61.73, 'B1', 'B1') (30.86, 'B0', 'B0') (174.6, 'F3', 'F3') (329.628, 'E4', 'E4') (587.33, 'D5', 'D5')
Great, seems to work! Now back to the original analysis.
plot(freq_vector, abs(s_fft_out))
xlim(0, 700)
print ('first max is at %.2f Hz' % freq_vector[argmax(abs(s_fft_out))])
print ('second max is at %.2f Hz' % freq_vector[argmax(abs(s_fft_out) * (freq_vector > 300) * (freq_vector < 400))])
print ('third max is at %.2f Hz' % freq_vector[argmax(abs(s_fft_out) * (freq_vector > 400) * (freq_vector < 500))])
first max is at 164.57 Hz second max is at 329.61 Hz third max is at 411.86 Hz
for pitch in [164.57, 329.61, 411.86]:
print(closest_note_from_pitch(pitch))
E3 E4 G#4
So we do have the first and second harmonics of the base pitch but not the fundamental. And we also have a G#, which as an interval corresponds to the major third of E. Mmmh... As I don't really know how to make much more progress on this, let's see if we have the same result with the high E tone.
tone = wavfile.read('files/High_tone.wav')
plot(tone[1])
wavPlayer(tone[1], tone[0])
sample_freq = tone[0]
s = tone[1]
s_fft = fft.fft(s)
n = len(s)
freq_vector = linspace(0, 1, num=n/2) * sample_freq / 2
s_fft_out = s_fft[:n/2]
plot(freq_vector, abs(s_fft_out))
xlim(0, 5000)
(0, 5000)
In this case, we have a lot more peak frequencies in the signal. Let's see what notes we can make out! First, a zoom.
figure(figsize=(10, 4))
plot(freq_vector, abs(s_fft_out))
xlim(0, 3000)
grid(True)
seps = [0, 500, 750, 1200, 1500, 1800, 2150, 2500, 3000]
for i in range(len(seps) - 1):
pitch = freq_vector[argmax(abs(s_fft_out) * (
freq_vector > seps[i]) * (freq_vector < seps[i + 1]))]
print('%s-th frequency is at %.2f Hz, pitch is close to %s' % (i, pitch, closest_note_from_pitch(pitch)))
0-th frequency is at 330.07 Hz, pitch is close to E4 1-th frequency is at 659.78 Hz, pitch is close to E5 2-th frequency is at 989.73 Hz, pitch is close to B5 3-th frequency is at 1318.50 Hz, pitch is close to E6 4-th frequency is at 1649.63 Hz, pitch is close to G#6 5-th frequency is at 1980.16 Hz, pitch is close to B6 6-th frequency is at 2311.52 Hz, pitch is close to D7 7-th frequency is at 2642.41 Hz, pitch is close to E7
At this point, it seems clear that a simple note is not, in fact, due to a single frequency and its harmonics. Indeed, analyzing the low and high E-strings recorded from my guitar, I found out that they contain notes that are related to the fundamental.
I found this link discussing some of the issues I ran into. A few comments would be:
And finally: a fantastic phenomemon called missing fundamental revolves around this topic of note identification with an instrument.
I can also recall one of Vi Hart's videos where she does analyze this phenomenon of harmonics and frequencies. See link below:
from IPython.display import YouTubeVideo
YouTubeVideo("i_0DXxNeaQ0")
With the help of this example, we're going to plot a spectrogram of the high E-tone.
(rate, tone) = wavfile.read('files/High_tone.wav') #rate is the sampling frequency
samples = len(tone)
samples
68159
samples / rate
8.519875
NFFT = 1024 # the length of the windowing segments
# Pxx is the segments x freqs array of instantaneous power, freqs is
# the frequency vector, bins are the centers of the time bins in which
# the power is computed, and im is the matplotlib.image.AxesImage
# instance
figure(figsize=(10, 8))
ax1 = subplot(211)
plot(linspace(0, 1, samples) * samples / rate, tone)
xlabel('time (seconds)')
subplot(212, sharex=ax1)
Pxx, freqs, bins, im = specgram(tone, NFFT=NFFT, Fs=Fs, noverlap=900,
cmap=cm.gist_heat)
xlabel('time (seconds)')
<matplotlib.text.Text at 0xb10466c>
Observations: