In [1]:

import IPython
from scipy.io import wavfile
from scipy.io.wavfile import write

import matplotlib.pyplot as plt
import numpy as np

Read and play an audio file¶

In [12]:

rate, s = wavfile.read('speech.wav')
plt.plot(s)
IPython.display.Audio(s, rate=rate)

print(f"len s: {len(s)}, rate: {rate}")
duration_seconds = len(s) / rate
print(f"The duration of the audio file is {duration_seconds:.2f} seconds.")

len s: 29355, rate: 16000
The duration of the audio file is 1.83 seconds.

In [14]:

t = np.linspace(0., 1., 16000)

y = np.sin(2.0*np.pi*t)
plt.plot(y)

Out[14]:

[<matplotlib.lines.Line2D at 0x13c818b8f40>]

generate an audio file with sin signal¶

Integer PCM (Pulse Code Modulation): In this encoding, audio samples are represented as signed or unsigned integers. The bit depth determines the range and precision of these integers. For example, 16-bit PCM audio uses 16-bit signed integers to represent each sample. The range of values is determined by the bit depth; for 16-bit audio, it typically ranges from -32,768 to 32,767 for signed integers.

In integer PCM (Pulse Code Modulation) audio representation, the numerical values represent the amplitude of the audio waveform at a specific point in time. These values indicate how much the air pressure (in the case of sound waves) or voltage (in the case of analog audio) deviates from a reference point (usually zero) at that particular moment.

In [56]:

samplerate = 16000; fs = 50

t = np.linspace(0., 1., samplerate)

amplitude = np.iinfo(np.int16).max

data = amplitude * np.sin(2. * np.pi * fs * t)

fig = plt.figure(figsize=(20, 2))
plt.plot(data)
IPython.display.Audio(data1, rate=16000)

Out[56]:

Hearing test :)¶

The range of audible frequencies for the average human ear typically spans from approximately 20 Hertz (Hz) to 20,000 Hertz (20 kHz). This range can vary from person to person, and it often decreases with age. Here's a breakdown of the audible frequency range:

Low Frequencies (Bass): The lowest frequencies humans can typically perceive are around 20 Hz. These low-frequency sounds are often associated with deep bass tones in music and the rumble of thunder.

Midrange Frequencies: Most of the sounds in our everyday environment fall within the midrange frequencies, which extend from around 100 Hz to 5,000 Hz. This range encompasses speech, music, and a wide variety of everyday sounds.

High Frequencies (Treble): The upper limit of human hearing, around 20,000 Hz (20 kHz), represents the highest frequencies that most people can perceive. These high frequencies are associated with the shimmer of cymbals, the chirping of birds, and other high-pitched sounds.

In [108]:

import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd

samplerate = 16000
duration = 5  # Duration of the audio clip in seconds

# Create a time vector
t = np.linspace(0., duration, int(samplerate * duration), endpoint=False)

# Define the frequency sweep parameters
start_freq = 20  # Starting frequency in Hz
end_freq = 20000  # Ending frequency in Hz

# Generate the frequency sweep signal
data = np.sin(2. * np.pi * np.logspace(np.log10(start_freq), np.log10(end_freq), len(t)) * t)

# Normalize the signal
amplitude = np.iinfo(np.int16).max
data = amplitude * data / np.max(np.abs(data))

# Plot the waveform
fig = plt.figure(figsize=(10, 4))
plt.plot(t, data)
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.title('Frequency Sweep')
plt.grid(True)

# Create an audio clip from the generated signal and display/play it
ipd.display(ipd.Audio(data, rate=samplerate))

In [109]:

samplerate = 16000; fs = 60

t = np.linspace(0., 1., samplerate)

amplitude = np.iinfo(np.int16).max

data3 = amplitude/5 * np.sin(2. * np.pi * fs * t)

fig = plt.figure(figsize=(20, 2))
plt.plot(data3)
IPython.display.Audio(data3, rate=16000)

Out[109]:

Add signals¶

In [126]:

samplerate = 16000
fs1, fs2 = 50, 200

t = np.linspace(0., 1., samplerate)

amplitude = np.iinfo(np.int16).max

data1 = amplitude * np.sin(2. * np.pi * fs1 * t)
data2 = amplitude * np.sin(2. * np.pi * fs2 * t)

In [127]:

fig = plt.figure(figsize=(20, 2))
plt.plot(data1[:1000])
IPython.display.Audio(data1, rate=16000)

Out[127]:

In [128]:

fig = plt.figure(figsize=(20, 2))
plt.plot(data2[:1000])
IPython.display.Audio(data2, rate=16000)

Out[128]:

In [129]:

fig = plt.figure(figsize=(20, 2))
data = (data1+data2)/2
plt.plot(data[:1000])
IPython.display.Audio(data, rate=16000)

Out[129]:

More details¶

In [137]:

import matplotlib.pyplot as plt

# Define your data1, data2, and data variables here

y1 = 80
y2 = 140
y3 = 420

# Plot 1
fig = plt.figure(figsize=(20, 2))
plt.plot(data1[:500])
plt.axvline(y1, color='r')
plt.axvline(y2, color='r')
plt.axvline(y3, color='r')
plt.grid(True)  # Add a grid

# Plot 2
fig = plt.figure(figsize=(20, 2))
plt.plot(data2[:500])
plt.axvline(y1, color='r')
plt.axvline(y2, color='r')
plt.axvline(y3, color='r')
plt.grid(True)  # Add a grid

# Plot 3
fig = plt.figure(figsize=(20, 2))
plt.plot(data1[:500] + data2[:500])
plt.axvline(y1, color='r')
plt.axvline(y2, color='r')
plt.axvline(y3, color='r')
plt.grid(True)  # Add a grid

# Plot 4
fig = plt.figure(figsize=(20, 2))
plt.plot(data[:500])
plt.axvline(y1, color='r')
plt.axvline(y2, color='r')
plt.axvline(y3, color='r')
plt.grid(True)  # Add a grid

plt.show()  # Display all plots

Using Fourier to separate signals¶

In [120]:

# Import necessary libraries
from scipy.fft import fft, fftfreq
import matplotlib.pyplot as plt

# Number of sample points
N = 1600

# Sampling period (inverse of the sampling frequency)
T = 1/1600

# Calculate the FFT of the signal 'data'
yf = fft(data)

# Generate the frequency axis for plotting
## The [:N//2] part is used to select only the positive frequencies
## (since the FFT result is symmetric, and the negative frequencies
## are just mirror images of the positive ones).

xf = fftfreq(N, T)[:N//2]

# Create a plot of the magnitude spectrum
plt.plot(xf, 2.0/N * np.abs(yf[0:N//2]))

# Add grid lines to the plot
plt.grid()

# Display the plot
plt.show()

More example¶

In [122]:

from scipy.fft import fft, fftfreq

# Number of sample points

N = 1600

# sample spacing

T = 1.0 / 800.0

x = np.linspace(0.0, N*T, N, endpoint=False)

y = np.sin(50.0 * 2.0*np.pi*x) + 0.5*np.sin(190.0 * 2.0*np.pi*x)+ 4 *np.sin(30.0 * 2.0*np.pi*x)

yf = fft(y)

xf = fftfreq(N, T)[:N//2]

import matplotlib.pyplot as plt

plt.plot(xf, 2.0/N * np.abs(yf[0:N//2]))

plt.grid()

plt.show()

In [123]:

from scipy.fft import fft, fftfreq

# Number of sample points

N = 600

# sample spacing

T = 1.0 / 800.0

x = np.linspace(0.0, N*T, N, endpoint=False)

y = np.sin(50.0 * 2.0*np.pi*x) + 0.5*np.sin(190.0 * 2.0*np.pi*x)+ 4 *np.sin(30.0 * 2.0*np.pi*x)

yf = fft(y)

xf = fftfreq(N, T)[:N//2]

import matplotlib.pyplot as plt

plt.plot(xf, 2.0/N * np.abs(yf[0:N//2]))

plt.grid()

plt.show()

Save as an audio file¶

In [140]:

samplerate = 44100; fs = 100

t = np.linspace(0., 1., samplerate)

amplitude = np.iinfo(np.int16).max 

data = amplitude * np.sin(2. * np.pi * fs * t)

fig = plt.figure(figsize=(20, 2))
plt.plot(data)

write("example1.wav", samplerate, data.astype(np.int16))

Tutorial by Class.vision