audio_data = (np.sin(200*np.arange(-30, 30, .01)) + np.sin(500*np.arange(-30, 30, .01))) audio_sample = audio_data[60:1440] # take a subsample full_image = audio_data.reshape((60, -1)) sample_image = audio_sample.reshape((60, -1)) imshow(full_image) figure() imshow(sample_image) from skimage.feature import match_template print match_template.__doc__[:340] result = match_template(full_image, sample_image) result.max() def spectrogram(data, segment_size=60): end = len(data) - len(data) % segment_size stacked = data[:end].reshape((-1, segment_size)) freq_space = np.fft.fft(stacked) real = np.abs(freq_space) # fft results are mirrored, this trims the excess trimmed = real.T[:segment_size/2, :] return trimmed spec = spectrogram(audio_data) imshow(spec) sample_spec = spectrogram(audio_sample) result = match_template(spec, sample_spec) result.max() from scipy.io import wavfile sampling_rate, audio = wavfile.read('adv_time/ep1.wav') audio = np.sum(audio, 1) #sum the channels sample = audio[10000000:10200000] # ~4.5 second subsample spec = spectrogram(audio, segment_size=512) sample_spec = spectrogram(sample, segment_size=512) imshow(sample_spec) %timeit result = match_template(spec, sample_spec) plot(result[0,:]) # plot 1 dim as a line sampling_rate downsampled = audio.reshape((-1, 8)).mean(1) downsampled_sample = sample.reshape((-1, 8)).mean(1) spec = spectrogram(downsampled, segment_size=512) sample_spec = spectrogram(downsampled_sample, segment_size=512) result = match_template(spec, sample_spec) print result.max() %timeit match_template(spec, sample_spec) plot(result[0,:]) def downsample2d(a, factor): e0 = a.shape[0] - a.shape[0] % factor e1 = a.shape[1] - a.shape[1] % factor shape = a.shape[0] / factor, a.shape[1] / factor sh = shape[0], a.shape[0]//shape[0], shape[1], a.shape[1]//shape[1] return a[:e0, :e1].reshape(sh).mean(-1).mean(1) down_spec = downsample2d(spec, 2) down_sample_spec = downsample2d(sample_spec, 2) result = match_template(down_spec, down_sample_spec) plot(result[0,:]) %timeit match_template(down_spec, down_sample_spec) import os import pyaudio def process_wavfile(filename, store): """ Open the given wavfile, downsample it, compute the spectrogram, and downsample again. Store the result in the given `store` keyed under the filename. """ name = filename.split('/')[-1].split('.')[0] sampling_rate, audio = wavfile.read(filename) downsampled = audio.reshape((-1, 16)).mean(1) spec = spectrogram(downsampled, segment_size=512) down_spec = downsample2d(spec, 2) store[name] = down_spec def acquire_audio(seconds=5): """ Acquire audio for the given duration. """ rate = 11025 chunk = 1024 p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=rate, input=True, frames_per_buffer=chunk) frames = [] for _ in range(int(rate / chunk * seconds)): frames.append(stream.read(chunk)) stream.stop_stream() stream.close() p.terminate() ary = np.fromstring(b''.join(frames), dtype=np.short) ary = ary.reshape((-1, 2)).mean(1) return ary def process_acquired(ary): """ Calculate the spectrogram and downsample the given audio array. """ spec = spectrogram(ary, segment_size=512) down_spec = downsample2d(spec, 2) return down_spec store = {} for filename in os.listdir('adv_time'): process_wavfile('adv_time/' + filename, store) acquired = acquire_audio(5) processed = process_acquired(acquired) results = {} for name, signature in store.iteritems(): result = match_template(signature, processed) results[name] = result top = sorted(results.items(), key=lambda i: i[1].max(), reverse=True) for name, result in top[:3]: # print the top three matches print name, result.max() for name, result in results.iteritems(): plot(result[0, :])