import os
import json
import speechpy
import numpy as np
import IPython.display as ipd
import scipy.io.wavfile as wav
import xml.etree.ElementTree as ET
from urllib.request import urlopen
cwd = os.getcwd()
xml_path = "LIFECLEF2015_BIRDAMAZON_XC_WAV_RN15568.xml"
xml_dict = {}
tree = ET.parse(xml_path)
root = tree.getroot()
for child in root:
xml_dict[child.tag] = root.find(child.tag).text
for key in xml_dict:
print(key, ":", xml_dict[key])
MediaId : 15568 FileName : LIFECLEF2015_BIRDAMAZON_XC_WAV_RN15568.wav ClassId : ssmptq Date : 2000-06-01 Time : ? Locality : Humedal de Tibanica, Bosa, Bogotá D.C. Latitude : 4.6030444444 Longitude : -74.2044555556 Elevation : 2546 Author : Paula Caycedo Rosales (Colección de Sonidos Ambientales - Instituto Humboldt) AuthorID : XMFDPACYJN Content : song Comments : BSA 7557To obtain a wav file of the original recording, please contact csa@humboldt.org.coplayback-used:no Quality : 1 Year : BirdCLEF2015 BackgroundSpecies : None Order : Passeriformes Family : Troglodytidae Genus : Cistothorus Species : apolinari Sub-species : apolinari VernacularNames : Apolinar's Wren
audio_path = "LIFECLEF2015_BIRDAMAZON_XC_WAV_RN15568.wav"
ipd.Audio(audio_path)
password = os.environ["PASS_GOOGLE"]
latitude = xml_dict['Latitude'].lower().strip()
longitude = xml_dict['Longitude'].lower().strip()
url ="https://maps.googleapis.com/maps/api/geocode/json?latlng="+latitude+","+longitude+"&key="+password
jsonResponse=json.load(urlopen(url))
jsonRes= jsonResponse['results']
for x in jsonRes:
res= x['address_components']
for x in res:
country = x['long_name']
print(country)
Colombia
new_audio_noise = str(audio_path).replace('.wav','_noise.wav')
# remove noise
resp = os.system("sox " + audio_path + " " + new_audio_noise + " noisered speech.noise-profile .5")
ipd.Audio(new_audio_noise)
new_audio_silence = str(audio_path).replace('.wav','_silence.wav')
# create new file without silence
resp = os.system("sox " + new_audio_noise + " " + new_audio_silence + " silence 1 0.1 1% -1 0.1 1%")
ipd.Audio(new_audio_silence)
fs, signal = wav.read(audio_path)
# mfcc features
mfcc = speechpy.feature.mfcc(signal, sampling_frequency=fs,
frame_length=0.020, frame_stride=0.01,
num_filters=40, fft_length=512,
low_frequency=0, high_frequency=None)
print("mfcc features:", np.shape(mfcc))
# mfcc(mean + variance normalized) features
mfcc_cmvn = speechpy.processing.cmvnw(mfcc, win_size=301,
variance_normalization=True)
print("mfcc(mean + variance normalized) features:", np.shape(mfcc_cmvn))
# mfcc feature cube
mfcc_feature_cube = speechpy.feature.extract_derivative_feature(mfcc)
print("mfcc feature cube:", np.shape(mfcc_feature_cube))
mfcc features: (961, 13) mfcc(mean + variance normalized) features: (961, 13) mfcc feature cube: (961, 13, 3)
import numpy as np
import collections
from sklearn.svm import SVC
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split as database_split
def classifier(feat_train, feat_test, lab_train, lab_test):
clf = SVC(C=2**0, cache_size=300, class_weight=None, coef0=0.0,
decision_function_shape=None, degree=3,
gamma='auto', kernel='rbf', probability=False,
random_state=None, shrinking=True,
tol=0.001, verbose=False)
clf.fit(feat_train, lab_train)
score_test = clf.score(feat_test, lab_test)
return score_test
# Extract global features
def globalFeatures(features):
return features.mean(0)
# Classifier and model evaluation
def main(dataset, labels):
# Split data into two set (training set, test set)
feat_train, feat_test, lab_train, lab_test = database_split(dataset,
labels, test_size=0.3)
print("Train set shape:", np.shape(feat_train))
print("Test set shape:", np.shape(feat_test))
counter = collections.Counter(lab_train)
counter = dict(counter)
print("Distribution labels (train set):", counter)
counter=collections.Counter(lab_test)
counter = dict(counter)
print("Distribution labels (test set):", counter)
score = classifier(feat_train, feat_test,
lab_train, lab_test)
print("Done!", "Score:", score)
data = joblib.load("mfcc_features.pkl.compressed")
mfcc_data = list()
mfcc_cmvn_data = list()
mfcc_feature_cube_data = list()
labels_data = list()
for key in data:
mfcc = data[key]["mfcc"]
mfcc_cmvn = data[key]["mfcc_cmvn"]
mfcc_feature_cube = data[key]["mfcc_feature_cube"]
mfcc_feature_cube = mfcc_feature_cube.reshape((len(mfcc_feature_cube), 39))
label = data[key]["label"]
mfcc_global = globalFeatures(mfcc)
mfcc_cmvn_global = globalFeatures(mfcc_cmvn)
mfcc_feature_global = globalFeatures(mfcc_feature_cube)
mfcc_data.append(mfcc_global)
mfcc_cmvn_data.append(mfcc_cmvn_global)
mfcc_feature_cube_data.append(mfcc_feature_global)
labels_data.append(label)
# Process for mfcc
main(mfcc_data, labels_data)
print("\n")
# Process for mfcc (mean + variance normalized)
main(mfcc_cmvn_data, labels_data)
print("\n")
# Process for mfcc (cube)
main(mfcc_feature_cube_data, labels_data)
Train set shape: (245, 13) Test set shape: (105, 13) Distribution labels (train set): {5: 44, 4: 53, 2: 65, 3: 83} Distribution labels (test set): {5: 21, 2: 24, 3: 39, 4: 21} Done! Score: 0.819047619047619 Train set shape: (245, 13) Test set shape: (105, 13) Distribution labels (train set): {3: 84, 2: 55, 4: 57, 5: 49} Distribution labels (test set): {3: 38, 2: 34, 4: 17, 5: 16} Done! Score: 0.3619047619047619 Train set shape: (245, 39) Test set shape: (105, 39) Distribution labels (train set): {2: 60, 4: 52, 3: 86, 5: 47} Distribution labels (test set): {3: 36, 2: 29, 5: 18, 4: 22} Done! Score: 0.8095238095238095
import numpy as np
import collections
from sklearn.svm import SVC
from sklearn.externals import joblib
from scipy.spatial.distance import cdist
from sklearn.cluster import MiniBatchKMeans
from sklearn.model_selection import train_test_split
def normalize(data):
std_dev = np.std(data, axis=0)
zero_std_mask = std_dev == 0
if zero_std_mask.any():
std_dev[zero_std_mask] = 1.0
result = data / std_dev
return result, std_dev
def k_means(data, k_guess):
batch_size = 100
mbk = MiniBatchKMeans(init='k-means++', n_clusters=k_guess,
batch_size=batch_size, n_init=10,
max_no_improvement=10, verbose=0)
codebook = mbk.fit(data)
return codebook.cluster_centers_
def histogram(std_dev, features, codebook, index):
histogram_= [0] * index
distance = cdist(features, codebook, 'euclidean')
short = ((np.argsort(distance)).transpose()[0]).tolist()
counter = dict(collections.Counter(short))
for key in counter:
histogram_[key] = int(counter[key])
return histogram_
def bagOfWords(features, n_clusters):
features, std_dev = normalize(mfcc)
codebook = k_means(features, n_clusters)
histogram_ = histogram(std_dev, features, codebook, n_clusters)
return histogram_
mfcc_data = list()
mfcc_cmvn_data = list()
mfcc_feature_cube_data = list()
labels_data = list()
n_clusters = 100
for key in data:
mfcc = data[key]["mfcc"]
mfcc_cmvn = data[key]["mfcc_cmvn"]
mfcc_feature_cube = data[key]["mfcc_feature_cube"]
mfcc_feature_cube = mfcc_feature_cube.reshape((len(mfcc_feature_cube), 39))
label = data[key]["label"]
n_clusterster = int ( len(mfcc) / 2)
mfcc_histogram = bagOfWords(mfcc, n_clusters)
mfcc_cmvn_histogram = bagOfWords(mfcc_cmvn, n_clusters)
mfcc_feature_cube_histogram = bagOfWords(mfcc_feature_cube, n_clusters)
mfcc_data.append(mfcc_histogram)
mfcc_cmvn_data.append(mfcc_cmvn_histogram)
mfcc_feature_cube_data.append(mfcc_feature_cube_histogram)
labels_data.append(label)
# process for mfcc
main(mfcc_data, labels_data)
print("\n")
# Process for mfcc (mean + variance normalized)
main(mfcc_cmvn_data, labels_data)
print("\n")
# Process for mfcc (cube)
main(mfcc_feature_cube_data, labels_data)
Train set shape: (245, 100) Test set shape: (105, 100) Distribution labels (train set): {3: 88, 4: 55, 2: 60, 5: 42} Distribution labels (test set): {3: 34, 2: 29, 5: 23, 4: 19} Done! Score: 0.3238095238095238 Train set shape: (245, 100) Test set shape: (105, 100) Distribution labels (train set): {3: 85, 2: 64, 5: 46, 4: 50} Distribution labels (test set): {3: 37, 5: 19, 2: 25, 4: 24} Done! Score: 0.3523809523809524 Train set shape: (245, 100) Test set shape: (105, 100) Distribution labels (train set): {4: 56, 2: 61, 3: 83, 5: 45} Distribution labels (test set): {3: 39, 2: 28, 5: 20, 4: 18} Done! Score: 0.37142857142857144