For the auto-encoder learning to converge we should rescale beforehand. As the transformation preserves energy, there is no need to rescale again.
scale
: scaling (None, minmax, std)Nvectors
: number of feature vectors per song.svm_type
: C-SVM (C) or nu-SVM (nu).kernel
: C-SVM kernel (linear, rbf).C
: penalty parameter C of the error term.nu
: an upper bound on the fraction of training errors and a lower bound of the fraction of support vectors.majority_voting
: When True
, each of the 2Nvectors
votes for one label and the accuracy is computed on the classification of the whole clips. When False
, the accuracy is computed on the classification of the feature vectors.test_size
: proportion of testing data for cross-validation.Ncv
: number of cross-validation runs, in multiple of 10.dataset_classification
: the dataset to use for classification (X, Z). It allows to compare with the baseline, i.e. spectrograms.Ngenres, Nclips, Nframes
: a way to reduce the size of the dataset.folder
: relative path to HDF5 files.filename_*
: name of the HDF5 file.if 'p' in globals().keys():
# Hyper-parameters passed by the experiment runner.
for key, value in p.items():
globals()[key] = value
else:
scale = 'minmax'
Nvectors = 6
svm_type = 'C'
kernel = 'linear'
C = 1
nu = 0.5
majority_voting = True
test_size = 0.1
Ncv = 20
dataset_classification = 'Z'
Ngenres, Nclips, Nframes = 10, 100, 644
folder = 'data'
filename_features = 'features.hdf5'
import os, time
import numpy as np
import sklearn
from sklearn import svm
from sklearn import cross_validation
from sklearn import metrics
from sklearn import preprocessing
import h5py
import matplotlib.pyplot as plt
%matplotlib inline
print('Software versions:')
for pkg in [np, sklearn]:
print(' {}: {}'.format(pkg.__name__, pkg.__version__))
toverall = time.time()
def datinfo(X, name='Dataset'):
r"""Print dataset size and dimensionality"""
print('{}:\n'
' size: N={:,} x n={} -> {:,} floats\n'
' dim: {:,} features per clip\n'
' shape: {}'
.format(name, np.prod(X.shape[:-1]), X.shape[-1],
np.prod(X.shape), np.prod(X.shape[2:]), X.shape))
filename = os.path.join(folder, filename_features)
with h5py.File(filename, 'r') as audio:
# Display HDF5 attributes.
print('Attributes:')
for attr in audio.attrs:
print(' {} = {}'.format(attr, audio.attrs[attr]))
labels = audio.attrs['labels']
# Show datasets, their dimensionality and data type.
print('Datasets:')
for dname, dset in audio.items():
print(' {:2}: {:24}, {}'.format(dname, dset.shape, dset.dtype))
# Choose dataset: Xa, Xs, Z.
X = audio.get(dataset_classification)
# Full dataset.
n = X.shape[-1]
datinfo(X, 'Full dataset')
print(type(X))
# Load data into memory as a standard NumPy array.
X = X[:Ngenres,:Nclips,:Nframes,...]
datinfo(X, 'Reduced dataset')
print(type(X))
# Resize in place without memory loading via hyperslab.
# Require chunked datasets.
#X.resize((Ngenres, Nclips, Nframes, 2, n))
Yet another (hopefully intelligent) dimensionality reduction:
# Flatten consecutive frames in time.
X.resize((Ngenres, Nclips, 2*Nframes, n))
#assert np.all(X1[1,4,3,:] == X[1,4,1,1,:])
datinfo(X, 'Flattened frames')
# Parameters.
Nframes_per_vector = int(np.floor(2 * Nframes / (Nvectors+0.5)))
def aggregate(X, absrect=True):
# Truncate.
X = X[:,:,:Nvectors*Nframes_per_vector,:]
# Group.
X = X.reshape((Ngenres, Nclips, Nvectors, Nframes_per_vector, n))
datinfo(X, 'Truncated and grouped')
# Aggregate.
if absrect:
return np.sum(np.abs(X), axis=3)
else:
return np.sum(X, axis=3)
# Feature vectors.
Y = np.empty((Ngenres, Nclips, Nvectors, 2, n))
Y[:,:,:,0,:] = aggregate(X) # Aligned.
Y[:,:,:,1,:] = aggregate(X[:,:,Nframes_per_vector/2:,:]) # Ovelapped.
datinfo(Y, 'Feature vectors')
# Free memory.
del(X)
Visualize all feature vectors of a given clip.
Observations:
genre, clip = 0, 7
fig = plt.figure(figsize=(8,5))
fig.suptitle('12 feature vectors each covering 5 seconds with 50% overlap')
for vector in range(Nvectors):
for k in range(2):
i = vector*2+k
ax = fig.add_subplot(4, 3, i)
ax.plot(Y[genre,clip,vector,k,:])
ax.set_xlim((0, n))
ax.set_xticks([])
ax.set_yticks([])
Observations:
def prepdata(a, b, c, test_size=None, scale=None, rand=False):
"""Prepare data for classification."""
# Squeeze dataset to a 2D array.
data = Y.reshape((a*b), c)
if c == n:
assert np.all(data[31,:] == Y[0,2,3,1,:])
elif c == Nvectors*2*n:
assert np.all(data[Nclips+2,:] == Y[1,2,:,:,:].reshape(-1))
# Independently rescale each feature.
# To be put in an sklearn Pipeline to avoid transductive learning.
if scale is 'std':
# Features have zero norm and unit standard deviation.
data = preprocessing.scale(data, axis=0)
elif scale is 'minmax':
# Features in [0,1].
data -= np.min(data, axis=0)
data /= np.max(data, axis=0)
#print(np.min(data, axis=0))
#print(np.max(data, axis=0))
# Labels.
target = np.empty((a, b), dtype=np.uint8)
for genre in range(Ngenres):
target[genre,:] = genre
target.resize(data.shape[0])
print('{} genres: {}'.format(Ngenres, ', '.join(labels[:Ngenres])))
# Be sure that classification with random labels is no better than random.
if rand:
target = np.floor(np.random.uniform(0, Ngenres, target.shape))
print('Balance: {} {}'.format(np.sum(target == 0), np.sum(target == 1)))
# Training and testing sets.
if test_size is not None:
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
data, target, test_size=test_size) # random_state=1
print('Training data: {}, {}'.format(X_train.shape, X_train.dtype))
print('Testing data: {}, {}'.format(X_test.shape, X_test.dtype))
print('Training labels: {}, {}'.format(y_train.shape, y_train.dtype))
print('Testing labels: {}, {}'.format(y_test.shape, y_test.dtype))
return X_train, X_test, y_train, y_test
else:
print('Data: {}, {}'.format(data.shape, data.dtype))
print('Labels: {}, {}'.format(target.shape, target.dtype))
return data, target
Observations:
Open questions:
# Instantiate a classifier.
if svm_type is 'C':
clf_svm = svm.SVC(kernel=kernel, C=C)
elif svm_type is 'nu':
clf_svm = svm.NuSVC(kernel=kernel, nu=nu)
#clf_svm = svm.LinearSVC(C=1)
# Try the single feature vector classifier (linear SVM).
if True:
# Split data.
X_train, X_test, y_train, y_test = prepdata(
Ngenres, Nclips*Nvectors*2, n, test_size=0.4,
scale=scale, rand=False)
# Train.
clf_svm.fit(X_train, y_train)
# Test.
y_predict = clf_svm.predict(X_test)
acc = metrics.accuracy_score(y_test, y_predict)
print('Accuracy: {:.1f} %'.format(acc*100))
Final dimensionality reduction step:
Observations:
# Define and instantiate our custom classifier.
class svm_vote(sklearn.base.BaseEstimator):
def __init__(self, svm):
self.svm = svm
def _vectors(self, X, y=None):
"""Rearrange data in feature vectors for SVM."""
X = X.reshape(X.shape[0]*Nvectors*2, n)
if y is not None:
y = np.repeat(y, Nvectors*2, axis=0)
assert y.shape[0] == X.shape[0]
return (X, y)
else:
return (X,)
def fit(self, X, y):
"""Fit the embedded SVC."""
self.svm.fit(*self._vectors(X, y))
def svm_score(self, X, y):
"""Return SVC accuracy on feature vectors."""
return self.svm.score(*self._vectors(X, y))
def svm_predict(self, X):
"""Return SVC predictions on feature vectors."""
y = self.svm.predict(*self._vectors(X))
y.resize(X.shape[0], Nvectors*2)
return y
def confidence(self, X):
"""Return the number of votes for each class."""
def bincount(x):
return np.bincount(x, minlength=Ngenres)
y = np.apply_along_axis(bincount, 1, self.svm_predict(X))
assert np.all(np.sum(y, axis=1) == Nvectors*2)
return y
def predict(self, X):
"""Return predictions on whole clips."""
y = self.svm_predict(X)
return np.apply_along_axis(lambda x: np.bincount(x).argmax(), 1, y)
#return np.zeros(X.shape[0]) # Pretty bad prediction.
def score(self, X, y):
"""Return the accuracy score. Used by sklearn cross-validation."""
return metrics.accuracy_score(y, self.predict(X))
clf_svm_vote = svm_vote(clf_svm)
# Try the whole clip classifier (linear SVM and majority voting).
if True:
# Split data.
X_train, X_test, y_train, y_test = prepdata(
Ngenres, Nclips, Nvectors*2*n, test_size=0.4,
scale=scale, rand=False)
# Train.
clf_svm_vote.fit(X_train, y_train)
# Test on single vectors.
acc = clf_svm_vote.svm_score(X_test, y_test)
print('Feature vectors accuracy: {:.1f} %'.format(acc*100))
# Observe individual votes.
#print(clf_svm_vote.svm_predict(X_test))
#print(clf_svm_vote.confidence(X_test))
# Test on whole clips.
y_predict = clf_svm_vote.predict(X_test)
acc = metrics.accuracy_score(y_test, y_predict)
assert acc == clf_svm_vote.score(X_test, y_test)
print('Clips accuracy: {:.1f} %'.format(acc*100))
Observations:
Results:
Ideas:
if majority_voting:
clf = clf_svm_vote
b = Nclips
c = Nvectors*2*n
else:
clf = clf_svm
b = Nclips*Nvectors*2
c = n
data, target = prepdata(Ngenres, b, c, scale=scale)
print('Ratio: {} training, {} testing'.format(
(1-test_size)*target.size, test_size*target.size))
tstart = time.time()
scores = np.empty(shape=(Ncv, 10))
# Cross-validation iterators.
cv = cross_validation.ShuffleSplit(target.size, n_iter=10, test_size=test_size)
#cv = cross_validation.StratifiedShuffleSplit(target.size, n_iter=10, test_size=test_size)
#cv = cross_validation.KFold(target.size, shuffle=True, n_folds=10)
#cv = cross_validation.StratifiedKFold(target, shuffle=True, n_folds=10)
for i in range(Ncv):
scores[i,:] = cross_validation.cross_val_score(
clf, data, target, cv=cv, n_jobs=1)
# Performance: accuracy.
mean, std = scores[i,:].mean()*100, scores[i,:].std()*100
print(' {:3.0f} (+/-{:4.1f}) <- {}'.format(mean, std, (scores[i,:]*100).astype(np.int)))
accuracy, accuracy_std = scores.mean()*100, scores.std()*100
print('Accuracy: {:.1f} (+/- {:.2f})'.format(accuracy, accuracy_std))
meantime = (time.time() - tstart) / Ncv
print('Mean time ({} cv): {:.2f} seconds'.format(Ncv, meantime))
print('Overall time: {:.2f} seconds'.format(time.time() - toverall))