Goal: Test the influence of $\lambda_e$.
Conclusion: Decrease accuracy once greater than $\lambda_d$ (by an order of magnitude).
Observations:
Pname = 'le'
Pvalues = [None, 1, 10, 100, 1e3]
# Regenerate the graph or the features at each iteration.
regen_graph = False
regen_features = True
regen_baseline = False
p = {}
# Preprocessing.
# Graph.
p['data_scaling_graph'] = 'features'
p['K'] = 10 + 1 # 5 to 10 + 1 for self-reference
p['dm'] = 'euclidean'
p['Csigma'] = 1
p['diag'] = True
p['laplacian'] = 'normalized'
# Feature extraction.
p['m'] = 128 # 64, 128, 512
p['ls'] = 1
p['ld'] = 10
p['le'] = None
p['lg'] = 100
# Classification.
p['scale'] = None
p['Nvectors'] = 6
p['svm_type'] = 'C'
p['kernel'] = 'linear'
p['C'] = 1
p['nu'] = 0.5
p['majority_voting'] = False
# HDF5 data stores.
p['folder'] = 'data'
p['filename_gtzan'] = 'gtzan.hdf5'
p['filename_audio'] = 'audio.hdf5'
p['filename_graph'] = 'graph.hdf5'
p['filename_features'] = 'features.hdf5'
# Dataset (10,100,644 | 5,100,149 | 2,10,644).
p['Ngenres'] = 5
p['Nclips'] = 100
p['Nframes'] = 149
# Added white noise.
p['noise_std'] = 0
# Graph.
p['tol'] = 1e-5
# Feature extraction.
p['rtol'] = 1e-5 # 1e-3, 1e-5, 1e-7
p['N_inner'] = 500
p['N_outer'] = 50
# Classification.
p['test_size'] = 0.1
p['Ncv'] = 20
p['dataset_classification'] = 'Z'
import numpy as np
import time
texperiment = time.time()
# Result dictionary.
res = ['accuracy', 'accuracy_std']
res += ['sparsity', 'atoms_D']
res += ['objective_g', 'objective_h', 'objective_i', 'objective_j']
res += ['time_features', 'iterations_inner', 'iterations_outer']
res = dict.fromkeys(res)
for key in res.keys():
res[key] = []
def separator(name, parameter=False):
if parameter:
name += ', {} = {}'.format(Pname, p[Pname])
dashes = 20 * '-'
print('\n {} {} {} \n'.format(dashes, name, dashes))
# Fair comparison when tuning parameters.
# Randomnesses: dictionary initialization, training and testing sets.
np.random.seed(1)
#%run gtzan.ipynb
#%run audio_preprocessing.ipynb
if not regen_graph:
separator('Graph')
%run audio_graph.ipynb
if not regen_features:
separator('Features')
%run audio_features.ipynb
# Hyper-parameter under test.
for p[Pname] in Pvalues:
if regen_graph:
separator('Graph', True)
%run audio_graph.ipynb
if regen_features:
separator('Features', True)
p['filename_features'] = 'features_{}_{}.hdf5'.format(Pname, p[Pname])
%run audio_features.ipynb
separator('Classification', True)
%run audio_classification.ipynb
# Collect results.
for key in res:
res[key].append(globals()[key])
# Baseline, i.e. classification with spectrograms.
p['dataset_classification'] = 'X'
p['scale'] = 'minmax' # Todo: should be done in pre-processing.
if regen_baseline:
res['baseline'] = []
res['baseline_std'] = []
for p[Pname] in Pvalues:
separator('Baseline', True)
%run audio_classification.ipynb
res['baseline'].append(accuracy)
res['baseline_std'].append(accuracy_std)
else:
separator('Baseline')
%run audio_classification.ipynb
res['baseline'] = len(Pvalues) * [accuracy]
res['baseline_std'] = accuracy_std
-------------------- Graph -------------------- Data: (149000, 96), float32 Elapsed time: 172.79 seconds All self-referenced in the first column: True dist in [0.0, 1.5347918272] w in [0.00525977369398, 1.0] Ones on the diagonal: 149000 (over 149000) assert: True W in [0.0, 1.0] Datasets: L_data : (2406018,), float32 L_indices : (2406018,), int32 L_indptr : (149001,) , int32 L_shape : (2,) , int64 W_data : (2406018,), float32 W_indices : (2406018,), int32 W_indptr : (149001,) , int32 W_shape : (2,) , int64 Attributes: K = 11 dm = euclidean Csigma = 1 diag = True laplacian = normalized Overall time: 182.22 seconds -------------------- Features, le = None -------------------- Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: Xa: (10, 100, 644, 2, 1024) , float32 Xs: (10, 100, 644, 2, 96) , float32 Full dataset: size: N=1,288,000 x n=96 -> 123,648,000 floats dim: 123,648 features per clip shape: (10, 100, 644, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Data: (149000, 96), float32 Attributes: K = 11 dm = euclidean Csigma = 1 diag = True laplacian = normalized Datasets: L_data : (2406018,), float32 L_indices : (2406018,), int32 L_indptr : (149001,) , int32 L_shape : (2,) , int64 W_data : (2406018,), float32 W_indices : (2406018,), int32 W_indptr : (149001,) , int32 W_shape : (2,) , int64 Size X: 13.6 M --> 54.6 MiB Size Z: 18.2 M --> 72.8 MiB Size D: 12.0 k --> 48.0 kiB Size E: 12.0 k --> 48.0 kiB Elapsed time: 2992 seconds
Inner loop: 1409 iterations g(Z) = ||X-DZ||_2^2 = 7.070601e+04 rdiff: 0.000305788624235 i(Z) = ||Z||_1 = 5.696315e+04 j(Z) = tr(Z^TLZ) = 7.903536e+03
Global objective: 1.355727e+05
Outer loop: 8 iterations Z in [-0.153496861458, 1.12698090076] Sparsity of Z: 3,703,625 non-zero entries out of 19,072,000 entries, i.e. 19.4%.
D in [-0.0444916076958, 0.888070523739] d in [0.999999582767, 1.00000035763] Constraints on D: True
Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Overall time: 2999 seconds -------------------- Classification, le = None -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 76.2 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 65.2 % Clips accuracy: 73.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 5400.0 training, 600.0 testing 76 (+/- 2.2) <- [76 73 77 76 71 77 74 79 78 76] 77 (+/- 1.6) <- [77 76 76 74 79 77 78 77 75 79] 78 (+/- 1.3) <- [78 77 78 78 79 80 79 76 78 75] 78 (+/- 1.5) <- [79 74 76 78 79 77 77 78 78 77] 77 (+/- 1.5) <- [77 78 78 78 79 77 78 76 77 73] 79 (+/- 1.0) <- [79 78 77 80 79 79 77 78 78 78] 77 (+/- 1.3) <- [78 76 79 77 76 75 78 75 77 78] 77 (+/- 1.3) <- [74 76 77 77 77 79 75 77 75 78] 78 (+/- 1.6) <- [76 78 75 79 77 76 78 81 79 78] 78 (+/- 1.8) <- [79 77 76 76 78 75 76 76 79 82] 78 (+/- 1.6) <- [80 79 77 75 77 77 79 75 78 76] 78 (+/- 0.9) <- [78 77 78 77 76 77 76 78 79 78] 77 (+/- 1.6) <- [79 77 77 78 77 79 77 74 77 75] 76 (+/- 1.6) <- [78 76 79 75 75 75 75 75 76 77] 78 (+/- 1.4) <- [76 78 77 77 78 81 80 78 78 76] 78 (+/- 2.0) <- [79 77 79 79 76 75 76 77 82 76] 77 (+/- 1.7) <- [75 78 78 75 78 75 76 79 79 75] 77 (+/- 1.3) <- [77 79 76 78 77 78 75 76 79 77] 78 (+/- 1.4) <- [77 79 77 77 77 78 80 76 76 75] 77 (+/- 2.1) <- [78 78 77 73 74 76 80 74 76 76] Accuracy: 77.5 (+/- 1.70) Mean time (20 cv): 21.41 seconds Overall time: 432.10 seconds -------------------- Features, le = 1 -------------------- Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: Xa: (10, 100, 644, 2, 1024) , float32 Xs: (10, 100, 644, 2, 96) , float32 Full dataset: size: N=1,288,000 x n=96 -> 123,648,000 floats dim: 123,648 features per clip shape: (10, 100, 644, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Data: (149000, 96), float32 Attributes: K = 11 dm = euclidean Csigma = 1 diag = True laplacian = normalized Datasets: L_data : (2406018,), float32 L_indices : (2406018,), int32 L_indptr : (149001,) , int32 L_shape : (2,) , int64 W_data : (2406018,), float32 W_indices : (2406018,), int32 W_indptr : (149001,) , int32 W_shape : (2,) , int64 Size X: 13.6 M --> 54.6 MiB Size Z: 18.2 M --> 72.8 MiB Size D: 12.0 k --> 48.0 kiB Size E: 12.0 k --> 48.0 kiB Elapsed time: 2597 seconds
Inner loop: 1686 iterations g(Z) = ||X-DZ||_2^2 = 6.903905e+04 rdiff: 0.00147964555554 h(Z) = ||Z-EX||_2^2 = 8.008671e+02 rdiff: 0.0 i(Z) = ||Z||_1 = 5.809159e+04 j(Z) = tr(Z^TLZ) = 7.798520e+03
Global objective: 1.357300e+05
Outer loop: 6 iterations Z in [-0.119608931243, 0.852648317814] Sparsity of Z: 3,513,306 non-zero entries out of 19,072,000 entries, i.e. 18.4%.
D in [-0.0305108930916, 0.928376078606] d in [0.999999582767, 1.00000023842] Constraints on D: True
E in [-0.0884121283889, 0.472104787827] e in [0.0757436454296, 0.492292612791] Constraints on E: True
Datasets: D : (128, 96) , float32 E : (96, 128) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Overall time: 2611 seconds -------------------- Classification, le = 1 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 E : (96, 128) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 75.1 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 64.2 % Clips accuracy: 70.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 5400.0 training, 600.0 testing 77 (+/- 1.7) <- [76 77 78 76 72 78 78 78 76 76] 77 (+/- 1.0) <- [76 78 76 74 78 77 78 77 76 76] 78 (+/- 1.3) <- [79 78 79 77 78 79 79 75 78 77] 77 (+/- 1.1) <- [77 74 77 77 77 77 77 78 76 76] 77 (+/- 0.9) <- [78 77 76 76 76 78 78 77 76 76] 78 (+/- 1.0) <- [79 76 77 79 78 79 77 80 78 78] 77 (+/- 1.5) <- [78 75 78 79 76 74 78 76 78 77] 77 (+/- 1.1) <- [75 76 78 77 77 79 77 75 76 76] 78 (+/- 1.8) <- [77 78 76 81 78 74 78 79 76 80] 77 (+/- 1.5) <- [78 76 77 76 75 78 74 76 77 80] 78 (+/- 2.3) <- [80 80 77 76 79 76 80 73 77 78] 78 (+/- 1.7) <- [79 77 79 76 74 80 78 76 79 76] 77 (+/- 1.4) <- [75 77 77 74 79 79 77 76 78 77] 77 (+/- 1.9) <- [80 77 78 74 75 75 74 76 77 79] 78 (+/- 1.9) <- [77 75 78 78 76 75 82 78 79 76] 77 (+/- 2.2) <- [76 76 77 80 74 74 76 77 80 73] 77 (+/- 2.1) <- [75 77 76 76 77 75 76 81 78 72] 77 (+/- 1.4) <- [77 78 74 76 74 77 76 75 79 76] 76 (+/- 1.2) <- [78 77 77 75 78 76 75 75 74 76] 77 (+/- 1.4) <- [77 77 75 75 76 78 78 78 77 74] Accuracy: 77.3 (+/- 1.68) Mean time (20 cv): 20.64 seconds Overall time: 416.99 seconds -------------------- Features, le = 10 -------------------- Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: Xa: (10, 100, 644, 2, 1024) , float32 Xs: (10, 100, 644, 2, 96) , float32 Full dataset: size: N=1,288,000 x n=96 -> 123,648,000 floats dim: 123,648 features per clip shape: (10, 100, 644, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Data: (149000, 96), float32 Attributes: K = 11 dm = euclidean Csigma = 1 diag = True laplacian = normalized Datasets: L_data : (2406018,), float32 L_indices : (2406018,), int32 L_indptr : (149001,) , int32 L_shape : (2,) , int64 W_data : (2406018,), float32 W_indices : (2406018,), int32 W_indptr : (149001,) , int32 W_shape : (2,) , int64 Size X: 13.6 M --> 54.6 MiB Size Z: 18.2 M --> 72.8 MiB Size D: 12.0 k --> 48.0 kiB Size E: 12.0 k --> 48.0 kiB Elapsed time: 1985 seconds
Inner loop: 1589 iterations g(Z) = ||X-DZ||_2^2 = 7.444009e+04 rdiff: 0.000644984356194 h(Z) = ||Z-EX||_2^2 = 3.133955e+03 rdiff: 0.0 i(Z) = ||Z||_1 = 5.788600e+04 j(Z) = tr(Z^TLZ) = 6.277175e+03
Global objective: 1.417372e+05
Outer loop: 7 iterations Z in [-0.0890292823315, 0.512945473194] Sparsity of Z: 4,190,298 non-zero entries out of 19,072,000 entries, i.e. 22.0%.
D in [-0.0696427598596, 0.908488214016] d in [0.999999642372, 1.00000035763] Constraints on D: True
E in [-0.0801763683558, 0.308741211891] e in [0.049411829561, 0.343382805586] Constraints on E: True
Datasets: D : (128, 96) , float32 E : (96, 128) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Overall time: 2000 seconds -------------------- Classification, le = 10 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 E : (96, 128) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 75.6 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.2 % Clips accuracy: 70.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 5400.0 training, 600.0 testing 75 (+/- 1.9) <- [74 74 79 73 72 76 74 77 76 75] 77 (+/- 1.6) <- [77 76 78 74 78 78 78 77 73 76] 78 (+/- 1.2) <- [77 76 79 78 78 78 79 76 80 77] 77 (+/- 1.8) <- [76 73 76 77 75 77 78 78 78 74] 77 (+/- 1.1) <- [78 77 79 77 75 77 78 77 76 75] 78 (+/- 0.7) <- [76 77 77 79 77 78 77 78 77 79] 77 (+/- 2.1) <- [77 75 80 79 75 73 79 76 77 76] 77 (+/- 1.4) <- [75 76 77 78 76 80 76 76 77 78] 77 (+/- 1.3) <- [77 77 75 79 76 75 77 76 78 78] 77 (+/- 2.0) <- [80 74 77 74 77 74 75 76 77 80] 78 (+/- 1.8) <- [77 78 79 76 78 77 79 73 79 76] 77 (+/- 1.3) <- [77 79 76 77 75 77 77 76 79 76] 77 (+/- 1.1) <- [77 77 77 76 78 77 78 76 76 74] 77 (+/- 2.1) <- [78 76 79 74 74 76 73 76 76 79] 78 (+/- 1.0) <- [75 77 77 78 76 79 77 77 78 77] 77 (+/- 1.8) <- [79 77 77 79 75 74 76 76 77 73] 77 (+/- 1.4) <- [77 78 76 77 78 75 77 76 80 74] 77 (+/- 0.9) <- [76 76 76 76 77 79 78 76 78 76] 76 (+/- 1.6) <- [75 74 75 75 78 78 78 74 74 76] 77 (+/- 1.2) <- [78 77 76 75 77 76 78 75 77 74] Accuracy: 77.0 (+/- 1.63) Mean time (20 cv): 21.27 seconds Overall time: 430.02 seconds -------------------- Features, le = 100 -------------------- Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: Xa: (10, 100, 644, 2, 1024) , float32 Xs: (10, 100, 644, 2, 96) , float32 Full dataset: size: N=1,288,000 x n=96 -> 123,648,000 floats dim: 123,648 features per clip shape: (10, 100, 644, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Data: (149000, 96), float32 Attributes: K = 11 dm = euclidean Csigma = 1 diag = True laplacian = normalized Datasets: L_data : (2406018,), float32 L_indices : (2406018,), int32 L_indptr : (149001,) , int32 L_shape : (2,) , int64 W_data : (2406018,), float32 W_indices : (2406018,), int32 W_indptr : (149001,) , int32 W_shape : (2,) , int64 Size X: 13.6 M --> 54.6 MiB Size Z: 18.2 M --> 72.8 MiB Size D: 12.0 k --> 48.0 kiB Size E: 12.0 k --> 48.0 kiB Elapsed time: 2905 seconds
Inner loop: 2976 iterations g(Z) = ||X-DZ||_2^2 = 8.288567e+04 rdiff: 0.000472577709342 h(Z) = ||Z-EX||_2^2 = 4.932835e+03 rdiff: 0.0 i(Z) = ||Z||_1 = 5.903889e+04 j(Z) = tr(Z^TLZ) = 5.362371e+03
Global objective: 1.522198e+05
Outer loop: 19 iterations Z in [-0.133742198348, 0.228960230947] Sparsity of Z: 5,615,087 non-zero entries out of 19,072,000 entries, i.e. 29.4%.
D in [-0.789868533611, 0.850951850414] d in [0.999999642372, 1.00000035763] Constraints on D: True
E in [-0.108812175691, 0.16275678575] e in [0.0255117379129, 0.256912648678] Constraints on E: True
Datasets: D : (128, 96) , float32 E : (96, 128) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Overall time: 2923 seconds -------------------- Classification, le = 100 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 E : (96, 128) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 72.7 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.5 % Clips accuracy: 70.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 5400.0 training, 600.0 testing 73 (+/- 2.2) <- [74 72 74 73 67 73 74 73 75 70] 74 (+/- 2.0) <- [72 74 73 69 78 74 74 73 71 72] 75 (+/- 1.6) <- [76 73 77 76 74 74 75 72 75 72] 73 (+/- 1.9) <- [75 69 72 73 74 75 73 75 72 71] 73 (+/- 1.4) <- [74 71 74 72 74 75 75 72 71 71] 75 (+/- 0.6) <- [73 74 75 74 74 75 74 74 75 74] 74 (+/- 1.5) <- [72 74 75 75 74 71 74 73 76 76] 74 (+/- 1.1) <- [74 74 75 74 75 76 72 74 73 72] 74 (+/- 1.7) <- [73 75 71 76 73 71 74 74 74 76] 74 (+/- 2.2) <- [73 73 74 71 75 69 75 72 75 78] 74 (+/- 1.3) <- [75 76 74 73 74 74 74 71 74 74] 74 (+/- 1.5) <- [76 74 76 73 71 74 75 73 74 73] 73 (+/- 1.2) <- [73 72 73 72 75 73 72 73 72 70] 73 (+/- 1.5) <- [75 72 73 71 70 72 70 74 72 74] 74 (+/- 1.2) <- [72 73 75 75 72 74 75 74 75 73] 75 (+/- 1.7) <- [76 74 75 78 72 73 74 74 74 72] 74 (+/- 1.5) <- [72 73 74 72 73 72 75 76 76 72] 74 (+/- 1.1) <- [73 72 73 74 72 75 74 72 75 74] 74 (+/- 1.5) <- [74 74 73 74 75 74 74 75 71 71] 74 (+/- 1.2) <- [74 76 74 72 72 73 75 75 75 72] Accuracy: 73.9 (+/- 1.66) Mean time (20 cv): 21.69 seconds Overall time: 438.69 seconds -------------------- Features, le = 1000.0 -------------------- Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: Xa: (10, 100, 644, 2, 1024) , float32 Xs: (10, 100, 644, 2, 96) , float32 Full dataset: size: N=1,288,000 x n=96 -> 123,648,000 floats dim: 123,648 features per clip shape: (10, 100, 644, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Data: (149000, 96), float32 Attributes: K = 11 dm = euclidean Csigma = 1 diag = True laplacian = normalized Datasets: L_data : (2406018,), float32 L_indices : (2406018,), int32 L_indptr : (149001,) , int32 L_shape : (2,) , int64 W_data : (2406018,), float32 W_indices : (2406018,), int32 W_indptr : (149001,) , int32 W_shape : (2,) , int64 Size X: 13.6 M --> 54.6 MiB Size Z: 18.2 M --> 72.8 MiB Size D: 12.0 k --> 48.0 kiB Size E: 12.0 k --> 48.0 kiB Elapsed time: 3914 seconds
Inner loop: 5909 iterations g(Z) = ||X-DZ||_2^2 = 3.051926e+04 rdiff: 0.00206068625012 h(Z) = ||Z-EX||_2^2 = 1.885736e+04 rdiff: 0.0 i(Z) = ||Z||_1 = 1.224685e+06 j(Z) = tr(Z^TLZ) = 2.625585e+05
Global objective: 1.536620e+06
Outer loop: 50 iterations Z in [-0.0849034264684, 0.662950575352] Sparsity of Z: 18,488,386 non-zero entries out of 19,072,000 entries, i.e. 96.9%.
D in [-0.355851888657, 0.378549784422] d in [0.999999761581, 1.00000035763] Constraints on D: True
E in [-0.0680567547679, 0.140228390694] e in [0.196911618114, 0.765983879566] Constraints on E: True
Datasets: D : (128, 96) , float32 E : (96, 128) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Overall time: 3932 seconds -------------------- Classification, le = 1000.0 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 E : (96, 128) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 72.5 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.2 % Clips accuracy: 69.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 5400.0 training, 600.0 testing 73 (+/- 2.1) <- [74 71 75 71 68 71 72 73 76 72] 72 (+/- 1.0) <- [72 71 70 71 73 72 71 73 70 73] 73 (+/- 1.3) <- [74 71 75 72 73 73 72 71 73 71] 73 (+/- 1.7) <- [74 74 70 71 72 75 70 71 72 72] 74 (+/- 2.1) <- [72 72 75 72 73 76 76 72 72 69] 73 (+/- 0.9) <- [74 73 73 72 72 73 71 72 74 74] 73 (+/- 1.8) <- [76 71 74 73 71 70 74 71 75 71] 73 (+/- 0.8) <- [72 73 73 73 72 74 73 74 72 73] 72 (+/- 1.7) <- [71 71 70 72 71 70 73 75 72 75] 73 (+/- 1.4) <- [74 73 72 71 72 69 73 71 72 74] 73 (+/- 1.9) <- [74 75 71 70 73 70 76 70 72 73] 73 (+/- 1.7) <- [74 74 75 71 70 72 74 71 71 73] 72 (+/- 1.4) <- [71 72 70 73 73 73 70 70 72 69] 72 (+/- 1.7) <- [74 72 72 71 68 73 70 72 72 74] 74 (+/- 1.3) <- [72 75 74 71 74 75 72 73 73 72] 73 (+/- 2.4) <- [76 73 72 75 70 69 73 74 72 68] 73 (+/- 1.4) <- [71 70 74 74 72 73 73 73 74 74] 73 (+/- 1.2) <- [73 72 74 71 71 72 72 72 75 73] 74 (+/- 1.6) <- [76 74 72 73 72 72 76 73 71 74] 73 (+/- 1.3) <- [71 74 73 72 74 74 71 73 72 70] Accuracy: 72.8 (+/- 1.67) Mean time (20 cv): 93.91 seconds Overall time: 1890.99 seconds -------------------- Baseline -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 E : (96, 128) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 298, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Feature vectors: size: N=6,000 x n=96 -> 576,000 floats dim: 1,152 features per clip shape: (5, 100, 6, 2, 96)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 96), float64 Testing data: (2400, 96), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 68.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1152), float64 Testing data: (200, 1152), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 62.1 % Clips accuracy: 68.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 96), float64 Labels: (6000,), uint8 Ratio: 5400.0 training, 600.0 testing 69 (+/- 2.2) <- [70 68 71 67 64 67 70 68 72 66] 69 (+/- 1.4) <- [71 68 67 67 72 69 67 70 69 70] 70 (+/- 2.0) <- [71 69 73 68 68 71 71 68 71 66] 69 (+/- 1.4) <- [70 67 66 67 67 70 68 70 70 68] 70 (+/- 2.4) <- [71 69 72 68 70 75 71 68 68 66] 71 (+/- 0.7) <- [71 71 70 70 71 71 70 69 70 70] 70 (+/- 1.4) <- [70 68 72 69 69 67 71 69 70 69] 70 (+/- 1.5) <- [69 70 68 71 65 70 70 69 70 69] 69 (+/- 1.7) <- [68 69 68 69 68 65 70 70 69 72] 69 (+/- 1.8) <- [72 69 66 68 69 66 69 69 70 71] 70 (+/- 1.4) <- [71 70 69 69 69 69 73 70 71 68] 70 (+/- 1.6) <- [71 70 71 68 69 69 73 68 69 71] 69 (+/- 1.3) <- [69 70 66 68 69 70 68 71 68 68] 69 (+/- 2.0) <- [71 66 68 66 67 69 66 69 69 72] 70 (+/- 1.1) <- [69 70 72 68 69 70 69 71 71 69] 70 (+/- 2.0) <- [72 71 69 71 67 68 71 72 69 67] 70 (+/- 1.6) <- [67 71 69 69 68 70 71 71 72 69] 70 (+/- 0.9) <- [70 68 71 70 68 70 69 69 71 69] 70 (+/- 1.2) <- [72 71 69 69 71 69 72 70 68 69] 69 (+/- 1.7) <- [67 70 71 67 67 72 67 70 70 69] Accuracy: 69.7 (+/- 1.71) Mean time (20 cv): 17.41 seconds Overall time: 352.77 seconds
print('{} = {}'.format(Pname, Pvalues))
for key, value in res.items():
if key is not 'atoms_D':
print('res[\'{}\'] = {}'.format(key, value))
def plot(*args, **kwargs):
plt.figure(figsize=(8,5))
x = range(len(Pvalues))
log = 'log' in kwargs and kwargs['log'] is True
pltfunc = plt.semilogy if log else plt.plot
params = {}
params['linestyle'] = '-'
params['marker'] = '.'
params['markersize'] = 10
for i, var in enumerate(args):
if 'err' in kwargs:
pltfunc = plt.errorbar
params['yerr'] = res[kwargs['err'][i]]
params['capsize'] = 5
pltfunc(x, res[var], label=var, **params)
for i,j in zip(x, res[var]):
plt.annotate('{:.2f}'.format(j), xy=(i,j), xytext=(5,5), textcoords='offset points')
margin = 0.25
params['markersize'] = 10
plt.xlim(-margin, len(Pvalues)-1+margin)
if 'ylim' in kwargs:
plt.ylim(kwargs['ylim'])
plt.title('{} vs {}'.format(', '.join(args), Pname))
plt.xlabel(Pname)
plt.ylabel(' ,'.join(args))
plt.xticks(x, Pvalues)
plt.grid(True); plt.legend(loc='best'); plt.show()
def div(l):
div = Pvalues if Pname is l else [p[l]]
return np.array([1 if v is None else v for v in div])
# Classification results.
res['chance'] = len(Pvalues) * [100./p['Ngenres']]
res['chance_std'] = 0
err=['accuracy_std', 'baseline_std', 'chance_std']
plot('accuracy', 'baseline', 'chance', err=err, ylim=[0,100])
# Features extraction results.
if regen_features:
plot('objective_g', 'objective_h', 'objective_i', 'objective_j', log=True)
# Unweighted objectives.
print('g(Z) = ||X-DZ||_2^2, h(Z) = ||Z-EX||_2^2, i(Z) = ||Z||_1, j(Z) = tr(Z^TLZ)')
res['objective_g_un'] = res['objective_g'] / div('ld')
res['objective_h_un'] = res['objective_h'] / div('le')
res['objective_i_un'] = res['objective_i'] / div('ls')
res['objective_j_un'] = res['objective_j'] / div('lg')
plot('objective_g_un', 'objective_h_un', 'objective_i_un', 'objective_j_un', log=True)
plot('sparsity', ylim=[0,100])
plot('time_features')
plot('iterations_inner')
plot('iterations_outer')
for i, fig in enumerate(res['atoms_D']):
print('Dictionary atoms for {} = {}'.format(Pname, Pvalues[i]))
fig.show()
print('Experiment time: {:.0f} seconds'.format(time.time() - texperiment))
le = [None, 1, 10, 100, 1000.0] res['accuracy_std'] = [1.7008557894973533, 1.6808356056834735, 1.6259132049405336, 1.6566111241527577, 1.6674579371672975] res['objective_j'] = [7903.5362243652344, 7798.5198974609375, 6277.1751403808594, 5362.3710632324219, 262558.544921875] res['objective_i'] = [56963.1484375, 58091.58984375, 57885.99609375, 59038.890625, 1224685.25] res['objective_h'] = [0, 800.86712646484375, 3133.955078125, 4932.8346252441406, 18857.358932495117] res['objective_g'] = [70706.0107421875, 69039.0478515625, 74440.0927734375, 82885.673828125, 30519.2626953125] res['baseline'] = [69.729166666666742, 69.729166666666742, 69.729166666666742, 69.729166666666742, 69.729166666666742] res['time_features'] = [2991.643203020096, 2597.270812034607, 1984.734433889389, 2905.119754076004, 3913.920464992523] res['baseline_std'] = 1.71029866625 res['sparsity'] = [19.419174706375838, 18.42127726510067, 21.970941694630874, 29.441521602348992, 96.93994337248323] res['iterations_inner'] = [1409, 1686, 1589, 2976, 5909] res['iterations_outer'] = [8, 6, 7, 19, 50] res['accuracy'] = [77.497500000000002, 77.275000000000006, 77.045833333333348, 73.907500000000027, 72.834166666666661]
/usr/lib/python2.7/dist-packages/numpy/ma/core.py:3847: UserWarning: Warning: converting a masked element to nan. warnings.warn("Warning: converting a masked element to nan.")
g(Z) = ||X-DZ||_2^2, h(Z) = ||Z-EX||_2^2, i(Z) = ||Z||_1, j(Z) = tr(Z^TLZ)
Dictionary atoms for le = None Dictionary atoms for le = 1 Dictionary atoms for le = 10 Dictionary atoms for le = 100 Dictionary atoms for le = 1000.0 Experiment time: 18629 seconds
/usr/lib/pymodules/python2.7/matplotlib/figure.py:371: UserWarning: matplotlib is currently using a non-GUI backend, so cannot show the figure "matplotlib is currently using a non-GUI backend, "