Goal: Understand the influence of the graph in a noisy data setting.
Conclusion: We want to increase $\lambda_g$ but do not want the fidelity objective to be higher than the sparsity objective.
Observations:
objective_g_un
and objective_i_un
have the same value when it works best. Coincidence ?12l_ld
, 12k_ls
. Probably not a coincidence.objective_g_un
(data fidelity) is higher than objective_i_un
(sparsity). It happend with experiments 12l_ld
, 12k_ls
and 12j_lg
too.Pname = 'lg'
Pvalues = [None, 0.1, 1, 10, 100, 1e3]
# Regenerate the graph or the features at each iteration.
regen_graph = False
regen_features = True
p = {}
# Preprocessing.
# Graph.
p['K'] = 10 + 1 # 5 to 10 + 1 for self-reference
p['dm'] = 'cosine'
p['Csigma'] = 1
p['diag'] = True
p['laplacian'] = 'normalized'
# Feature extraction.
p['m'] = 128 # 64, 128, 512
p['ls'] = 1
p['ld'] = 10
p['le'] = None
p['lg'] = 100
# Classification.
p['scale'] = None
p['Nvectors'] = 6
p['svm_type'] = 'C'
p['kernel'] = 'linear'
p['C'] = 1
p['nu'] = 0.5
# HDF5 data stores.
p['folder'] = 'data'
p['filename_gtzan'] = 'gtzan.hdf5'
p['filename_audio'] = 'audio.hdf5'
p['filename_graph'] = 'graph.hdf5'
p['filename_features'] = 'features.hdf5'
# Dataset (10,100,644 | 5,100,149 | 2,10,644).
p['Ngenres'] = 5
p['Nclips'] = 100
p['Nframes'] = 149
# Added white noise.
p['noise_std'] = 0.1
# Graph.
p['tol'] = 1e-5
# Feature extraction.
p['rtol'] = 1e-5 # 1e-3, 1e-5, 1e-7
p['N_inner'] = 500
p['N_outer'] = 50
# Classification.
p['Nfolds'] = 10
p['Ncv'] = 40
p['dataset_classification'] = 'Z'
import numpy as np
import time
texperiment = time.time()
# Result dictionary.
res = ['accuracy', 'accuracy_std']
res += ['sparsity', 'atoms']
res += ['objective_g', 'objective_h', 'objective_i', 'objective_j']
res += ['time_features', 'iterations_inner', 'iterations_outer']
res = dict.fromkeys(res)
for key in res.keys():
res[key] = []
def separator(name, parameter=False):
if parameter:
name += ', {} = {}'.format(Pname, p[Pname])
dashes = 20 * '-'
print('\n {} {} {} \n'.format(dashes, name, dashes))
# Fair comparison when tuning parameters.
# Randomnesses: dictionary initialization, training and testing sets.
np.random.seed(1)
#%run gtzan.ipynb
#%run audio_preprocessing.ipynb
if not regen_graph:
separator('Graph')
%run audio_graph.ipynb
if not regen_features:
separator('Features')
%run audio_features.ipynb
# Hyper-parameter under test.
for p[Pname] in Pvalues:
if regen_graph:
separator('Graph', True)
%run audio_graph.ipynb
if regen_features:
separator('Features', True)
p['filename_features'] = 'features_{}_{}.hdf5'.format(Pname, p[Pname])
%run audio_features.ipynb
separator('Classification', True)
%run audio_classification.ipynb
# Collect results.
for key in res:
res[key].append(globals()[key])
# Baseline, i.e. classification with spectrograms.
p['dataset_classification'] = 'X'
p['scale'] = 'minmax' # Todo: should be done in pre-processing.
if not regen_graph and not regen_features:
# Classifier parameters are being tested.
for p[Pname] in Pvalues:
separator('Baseline', True)
%run audio_classification.ipynb
else:
separator('Baseline')
%run audio_classification.ipynb
res['baseline'] = len(Pvalues) * [accuracy]
res['baseline_std'] = accuracy_std
-------------------- Graph -------------------- Data: (149000, 96), float32 Elapsed time: 623.82 seconds All self-referenced in the first column: True dist in [0.0, 0.681089401245] w in [0.283994346857, 1.0] Ones on the diagonal: 149000 (over 149000) assert: True W in [0.0, 1.0] Datasets: L_data : (2789250,), float32 L_indices : (2789250,), int32 L_indptr : (149001,) , int32 L_shape : (2,) , int64 W_data : (2789250,), float32 W_indices : (2789250,), int32 W_indptr : (149001,) , int32 W_shape : (2,) , int64 Attributes: K = 11 dm = cosine Csigma = 1 diag = True laplacian = normalized Overall time: 633.45 seconds -------------------- Features, lg = None -------------------- Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: Xa: (10, 100, 644, 2, 1024) , float32 Xs: (10, 100, 644, 2, 96) , float32 Full dataset: size: N=1,288,000 x n=96 -> 123,648,000 floats dim: 123,648 features per clip shape: (10, 100, 644, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Data: (149000, 96), float32 Attributes: K = 11 dm = cosine Csigma = 1 diag = True laplacian = normalized Datasets: L_data : (2789250,), float32 L_indices : (2789250,), int32 L_indptr : (149001,) , int32 L_shape : (2,) , int64 W_data : (2789250,), float32 W_indices : (2789250,), int32 W_indptr : (149001,) , int32 W_shape : (2,) , int64 Size X: 13.6 M --> 54.6 MiB Size Z: 18.2 M --> 72.8 MiB Size D: 12.0 k --> 48.0 kiB Size E: 12.0 k --> 48.0 kiB Elapsed time: 391 seconds
Inner loop: 304 iterations g(Z) = ||X-DZ||_2^2 = 3.556715e+05 rdiff: 0.000847538508528 i(Z) = ||Z||_1 = 3.337448e+05
Global objective: 6.894163e+05
Outer loop: 13 iterations Z in [-0.925734937191, 0.85433703661] Sparsity of Z: 5,470,082 non-zero entries out of 19,072,000 entries, i.e. 28.7%.
D in [-0.296960860491, 0.514295816422] d in [0.999999701977, 1.00000035763] Constraints on D: True
Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Overall time: 398 seconds -------------------- Classification, lg = None -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 54.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 48.4 % Clips accuracy: 57.0 % 5 genres: blues, classical, country, disco, hiphop Data: (500, 1536), float64 Labels: (500,), uint8 56 (+/- 7.0) <- [54 68 54 54 54 68 50 46 50 60] 56 (+/- 4.8) <- [60 57 57 48 57 57 60 60 56 46] 57 (+/- 5.2) <- [52 60 57 57 48 54 54 60 68 60] 59 (+/- 4.4) <- [50 62 60 62 62 57 64 54 62 54] 57 (+/- 8.8) <- [40 60 52 64 44 62 66 57 68 54] 57 (+/- 5.4) <- [57 56 57 57 62 52 60 54 44 64] 56 (+/- 6.5) <- [50 57 40 60 57 57 60 50 62 60] 57 (+/- 5.0) <- [62 56 60 54 66 48 54 52 56 60] 57 (+/- 6.8) <- [62 54 56 52 70 66 46 52 56 57] 56 (+/- 8.0) <- [66 54 68 46 66 52 56 44 50 57] 55 (+/- 5.9) <- [57 50 54 54 50 56 70 57 56 48] 57 (+/- 9.4) <- [57 52 57 74 57 57 50 57 66 36] 59 (+/- 9.1) <- [76 52 54 66 64 44 57 57 50 68] 56 (+/- 7.5) <- [52 52 64 57 72 48 54 60 48 48] 56 (+/- 7.8) <- [62 54 68 42 48 68 57 54 52 54] 58 (+/- 7.3) <- [60 70 52 52 48 64 66 56 48 60] 58 (+/- 5.9) <- [56 56 56 57 46 64 70 60 56 60] 60 (+/- 4.7) <- [66 66 54 54 60 57 66 54 60 57] 58 (+/- 8.3) <- [46 70 60 52 64 62 62 56 64 42] 55 (+/- 5.1) <- [56 54 50 64 57 60 52 57 48 48] 57 (+/- 5.2) <- [56 54 60 62 56 56 48 64 64 50] 57 (+/- 4.8) <- [56 56 64 50 48 60 62 60 57 60] 58 (+/- 3.5) <- [62 57 60 60 60 62 57 50 54 57] 58 (+/- 6.1) <- [54 70 56 60 62 54 62 56 46 60] 58 (+/- 6.7) <- [62 54 52 54 60 56 57 48 66 72] 57 (+/- 7.0) <- [66 57 46 60 56 60 66 56 44 62] 56 (+/- 6.9) <- [52 52 54 66 44 56 48 62 66 56] 58 (+/- 7.3) <- [60 64 66 64 40 62 52 56 54 57] 58 (+/- 5.7) <- [56 52 50 62 52 57 56 68 56 66] 55 (+/- 4.3) <- [57 52 54 57 56 52 48 60 50 62] 57 (+/- 4.2) <- [57 48 62 54 52 57 60 62 56 56] 56 (+/- 7.8) <- [44 57 64 68 52 52 48 54 52 68] 57 (+/- 2.7) <- [56 56 57 57 62 57 56 54 60 52] 56 (+/- 6.8) <- [72 60 64 52 50 50 50 54 56 54] 58 (+/- 7.7) <- [64 64 54 44 70 52 62 62 60 48] 57 (+/- 5.9) <- [57 56 44 54 57 56 62 54 66 64] 55 (+/- 9.1) <- [48 60 42 66 56 50 62 50 46 72] 56 (+/- 6.8) <- [56 66 50 60 56 44 66 56 48 54] 57 (+/- 5.7) <- [62 48 60 68 60 56 52 57 52 52] 58 (+/- 8.0) <- [57 56 50 70 56 60 66 64 60 40] Accuracy: 56.9 (+/- 6.68) Mean time (40 cv): 44.13 seconds Overall time: 1772.34 seconds -------------------- Features, lg = 0.1 -------------------- Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: Xa: (10, 100, 644, 2, 1024) , float32 Xs: (10, 100, 644, 2, 96) , float32 Full dataset: size: N=1,288,000 x n=96 -> 123,648,000 floats dim: 123,648 features per clip shape: (10, 100, 644, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Data: (149000, 96), float32 Attributes: K = 11 dm = cosine Csigma = 1 diag = True laplacian = normalized Datasets: L_data : (2789250,), float32 L_indices : (2789250,), int32 L_indptr : (149001,) , int32 L_shape : (2,) , int64 W_data : (2789250,), float32 W_indices : (2789250,), int32 W_indptr : (149001,) , int32 W_shape : (2,) , int64 Size X: 13.6 M --> 54.6 MiB Size Z: 18.2 M --> 72.8 MiB Size D: 12.0 k --> 48.0 kiB Size E: 12.0 k --> 48.0 kiB Elapsed time: 1715 seconds
Inner loop: 814 iterations g(Z) = ||X-DZ||_2^2 = 3.521848e+05 rdiff: 0.000667817814065 i(Z) = ||Z||_1 = 3.322563e+05 j(Z) = tr(Z^TLZ) = 1.131077e+03
Global objective: 6.855722e+05
Outer loop: 50 iterations Z in [-0.871740281582, 1.23573553562] Sparsity of Z: 5,420,626 non-zero entries out of 19,072,000 entries, i.e. 28.4%.
D in [-0.306736111641, 0.716826856136] d in [0.999999701977, 1.00000023842] Constraints on D: True
Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Overall time: 1724 seconds -------------------- Classification, lg = 0.1 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 54.9 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 48.8 % Clips accuracy: 57.5 % 5 genres: blues, classical, country, disco, hiphop Data: (500, 1536), float64 Labels: (500,), uint8 61 (+/- 3.5) <- [57 66 57 57 64 62 56 60 57 66] 59 (+/- 5.4) <- [62 66 54 48 60 66 62 56 57 54] 58 (+/- 8.8) <- [56 62 70 54 44 52 56 52 76 56] 58 (+/- 5.4) <- [50 60 62 64 60 60 62 46 60 60] 59 (+/- 6.8) <- [52 68 56 57 50 57 66 57 72 54] 59 (+/- 5.8) <- [60 62 62 50 66 56 56 60 52 70] 57 (+/- 7.1) <- [44 60 50 68 64 57 52 56 56 66] 58 (+/- 8.0) <- [68 60 56 57 74 48 54 46 62 57] 58 (+/- 5.0) <- [62 56 57 56 70 56 54 50 57 57] 57 (+/- 5.3) <- [62 56 62 54 60 56 62 60 44 54] 58 (+/- 7.3) <- [66 50 50 64 52 56 70 62 48 57] 57 (+/- 9.5) <- [56 52 54 68 60 62 50 54 76 40] 59 (+/- 7.1) <- [72 62 54 66 60 48 57 60 48 60] 59 (+/- 9.4) <- [66 57 64 72 68 50 60 64 42 46] 58 (+/- 8.9) <- [66 56 66 42 54 72 64 57 46 56] 60 (+/- 6.4) <- [50 72 57 54 62 66 62 64 54 54] 57 (+/- 4.9) <- [56 54 54 57 50 66 64 60 52 60] 58 (+/- 6.2) <- [64 62 56 52 66 57 62 52 64 46] 58 (+/-11.1) <- [46 68 60 56 68 48 72 44 72 44] 58 (+/- 5.2) <- [57 52 56 70 60 62 54 52 57 62] 60 (+/- 6.0) <- [57 52 68 57 60 57 57 72 62 52] 58 (+/- 4.3) <- [56 57 60 52 56 62 68 56 54 56] 57 (+/- 4.3) <- [54 57 54 62 64 60 60 56 52 50] 60 (+/- 6.9) <- [52 68 60 66 64 54 72 57 50 56] 58 (+/- 7.5) <- [68 57 54 52 56 56 57 44 60 72] 59 (+/- 7.7) <- [57 60 54 60 66 64 70 60 40 62] 59 (+/- 4.7) <- [60 52 60 57 56 68 62 62 62 52] 59 (+/- 6.6) <- [62 62 72 66 56 48 52 56 56 60] 57 (+/- 6.6) <- [56 54 40 64 57 57 57 64 62 60] 59 (+/- 6.1) <- [64 52 62 54 56 54 52 66 62 70] 58 (+/- 4.7) <- [56 48 60 52 64 60 60 62 56 62] 59 (+/- 8.7) <- [57 62 72 72 50 62 44 56 50 60] 58 (+/- 5.2) <- [64 62 64 54 62 56 60 48 52 60] 59 (+/- 6.8) <- [76 60 64 57 54 56 50 54 62 60] 59 (+/- 6.5) <- [66 62 56 50 68 56 60 62 62 46] 56 (+/- 8.0) <- [57 64 40 46 57 64 62 54 66 52] 57 (+/- 7.6) <- [60 56 48 57 60 54 68 50 48 72] 58 (+/- 7.5) <- [54 72 62 66 60 50 66 52 50 52] 58 (+/- 4.9) <- [56 54 54 70 62 54 60 60 60 54] 59 (+/- 9.0) <- [68 50 52 76 56 57 60 72 50 52] Accuracy: 58.4 (+/- 6.93) Mean time (40 cv): 45.42 seconds Overall time: 1824.35 seconds -------------------- Features, lg = 1 -------------------- Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: Xa: (10, 100, 644, 2, 1024) , float32 Xs: (10, 100, 644, 2, 96) , float32 Full dataset: size: N=1,288,000 x n=96 -> 123,648,000 floats dim: 123,648 features per clip shape: (10, 100, 644, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Data: (149000, 96), float32 Attributes: K = 11 dm = cosine Csigma = 1 diag = True laplacian = normalized Datasets: L_data : (2789250,), float32 L_indices : (2789250,), int32 L_indptr : (149001,) , int32 L_shape : (2,) , int64 W_data : (2789250,), float32 W_indices : (2789250,), int32 W_indptr : (149001,) , int32 W_shape : (2,) , int64 Size X: 13.6 M --> 54.6 MiB Size Z: 18.2 M --> 72.8 MiB Size D: 12.0 k --> 48.0 kiB Size E: 12.0 k --> 48.0 kiB Elapsed time: 762 seconds
Inner loop: 328 iterations g(Z) = ||X-DZ||_2^2 = 3.741959e+05 rdiff: 0.000903395552171 i(Z) = ||Z||_1 = 3.155545e+05 j(Z) = tr(Z^TLZ) = 9.733402e+03
Global objective: 6.994837e+05
Outer loop: 11 iterations Z in [-0.826561033726, 0.751893222332] Sparsity of Z: 5,668,607 non-zero entries out of 19,072,000 entries, i.e. 29.7%.
D in [-0.288511753082, 0.513043045998] d in [0.999999701977, 1.00000023842] Constraints on D: True
Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Overall time: 772 seconds -------------------- Classification, lg = 1 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 55.6 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 48.5 % Clips accuracy: 59.0 % 5 genres: blues, classical, country, disco, hiphop Data: (500, 1536), float64 Labels: (500,), uint8 57 (+/- 7.1) <- [57 66 54 52 54 68 50 46 56 66] 58 (+/- 6.9) <- [60 56 60 48 62 66 66 62 50 46] 57 (+/- 6.6) <- [52 64 57 60 48 52 52 57 72 57] 59 (+/- 4.4) <- [54 60 56 62 66 56 64 52 62 56] 57 (+/- 7.5) <- [48 60 54 66 44 56 66 56 68 54] 57 (+/- 6.4) <- [60 56 57 54 68 50 60 54 46 66] 56 (+/- 6.0) <- [54 62 42 60 52 56 62 54 60 62] 57 (+/- 6.5) <- [57 54 62 52 72 48 56 52 57 62] 58 (+/- 5.4) <- [62 57 54 54 68 66 52 52 56 60] 58 (+/- 8.3) <- [70 56 68 46 70 54 56 52 48 57] 55 (+/- 5.1) <- [56 48 54 57 50 57 64 57 54 46] 57 (+/- 8.1) <- [62 50 60 68 62 57 48 57 64 40] 59 (+/-10.8) <- [76 57 54 64 68 36 57 56 48 68] 58 (+/- 6.2) <- [60 52 68 62 62 52 60 62 50 48] 54 (+/- 8.4) <- [60 50 66 40 48 68 60 52 48 52] 59 (+/- 5.7) <- [62 68 57 56 50 64 64 56 50 60] 57 (+/- 4.0) <- [56 56 54 60 50 60 64 54 57 62] 59 (+/- 4.7) <- [68 64 56 56 62 56 62 52 62 56] 59 (+/- 8.9) <- [46 68 64 52 66 64 66 54 64 42] 57 (+/- 3.7) <- [56 60 57 62 62 54 54 57 54 50] 58 (+/- 5.0) <- [60 54 60 62 56 60 50 66 64 52] 58 (+/- 3.7) <- [56 56 57 52 52 57 62 64 60 60] 59 (+/- 4.7) <- [62 56 56 66 64 62 57 50 56 64] 58 (+/- 7.0) <- [50 70 54 54 62 54 66 54 48 64] 59 (+/- 7.6) <- [62 52 50 56 60 56 56 50 72 72] 59 (+/- 7.8) <- [64 60 48 57 54 62 74 62 46 64] 58 (+/- 5.0) <- [56 57 57 57 48 60 52 66 64 56] 59 (+/- 6.3) <- [64 64 64 66 44 62 57 56 54 56] 58 (+/- 5.7) <- [56 54 50 64 54 57 56 68 54 66] 58 (+/- 5.5) <- [64 52 56 57 54 54 54 68 54 66] 57 (+/- 5.4) <- [60 52 66 56 46 56 60 64 56 57] 57 (+/- 8.4) <- [40 56 64 70 52 54 50 56 56 68] 59 (+/- 3.9) <- [56 57 57 64 62 62 56 50 62 57] 58 (+/- 6.4) <- [72 60 66 60 56 52 50 52 56 57] 59 (+/- 6.8) <- [66 64 56 48 66 56 64 57 62 46] 58 (+/- 6.5) <- [57 57 44 54 50 60 64 60 68 60] 56 (+/- 8.6) <- [48 57 48 66 62 48 62 52 46 72] 56 (+/- 6.5) <- [50 68 54 57 57 46 64 54 48 56] 57 (+/- 5.9) <- [60 48 60 64 64 54 56 54 60 46] 58 (+/- 8.6) <- [62 50 50 72 56 54 66 68 54 44] Accuracy: 57.6 (+/- 6.69) Mean time (40 cv): 39.78 seconds Overall time: 1598.14 seconds -------------------- Features, lg = 10 -------------------- Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: Xa: (10, 100, 644, 2, 1024) , float32 Xs: (10, 100, 644, 2, 96) , float32 Full dataset: size: N=1,288,000 x n=96 -> 123,648,000 floats dim: 123,648 features per clip shape: (10, 100, 644, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Data: (149000, 96), float32 Attributes: K = 11 dm = cosine Csigma = 1 diag = True laplacian = normalized Datasets: L_data : (2789250,), float32 L_indices : (2789250,), int32 L_indptr : (149001,) , int32 L_shape : (2,) , int64 W_data : (2789250,), float32 W_indices : (2789250,), int32 W_indptr : (149001,) , int32 W_shape : (2,) , int64 Size X: 13.6 M --> 54.6 MiB Size Z: 18.2 M --> 72.8 MiB Size D: 12.0 k --> 48.0 kiB Size E: 12.0 k --> 48.0 kiB Elapsed time: 1061 seconds
Inner loop: 430 iterations g(Z) = ||X-DZ||_2^2 = 4.885840e+05 rdiff: 0.00101656964549 i(Z) = ||Z||_1 = 2.251200e+05 j(Z) = tr(Z^TLZ) = 3.864909e+04
Global objective: 7.523531e+05
Outer loop: 14 iterations Z in [-0.409308135509, 0.48392239213] Sparsity of Z: 6,572,587 non-zero entries out of 19,072,000 entries, i.e. 34.5%.
D in [-0.246071562171, 0.592663884163] d in [0.999999701977, 1.00000035763] Constraints on D: True
Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Overall time: 1072 seconds -------------------- Classification, lg = 10 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 59.2 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 50.6 % Clips accuracy: 61.0 % 5 genres: blues, classical, country, disco, hiphop Data: (500, 1536), float64 Labels: (500,), uint8 60 (+/- 7.8) <- [68 72 60 50 62 66 54 46 60 66] 59 (+/- 5.5) <- [56 62 64 50 62 57 68 60 60 50] 60 (+/- 6.8) <- [57 57 74 60 50 64 54 62 68 54] 63 (+/- 4.6) <- [57 66 66 72 68 60 64 56 62 62] 60 (+/- 9.0) <- [44 62 56 64 46 70 70 56 70 62] 61 (+/- 4.7) <- [62 62 60 56 70 54 64 64 54 62] 61 (+/- 4.4) <- [60 60 50 66 60 62 66 57 60 64] 61 (+/- 6.8) <- [62 62 57 60 78 50 57 56 64 60] 60 (+/- 5.2) <- [60 54 60 57 70 68 56 54 60 64] 59 (+/- 5.8) <- [68 60 64 57 66 57 56 50 50 56] 59 (+/- 4.0) <- [60 57 60 66 57 56 62 60 62 50] 60 (+/-10.9) <- [57 57 62 78 62 57 46 60 78 42] 60 (+/- 9.0) <- [78 60 50 62 64 48 57 66 48 66] 60 (+/- 6.9) <- [60 68 70 62 66 54 56 64 57 46] 59 (+/- 7.3) <- [64 57 74 48 57 66 57 62 54 50] 61 (+/- 6.1) <- [56 72 57 60 52 64 66 57 54 68] 59 (+/- 4.5) <- [54 62 57 57 54 66 68 56 57 60] 61 (+/- 4.9) <- [64 62 66 60 60 57 70 52 56 57] 60 (+/- 7.9) <- [54 68 68 52 66 56 68 56 68 46] 59 (+/- 5.2) <- [57 56 66 66 62 57 50 60 64 52] 60 (+/- 7.0) <- [64 56 62 62 56 62 46 70 68 52] 60 (+/- 4.8) <- [62 56 66 57 54 52 64 64 66 62] 59 (+/- 4.8) <- [62 57 56 64 68 64 56 52 60 54] 62 (+/- 6.1) <- [52 68 57 68 66 54 72 60 62 62] 61 (+/- 4.7) <- [62 60 56 57 57 66 62 52 68 66] 61 (+/- 9.3) <- [60 62 52 72 60 64 74 54 42 70] 60 (+/- 5.2) <- [57 57 68 57 54 66 54 66 66 56] 61 (+/- 6.0) <- [57 64 72 68 50 56 57 57 60 64] 59 (+/- 6.1) <- [60 48 52 62 56 66 57 68 57 66] 61 (+/- 5.4) <- [66 60 62 57 56 56 54 66 64 72] 61 (+/- 7.4) <- [62 46 64 64 57 66 60 76 56 56] 61 (+/- 8.9) <- [46 68 72 68 54 70 52 57 54 70] 61 (+/- 4.4) <- [54 62 62 60 66 60 64 52 66 62] 60 (+/- 6.8) <- [74 66 66 50 54 57 57 57 64 54] 62 (+/- 7.2) <- [72 66 62 46 70 57 64 66 60 56] 61 (+/- 6.8) <- [60 64 50 56 52 62 68 57 74 62] 60 (+/- 9.9) <- [56 56 50 70 72 52 62 50 54 80] 62 (+/- 6.9) <- [57 72 64 68 64 50 66 52 56 66] 60 (+/- 8.1) <- [66 50 64 72 72 56 57 64 54 48] 61 (+/- 9.2) <- [74 62 56 74 57 57 64 70 52 44] Accuracy: 60.5 (+/- 6.84) Mean time (40 cv): 29.55 seconds Overall time: 1187.91 seconds -------------------- Features, lg = 100 -------------------- Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: Xa: (10, 100, 644, 2, 1024) , float32 Xs: (10, 100, 644, 2, 96) , float32 Full dataset: size: N=1,288,000 x n=96 -> 123,648,000 floats dim: 123,648 features per clip shape: (10, 100, 644, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Data: (149000, 96), float32 Attributes: K = 11 dm = cosine Csigma = 1 diag = True laplacian = normalized Datasets: L_data : (2789250,), float32 L_indices : (2789250,), int32 L_indptr : (149001,) , int32 L_shape : (2,) , int64 W_data : (2789250,), float32 W_indices : (2789250,), int32 W_indptr : (149001,) , int32 W_shape : (2,) , int64 Size X: 13.6 M --> 54.6 MiB Size Z: 18.2 M --> 72.8 MiB Size D: 12.0 k --> 48.0 kiB Size E: 12.0 k --> 48.0 kiB Elapsed time: 632 seconds
Inner loop: 246 iterations g(Z) = ||X-DZ||_2^2 = 7.462377e+05 rdiff: 0.000593498029647 i(Z) = ||Z||_1 = 7.127560e+04 j(Z) = tr(Z^TLZ) = 2.056625e+04
Global objective: 8.380796e+05
Outer loop: 4 iterations Z in [-0.056636184454, 0.14264112711] Sparsity of Z: 8,786,613 non-zero entries out of 19,072,000 entries, i.e. 46.1%.
D in [-0.145910441875, 0.507630586624] d in [0.999999761581, 1.00000023842] Constraints on D: True
Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Overall time: 644 seconds -------------------- Classification, lg = 100 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 63.2 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 57.2 % Clips accuracy: 62.0 % 5 genres: blues, classical, country, disco, hiphop Data: (500, 1536), float64 Labels: (500,), uint8 64 (+/- 7.1) <- [68 70 57 62 74 70 60 48 64 64] 61 (+/- 4.7) <- [62 60 56 57 72 66 64 57 62 56] 62 (+/- 5.3) <- [60 64 68 62 57 66 56 54 72 60] 62 (+/- 4.4) <- [54 60 66 68 64 62 64 57 68 57] 61 (+/- 8.7) <- [52 68 70 64 48 68 68 62 68 46] 63 (+/- 7.8) <- [66 68 70 56 76 50 68 57 54 64] 63 (+/- 6.2) <- [50 72 64 64 56 72 62 62 64 64] 61 (+/- 7.0) <- [60 50 56 68 76 54 64 64 62 60] 63 (+/- 6.8) <- [72 54 56 60 74 70 62 62 56 68] 61 (+/- 3.1) <- [66 62 62 57 66 62 62 62 56 57] 62 (+/- 5.8) <- [68 60 62 66 68 66 66 60 57 48] 62 (+/- 6.4) <- [66 57 60 78 62 57 56 56 66 57] 63 (+/- 8.1) <- [76 60 54 70 70 54 60 66 50 68] 62 (+/- 6.2) <- [64 62 74 57 66 64 57 68 50 60] 61 (+/- 8.3) <- [68 62 74 50 57 72 64 52 54 52] 62 (+/- 6.1) <- [60 74 60 60 57 68 62 62 50 66] 61 (+/- 5.3) <- [62 72 56 56 57 66 68 57 57 57] 63 (+/- 7.0) <- [56 66 68 50 74 60 70 60 66 56] 62 (+/-10.0) <- [50 68 68 54 66 56 76 57 78 48] 62 (+/- 7.2) <- [60 66 62 74 70 62 60 56 62 46] 62 (+/- 5.2) <- [64 64 64 68 60 57 54 70 66 54] 62 (+/- 4.5) <- [64 64 66 57 60 56 70 66 56 60] 62 (+/- 3.2) <- [60 57 64 60 66 68 62 57 62 60] 63 (+/- 6.5) <- [57 72 56 62 62 56 74 57 57 70] 61 (+/- 6.1) <- [64 62 57 56 60 60 60 50 72 70] 60 (+/- 9.0) <- [60 57 54 62 57 64 72 57 40 74] 60 (+/- 4.5) <- [57 62 62 62 57 60 52 68 66 56] 61 (+/- 6.3) <- [56 57 70 70 48 62 57 64 57 64] 61 (+/- 4.2) <- [57 60 57 68 54 57 62 68 60 62] 61 (+/- 4.8) <- [64 60 62 57 56 54 57 72 62 64] 61 (+/- 7.0) <- [66 54 62 68 68 50 66 68 62 50] 61 (+/- 5.7) <- [56 66 62 68 50 62 52 62 62 66] 61 (+/- 3.8) <- [66 57 66 60 60 66 57 56 62 56] 63 (+/- 7.9) <- [78 62 76 54 56 57 56 60 60 66] 63 (+/- 6.8) <- [74 68 64 56 62 54 70 64 66 52] 62 (+/- 7.8) <- [64 56 48 68 62 62 66 54 78 62] 62 (+/- 7.9) <- [57 64 52 74 72 50 62 60 57 72] 61 (+/- 7.5) <- [54 70 64 66 70 56 70 52 54 52] 62 (+/- 5.9) <- [64 62 60 68 64 72 54 66 57 52] 62 (+/- 7.3) <- [62 60 57 76 66 62 57 72 50 56] Accuracy: 61.8 (+/- 6.58) Mean time (40 cv): 21.88 seconds Overall time: 880.89 seconds -------------------- Features, lg = 1000.0 -------------------- Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: Xa: (10, 100, 644, 2, 1024) , float32 Xs: (10, 100, 644, 2, 96) , float32 Full dataset: size: N=1,288,000 x n=96 -> 123,648,000 floats dim: 123,648 features per clip shape: (10, 100, 644, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Data: (149000, 96), float32 Attributes: K = 11 dm = cosine Csigma = 1 diag = True laplacian = normalized Datasets: L_data : (2789250,), float32 L_indices : (2789250,), int32 L_indptr : (149001,) , int32 L_shape : (2,) , int64 W_data : (2789250,), float32 W_indices : (2789250,), int32 W_indptr : (149001,) , int32 W_shape : (2,) , int64 Size X: 13.6 M --> 54.6 MiB Size Z: 18.2 M --> 72.8 MiB Size D: 12.0 k --> 48.0 kiB Size E: 12.0 k --> 48.0 kiB Elapsed time: 352 seconds
Inner loop: 137 iterations g(Z) = ||X-DZ||_2^2 = 8.337766e+05 rdiff: 0.00266783104736 i(Z) = ||Z||_1 = 3.778314e+04 j(Z) = tr(Z^TLZ) = 6.533181e+03
Global objective: 8.780929e+05
Outer loop: 3 iterations Z in [-0.00323675596155, 0.0201184488833] Sparsity of Z: 16,392,359 non-zero entries out of 19,072,000 entries, i.e. 85.9%.
D in [-0.00621616281569, 0.289856225252] d in [0.999999642372, 1.00000023842] Constraints on D: True
Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Overall time: 365 seconds -------------------- Classification, lg = 1000.0 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 52.1 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 51.2 % Clips accuracy: 52.5 % 5 genres: blues, classical, country, disco, hiphop Data: (500, 1536), float64 Labels: (500,), uint8 53 (+/- 6.5) <- [70 52 46 48 56 56 56 48 50 52] 53 (+/- 4.1) <- [50 52 56 46 60 54 56 56 56 48] 52 (+/- 4.5) <- [48 48 56 56 48 60 52 50 57 48] 53 (+/- 4.3) <- [52 42 57 50 54 54 56 50 54 56] 52 (+/- 8.3) <- [46 56 54 60 46 42 64 44 66 46] 52 (+/- 9.5) <- [50 57 60 50 54 36 46 72 42 52] 52 (+/- 7.5) <- [46 68 48 50 52 44 56 48 48 64] 52 (+/- 3.4) <- [54 46 50 52 56 46 50 56 52 54] 52 (+/- 6.2) <- [62 40 52 56 56 54 42 50 54 52] 51 (+/- 4.2) <- [56 52 57 48 54 54 44 52 50 46] 51 (+/- 6.4) <- [56 44 48 60 56 48 60 48 40 50] 52 (+/- 6.9) <- [44 54 56 57 62 60 42 46 56 46] 53 (+/- 8.8) <- [56 64 48 57 50 48 44 66 36 57] 51 (+/- 6.6) <- [56 57 57 48 56 54 46 54 36 48] 52 (+/- 7.3) <- [60 46 60 42 56 64 46 52 46 46] 53 (+/- 5.2) <- [52 60 56 50 56 44 62 48 52 52] 51 (+/- 6.7) <- [48 54 50 44 46 60 64 46 42 52] 52 (+/- 4.4) <- [50 62 52 50 56 46 52 48 54 48] 50 (+/- 8.5) <- [40 56 56 44 56 36 62 48 57 42] 52 (+/- 5.3) <- [44 62 46 54 60 50 52 50 52 50] 53 (+/- 7.0) <- [54 44 52 57 57 48 46 62 62 42] 51 (+/- 3.0) <- [48 54 50 48 56 52 56 52 50 48] 53 (+/- 7.2) <- [60 52 52 48 62 44 54 48 66 44] 52 (+/- 7.9) <- [48 57 46 44 42 54 70 48 52 57] 52 (+/- 7.2) <- [60 48 48 44 44 54 52 46 66 60] 53 (+/- 7.4) <- [50 46 60 48 46 57 54 50 46 70] 52 (+/- 4.4) <- [57 52 48 48 48 52 44 57 54 54] 54 (+/- 7.5) <- [56 54 57 68 40 48 52 48 50 62] 51 (+/- 4.0) <- [48 56 46 56 50 48 44 54 50 54] 51 (+/- 5.6) <- [52 54 54 46 56 44 52 56 40 57] 52 (+/- 5.9) <- [60 50 56 54 50 44 64 46 50 50] 52 (+/- 5.1) <- [54 48 56 60 48 56 46 46 48 57] 51 (+/- 4.3) <- [52 48 52 48 54 50 56 42 57 50] 53 (+/- 7.4) <- [57 60 52 40 48 62 44 57 46 60] 52 (+/- 7.4) <- [57 60 56 44 56 50 64 42 50 42] 53 (+/- 9.1) <- [56 50 34 56 66 44 50 54 66 50] 52 (+/- 6.7) <- [56 56 42 54 62 44 56 46 44 57] 53 (+/- 8.7) <- [42 60 50 54 68 50 66 48 42 48] 51 (+/- 6.0) <- [54 50 36 52 56 56 46 52 50 57] 51 (+/- 5.3) <- [50 50 46 56 54 42 56 56 42 54] Accuracy: 52.0 (+/- 6.56) Mean time (40 cv): 28.49 seconds Overall time: 1146.56 seconds -------------------- Baseline -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 298, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Feature vectors: size: N=6,000 x n=96 -> 576,000 floats dim: 1,152 features per clip shape: (5, 100, 6, 2, 96)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 96), float64 Testing data: (2400, 96), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 59.3 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1152), float64 Testing data: (200, 1152), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 54.8 % Clips accuracy: 62.5 % 5 genres: blues, classical, country, disco, hiphop Data: (500, 1152), float64 Labels: (500,), uint8 58 (+/- 7.0) <- [64 60 60 42 52 68 64 56 56 56] 59 (+/- 6.6) <- [64 62 52 46 70 66 60 57 56 60] 61 (+/- 6.2) <- [60 66 72 54 57 62 56 54 70 56] 58 (+/- 6.4) <- [50 56 57 68 68 57 64 48 57 56] 57 (+/- 5.6) <- [48 60 54 60 62 56 68 57 54 50] 58 (+/- 4.0) <- [54 57 64 60 64 56 60 57 50 57] 58 (+/- 5.8) <- [54 56 54 62 57 66 60 52 52 70] 60 (+/- 7.2) <- [57 48 60 64 70 48 68 57 66 57] 58 (+/- 5.0) <- [66 48 57 57 60 64 52 60 60 56] 59 (+/- 5.6) <- [72 54 66 60 57 60 57 56 56 52] 58 (+/- 5.7) <- [62 50 54 66 64 57 62 54 48 57] 57 (+/- 8.5) <- [57 54 68 68 57 50 46 60 68 44] 59 (+/- 7.0) <- [64 60 64 62 52 52 56 66 44 66] 59 (+/- 7.2) <- [66 64 62 64 70 57 56 57 44 52] 60 (+/- 5.8) <- [62 57 68 50 57 68 64 60 56 52] 59 (+/- 5.7) <- [54 70 64 56 62 48 60 56 57 62] 58 (+/- 7.0) <- [57 60 60 52 46 60 74 57 52 57] 59 (+/- 5.0) <- [54 68 66 54 64 57 57 54 62 56] 57 (+/- 9.8) <- [44 62 60 56 70 44 60 54 74 46] 61 (+/- 5.0) <- [60 60 64 66 62 64 56 52 68 54] 59 (+/- 5.4) <- [60 66 60 62 60 56 54 68 60 48] 58 (+/- 8.8) <- [48 64 76 52 48 66 56 56 64 50] 60 (+/- 4.6) <- [66 62 60 52 66 56 66 57 56 60] 59 (+/- 7.5) <- [54 62 54 57 56 52 76 64 48 62] 57 (+/- 5.7) <- [62 54 62 54 50 60 54 48 66 62] 58 (+/- 7.0) <- [54 50 57 66 50 64 60 60 48 70] 59 (+/- 8.0) <- [74 60 54 64 57 68 48 64 50 52] 58 (+/- 7.7) <- [60 57 68 66 40 54 52 54 60 64] 57 (+/- 4.6) <- [57 57 52 64 50 56 56 64 54 62] 60 (+/- 5.0) <- [60 62 54 57 54 54 56 64 66 68] 58 (+/- 6.7) <- [68 52 68 52 62 57 66 54 50 54] 57 (+/- 7.5) <- [46 54 68 64 52 62 57 54 46 66] 58 (+/- 6.2) <- [62 57 64 52 66 64 62 46 52 56] 59 (+/- 7.8) <- [72 68 54 46 48 62 56 56 62 62] 58 (+/- 9.7) <- [74 66 52 42 64 52 70 50 60 52] 59 (+/- 4.2) <- [56 62 52 56 62 60 64 62 62 52] 59 (+/- 7.1) <- [60 66 48 60 72 52 62 50 56 62] 59 (+/- 8.9) <- [52 68 68 66 68 48 68 52 46 54] 58 (+/- 6.9) <- [57 64 48 66 64 70 50 54 56 54] 59 (+/- 5.9) <- [66 62 52 66 60 62 57 62 52 48] Accuracy: 58.6 (+/- 6.74) Mean time (40 cv): 19.35 seconds Overall time: 779.22 seconds
print('{} = {}'.format(Pname, Pvalues))
for key, value in res.items():
if key is not 'atoms':
print('res[\'{}\'] = {}'.format(key, value))
def plot(*args, **kwargs):
plt.figure(figsize=(8,5))
x = range(len(Pvalues))
log = 'log' in kwargs and kwargs['log'] is True
pltfunc = plt.semilogy if log else plt.plot
params = {}
params['linestyle'] = '-'
params['marker'] = '.'
params['markersize'] = 10
for i, var in enumerate(args):
if 'err' in kwargs:
pltfunc = plt.errorbar
params['yerr'] = res[kwargs['err'][i]]
params['capsize'] = 5
pltfunc(x, res[var], label=var, **params)
for i,j in zip(x, res[var]):
plt.annotate('{:.2f}'.format(j), xy=(i,j), xytext=(5,5), textcoords='offset points')
margin = 0.25
params['markersize'] = 10
plt.xlim(-margin, len(Pvalues)-1+margin)
if 'ylim' in kwargs:
plt.ylim(kwargs['ylim'])
plt.title('{} vs {}'.format(', '.join(args), Pname))
plt.xlabel(Pname)
plt.ylabel(' ,'.join(args))
plt.xticks(x, Pvalues)
plt.grid(True); plt.legend(loc='best'); plt.show()
def div(l):
div = Pvalues if Pname is 'l' else [p[l]]
return np.array([1 if v is None else v for v in div])
# Classification results.
res['chance'] = len(Pvalues) * [100./p['Ngenres']]
res['chance_std'] = 0
err=['accuracy_std', 'baseline_std', 'chance_std']
plot('accuracy', 'baseline', 'chance', err=err, ylim=[0,100])
# Features extraction results.
if regen_features:
plot('objective_g', 'objective_i', 'objective_j', log=True)
# Unweighted objectives.
print('g(Z) = ||X-DZ||_2^2, h(Z) = ||Z-EX||_2^2, i(Z) = ||Z||_1, j(Z) = tr(Z^TLZ)')
res['objective_g_un'] = res['objective_g'] / div('ld')
res['objective_i_un'] = res['objective_i'] / div('ls')
res['objective_j_un'] = res['objective_j'] / div('lg')
plot('objective_g_un', 'objective_i_un', 'objective_j_un', log=True)
plot('sparsity', ylim=[0,100])
plot('time_features')
plot('iterations_inner')
plot('iterations_outer')
for i, fig in enumerate(res['atoms']):
print('Dictionary atoms for {} = {}'.format(Pname, Pvalues[i]))
fig.show()
print('Experiment time: {:.0f} seconds'.format(time.time() - texperiment))
lg = [None, 0.1, 1, 10, 100, 1000.0] res['accuracy_std'] = [6.6807110400016629, 6.9344051655495358, 6.6889292865151484, 6.8423314739933545, 6.5823703937107663, 6.5635280147189139] res['objective_j'] = [0, 1131.0767578125001, 9733.40234375, 38649.09423828125, 20566.252136230469, 6533.1807136535645] res['objective_i'] = [333744.78125, 332256.28125, 315554.46875, 225119.96875, 71275.6015625, 37783.14453125] res['objective_h'] = [0, 0, 0, 0, 0, 0] res['objective_g'] = [355671.5234375, 352184.84375, 374195.859375, 488584.0234375, 746237.734375, 833776.5625] res['baseline'] = [58.574999999999989, 58.574999999999989, 58.574999999999989, 58.574999999999989, 58.574999999999989, 58.574999999999989] res['time_features'] = [390.82487893104553, 1715.089271068573, 762.2838189601898, 1060.8061549663544, 631.646185874939, 351.8170828819275] res['baseline_std'] = 6.7423567838 res['sparsity'] = [28.681218540268457, 28.421906459731545, 29.72214240771812, 34.46197042785235, 46.07074769295302, 85.94986891778524] res['iterations_inner'] = [304, 814, 328, 430, 246, 137] res['iterations_outer'] = [13, 50, 11, 14, 4, 3] res['accuracy'] = [56.910000000000018, 58.404999999999994, 57.614999999999981, 60.450000000000024, 61.820000000000022, 52.009999999999991]
/usr/lib/python2.7/dist-packages/numpy/ma/core.py:3847: UserWarning: Warning: converting a masked element to nan. warnings.warn("Warning: converting a masked element to nan.")
g(Z) = ||X-DZ||_2^2, h(Z) = ||Z-EX||_2^2, i(Z) = ||Z||_1, j(Z) = tr(Z^TLZ)
Dictionary atoms for lg = None Dictionary atoms for lg = 0.1 Dictionary atoms for lg = 1 Dictionary atoms for lg = 10 Dictionary atoms for lg = 100 Dictionary atoms for lg = 1000.0 Experiment time: 14823 seconds
/usr/lib/pymodules/python2.7/matplotlib/figure.py:371: UserWarning: matplotlib is currently using a non-GUI backend, so cannot show the figure "matplotlib is currently using a non-GUI backend, "
The previous graph is wrong due to an error in div()
.
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
def plot(*args, **kwargs):
plt.figure(figsize=(8,5))
x = range(len(Pvalues))
log = 'log' in kwargs and kwargs['log'] is True
pltfunc = plt.semilogy if log else plt.plot
params = {}
params['linestyle'] = '-'
params['marker'] = '.'
params['markersize'] = 10
for i, var in enumerate(args):
if 'err' in kwargs:
pltfunc = plt.errorbar
params['yerr'] = res[kwargs['err'][i]]
params['capsize'] = 5
pltfunc(x, res[var], label=var, **params)
for i,j in zip(x, res[var]):
plt.annotate('{:.2f}'.format(j), xy=(i,j), xytext=(5,5), textcoords='offset points')
margin = 0.25
params['markersize'] = 10
plt.xlim(-margin, len(Pvalues)-1+margin)
if 'ylim' in kwargs:
plt.ylim(kwargs['ylim'])
plt.title('{} vs {}'.format(', '.join(args), Pname))
plt.xlabel(Pname)
plt.ylabel(' ,'.join(args))
plt.xticks(x, Pvalues)
plt.grid(True); plt.legend(loc='best'); plt.show()
def div(l):
div = Pvalues if Pname is l else [p[l]]
print div
return np.array([1 if v is None else v for v in div])
res = {}
res['objective_j'] = [0, 1131.0767578125001, 9733.40234375, 38649.09423828125, 20566.252136230469, 6533.1807136535645]
res['objective_i'] = [333744.78125, 332256.28125, 315554.46875, 225119.96875, 71275.6015625, 37783.14453125]
res['objective_g'] = [355671.5234375, 352184.84375, 374195.859375, 488584.0234375, 746237.734375, 833776.5625]
res['objective_g_un'] = res['objective_g'] / div('ld')
res['objective_i_un'] = res['objective_i'] / div('ls')
res['objective_j_un'] = res['objective_j'] / div('lg')
print res['objective_j_un']
plot('objective_g_un', 'objective_i_un', 'objective_j_un', log=True)
[10] [1] [None, 0.1, 1, 10, 100, 1000.0] [ 0.00000000e+00 1.13107676e+04 9.73340234e+03 3.86490942e+03 2.05662521e+02 6.53318071e+00]
/home/michael/.virtualenvs/dlaudio/local/lib/python2.7/site-packages/numpy/ma/core.py:3900: UserWarning: Warning: converting a masked element to nan. warnings.warn("Warning: converting a masked element to nan.")