Goal:
Conclusion:
Observations:
Pname = 'test_size'
Pvalues = [.1, .2, .3, .4, .5, .6, .7, .8, .9, .92, .94, .96, .97, .98, .99]
# Regenerate the graph or the features at each iteration.
regen_graph = False
regen_features = False
regen_baseline = True
p = {}
# Preprocessing.
# Graph.
p['data_scaling_graph'] = 'features'
p['K'] = 10 + 1 # 5 to 10 + 1 for self-reference
p['dm'] = 'euclidean'
p['Csigma'] = 1
p['diag'] = True
p['laplacian'] = 'normalized'
# Feature extraction.
p['m'] = 128 # 64, 128, 512
p['ls'] = 1
p['ld'] = 10
p['le'] = None
p['lg'] = 600
# Classification.
p['scale'] = None
p['Nvectors'] = 6
p['svm_type'] = 'C'
p['kernel'] = 'linear'
p['C'] = 1
p['nu'] = 0.5
p['majority_voting'] = False
# HDF5 data stores.
p['folder'] = 'data'
p['filename_gtzan'] = 'gtzan.hdf5'
p['filename_audio'] = 'audio.hdf5'
p['filename_graph'] = 'graph.hdf5'
p['filename_features'] = 'features.hdf5'
# Dataset (10,100,644 | 5,100,149 | 2,10,644).
p['Ngenres'] = 5
p['Nclips'] = 100
p['Nframes'] = 149
# Added white noise.
p['noise_std'] = 0
# Graph.
p['tol'] = 1e-5
# Feature extraction.
p['rtol'] = 1e-5 # 1e-3, 1e-5, 1e-7
p['N_inner'] = 500
p['N_outer'] = 50
# Classification.
p['test_size'] = 0.1
p['Ncv'] = 20
p['dataset_classification'] = 'Z'
import numpy as np
import time
texperiment = time.time()
# Result dictionary.
res = ['accuracy', 'accuracy_std']
res += ['sparsity', 'atoms']
res += ['objective_g', 'objective_h', 'objective_i', 'objective_j']
res += ['time_features', 'iterations_inner', 'iterations_outer']
res = dict.fromkeys(res)
for key in res.keys():
res[key] = []
def separator(name, parameter=False):
if parameter:
name += ', {} = {}'.format(Pname, p[Pname])
dashes = 20 * '-'
print('\n {} {} {} \n'.format(dashes, name, dashes))
# Fair comparison when tuning parameters.
# Randomnesses: dictionary initialization, training and testing sets.
np.random.seed(1)
#%run gtzan.ipynb
#%run audio_preprocessing.ipynb
if not regen_graph:
separator('Graph')
%run audio_graph.ipynb
if not regen_features:
separator('Features')
%run audio_features.ipynb
# Hyper-parameter under test.
for p[Pname] in Pvalues:
if regen_graph:
separator('Graph', True)
%run audio_graph.ipynb
if regen_features:
separator('Features', True)
p['filename_features'] = 'features_{}_{}.hdf5'.format(Pname, p[Pname])
%run audio_features.ipynb
separator('Classification', True)
%run audio_classification.ipynb
# Collect results.
for key in res:
res[key].append(globals()[key])
# Baseline, i.e. classification with spectrograms.
p['dataset_classification'] = 'X'
p['scale'] = 'minmax' # Todo: should be done in pre-processing.
if regen_baseline:
res['baseline'] = []
res['baseline_std'] = []
for p[Pname] in Pvalues:
separator('Baseline', True)
%run audio_classification.ipynb
res['baseline'].append(accuracy)
res['baseline_std'].append(accuracy_std)
else:
separator('Baseline')
%run audio_classification.ipynb
res['baseline'] = len(Pvalues) * [accuracy]
res['baseline_std'] = accuracy_std
-------------------- Graph -------------------- Data: (149000, 96), float32 Elapsed time: 174.12 seconds All self-referenced in the first column: True dist in [0.0, 1.52197754383] w in [0.00494863791391, 1.0] Ones on the diagonal: 149000 (over 149000) assert: True W in [0.0, 1.0] Datasets: L_data : (2409282,), float32 L_indices : (2409282,), int32 L_indptr : (149001,) , int32 L_shape : (2,) , int64 W_data : (2409282,), float32 W_indices : (2409282,), int32 W_indptr : (149001,) , int32 W_shape : (2,) , int64 Attributes: K = 11 dm = euclidean Csigma = 1 diag = True laplacian = normalized Overall time: 184.07 seconds -------------------- Features -------------------- Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: Xa: (10, 100, 644, 2, 1024) , float32 Xs: (10, 100, 644, 2, 96) , float32 Full dataset: size: N=1,288,000 x n=96 -> 123,648,000 floats dim: 123,648 features per clip shape: (10, 100, 644, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Data: (149000, 96), float32 Attributes: K = 11 dm = euclidean Csigma = 1 diag = True laplacian = normalized Datasets: L_data : (2409282,), float32 L_indices : (2409282,), int32 L_indptr : (149001,) , int32 L_shape : (2,) , int64 W_data : (2409282,), float32 W_indices : (2409282,), int32 W_indptr : (149001,) , int32 W_shape : (2,) , int64 Size X: 13.6 M --> 54.6 MiB Size Z: 18.2 M --> 72.8 MiB Size D: 12.0 k --> 48.0 kiB Size E: 12.0 k --> 48.0 kiB Elapsed time: 2488 seconds
Inner loop: 1122 iterations g(Z) = ||X-DZ||_2^2 = 1.038306e+05 rdiff: 0.000175221613487 i(Z) = ||Z||_1 = 4.643225e+04 j(Z) = tr(Z^TLZ) = 8.429849e+03
Global objective: 1.586927e+05
Outer loop: 7 iterations Z in [-0.063802331686, 0.404274195433] Sparsity of Z: 7,789,559 non-zero entries out of 19,072,000 entries, i.e. 40.8%.
D in [-0.0233125798404, 0.826667249203] d in [0.999999701977, 1.00000035763] Constraints on D: True
Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Overall time: 2496 seconds -------------------- Classification, test_size = 0.1 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 70.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.5 % Clips accuracy: 69.0 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 5400.0 training, 600.0 testing 72 (+/- 1.6) <- [72 71 74 71 68 73 71 69 73 72] 72 (+/- 1.5) <- [72 75 73 69 73 71 71 72 71 72] 73 (+/- 1.7) <- [72 74 75 72 72 72 71 69 74 72] 73 (+/- 2.2) <- [75 67 72 72 73 74 76 70 72 72] 73 (+/- 1.4) <- [74 73 72 74 71 74 74 70 73 71] 73 (+/- 1.5) <- [73 72 72 73 74 75 72 76 71 72] 73 (+/- 1.5) <- [74 71 73 74 72 69 73 71 73 73] 72 (+/- 1.2) <- [72 73 75 71 72 73 72 71 71 71] 73 (+/- 1.4) <- [72 75 70 74 72 71 73 73 72 71] 73 (+/- 1.5) <- [73 69 72 73 73 72 72 71 72 75] 73 (+/- 1.6) <- [74 75 74 70 72 72 71 70 73 72] 73 (+/- 1.1) <- [72 73 73 74 71 71 70 72 73 74] 73 (+/- 1.2) <- [73 71 74 74 72 72 71 72 72 70] 72 (+/- 1.9) <- [74 73 74 69 72 71 69 71 71 74] 73 (+/- 1.0) <- [73 72 72 72 72 72 75 74 72 73] 73 (+/- 1.9) <- [75 73 70 75 71 72 73 71 73 69] 72 (+/- 1.5) <- [72 73 72 71 72 69 72 74 74 70] 73 (+/- 1.6) <- [73 74 72 72 71 74 71 70 74 75] 72 (+/- 1.2) <- [71 72 71 70 74 72 72 71 72 74] 72 (+/- 1.2) <- [70 72 72 71 70 71 73 74 72 71] Accuracy: 72.6 (+/- 1.56) Mean time (20 cv): 21.64 seconds Overall time: 437.29 seconds -------------------- Classification, test_size = 0.2 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 70.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.5 % Clips accuracy: 69.0 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 4800.0 training, 1200.0 testing 72 (+/- 1.2) <- [71 70 72 72 69 71 73 69 73 71] 72 (+/- 1.1) <- [71 73 72 69 72 71 72 70 72 70] 73 (+/- 0.5) <- [72 73 73 72 72 72 72 71 73 72] 72 (+/- 1.1) <- [72 70 71 72 73 74 73 72 71 71] 73 (+/- 1.1) <- [72 72 74 72 72 73 72 70 71 73] 73 (+/- 1.3) <- [71 72 73 75 73 74 70 74 72 72] 73 (+/- 1.1) <- [71 71 74 73 72 70 73 72 73 72] 72 (+/- 1.3) <- [71 72 72 73 70 75 73 71 71 72] 73 (+/- 1.1) <- [72 74 71 73 72 70 73 72 72 72] 73 (+/- 1.0) <- [73 71 73 72 74 72 73 72 72 74] 72 (+/- 1.0) <- [71 73 71 70 71 70 72 70 72 72] 73 (+/- 0.8) <- [73 71 72 73 71 72 72 72 73 73] 72 (+/- 0.8) <- [72 72 71 73 70 72 71 70 72 70] 72 (+/- 1.4) <- [74 71 72 69 70 70 71 73 72 73] 73 (+/- 0.6) <- [73 72 72 73 72 72 72 73 73 72] 72 (+/- 1.2) <- [74 71 71 73 72 71 71 73 72 70] 72 (+/- 0.7) <- [72 73 71 71 72 72 71 71 71 71] 72 (+/- 0.9) <- [71 72 73 70 72 72 72 70 72 72] 73 (+/- 1.2) <- [73 72 71 69 73 72 73 74 72 73] 72 (+/- 1.3) <- [72 70 71 70 70 71 74 71 72 73] Accuracy: 72.3 (+/- 1.15) Mean time (20 cv): 18.85 seconds Overall time: 381.46 seconds -------------------- Classification, test_size = 0.3 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 70.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.5 % Clips accuracy: 69.0 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 4200.0 training, 1800.0 testing 72 (+/- 0.8) <- [70 71 72 72 70 71 72 71 72 71] 71 (+/- 1.0) <- [71 73 72 69 71 71 70 71 72 71] 72 (+/- 1.0) <- [72 72 71 70 73 71 71 70 74 71] 72 (+/- 1.2) <- [73 70 71 72 72 74 71 71 70 72] 72 (+/- 0.9) <- [71 71 73 72 70 72 72 70 71 72] 72 (+/- 0.7) <- [71 72 72 73 72 72 70 72 71 72] 72 (+/- 0.9) <- [72 72 73 72 72 70 73 72 72 71] 72 (+/- 1.1) <- [71 72 71 72 71 74 73 71 71 71] 72 (+/- 0.9) <- [71 73 72 72 72 72 73 72 71 70] 72 (+/- 0.9) <- [72 70 73 71 73 72 72 72 72 73] 71 (+/- 1.2) <- [74 71 71 70 71 70 71 69 70 71] 72 (+/- 0.7) <- [72 72 72 71 70 72 73 71 73 73] 72 (+/- 0.6) <- [71 71 71 72 71 71 71 71 72 70] 71 (+/- 1.2) <- [73 71 71 68 71 71 71 71 70 71] 72 (+/- 0.6) <- [72 72 71 73 71 73 72 71 72 72] 72 (+/- 0.7) <- [73 71 71 71 72 72 71 72 72 70] 72 (+/- 0.7) <- [72 73 72 70 71 72 72 72 71 71] 72 (+/- 0.7) <- [71 71 71 70 72 72 72 70 71 71] 72 (+/- 0.6) <- [71 72 71 71 72 71 73 71 71 73] 71 (+/- 1.0) <- [70 69 70 71 70 71 72 71 73 72] Accuracy: 71.9 (+/- 0.97) Mean time (20 cv): 16.53 seconds Overall time: 334.93 seconds -------------------- Classification, test_size = 0.4 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 70.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.5 % Clips accuracy: 69.0 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 3600.0 training, 2400.0 testing 71 (+/- 0.5) <- [71 70 71 71 70 71 71 71 70 71] 71 (+/- 0.7) <- [70 72 71 69 70 70 71 70 71 70] 71 (+/- 0.7) <- [71 71 71 70 72 71 71 70 72 70] 71 (+/- 0.9) <- [72 69 71 72 71 72 71 70 70 71] 71 (+/- 0.9) <- [70 69 73 71 71 72 71 70 71 71] 72 (+/- 0.6) <- [71 71 71 72 73 71 71 71 70 71] 72 (+/- 0.9) <- [72 71 71 71 72 69 72 71 70 70] 72 (+/- 0.9) <- [70 72 71 71 70 72 72 70 71 70] 71 (+/- 0.7) <- [69 71 71 71 71 71 72 71 71 70] 72 (+/- 0.8) <- [71 70 71 70 73 71 71 71 71 71] 71 (+/- 0.8) <- [72 71 70 70 70 70 69 69 69 70] 72 (+/- 0.7) <- [71 72 73 70 70 72 72 71 71 71] 71 (+/- 0.6) <- [70 70 71 71 70 70 70 72 72 70] 71 (+/- 0.8) <- [71 70 71 69 70 72 72 70 70 70] 71 (+/- 0.6) <- [70 71 71 70 70 72 71 70 71 71] 71 (+/- 0.4) <- [71 71 71 70 72 71 71 71 71 70] 71 (+/- 0.4) <- [71 71 71 71 71 72 71 71 70 71] 71 (+/- 0.6) <- [71 70 70 70 71 71 72 70 71 71] 72 (+/- 1.0) <- [71 72 72 71 71 70 73 70 71 72] 71 (+/- 0.9) <- [70 70 70 71 69 70 71 71 72 72] Accuracy: 71.3 (+/- 0.79) Mean time (20 cv): 14.23 seconds Overall time: 289.10 seconds -------------------- Classification, test_size = 0.5 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 70.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.5 % Clips accuracy: 69.0 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 3000.0 training, 3000.0 testing 70 (+/- 0.4) <- [70 69 70 70 69 70 70 70 70 70] 71 (+/- 0.3) <- [70 70 70 70 70 70 70 70 70 70] 70 (+/- 0.8) <- [71 70 70 69 71 71 70 70 69 70] 71 (+/- 0.8) <- [71 69 70 71 71 71 69 69 70 71] 71 (+/- 0.6) <- [70 69 71 71 70 71 71 70 70 70] 71 (+/- 0.5) <- [70 71 70 71 71 71 71 71 70 70] 71 (+/- 0.9) <- [71 70 71 70 72 69 71 71 69 69] 71 (+/- 0.9) <- [71 71 70 71 69 70 72 69 69 70] 71 (+/- 0.7) <- [69 70 70 70 72 71 71 70 70 70] 71 (+/- 0.8) <- [70 70 69 70 72 71 70 70 70 69] 70 (+/- 0.7) <- [71 71 70 70 71 70 69 69 69 70] 71 (+/- 0.6) <- [70 70 72 70 70 71 70 69 71 71] 70 (+/- 0.4) <- [70 70 70 70 69 70 70 71 70 70] 70 (+/- 0.6) <- [70 70 70 69 71 71 70 69 70 70] 71 (+/- 0.7) <- [70 70 71 69 70 71 70 70 70 71] 71 (+/- 0.6) <- [69 70 71 71 71 70 70 71 70 70] 71 (+/- 0.5) <- [71 71 70 70 71 71 70 71 70 71] 70 (+/- 0.7) <- [70 69 69 69 71 69 71 69 71 70] 71 (+/- 0.9) <- [70 71 72 70 70 70 72 70 69 71] 71 (+/- 0.7) <- [70 70 69 70 70 70 70 71 71 71] Accuracy: 70.7 (+/- 0.70) Mean time (20 cv): 11.77 seconds Overall time: 239.85 seconds -------------------- Classification, test_size = 0.6 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 70.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.5 % Clips accuracy: 69.0 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 2400.0 training, 3600.0 testing 70 (+/- 0.6) <- [70 69 69 69 69 69 68 69 69 70] 70 (+/- 0.8) <- [70 69 69 69 69 69 69 67 69 70] 70 (+/- 0.7) <- [70 69 70 68 70 69 69 69 69 69] 70 (+/- 0.7) <- [69 68 69 70 70 70 69 68 69 70] 70 (+/- 0.6) <- [70 68 70 70 70 70 69 70 70 69] 70 (+/- 0.7) <- [69 70 69 70 71 69 69 70 69 70] 70 (+/- 0.8) <- [69 69 71 70 70 68 70 70 69 69] 70 (+/- 0.8) <- [69 70 69 68 68 70 71 69 68 69] 70 (+/- 0.5) <- [69 69 69 69 70 69 70 69 70 70] 70 (+/- 0.6) <- [70 69 68 68 70 70 69 69 69 69] 70 (+/- 0.7) <- [70 70 69 68 70 70 69 69 68 69] 70 (+/- 0.4) <- [70 69 70 69 69 70 69 70 70 69] 70 (+/- 0.5) <- [70 68 70 69 69 69 68 70 69 69] 70 (+/- 0.8) <- [70 70 68 68 69 69 70 68 70 69] 70 (+/- 0.7) <- [69 69 70 69 70 70 68 69 69 70] 70 (+/- 0.5) <- [69 68 70 70 70 69 70 69 69 69] 70 (+/- 0.6) <- [70 70 69 70 70 69 68 69 69 70] 70 (+/- 0.7) <- [69 69 68 68 70 70 70 68 70 69] 70 (+/- 0.8) <- [70 70 70 70 68 69 70 69 68 71] 70 (+/- 0.9) <- [69 69 67 69 70 69 70 70 71 69] Accuracy: 69.8 (+/- 0.70) Mean time (20 cv): 9.50 seconds Overall time: 194.41 seconds -------------------- Classification, test_size = 0.7 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 70.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.5 % Clips accuracy: 69.0 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 1800.0 training, 4200.0 testing 69 (+/- 0.6) <- [69 68 68 69 67 68 68 68 67 69] 68 (+/- 0.8) <- [69 68 68 68 68 68 68 66 66 68] 68 (+/- 0.6) <- [68 68 68 67 68 68 67 68 68 69] 68 (+/- 0.5) <- [68 68 67 68 68 68 68 68 67 68] 69 (+/- 0.6) <- [69 67 69 68 68 69 68 68 69 68] 69 (+/- 0.7) <- [67 69 67 69 69 68 68 68 67 69] 69 (+/- 0.5) <- [68 68 69 69 69 68 69 68 68 68] 68 (+/- 0.8) <- [68 68 67 68 66 69 69 67 68 68] 69 (+/- 0.6) <- [67 69 67 68 68 68 68 68 69 69] 68 (+/- 0.3) <- [68 68 68 68 69 68 68 68 68 68] 68 (+/- 0.6) <- [68 68 68 67 68 69 67 67 68 69] 69 (+/- 0.7) <- [69 68 70 67 68 69 69 68 69 68] 68 (+/- 0.4) <- [68 67 68 68 68 68 68 68 68 68] 68 (+/- 0.7) <- [68 69 68 67 68 66 68 68 68 68] 69 (+/- 0.8) <- [68 68 69 68 68 68 66 68 69 69] 68 (+/- 0.9) <- [69 66 68 68 69 68 67 68 69 67] 69 (+/- 0.9) <- [69 69 67 69 68 69 66 69 67 68] 69 (+/- 0.7) <- [68 68 67 67 68 69 69 68 69 68] 69 (+/- 0.7) <- [69 68 68 69 67 68 69 68 67 69] 69 (+/- 0.7) <- [68 68 67 69 68 68 69 68 69 68] Accuracy: 68.6 (+/- 0.71) Mean time (20 cv): 7.16 seconds Overall time: 147.70 seconds -------------------- Classification, test_size = 0.8 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 70.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.5 % Clips accuracy: 69.0 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 1200.0 training, 4800.0 testing 67 (+/- 0.5) <- [67 66 66 67 67 67 67 68 66 67] 67 (+/- 0.6) <- [67 66 66 66 66 67 66 65 66 65] 66 (+/- 0.6) <- [65 66 67 66 66 66 65 66 67 66] 67 (+/- 0.6) <- [66 66 66 67 66 65 66 66 65 67] 67 (+/- 0.7) <- [67 67 68 67 66 68 66 67 67 66] 67 (+/- 0.5) <- [66 66 66 66 68 66 66 66 66 66] 67 (+/- 0.6) <- [67 66 66 67 66 65 67 65 66 67] 66 (+/- 0.7) <- [66 67 66 66 65 66 68 66 66 66] 67 (+/- 0.7) <- [65 66 66 67 67 66 67 66 67 67] 67 (+/- 0.6) <- [67 67 67 66 66 66 66 66 66 66] 67 (+/- 0.9) <- [67 66 68 65 67 68 66 66 66 67] 67 (+/- 0.4) <- [66 67 67 65 67 67 66 67 67 67] 67 (+/- 0.6) <- [66 66 67 67 67 66 65 65 66 66] 66 (+/- 0.6) <- [66 66 65 65 66 65 66 67 66 66] 67 (+/- 0.7) <- [66 66 67 67 67 66 65 65 67 67] 67 (+/- 0.6) <- [67 66 67 66 67 65 67 67 66 65] 67 (+/- 0.9) <- [67 66 65 67 65 66 64 66 67 66] 66 (+/- 0.7) <- [65 65 66 65 66 66 67 66 66 65] 66 (+/- 0.9) <- [67 65 66 67 66 65 67 65 64 66] 67 (+/- 1.0) <- [67 65 67 67 65 66 66 66 68 65] Accuracy: 66.7 (+/- 0.74) Mean time (20 cv): 4.88 seconds Overall time: 101.96 seconds -------------------- Classification, test_size = 0.9 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 70.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.5 % Clips accuracy: 69.0 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 600.0 training, 5400.0 testing 63 (+/- 0.5) <- [62 63 62 63 63 63 63 63 64 62] 63 (+/- 1.0) <- [63 64 63 61 63 65 63 61 63 62] 63 (+/- 1.2) <- [62 62 60 64 63 63 63 62 63 61] 63 (+/- 0.7) <- [63 63 62 63 62 62 62 61 62 62] 63 (+/- 1.1) <- [61 62 64 64 63 64 61 63 64 62] 62 (+/- 0.5) <- [62 61 62 62 62 63 62 62 62 62] 63 (+/- 0.4) <- [63 63 63 63 62 63 63 62 63 64] 62 (+/- 0.8) <- [63 61 61 62 62 62 64 61 62 62] 63 (+/- 0.6) <- [62 63 63 62 62 62 64 62 62 62] 63 (+/- 1.1) <- [62 63 63 61 64 61 64 62 61 61] 63 (+/- 0.9) <- [64 61 62 62 62 64 62 61 63 63] 63 (+/- 0.6) <- [62 62 63 62 62 63 62 63 62 63] 63 (+/- 1.0) <- [63 64 62 63 64 62 61 62 62 62] 63 (+/- 0.6) <- [62 63 62 62 63 62 62 63 64 63] 63 (+/- 0.7) <- [63 62 63 63 63 63 61 62 63 63] 63 (+/- 0.7) <- [61 64 63 63 63 63 63 63 63 63] 63 (+/- 1.0) <- [63 61 62 64 62 62 60 64 62 62] 63 (+/- 1.0) <- [61 63 64 62 63 62 64 62 63 62] 63 (+/- 1.0) <- [63 63 64 62 62 63 62 62 60 62] 62 (+/- 1.0) <- [61 62 62 61 60 62 62 63 64 63] Accuracy: 62.9 (+/- 0.91) Mean time (20 cv): 2.56 seconds Overall time: 55.71 seconds -------------------- Classification, test_size = 0.92 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 70.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.5 % Clips accuracy: 69.0 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 480.0 training, 5520.0 testing 62 (+/- 1.1) <- [60 61 61 61 61 61 62 64 63 60] 62 (+/- 0.9) <- [61 62 63 60 61 62 62 60 61 60] 61 (+/- 0.6) <- [61 60 60 61 63 60 61 61 61 61] 61 (+/- 0.7) <- [61 61 61 61 60 60 62 59 61 60] 62 (+/- 1.1) <- [61 61 61 62 63 62 60 62 62 61] 61 (+/- 0.6) <- [60 59 61 61 61 62 61 61 61 61] 62 (+/- 0.6) <- [63 61 63 61 61 62 62 62 61 63] 61 (+/- 0.5) <- [61 61 60 60 61 60 62 60 61 61] 62 (+/- 0.8) <- [62 62 62 61 60 62 63 61 60 60] 61 (+/- 1.0) <- [60 61 62 60 62 60 62 61 59 61] 61 (+/- 0.8) <- [62 60 61 60 61 61 61 59 62 61] 62 (+/- 0.8) <- [61 61 61 59 61 62 61 61 61 62] 61 (+/- 1.2) <- [61 63 60 62 63 59 61 61 60 59] 62 (+/- 0.9) <- [60 61 61 61 61 62 61 62 63 62] 62 (+/- 1.0) <- [62 62 60 63 61 61 60 60 62 63] 62 (+/- 0.9) <- [60 63 60 60 62 61 61 61 61 62] 61 (+/- 1.2) <- [61 59 61 61 62 60 59 63 60 62] 61 (+/- 1.2) <- [59 61 61 61 61 59 64 60 62 61] 61 (+/- 1.1) <- [62 62 62 62 60 62 59 62 59 61] 61 (+/- 1.0) <- [60 61 60 59 59 61 62 60 62 61] Accuracy: 61.5 (+/- 0.99) Mean time (20 cv): 2.10 seconds Overall time: 46.40 seconds -------------------- Classification, test_size = 0.94 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 70.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.5 % Clips accuracy: 69.0 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 360.0 training, 5640.0 testing 60 (+/- 1.5) <- [58 58 61 58 57 59 61 62 60 60] 60 (+/- 0.9) <- [58 61 61 60 59 60 59 60 58 59] 60 (+/- 0.9) <- [59 57 58 59 60 58 59 60 59 61] 60 (+/- 1.0) <- [59 59 61 60 59 59 59 57 60 59] 60 (+/- 1.0) <- [58 59 60 61 58 61 60 59 60 59] 59 (+/- 1.1) <- [59 58 60 57 59 59 59 57 59 61] 60 (+/- 0.9) <- [60 58 60 60 59 61 59 60 60 62] 59 (+/- 1.0) <- [60 58 60 59 59 58 61 59 58 58] 59 (+/- 1.0) <- [60 59 60 59 58 59 60 58 59 57] 59 (+/- 1.0) <- [57 60 59 59 60 59 59 60 58 58] 59 (+/- 1.1) <- [60 58 57 59 58 61 59 58 60 60] 60 (+/- 0.7) <- [59 60 59 58 60 59 58 59 59 60] 59 (+/- 1.1) <- [59 60 59 59 60 57 56 59 59 59] 60 (+/- 1.2) <- [58 60 59 60 59 57 61 61 60 60] 60 (+/- 1.1) <- [59 61 58 61 59 59 59 59 61 60] 60 (+/- 1.0) <- [57 60 58 58 59 59 60 60 59 60] 59 (+/- 1.5) <- [60 57 60 59 61 57 58 60 57 60] 60 (+/- 1.0) <- [59 59 61 60 61 58 61 59 59 60] 60 (+/- 1.0) <- [58 61 59 59 60 61 58 59 57 59] 60 (+/- 0.9) <- [59 60 60 58 59 58 60 58 61 58] Accuracy: 59.7 (+/- 1.10) Mean time (20 cv): 1.61 seconds Overall time: 36.70 seconds -------------------- Classification, test_size = 0.96 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 70.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.5 % Clips accuracy: 69.0 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 240.0 training, 5760.0 testing 57 (+/- 1.1) <- [55 56 55 55 55 56 58 58 56 57] 57 (+/- 1.4) <- [57 57 59 57 59 55 55 56 56 54] 56 (+/- 1.1) <- [55 55 54 56 56 56 56 56 58 57] 57 (+/- 1.4) <- [56 54 57 57 57 55 57 54 58 57] 57 (+/- 1.3) <- [54 56 56 58 57 57 58 56 54 57] 57 (+/- 1.7) <- [56 54 54 53 57 57 57 56 57 59] 57 (+/- 1.6) <- [56 56 57 57 57 59 53 58 58 59] 56 (+/- 1.4) <- [58 55 57 56 56 54 57 54 54 57] 57 (+/- 1.0) <- [58 56 55 56 56 56 58 55 56 55] 56 (+/- 1.0) <- [54 55 55 56 56 57 57 56 55 57] 57 (+/- 1.0) <- [55 55 55 57 56 56 57 55 58 57] 57 (+/- 1.0) <- [58 59 57 55 58 57 56 57 57 56] 57 (+/- 1.2) <- [56 57 58 56 57 55 55 56 58 55] 57 (+/- 1.5) <- [55 56 57 58 56 54 57 58 54 55] 56 (+/- 2.0) <- [56 58 56 57 56 53 52 56 57 59] 58 (+/- 1.2) <- [54 58 56 58 58 58 58 57 56 58] 57 (+/- 1.3) <- [55 55 57 58 55 55 56 59 54 57] 57 (+/- 1.2) <- [55 57 57 57 56 54 58 55 56 57] 57 (+/- 1.3) <- [56 57 55 58 57 57 58 59 54 56] 57 (+/- 1.3) <- [57 56 54 55 58 57 58 56 58 55] Accuracy: 56.8 (+/- 1.37) Mean time (20 cv): 1.14 seconds Overall time: 27.24 seconds -------------------- Classification, test_size = 0.97 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 70.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.5 % Clips accuracy: 69.0 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 180.0 training, 5820.0 testing 54 (+/- 1.4) <- [53 53 53 53 53 54 56 57 55 53] 55 (+/- 1.3) <- [52 55 56 55 55 53 54 55 52 55] 54 (+/- 1.7) <- [54 50 50 55 53 54 54 53 55 55] 55 (+/- 1.3) <- [55 54 54 56 56 53 55 52 55 56] 54 (+/- 1.4) <- [53 53 54 55 53 55 56 56 52 52] 54 (+/- 1.3) <- [52 53 53 53 56 55 54 54 54 55] 55 (+/- 1.5) <- [55 52 54 54 55 55 52 57 55 56] 55 (+/- 1.4) <- [56 54 56 56 55 52 55 52 54 54] 55 (+/- 1.7) <- [57 55 54 55 57 55 56 51 56 52] 55 (+/- 0.6) <- [54 54 54 54 53 55 55 54 54 55] 54 (+/- 1.6) <- [53 54 54 54 50 55 55 53 56 52] 55 (+/- 1.4) <- [55 57 56 53 53 54 53 54 56 55] 55 (+/- 1.5) <- [54 55 57 54 56 53 52 53 56 56] 54 (+/- 1.2) <- [55 55 53 55 52 52 55 55 54 54] 54 (+/- 2.6) <- [54 57 54 55 55 50 50 52 56 58] 55 (+/- 1.5) <- [51 56 55 55 54 55 56 56 54 56] 55 (+/- 1.2) <- [53 53 54 55 55 54 54 57 54 53] 55 (+/- 1.0) <- [52 54 55 54 53 54 56 56 54 55] 54 (+/- 1.5) <- [54 56 55 55 52 52 55 56 52 54] 55 (+/- 1.3) <- [53 55 54 52 57 55 53 55 55 53] Accuracy: 54.7 (+/- 1.51) Mean time (20 cv): 0.88 seconds Overall time: 22.07 seconds -------------------- Classification, test_size = 0.98 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 70.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.5 % Clips accuracy: 69.0 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 120.0 training, 5880.0 testing 50 (+/- 4.4) <- [49 51 50 49 50 52 54 37 52 50] 52 (+/- 1.6) <- [51 54 51 50 54 51 49 51 49 52] 51 (+/- 2.1) <- [51 47 48 55 49 50 50 51 51 50] 52 (+/- 1.6) <- [52 52 53 52 54 53 51 48 53 52] 51 (+/- 1.8) <- [49 52 50 52 49 50 52 50 46 52] 51 (+/- 1.6) <- [50 49 49 51 53 51 51 50 51 54] 52 (+/- 1.7) <- [52 47 54 52 51 52 51 53 53 52] 52 (+/- 1.6) <- [52 51 52 51 50 47 52 51 52 54] 52 (+/- 1.1) <- [53 51 51 51 53 50 53 51 52 50] 51 (+/- 1.8) <- [48 50 50 52 53 52 48 51 53 53] 52 (+/- 2.0) <- [52 50 51 53 47 54 52 51 55 51] 52 (+/- 1.9) <- [52 56 54 50 51 52 50 50 51 50] 52 (+/- 1.0) <- [53 52 53 51 53 51 51 51 52 51] 52 (+/- 1.1) <- [53 53 51 54 51 52 51 51 52 52] 51 (+/- 2.2) <- [49 52 52 51 51 46 48 49 53 53] 52 (+/- 1.7) <- [48 52 51 52 53 53 52 54 49 52] 52 (+/- 2.1) <- [51 46 51 53 51 51 51 55 51 51] 52 (+/- 1.8) <- [50 53 53 52 49 52 52 51 47 52] 52 (+/- 1.9) <- [51 51 53 53 48 51 54 54 49 50] 52 (+/- 1.3) <- [50 52 52 52 53 52 51 51 54 49] Accuracy: 51.7 (+/- 2.04) Mean time (20 cv): 0.63 seconds Overall time: 17.12 seconds -------------------- Classification, test_size = 0.99 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 149, 2, 128) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=128 -> 19,072,000 floats dim: 38,144 features per clip shape: (5, 100, 298, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Truncated and grouped: size: N=135,000 x n=128 -> 17,280,000 floats dim: 34,560 features per clip shape: (5, 100, 6, 45, 128) Feature vectors: size: N=6,000 x n=128 -> 768,000 floats dim: 1,536 features per clip shape: (5, 100, 6, 2, 128)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 128), float64 Testing data: (2400, 128), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 70.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1536), float64 Testing data: (200, 1536), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 63.5 % Clips accuracy: 69.0 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 128), float64 Labels: (6000,), uint8 Ratio: 60.0 training, 5940.0 testing 46 (+/- 2.5) <- [46 49 45 43 44 47 44 43 48 50] 47 (+/- 2.3) <- [45 51 48 42 47 47 49 46 45 48] 47 (+/- 2.5) <- [47 41 47 49 47 44 48 49 45 48] 48 (+/- 2.4) <- [48 48 48 46 52 48 47 42 47 48] 46 (+/- 2.5) <- [41 47 45 47 43 46 46 49 44 49] 47 (+/- 1.5) <- [45 45 45 48 49 48 44 46 46 48] 47 (+/- 1.3) <- [46 44 49 47 45 46 46 47 46 47] 47 (+/- 1.3) <- [47 47 44 46 46 46 48 46 46 49] 47 (+/- 2.1) <- [51 43 46 46 46 48 47 45 45 48] 46 (+/- 4.0) <- [46 44 47 50 47 47 46 46 49 35] 47 (+/- 1.6) <- [47 46 50 47 43 46 48 47 46 48] 48 (+/- 1.5) <- [46 50 49 48 49 47 45 47 47 46] 48 (+/- 1.9) <- [48 49 51 46 47 48 48 47 44 47] 48 (+/- 1.2) <- [48 50 47 46 49 48 46 48 47 48] 48 (+/- 2.9) <- [45 50 51 47 45 42 46 48 49 52] 48 (+/- 2.3) <- [48 46 47 43 51 49 48 49 46 50] 48 (+/- 2.4) <- [50 42 47 51 45 47 46 49 47 47] 48 (+/- 1.6) <- [44 48 48 46 46 48 47 48 46 50] 47 (+/- 1.6) <- [50 47 45 45 46 46 48 47 43 46] 47 (+/- 1.9) <- [49 46 49 45 44 49 46 44 49 48] Accuracy: 47.3 (+/- 2.25) Mean time (20 cv): 0.38 seconds Overall time: 11.94 seconds -------------------- Baseline, test_size = 0.1 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 298, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Feature vectors: size: N=6,000 x n=96 -> 576,000 floats dim: 1,152 features per clip shape: (5, 100, 6, 2, 96)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 96), float64 Testing data: (2400, 96), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 68.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1152), float64 Testing data: (200, 1152), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 62.1 % Clips accuracy: 68.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 96), float64 Labels: (6000,), uint8 Ratio: 5400.0 training, 600.0 testing 69 (+/- 2.2) <- [70 68 71 67 64 67 70 68 72 66] 69 (+/- 1.4) <- [71 68 67 67 72 69 67 70 69 70] 70 (+/- 2.0) <- [71 69 73 68 68 71 71 68 71 66] 69 (+/- 1.4) <- [70 67 66 67 67 70 68 70 70 68] 70 (+/- 2.4) <- [71 69 72 68 70 75 71 68 68 66] 71 (+/- 0.7) <- [71 71 70 70 71 71 70 69 70 70] 70 (+/- 1.4) <- [70 68 72 69 69 67 71 69 70 69] 70 (+/- 1.5) <- [69 70 68 71 65 70 70 69 70 69] 69 (+/- 1.7) <- [68 69 68 69 68 65 70 70 69 72] 69 (+/- 1.8) <- [72 69 66 68 69 66 69 69 70 71] 70 (+/- 1.4) <- [71 70 69 69 69 69 73 70 71 68] 70 (+/- 1.6) <- [71 70 71 68 69 69 73 68 69 71] 69 (+/- 1.3) <- [69 70 66 68 69 70 68 71 68 68] 69 (+/- 2.0) <- [71 66 68 66 67 69 66 69 69 72] 70 (+/- 1.1) <- [69 70 72 68 69 70 69 71 71 69] 70 (+/- 2.0) <- [72 71 69 71 67 68 71 72 69 67] 70 (+/- 1.6) <- [67 71 69 69 68 70 71 71 72 69] 70 (+/- 0.9) <- [70 68 71 70 68 70 69 69 71 69] 70 (+/- 1.2) <- [72 71 69 69 71 69 72 70 68 69] 69 (+/- 1.7) <- [67 70 71 67 67 72 67 70 70 69] Accuracy: 69.7 (+/- 1.71) Mean time (20 cv): 17.53 seconds Overall time: 354.40 seconds -------------------- Baseline, test_size = 0.2 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 298, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Feature vectors: size: N=6,000 x n=96 -> 576,000 floats dim: 1,152 features per clip shape: (5, 100, 6, 2, 96)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 96), float64 Testing data: (2400, 96), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 68.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1152), float64 Testing data: (200, 1152), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 62.1 % Clips accuracy: 68.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 96), float64 Labels: (6000,), uint8 Ratio: 4800.0 training, 1200.0 testing 69 (+/- 1.2) <- [70 69 69 69 66 67 70 68 70 68] 69 (+/- 1.3) <- [70 70 67 69 68 71 68 68 67 70] 70 (+/- 1.5) <- [70 69 72 67 68 71 69 70 70 68] 69 (+/- 1.1) <- [70 68 69 66 67 69 69 68 68 69] 70 (+/- 1.4) <- [71 70 71 67 70 72 70 68 69 69] 70 (+/- 1.4) <- [70 70 69 72 72 68 67 69 70 69] 70 (+/- 1.2) <- [68 68 71 70 69 68 71 68 70 69] 69 (+/- 1.3) <- [67 70 67 69 67 71 70 68 68 69] 69 (+/- 1.0) <- [68 67 69 70 69 68 68 68 70 71] 69 (+/- 1.2) <- [71 69 68 67 70 68 69 70 69 68] 70 (+/- 1.0) <- [70 69 69 68 71 68 71 69 69 69] 69 (+/- 1.0) <- [70 69 70 67 68 69 71 68 68 68] 69 (+/- 0.7) <- [70 69 67 68 68 68 68 69 69 69] 69 (+/- 1.6) <- [71 69 68 65 68 69 69 70 70 70] 70 (+/- 0.9) <- [69 71 71 69 69 70 69 71 71 68] 70 (+/- 1.6) <- [71 69 68 70 68 68 69 73 68 68] 69 (+/- 0.9) <- [68 69 68 69 70 70 69 70 70 68] 69 (+/- 0.8) <- [70 69 70 68 68 68 69 67 69 69] 70 (+/- 1.0) <- [71 70 68 69 69 68 70 70 71 70] 69 (+/- 0.8) <- [69 68 69 68 67 68 68 67 69 70] Accuracy: 69.4 (+/- 1.26) Mean time (20 cv): 15.31 seconds Overall time: 310.07 seconds -------------------- Baseline, test_size = 0.3 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 298, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Feature vectors: size: N=6,000 x n=96 -> 576,000 floats dim: 1,152 features per clip shape: (5, 100, 6, 2, 96)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 96), float64 Testing data: (2400, 96), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 68.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1152), float64 Testing data: (200, 1152), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 62.1 % Clips accuracy: 68.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 96), float64 Labels: (6000,), uint8 Ratio: 4200.0 training, 1800.0 testing 68 (+/- 1.3) <- [69 68 69 66 66 68 68 70 68 68] 69 (+/- 0.7) <- [69 70 67 69 68 69 69 69 68 69] 69 (+/- 0.9) <- [70 69 69 68 69 70 69 68 70 67] 69 (+/- 0.9) <- [70 68 68 68 68 69 68 68 69 70] 70 (+/- 0.4) <- [69 69 70 69 69 69 69 69 69 68] 69 (+/- 1.2) <- [70 71 68 71 70 68 67 67 68 69] 69 (+/- 0.9) <- [69 68 70 68 68 69 71 67 70 68] 69 (+/- 0.8) <- [68 69 67 69 68 70 69 69 69 69] 69 (+/- 0.7) <- [69 67 70 70 68 69 69 68 69 70] 69 (+/- 0.8) <- [70 68 68 68 68 68 69 69 69 69] 69 (+/- 1.3) <- [72 69 69 67 69 68 69 67 68 68] 69 (+/- 0.9) <- [69 69 70 67 67 69 70 69 69 68] 68 (+/- 0.8) <- [68 69 67 68 67 68 67 70 69 67] 69 (+/- 1.2) <- [69 68 68 65 68 69 68 68 69 70] 70 (+/- 0.6) <- [69 70 69 68 69 70 69 69 70 69] 69 (+/- 0.9) <- [69 68 70 69 67 67 69 70 68 68] 70 (+/- 0.8) <- [68 69 69 69 70 70 69 70 70 67] 69 (+/- 0.7) <- [67 68 69 67 68 68 69 69 68 69] 70 (+/- 0.7) <- [70 69 68 69 70 69 69 69 70 70] 69 (+/- 0.8) <- [68 68 69 69 66 68 68 68 69 68] Accuracy: 69.1 (+/- 0.96) Mean time (20 cv): 13.32 seconds Overall time: 270.20 seconds -------------------- Baseline, test_size = 0.4 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 298, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Feature vectors: size: N=6,000 x n=96 -> 576,000 floats dim: 1,152 features per clip shape: (5, 100, 6, 2, 96)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 96), float64 Testing data: (2400, 96), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 68.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1152), float64 Testing data: (200, 1152), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 62.1 % Clips accuracy: 68.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 96), float64 Labels: (6000,), uint8 Ratio: 3600.0 training, 2400.0 testing 69 (+/- 0.8) <- [69 68 68 66 67 68 68 69 69 68] 68 (+/- 0.6) <- [68 69 68 68 67 68 68 68 68 67] 69 (+/- 0.7) <- [68 69 69 67 68 69 69 67 69 67] 68 (+/- 0.7) <- [68 69 68 68 68 68 66 67 68 68] 69 (+/- 0.6) <- [68 68 70 69 70 69 68 68 69 68] 69 (+/- 0.9) <- [68 69 69 70 70 67 69 68 67 69] 69 (+/- 0.3) <- [68 69 69 68 68 68 68 68 68 68] 69 (+/- 0.7) <- [67 69 67 68 69 69 68 68 69 68] 69 (+/- 0.8) <- [67 67 69 69 68 68 67 68 69 69] 69 (+/- 0.8) <- [68 67 68 66 69 68 68 69 69 69] 68 (+/- 1.0) <- [70 68 68 66 68 67 68 67 68 67] 69 (+/- 0.8) <- [68 68 70 68 68 69 67 69 69 68] 68 (+/- 0.8) <- [68 68 67 67 67 68 68 69 69 68] 69 (+/- 0.9) <- [68 69 68 66 67 69 68 68 70 68] 69 (+/- 0.8) <- [69 69 67 66 68 69 68 68 69 69] 68 (+/- 0.6) <- [68 67 69 68 67 67 69 68 68 68] 69 (+/- 0.8) <- [67 69 68 68 69 69 68 68 69 67] 68 (+/- 0.7) <- [68 67 68 68 68 67 69 68 68 69] 69 (+/- 0.7) <- [69 69 68 70 67 68 68 67 68 68] 68 (+/- 0.6) <- [67 69 68 68 67 68 69 68 68 68] Accuracy: 68.6 (+/- 0.77) Mean time (20 cv): 11.41 seconds Overall time: 231.89 seconds -------------------- Baseline, test_size = 0.5 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 298, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Feature vectors: size: N=6,000 x n=96 -> 576,000 floats dim: 1,152 features per clip shape: (5, 100, 6, 2, 96)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 96), float64 Testing data: (2400, 96), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 68.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1152), float64 Testing data: (200, 1152), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 62.1 % Clips accuracy: 68.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 96), float64 Labels: (6000,), uint8 Ratio: 3000.0 training, 3000.0 testing 68 (+/- 0.7) <- [68 67 68 66 66 67 67 68 68 68] 68 (+/- 0.7) <- [67 68 69 68 66 68 68 67 67 68] 68 (+/- 0.6) <- [68 68 68 67 67 68 69 67 68 67] 68 (+/- 0.7) <- [68 68 67 67 68 68 67 66 68 67] 68 (+/- 0.6) <- [68 67 69 68 69 69 68 68 68 67] 68 (+/- 0.7) <- [68 68 67 68 70 68 68 67 67 68] 68 (+/- 0.7) <- [69 67 68 67 67 67 67 67 66 67] 68 (+/- 0.6) <- [67 68 67 67 69 68 67 67 67 66] 68 (+/- 0.8) <- [66 67 67 68 69 68 67 67 68 68] 68 (+/- 0.7) <- [68 66 67 66 67 67 67 67 68 68] 68 (+/- 0.7) <- [68 69 67 67 68 67 68 67 67 69] 68 (+/- 0.5) <- [68 67 69 67 68 68 67 68 69 68] 68 (+/- 0.6) <- [68 67 66 67 67 68 67 68 67 67] 68 (+/- 0.7) <- [68 67 68 67 67 68 68 67 69 68] 68 (+/- 0.7) <- [68 67 66 66 67 68 68 67 68 68] 68 (+/- 0.5) <- [67 67 68 67 67 66 68 67 67 68] 68 (+/- 0.8) <- [68 68 68 67 68 69 67 68 68 67] 68 (+/- 0.6) <- [67 67 66 67 67 68 68 66 68 68] 68 (+/- 0.5) <- [68 69 68 68 67 67 68 67 68 68] 68 (+/- 0.4) <- [68 68 68 68 67 68 68 67 67 68] Accuracy: 68.1 (+/- 0.70) Mean time (20 cv): 9.57 seconds Overall time: 195.26 seconds -------------------- Baseline, test_size = 0.6 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 298, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Feature vectors: size: N=6,000 x n=96 -> 576,000 floats dim: 1,152 features per clip shape: (5, 100, 6, 2, 96)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 96), float64 Testing data: (2400, 96), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 68.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1152), float64 Testing data: (200, 1152), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 62.1 % Clips accuracy: 68.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 96), float64 Labels: (6000,), uint8 Ratio: 2400.0 training, 3600.0 testing 67 (+/- 0.7) <- [67 67 66 66 65 66 66 67 67 67] 67 (+/- 0.5) <- [68 67 67 67 66 67 67 67 66 68] 68 (+/- 0.6) <- [67 67 68 66 68 67 68 66 67 66] 67 (+/- 0.5) <- [67 67 67 67 67 68 68 66 67 67] 67 (+/- 0.7) <- [67 66 68 67 68 67 66 67 67 67] 67 (+/- 0.8) <- [67 67 67 67 68 66 67 67 66 68] 67 (+/- 0.5) <- [67 66 68 67 67 67 67 67 66 66] 67 (+/- 0.6) <- [66 67 67 67 67 68 67 66 66 66] 67 (+/- 0.5) <- [67 66 66 67 67 67 67 67 67 68] 67 (+/- 0.7) <- [67 67 65 66 66 67 67 67 66 66] 67 (+/- 0.8) <- [67 68 66 66 68 67 67 66 66 68] 68 (+/- 0.6) <- [67 67 67 67 68 67 66 68 67 67] 67 (+/- 0.9) <- [66 66 67 67 67 68 64 66 67 66] 67 (+/- 0.6) <- [66 67 67 66 66 67 67 66 68 67] 67 (+/- 0.5) <- [66 66 67 66 67 67 67 66 67 67] 67 (+/- 0.5) <- [67 66 67 67 67 66 67 68 66 67] 68 (+/- 0.7) <- [68 67 68 68 67 67 66 68 67 66] 67 (+/- 0.6) <- [67 67 67 66 67 68 67 66 67 67] 68 (+/- 0.5) <- [67 67 68 68 67 67 67 68 68 67] 67 (+/- 0.5) <- [67 67 67 66 66 67 68 67 67 67] Accuracy: 67.3 (+/- 0.65) Mean time (20 cv): 7.71 seconds Overall time: 157.97 seconds -------------------- Baseline, test_size = 0.7 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 298, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Feature vectors: size: N=6,000 x n=96 -> 576,000 floats dim: 1,152 features per clip shape: (5, 100, 6, 2, 96)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 96), float64 Testing data: (2400, 96), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 68.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1152), float64 Testing data: (200, 1152), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 62.1 % Clips accuracy: 68.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 96), float64 Labels: (6000,), uint8 Ratio: 1800.0 training, 4200.0 testing 66 (+/- 0.6) <- [66 66 65 65 65 66 66 67 66 66] 66 (+/- 0.4) <- [65 66 66 66 65 65 66 66 65 66] 66 (+/- 0.7) <- [66 67 65 65 66 66 67 65 66 65] 67 (+/- 0.2) <- [66 66 66 66 66 66 67 66 66 66] 67 (+/- 0.6) <- [67 66 66 66 66 66 65 66 66 66] 66 (+/- 0.6) <- [66 66 66 67 67 66 66 66 64 66] 66 (+/- 0.5) <- [66 65 66 67 65 66 66 66 65 66] 66 (+/- 0.5) <- [65 66 66 66 66 66 66 65 65 65] 66 (+/- 0.7) <- [65 65 65 66 65 66 65 67 66 67] 66 (+/- 0.7) <- [66 66 64 65 66 65 65 66 65 65] 66 (+/- 0.6) <- [65 66 65 65 67 65 66 66 66 66] 67 (+/- 0.5) <- [66 66 67 65 67 66 66 67 66 66] 66 (+/- 0.7) <- [66 65 66 66 65 66 64 66 66 65] 66 (+/- 0.5) <- [66 66 67 66 65 65 66 66 67 66] 66 (+/- 0.5) <- [66 65 67 65 66 66 66 66 66 66] 66 (+/- 0.5) <- [66 66 66 66 67 65 66 66 66 65] 66 (+/- 0.6) <- [66 66 65 66 66 65 65 66 65 65] 66 (+/- 0.5) <- [66 66 66 66 66 67 66 65 67 66] 66 (+/- 0.5) <- [65 65 66 67 65 65 66 65 66 66] 66 (+/- 0.5) <- [65 65 66 66 65 66 66 67 66 66] Accuracy: 66.3 (+/- 0.59) Mean time (20 cv): 5.81 seconds Overall time: 119.91 seconds -------------------- Baseline, test_size = 0.8 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 298, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Feature vectors: size: N=6,000 x n=96 -> 576,000 floats dim: 1,152 features per clip shape: (5, 100, 6, 2, 96)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 96), float64 Testing data: (2400, 96), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 68.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1152), float64 Testing data: (200, 1152), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 62.1 % Clips accuracy: 68.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 96), float64 Labels: (6000,), uint8 Ratio: 1200.0 training, 4800.0 testing 65 (+/- 1.0) <- [64 65 63 63 65 64 66 64 65 64] 64 (+/- 0.4) <- [64 64 64 64 64 64 64 64 63 63] 65 (+/- 0.8) <- [63 64 65 64 63 65 65 63 64 64] 65 (+/- 0.4) <- [64 64 64 65 64 65 65 64 65 64] 64 (+/- 0.5) <- [64 64 64 64 64 64 63 65 63 63] 64 (+/- 0.7) <- [65 64 64 63 64 63 63 64 63 63] 64 (+/- 0.9) <- [64 62 64 64 63 65 64 63 64 66] 65 (+/- 0.9) <- [64 66 63 64 64 64 65 65 65 63] 64 (+/- 0.7) <- [62 64 64 65 64 63 63 65 64 64] 65 (+/- 0.6) <- [65 65 64 64 64 64 63 64 63 64] 64 (+/- 0.5) <- [64 64 63 64 65 64 64 64 64 65] 65 (+/- 0.8) <- [64 64 66 64 64 64 65 65 65 66] 64 (+/- 0.7) <- [64 63 63 65 64 65 63 64 65 63] 64 (+/- 0.7) <- [64 63 64 63 63 63 63 64 65 62] 65 (+/- 0.7) <- [65 63 65 63 65 63 65 64 64 65] 65 (+/- 0.7) <- [64 63 65 65 65 64 65 64 63 63] 64 (+/- 0.3) <- [64 63 64 64 64 64 64 64 64 63] 64 (+/- 0.7) <- [64 64 64 62 63 64 64 63 64 63] 64 (+/- 0.3) <- [63 63 64 63 64 63 64 64 64 64] 65 (+/- 0.7) <- [63 64 64 64 63 65 64 65 65 65] Accuracy: 64.5 (+/- 0.75) Mean time (20 cv): 3.95 seconds Overall time: 82.85 seconds -------------------- Baseline, test_size = 0.9 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 298, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Feature vectors: size: N=6,000 x n=96 -> 576,000 floats dim: 1,152 features per clip shape: (5, 100, 6, 2, 96)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 96), float64 Testing data: (2400, 96), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 68.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1152), float64 Testing data: (200, 1152), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 62.1 % Clips accuracy: 68.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 96), float64 Labels: (6000,), uint8 Ratio: 600.0 training, 5400.0 testing 61 (+/- 1.2) <- [60 61 60 58 62 61 62 60 61 62] 61 (+/- 0.8) <- [59 60 59 60 61 61 61 61 59 60] 61 (+/- 0.6) <- [61 59 60 60 61 60 61 60 60 61] 61 (+/- 0.8) <- [62 61 60 61 60 62 61 60 62 60] 61 (+/- 1.0) <- [60 60 61 62 62 60 59 63 60 60] 61 (+/- 0.7) <- [60 61 59 59 61 61 60 61 61 61] 61 (+/- 1.0) <- [61 61 59 60 60 60 61 61 59 62] 61 (+/- 1.0) <- [60 62 60 61 59 59 62 62 62 60] 61 (+/- 0.9) <- [61 58 60 60 60 62 61 60 60 59] 61 (+/- 1.0) <- [61 62 60 62 62 61 61 60 59 59] 61 (+/- 0.7) <- [61 61 61 60 59 59 61 60 61 61] 61 (+/- 0.7) <- [60 61 61 60 60 60 60 60 61 62] 61 (+/- 0.6) <- [61 60 60 60 61 61 60 60 61 59] 61 (+/- 0.8) <- [60 61 61 60 60 59 61 61 60 60] 61 (+/- 0.9) <- [62 60 61 60 61 59 60 60 61 61] 62 (+/- 0.6) <- [61 61 60 62 61 62 62 61 60 62] 61 (+/- 0.6) <- [61 60 61 61 62 60 60 61 61 60] 61 (+/- 0.6) <- [60 60 61 59 59 60 61 60 61 60] 61 (+/- 0.7) <- [60 60 60 61 60 62 59 60 60 61] 61 (+/- 0.6) <- [60 60 61 60 61 61 61 61 62 61] Accuracy: 61.0 (+/- 0.86) Mean time (20 cv): 2.08 seconds Overall time: 45.42 seconds -------------------- Baseline, test_size = 0.92 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 298, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Feature vectors: size: N=6,000 x n=96 -> 576,000 floats dim: 1,152 features per clip shape: (5, 100, 6, 2, 96)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 96), float64 Testing data: (2400, 96), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 68.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1152), float64 Testing data: (200, 1152), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 62.1 % Clips accuracy: 68.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 96), float64 Labels: (6000,), uint8 Ratio: 480.0 training, 5520.0 testing 60 (+/- 1.0) <- [58 59 59 57 58 60 60 59 59 60] 60 (+/- 0.9) <- [58 59 59 59 61 60 59 60 59 59] 60 (+/- 0.9) <- [60 58 59 58 60 58 60 59 59 60] 60 (+/- 0.9) <- [61 60 58 59 59 60 59 58 61 58] 60 (+/- 1.3) <- [58 58 60 61 60 59 58 62 59 60] 60 (+/- 0.5) <- [59 60 60 59 58 60 59 60 60 60] 60 (+/- 1.0) <- [59 59 59 59 58 59 60 59 57 61] 60 (+/- 0.9) <- [60 61 59 59 58 58 60 61 60 59] 60 (+/- 0.9) <- [60 57 59 59 59 60 60 59 59 58] 60 (+/- 0.8) <- [59 60 59 60 61 61 59 59 58 59] 60 (+/- 1.0) <- [60 60 60 59 58 57 59 59 60 60] 60 (+/- 0.9) <- [60 60 59 58 57 59 60 59 60 60] 59 (+/- 0.8) <- [60 59 59 60 59 57 59 59 59 58] 60 (+/- 0.8) <- [58 60 59 59 60 58 59 60 60 59] 60 (+/- 0.8) <- [60 60 59 59 59 59 58 60 61 61] 60 (+/- 0.7) <- [60 61 59 60 59 61 60 60 59 60] 60 (+/- 0.6) <- [60 59 60 59 61 59 58 60 60 60] 60 (+/- 0.7) <- [59 59 58 59 60 59 61 58 60 59] 60 (+/- 0.9) <- [57 60 59 60 59 61 58 59 59 59] 60 (+/- 0.7) <- [59 59 59 59 58 60 60 59 59 60] Accuracy: 59.8 (+/- 0.90) Mean time (20 cv): 1.69 seconds Overall time: 37.61 seconds -------------------- Baseline, test_size = 0.94 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 298, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Feature vectors: size: N=6,000 x n=96 -> 576,000 floats dim: 1,152 features per clip shape: (5, 100, 6, 2, 96)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 96), float64 Testing data: (2400, 96), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 68.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1152), float64 Testing data: (200, 1152), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 62.1 % Clips accuracy: 68.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 96), float64 Labels: (6000,), uint8 Ratio: 360.0 training, 5640.0 testing 58 (+/- 1.1) <- [57 58 58 55 56 59 57 57 58 58] 58 (+/- 1.4) <- [55 58 58 58 58 59 56 59 55 58] 58 (+/- 1.0) <- [58 56 57 57 58 56 58 58 57 60] 58 (+/- 0.7) <- [59 58 57 57 57 58 58 57 59 57] 58 (+/- 1.1) <- [56 56 57 59 57 58 56 58 57 57] 58 (+/- 1.0) <- [57 57 57 55 57 57 57 58 59 59] 58 (+/- 1.3) <- [59 56 57 56 56 58 58 58 56 59] 58 (+/- 0.8) <- [57 59 57 58 58 57 59 59 57 58] 58 (+/- 0.8) <- [59 56 59 57 58 58 58 57 57 58] 58 (+/- 1.0) <- [57 59 56 59 59 59 57 56 57 58] 58 (+/- 0.8) <- [58 57 57 57 57 56 59 57 58 59] 58 (+/- 0.8) <- [59 57 58 57 56 57 58 57 59 59] 58 (+/- 1.2) <- [59 58 57 57 59 56 56 56 58 56] 58 (+/- 1.5) <- [55 59 58 58 58 54 59 58 57 59] 58 (+/- 0.9) <- [58 57 58 58 59 55 57 58 57 58] 59 (+/- 1.0) <- [58 59 57 56 59 59 59 58 58 59] 58 (+/- 1.3) <- [59 55 58 56 59 57 56 59 57 59] 58 (+/- 1.1) <- [58 57 56 58 59 57 59 58 58 60] 58 (+/- 1.1) <- [56 59 57 59 58 59 56 58 58 58] 58 (+/- 0.6) <- [58 58 58 57 57 59 57 58 58 57] Accuracy: 58.1 (+/- 1.08) Mean time (20 cv): 1.31 seconds Overall time: 30.05 seconds -------------------- Baseline, test_size = 0.96 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 298, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Feature vectors: size: N=6,000 x n=96 -> 576,000 floats dim: 1,152 features per clip shape: (5, 100, 6, 2, 96)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 96), float64 Testing data: (2400, 96), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 68.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1152), float64 Testing data: (200, 1152), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 62.1 % Clips accuracy: 68.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 96), float64 Labels: (6000,), uint8 Ratio: 240.0 training, 5760.0 testing 55 (+/- 1.4) <- [56 56 52 53 53 57 55 53 54 55] 55 (+/- 1.5) <- [54 56 55 56 55 55 53 56 51 56] 56 (+/- 1.1) <- [54 53 55 55 56 55 55 56 55 57] 56 (+/- 1.0) <- [56 55 55 55 57 57 57 54 57 55] 54 (+/- 1.4) <- [53 52 54 56 53 55 53 55 52 54] 56 (+/- 1.5) <- [56 55 53 53 57 53 56 56 55 57] 55 (+/- 1.2) <- [55 53 55 56 53 57 55 55 54 56] 56 (+/- 1.2) <- [56 55 54 58 55 54 57 56 55 58] 55 (+/- 1.0) <- [56 53 55 56 56 56 55 54 55 54] 56 (+/- 1.1) <- [55 56 54 57 54 57 54 56 54 56] 55 (+/- 1.0) <- [56 52 55 55 55 55 55 55 56 55] 56 (+/- 1.1) <- [57 56 55 54 54 55 55 54 57 55] 55 (+/- 1.2) <- [54 57 55 54 56 52 54 53 56 54] 56 (+/- 1.9) <- [55 56 56 57 56 52 55 56 51 56] 56 (+/- 1.3) <- [54 56 56 57 56 53 54 54 54 56] 56 (+/- 1.2) <- [57 58 54 55 56 55 56 57 56 56] 56 (+/- 1.5) <- [55 52 55 55 56 53 55 57 56 57] 56 (+/- 1.1) <- [54 56 54 56 57 54 54 54 55 56] 55 (+/- 1.2) <- [53 56 52 55 55 56 54 55 54 54] 55 (+/- 0.8) <- [55 54 55 54 56 56 55 54 56 55] Accuracy: 55.5 (+/- 1.37) Mean time (20 cv): 0.91 seconds Overall time: 22.03 seconds -------------------- Baseline, test_size = 0.97 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 298, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Feature vectors: size: N=6,000 x n=96 -> 576,000 floats dim: 1,152 features per clip shape: (5, 100, 6, 2, 96)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 96), float64 Testing data: (2400, 96), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 68.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1152), float64 Testing data: (200, 1152), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 62.1 % Clips accuracy: 68.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 96), float64 Labels: (6000,), uint8 Ratio: 180.0 training, 5820.0 testing 53 (+/- 2.0) <- [53 52 50 49 52 54 56 53 54 52] 54 (+/- 2.2) <- [50 54 54 56 56 53 53 54 49 55] 54 (+/- 1.3) <- [53 50 53 53 54 53 52 54 54 54] 55 (+/- 0.8) <- [54 55 53 55 54 53 54 53 55 53] 52 (+/- 1.6) <- [51 51 52 55 49 52 52 54 51 52] 54 (+/- 1.0) <- [54 52 52 53 55 52 54 54 53 54] 54 (+/- 1.4) <- [54 50 54 54 52 54 54 54 53 55] 54 (+/- 1.3) <- [54 54 51 55 54 53 54 54 52 55] 54 (+/- 1.2) <- [55 51 54 53 55 55 54 53 53 52] 53 (+/- 1.4) <- [52 54 53 54 50 55 51 53 53 53] 53 (+/- 1.2) <- [53 50 53 54 51 53 53 52 54 53] 54 (+/- 1.8) <- [56 56 54 54 53 53 52 50 56 53] 54 (+/- 1.5) <- [52 55 54 54 54 52 51 51 55 53] 54 (+/- 1.1) <- [52 54 53 55 54 52 54 54 52 54] 54 (+/- 1.5) <- [53 55 54 54 54 50 52 53 55 54] 54 (+/- 1.0) <- [54 56 54 54 55 52 53 54 54 54] 54 (+/- 1.8) <- [52 50 52 53 54 53 53 57 54 55] 55 (+/- 1.3) <- [53 54 55 54 56 53 53 54 52 55] 53 (+/- 1.7) <- [51 54 52 53 52 51 52 56 51 51] 54 (+/- 1.0) <- [54 53 55 53 56 54 54 53 54 53] Accuracy: 53.7 (+/- 1.55) Mean time (20 cv): 0.71 seconds Overall time: 18.03 seconds -------------------- Baseline, test_size = 0.98 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 298, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Feature vectors: size: N=6,000 x n=96 -> 576,000 floats dim: 1,152 features per clip shape: (5, 100, 6, 2, 96)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 96), float64 Testing data: (2400, 96), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 68.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1152), float64 Testing data: (200, 1152), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 62.1 % Clips accuracy: 68.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 96), float64 Labels: (6000,), uint8 Ratio: 120.0 training, 5880.0 testing 49 (+/- 4.5) <- [50 52 50 47 49 51 53 36 52 50] 51 (+/- 2.3) <- [49 51 49 52 53 52 52 51 46 52] 50 (+/- 2.1) <- [52 45 50 50 50 49 47 51 52 51] 51 (+/- 1.6) <- [51 53 49 52 50 49 50 49 53 52] 50 (+/- 1.8) <- [47 51 48 52 48 47 51 51 49 52] 52 (+/- 1.5) <- [51 50 49 53 54 52 51 49 52 52] 51 (+/- 1.8) <- [51 47 52 52 51 53 52 50 49 52] 51 (+/- 1.5) <- [49 51 47 51 50 49 52 52 52 49] 51 (+/- 1.5) <- [53 47 49 51 52 50 50 51 52 49] 51 (+/- 1.2) <- [50 50 52 51 48 51 49 52 52 52] 51 (+/- 1.8) <- [53 47 51 52 48 51 52 51 51 50] 51 (+/- 2.5) <- [54 55 52 51 51 51 47 47 51 47] 51 (+/- 1.5) <- [49 54 50 52 51 50 50 50 50 52] 51 (+/- 2.0) <- [51 51 51 54 53 50 48 49 48 53] 51 (+/- 2.0) <- [50 51 52 51 50 45 49 51 53 50] 52 (+/- 1.6) <- [51 53 49 52 53 50 51 55 50 50] 51 (+/- 2.2) <- [49 46 48 54 51 49 51 53 52 52] 52 (+/- 1.7) <- [50 53 53 53 53 52 51 53 48 50] 51 (+/- 1.9) <- [50 52 50 53 50 46 51 51 48 48] 52 (+/- 1.6) <- [51 53 53 52 54 51 53 49 53 49] Accuracy: 51.1 (+/- 2.15) Mean time (20 cv): 0.51 seconds Overall time: 13.90 seconds -------------------- Baseline, test_size = 0.99 -------------------- Software versions: numpy: 1.8.2 sklearn: 0.14.1 Attributes: sr = 22050 labels = ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop' 'reggae' 'rock'] Datasets: D : (128, 96) , float32 X : (5, 100, 149, 2, 96) , float32 Z : (5, 100, 149, 2, 128) , float32 Full dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <class 'h5py._hl.dataset.Dataset'> Reduced dataset: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 149, 2, 96) <type 'numpy.ndarray'> Flattened frames: size: N=149,000 x n=96 -> 14,304,000 floats dim: 28,608 features per clip shape: (5, 100, 298, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Truncated and grouped: size: N=135,000 x n=96 -> 12,960,000 floats dim: 25,920 features per clip shape: (5, 100, 6, 45, 96) Feature vectors: size: N=6,000 x n=96 -> 576,000 floats dim: 1,152 features per clip shape: (5, 100, 6, 2, 96)
5 genres: blues, classical, country, disco, hiphop Training data: (3600, 96), float64 Testing data: (2400, 96), float64 Training labels: (3600,), uint8 Testing labels: (2400,), uint8 Accuracy: 68.8 % 5 genres: blues, classical, country, disco, hiphop Training data: (300, 1152), float64 Testing data: (200, 1152), float64 Training labels: (300,), uint8 Testing labels: (200,), uint8 Feature vectors accuracy: 62.1 % Clips accuracy: 68.5 % 5 genres: blues, classical, country, disco, hiphop Data: (6000, 96), float64 Labels: (6000,), uint8 Ratio: 60.0 training, 5940.0 testing 46 (+/- 2.6) <- [46 49 46 41 46 47 45 40 47 45] 46 (+/- 2.0) <- [46 47 43 42 49 47 48 47 44 46] 46 (+/- 2.4) <- [44 40 46 49 48 44 45 46 47 46] 47 (+/- 2.5) <- [45 49 42 46 51 45 45 43 48 46] 46 (+/- 2.9) <- [41 46 44 48 44 41 46 50 44 48] 47 (+/- 1.4) <- [48 45 44 47 48 47 46 45 47 48] 47 (+/- 2.8) <- [48 39 50 45 44 50 46 46 46 46] 45 (+/- 3.7) <- [49 44 35 44 48 44 48 46 44 47] 46 (+/- 1.6) <- [44 42 48 45 45 46 48 45 46 46] 45 (+/- 4.8) <- [47 42 46 46 45 46 46 46 47 30] 46 (+/- 2.0) <- [49 42 46 47 47 42 46 45 45 47] 45 (+/- 4.6) <- [49 48 48 48 35 49 39 43 48 43] 47 (+/- 2.4) <- [44 50 48 48 49 47 50 45 42 47] 49 (+/- 1.5) <- [48 48 46 47 51 48 51 48 46 49] 46 (+/- 3.8) <- [42 47 47 49 42 37 45 48 49 48] 48 (+/- 2.5) <- [48 48 44 44 50 46 50 51 44 48] 47 (+/- 2.9) <- [51 41 44 49 45 44 46 49 45 50] 48 (+/- 1.8) <- [44 47 48 47 49 49 46 50 45 49] 46 (+/- 2.4) <- [47 47 42 45 45 41 48 49 45 44] 47 (+/- 2.1) <- [46 49 48 46 46 47 46 42 50 47] Accuracy: 46.5 (+/- 2.95) Mean time (20 cv): 0.30 seconds Overall time: 9.75 seconds
print('{} = {}'.format(Pname, Pvalues))
for key, value in res.items():
if key is not 'atoms':
print('res[\'{}\'] = {}'.format(key, value))
def plot(*args, **kwargs):
plt.figure(figsize=(8,5))
x = range(len(Pvalues))
log = 'log' in kwargs and kwargs['log'] is True
pltfunc = plt.semilogy if log else plt.plot
params = {}
params['linestyle'] = '-'
params['marker'] = '.'
params['markersize'] = 10
for i, var in enumerate(args):
if 'err' in kwargs:
pltfunc = plt.errorbar
params['yerr'] = res[kwargs['err'][i]]
params['capsize'] = 5
pltfunc(x, res[var], label=var, **params)
for i,j in zip(x, res[var]):
plt.annotate('{:.2f}'.format(j), xy=(i,j), xytext=(5,5), textcoords='offset points')
margin = 0.25
params['markersize'] = 10
plt.xlim(-margin, len(Pvalues)-1+margin)
if 'ylim' in kwargs:
plt.ylim(kwargs['ylim'])
plt.title('{} vs {}'.format(', '.join(args), Pname))
plt.xlabel(Pname)
plt.ylabel(' ,'.join(args))
plt.xticks(x, Pvalues)
plt.grid(True); plt.legend(loc='best'); plt.show()
def div(l):
div = Pvalues if Pname is l else [p[l]]
return np.array([1 if v is None else v for v in div])
# Classification results.
res['chance'] = len(Pvalues) * [100./p['Ngenres']]
res['chance_std'] = 0
err=['accuracy_std', 'baseline_std', 'chance_std']
plot('accuracy', 'baseline', 'chance', err=err, ylim=[0,100])
# Features extraction results.
if regen_features:
plot('objective_g', 'objective_i', 'objective_j', log=True)
# Unweighted objectives.
print('g(Z) = ||X-DZ||_2^2, h(Z) = ||Z-EX||_2^2, i(Z) = ||Z||_1, j(Z) = tr(Z^TLZ)')
res['objective_g_un'] = res['objective_g'] / div('ld')
res['objective_i_un'] = res['objective_i'] / div('ls')
res['objective_j_un'] = res['objective_j'] / div('lg')
plot('objective_g_un', 'objective_i_un', 'objective_j_un', log=True)
plot('sparsity', ylim=[0,100])
plot('time_features')
plot('iterations_inner')
plot('iterations_outer')
for i, fig in enumerate(res['atoms']):
print('Dictionary atoms for {} = {}'.format(Pname, Pvalues[i]))
fig.show()
print('Experiment time: {:.0f} seconds'.format(time.time() - texperiment))
test_size = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.92, 0.94, 0.96, 0.97, 0.98, 0.99] res['accuracy_std'] = [1.5608471257621608, 1.1485328805819084, 0.97097535124021084, 0.79022851828512952, 0.69960711196435987, 0.69625874110699781, 0.71097445784304725, 0.74333974883956044, 0.91061356766518298, 0.992902374341881, 1.1010037236195309, 1.3692128406671615, 1.5058190643845542, 2.0381446354892905, 2.246605962044653] res['objective_j'] = [8429.8490524291992, 8429.8490524291992, 8429.8490524291992, 8429.8490524291992, 8429.8490524291992, 8429.8490524291992, 8429.8490524291992, 8429.8490524291992, 8429.8490524291992, 8429.8490524291992, 8429.8490524291992, 8429.8490524291992, 8429.8490524291992, 8429.8490524291992, 8429.8490524291992] res['objective_i'] = [46432.25, 46432.25, 46432.25, 46432.25, 46432.25, 46432.25, 46432.25, 46432.25, 46432.25, 46432.25, 46432.25, 46432.25, 46432.25, 46432.25, 46432.25] res['objective_h'] = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] res['objective_g'] = [103830.56640625, 103830.56640625, 103830.56640625, 103830.56640625, 103830.56640625, 103830.56640625, 103830.56640625, 103830.56640625, 103830.56640625, 103830.56640625, 103830.56640625, 103830.56640625, 103830.56640625, 103830.56640625, 103830.56640625] res['baseline'] = [69.729166666666742, 69.439166666666665, 69.101388888888863, 68.599375000000009, 68.060166666666689, 67.333888888888907, 66.250476190476164, 64.493020833333333, 60.973703703703706, 59.755344202898506, 58.062677304964552, 55.507986111111116, 53.744501718213066, 51.096088435374142, 46.473737373737357] res['time_features'] = [2488.0675921440125, 2488.0675921440125, 2488.0675921440125, 2488.0675921440125, 2488.0675921440125, 2488.0675921440125, 2488.0675921440125, 2488.0675921440125, 2488.0675921440125, 2488.0675921440125, 2488.0675921440125, 2488.0675921440125, 2488.0675921440125, 2488.0675921440125, 2488.0675921440125] res['baseline_std'] = [1.7102986662503645, 1.2586840019109906, 0.95690211162614125, 0.77039038512049951, 0.69960065990058462, 0.65298436447354025, 0.58584473963407446, 0.7472381947577772, 0.85954329596629586, 0.89706844615576686, 1.0826780027089868, 1.3656555460323629, 1.5531957132032486, 2.1520828759669808, 2.9536838364939091] res['sparsity'] = [40.842905830536914, 40.842905830536914, 40.842905830536914, 40.842905830536914, 40.842905830536914, 40.842905830536914, 40.842905830536914, 40.842905830536914, 40.842905830536914, 40.842905830536914, 40.842905830536914, 40.842905830536914, 40.842905830536914, 40.842905830536914, 40.842905830536914] res['iterations_inner'] = [1122, 1122, 1122, 1122, 1122, 1122, 1122, 1122, 1122, 1122, 1122, 1122, 1122, 1122, 1122] res['iterations_outer'] = [7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] res['accuracy'] = [72.622499999999988, 72.305000000000021, 71.944722222222197, 71.313333333333361, 70.669000000000025, 69.772222222222211, 68.565595238095241, 66.707812500000017, 62.917685185185171, 61.527173913043441, 59.656826241134773, 56.793402777777757, 54.67457044673543, 51.681717687074858, 47.269528619528636]
Experiment time: 6933 seconds