#!/usr/bin/env python # coding: utf-8 # # Genre recognition: experiment # Goal: Test if we have an edge with law training / testing ratios, i.e. when there is not much training data. # # Conclusion: As we cannot train the auto-encoder and extract features on a subset of the data yet (this is a limitation of the current implementation), only the ability of the classifier to generalize is actually tested. Maybe the extracted features are better for generalization, but there is no such indices in the results. # # Observations: # * Classification accuracy drops with `test_size`. # * Our margin over baseline too. # ## Hyper-parameters # ### Parameter under test # In[1]: Pname = 'test_size' Pvalues = [0.1, 0.3, 0.5, 0.7, 0.9] # Regenerate the graph or the features at each iteration. regen_graph = False regen_features = False regen_baseline = True # ### Model parameters # In[2]: p = {} # Preprocessing. # Graph. p['data_scaling_graph'] = 'features' p['K'] = 10 + 1 # 5 to 10 + 1 for self-reference p['dm'] = 'euclidean' p['Csigma'] = 1 p['diag'] = True p['laplacian'] = 'normalized' # Feature extraction. p['m'] = 128 # 64, 128, 512 p['ls'] = 1 p['ld'] = 10 p['le'] = None p['lg'] = 100 # Classification. p['scale'] = None p['Nvectors'] = 6 p['svm_type'] = 'C' p['kernel'] = 'linear' p['C'] = 1 p['nu'] = 0.5 p['majority_voting'] = False # ### Data parameters # In[3]: # HDF5 data stores. p['folder'] = 'data' p['filename_gtzan'] = 'gtzan.hdf5' p['filename_audio'] = 'audio.hdf5' p['filename_graph'] = 'graph.hdf5' p['filename_features'] = 'features.hdf5' # Dataset (10,100,644 | 5,100,149 | 2,10,644). p['Ngenres'] = 5 p['Nclips'] = 100 p['Nframes'] = 149 # Added white noise. p['noise_std'] = 0 # ### Numerical parameters # In[4]: # Graph. p['tol'] = 1e-5 # Feature extraction. p['rtol'] = 1e-5 # 1e-3, 1e-5, 1e-7 p['N_inner'] = 500 p['N_outer'] = 50 # Classification. p['test_size'] = 0.1 p['Ncv'] = 20 p['dataset_classification'] = 'Z' # ## Processing # In[5]: import numpy as np import time texperiment = time.time() # Result dictionary. res = ['accuracy', 'accuracy_std'] res += ['sparsity', 'atoms'] res += ['objective_g', 'objective_h', 'objective_i', 'objective_j'] res += ['time_features', 'iterations_inner', 'iterations_outer'] res = dict.fromkeys(res) for key in res.keys(): res[key] = [] def separator(name, parameter=False): if parameter: name += ', {} = {}'.format(Pname, p[Pname]) dashes = 20 * '-' print('\n {} {} {} \n'.format(dashes, name, dashes)) # Fair comparison when tuning parameters. # Randomnesses: dictionary initialization, training and testing sets. np.random.seed(1) # In[6]: #%run gtzan.ipynb #%run audio_preprocessing.ipynb if not regen_graph: separator('Graph') get_ipython().run_line_magic('run', 'audio_graph.ipynb') if not regen_features: separator('Features') get_ipython().run_line_magic('run', 'audio_features.ipynb') # Hyper-parameter under test. for p[Pname] in Pvalues: if regen_graph: separator('Graph', True) get_ipython().run_line_magic('run', 'audio_graph.ipynb') if regen_features: separator('Features', True) p['filename_features'] = 'features_{}_{}.hdf5'.format(Pname, p[Pname]) get_ipython().run_line_magic('run', 'audio_features.ipynb') separator('Classification', True) get_ipython().run_line_magic('run', 'audio_classification.ipynb') # Collect results. for key in res: res[key].append(globals()[key]) # Baseline, i.e. classification with spectrograms. p['dataset_classification'] = 'X' p['scale'] = 'minmax' # Todo: should be done in pre-processing. if regen_baseline: res['baseline'] = [] res['baseline_std'] = [] for p[Pname] in Pvalues: separator('Baseline', True) get_ipython().run_line_magic('run', 'audio_classification.ipynb') res['baseline'].append(accuracy) res['baseline_std'].append(accuracy_std) else: separator('Baseline') get_ipython().run_line_magic('run', 'audio_classification.ipynb') res['baseline'] = len(Pvalues) * [accuracy] res['baseline_std'] = accuracy_std # ## Results # In[7]: print('{} = {}'.format(Pname, Pvalues)) for key, value in res.items(): if key is not 'atoms': print('res[\'{}\'] = {}'.format(key, value)) def plot(*args, **kwargs): plt.figure(figsize=(8,5)) x = range(len(Pvalues)) log = 'log' in kwargs and kwargs['log'] is True pltfunc = plt.semilogy if log else plt.plot params = {} params['linestyle'] = '-' params['marker'] = '.' params['markersize'] = 10 for i, var in enumerate(args): if 'err' in kwargs: pltfunc = plt.errorbar params['yerr'] = res[kwargs['err'][i]] params['capsize'] = 5 pltfunc(x, res[var], label=var, **params) for i,j in zip(x, res[var]): plt.annotate('{:.2f}'.format(j), xy=(i,j), xytext=(5,5), textcoords='offset points') margin = 0.25 params['markersize'] = 10 plt.xlim(-margin, len(Pvalues)-1+margin) if 'ylim' in kwargs: plt.ylim(kwargs['ylim']) plt.title('{} vs {}'.format(', '.join(args), Pname)) plt.xlabel(Pname) plt.ylabel(' ,'.join(args)) plt.xticks(x, Pvalues) plt.grid(True); plt.legend(loc='best'); plt.show() def div(l): div = Pvalues if Pname is l else [p[l]] return np.array([1 if v is None else v for v in div]) # Classification results. res['chance'] = len(Pvalues) * [100./p['Ngenres']] res['chance_std'] = 0 err=['accuracy_std', 'baseline_std', 'chance_std'] plot('accuracy', 'baseline', 'chance', err=err, ylim=[0,100]) # Features extraction results. if regen_features: plot('objective_g', 'objective_i', 'objective_j', log=True) # Unweighted objectives. print('g(Z) = ||X-DZ||_2^2, h(Z) = ||Z-EX||_2^2, i(Z) = ||Z||_1, j(Z) = tr(Z^TLZ)') res['objective_g_un'] = res['objective_g'] / div('ld') res['objective_i_un'] = res['objective_i'] / div('ls') res['objective_j_un'] = res['objective_j'] / div('lg') plot('objective_g_un', 'objective_i_un', 'objective_j_un', log=True) plot('sparsity', ylim=[0,100]) plot('time_features') plot('iterations_inner') plot('iterations_outer') for i, fig in enumerate(res['atoms']): print('Dictionary atoms for {} = {}'.format(Pname, Pvalues[i])) fig.show() print('Experiment time: {:.0f} seconds'.format(time.time() - texperiment))