#!/usr/bin/env python # coding: utf-8 # # Genre recognition: experiment # Goal: Check the influence of the distance metric (euclidean or cosine). # # Conclusion: The cosine metric seems more appropriate. # # Observations: # * Higher accuracy with the cosine distance. # * Why are the atoms and the accuracy different from the experiment on $\lambda_d$ ? With the initialized seed the results should really be the same. The baseline is however strictly the same. # * Ran for 2h10. # ## Hyper-parameters # ### Parameter under test # In[1]: Pname = 'dm' Pvalues = ['cosine', 'euclidean'] # Regenerate the graph or the features at each iteration. regen_graph = True regen_features = True # ### Model parameters # In[2]: p = {} # Preprocessing. # Graph. p['K'] = 10 + 1 # 5 to 10 + 1 for self-reference p['dm'] = 'cosine' p['Csigma'] = 1 p['diag'] = True p['laplacian'] = 'normalized' # Feature extraction. p['m'] = 128 # 64, 128, 512 p['ls'] = 1 p['ld'] = 10 p['le'] = None p['lg'] = 100 # Classification. p['scale'] = None p['Nvectors'] = 6 p['svm_type'] = 'C' p['kernel'] = 'linear' p['C'] = 1 p['nu'] = 0.5 # ### Numerical parameters # In[3]: # HDF5 data stores. p['folder'] = 'data' p['filename_gtzan'] = 'gtzan.hdf5' p['filename_audio'] = 'audio.hdf5' p['filename_graph'] = 'graph.hdf5' p['filename_features'] = 'features.hdf5' # Dataset (10,100,644 | 5,100,149 | 2,10,644). p['Ngenres'] = 5 p['Nclips'] = 100 p['Nframes'] = 149 # Graph. p['tol'] = 1e-5 # Feature extraction. p['rtol'] = 1e-5 # 1e-3, 1e-5, 1e-7 p['N_inner'] = 500 p['N_outer'] = 50 # Classification. p['Nfolds'] = 10 p['Ncv'] = 40 p['dataset_classification'] = 'Z' # ## Processing # In[4]: import numpy as np import time texperiment = time.time() # Result dictionary. res = ['accuracy', 'accuracy_std'] res += ['sparsity', 'atoms'] res += ['objective_g', 'objective_h', 'objective_i', 'objective_j'] res += ['time_features', 'iterations_inner', 'iterations_outer'] res = dict.fromkeys(res) for key in res.keys(): res[key] = [] def separator(name, parameter=False): if parameter: name += ', {} = {}'.format(Pname, p[Pname]) dashes = 20 * '-' print('\n {} {} {} \n'.format(dashes, name, dashes)) # Fair comparison when tuning parameters. # Randomnesses: dictionary initialization, training and testing sets. np.random.seed(1) # In[5]: #%run gtzan.ipynb #%run audio_preprocessing.ipynb if not regen_graph: separator('Graph') get_ipython().run_line_magic('run', 'audio_graph.ipynb') if not regen_features: separator('Features') get_ipython().run_line_magic('run', 'audio_features.ipynb') # Hyper-parameter under test. for p[Pname] in Pvalues: if regen_graph: separator('Graph', True) get_ipython().run_line_magic('run', 'audio_graph.ipynb') if regen_features: separator('Features', True) p['filename_features'] = 'features_{}_{}.hdf5'.format(Pname, p[Pname]) get_ipython().run_line_magic('run', 'audio_features.ipynb') separator('Classification', True) get_ipython().run_line_magic('run', 'audio_classification.ipynb') # Collect results. for key in res: res[key].append(globals()[key]) # Baseline, i.e. classification with spectrograms. p['dataset_classification'] = 'X' p['scale'] = 'minmax' # Todo: should be done in pre-processing. if not regen_graph and not regen_features: # Classifier parameters are being tested. for p[Pname] in Pvalues: separator('Baseline', True) get_ipython().run_line_magic('run', 'audio_classification.ipynb') else: separator('Baseline') get_ipython().run_line_magic('run', 'audio_classification.ipynb') res['baseline'] = len(Pvalues) * [accuracy] res['baseline_std'] = accuracy_std # ## Results # In[6]: print('{} = {}'.format(Pname, Pvalues)) for key, value in res.items(): if key is not 'atoms': print('res[\'{}\'] = {}'.format(key, value)) def plot(*args, **kwargs): plt.figure(figsize=(8,5)) x = range(len(Pvalues)) log = 'log' in kwargs and kwargs['log'] is True pltfunc = plt.semilogy if log else plt.plot params = {} params['linestyle'] = '-' params['marker'] = '.' params['markersize'] = 10 for i, var in enumerate(args): if 'err' in kwargs: pltfunc = plt.errorbar params['yerr'] = res[kwargs['err'][i]] params['capsize'] = 5 pltfunc(x, res[var], label=var, **params) for i,j in zip(x, res[var]): plt.annotate('{:.2f}'.format(j), xy=(i,j), xytext=(5,5), textcoords='offset points') margin = 0.25 / (len(Pvalues)-1) params['markersize'] = 10 plt.xlim(-margin, len(Pvalues)-1+margin) plt.title('{} vs {}'.format(', '.join(args), Pname)) plt.xlabel(Pname) plt.ylabel(' ,'.join(args)) plt.xticks(x, Pvalues) plt.grid(True); plt.legend(loc='best'); plt.show() # Classification results. res['chance'] = len(Pvalues) * [100./p['Ngenres']] res['chance_std'] = 0 err=['accuracy_std', 'baseline_std', 'chance_std'] plot('accuracy', 'baseline', 'chance', err=err) # Features extraction results. if regen_features: plot('objective_g', 'objective_i', 'objective_j', log=True) # Unweighted objectives. print('g(Z) = ||X-DZ||_2^2, h(Z) = ||Z-EX||_2^2, i(Z) = ||Z||_1, j(Z) = tr(Z^TLZ)') div = np.array(Pvalues if Pname is 'ld' else p['ld']) res['objective_g_un'] = res['objective_g'] / div div = np.array(Pvalues if Pname is 'ls' else p['ls']) res['objective_i_un'] = res['objective_i'] / div div = np.array(Pvalues if Pname is 'lg' else p['lg']) res['objective_j_un'] = res['objective_j'] / div plot('objective_g_un', 'objective_i_un', 'objective_j_un', log=True) plot('sparsity') plot('time_features') plot('iterations_inner') plot('iterations_outer') for i, fig in enumerate(res['atoms']): print('Dictionary atoms for {} = {}'.format(Pname, Pvalues[i])) fig.show() print('Experiment time: {:.0f} seconds'.format(time.time() - texperiment))