#!/usr/bin/env python # coding: utf-8 # # Genre recognition: experiment # Goal: Explore the effect of $\lambda_d$. # # Conclusion: A value of $\lambda_d$ between 10 and 100 seems reasonable (when $\lambda_g=100$ and $\lambda_s=1$). It corresponds to the range of $\lambda_s$, i.e. between 1 and 10 for $\lambda_d=\lambda_g=100$. We want a ratio $\frac{\lambda_d}{\lambda_s}$ between 10 and 100. This ratio controls sparsity and, indirectly, speed. # # Observations: # * In the previous experiment with $\lambda_d = \lambda_g = 100$, $\lambda_s = 1$ was found to be the best option. So we fixed $\lambda_g = 100$ and $\lambda_s=1$. # * The experiment with $\lambda_d = \lambda_g = 100$ and $\lambda_s=1$ is 1.5 times faster with `rtol=1e-5` (compared with the previous experiment). Accuracy dropped from 73.06 to 72.49. # * Time to extract features increases with $\lambda_d$. It is the term which couples the two variables we optimize for. # * Ran for 16h30. # ## Hyper-parameters # ### Parameter under test # In[1]: Pname = 'ld' Pvalues = [1, 10, 100, 1e3, 1e4] # Regenerate the graph or the features at each iteration. regen_graph = False regen_features = True # ### Model parameters # In[2]: p = {} # Preprocessing. # Graph. p['K'] = 10 + 1 # 5 to 10 + 1 for self-reference p['dm'] = 'cosine' p['Csigma'] = 1 p['diag'] = True p['laplacian'] = 'normalized' # Feature extraction. p['m'] = 128 # 64, 128, 512 p['ls'] = 1 p['ld'] = 100 p['le'] = None p['lg'] = 100 # Classification. p['scale'] = None p['Nvectors'] = 6 p['svm_type'] = 'C' p['kernel'] = 'linear' p['C'] = 1 p['nu'] = 0.5 # ### Numerical parameters # In[3]: # HDF5 data stores. p['folder'] = 'data' p['filename_gtzan'] = 'gtzan.hdf5' p['filename_audio'] = 'audio.hdf5' p['filename_graph'] = 'graph.hdf5' p['filename_features'] = 'features.hdf5' # Dataset (10,100,644 | 5,100,149 | 2,10,644). p['Ngenres'] = 5 p['Nclips'] = 100 p['Nframes'] = 149 # Graph. p['tol'] = 1e-5 # Feature extraction. p['rtol'] = 1e-5 # 1e-3, 1e-5, 1e-7 p['N_inner'] = 500 p['N_outer'] = 50 # Classification. p['Nfolds'] = 10 p['Ncv'] = 40 p['dataset_classification'] = 'Z' # ## Processing # In[4]: import numpy as np import time texperiment = time.time() # Result dictionary. res = ['accuracy', 'accuracy_std'] res += ['sparsity', 'atoms'] res += ['objective_g', 'objective_h', 'objective_i', 'objective_j'] res += ['time_features', 'iterations_inner', 'iterations_outer'] res = dict.fromkeys(res) for key in res.keys(): res[key] = [] def separator(name, parameter=False): if parameter: name += ', {} = {}'.format(Pname, p[Pname]) dashes = 20 * '-' print('\n {} {} {} \n'.format(dashes, name, dashes)) # Fair comparison when tuning parameters. # Randomnesses: dictionary initialization, training and testing sets. np.random.seed(1) # In[5]: #%run gtzan.ipynb #%run audio_preprocessing.ipynb if not regen_graph: separator('Graph') get_ipython().run_line_magic('run', 'audio_graph.ipynb') if not regen_features: separator('Features') get_ipython().run_line_magic('run', 'audio_features.ipynb') # Hyper-parameter under test. for p[Pname] in Pvalues: if regen_graph: separator('Graph', True) get_ipython().run_line_magic('run', 'audio_graph.ipynb') if regen_features: separator('Features', True) p['filename_features'] = 'features_{}_{}.hdf5'.format(Pname, p[Pname]) get_ipython().run_line_magic('run', 'audio_features.ipynb') separator('Classification', True) get_ipython().run_line_magic('run', 'audio_classification.ipynb') # Collect results. for key in res: res[key].append(globals()[key]) # Baseline, i.e. classification with spectrograms. p['dataset_classification'] = 'X' p['scale'] = 'minmax' # Todo: should be done in pre-processing. if not regen_graph and not regen_features: # Classifier parameters are being tested. for p[Pname] in Pvalues: separator('Baseline', True) get_ipython().run_line_magic('run', 'audio_classification.ipynb') else: separator('Baseline') get_ipython().run_line_magic('run', 'audio_classification.ipynb') res['baseline'] = len(Pvalues) * [accuracy] res['baseline_std'] = len(Pvalues) * [accuracy_std] # ## Results # In[6]: print('{}: {}'.format(Pname, Pvalues)) for key, value in res.items(): if key is not 'atoms': print('{}: {}'.format(key, value)) def plot(*args, **kwargs): plt.figure(figsize=(8,5)) x = range(len(Pvalues)) log = 'log' in kwargs and kwargs['log'] is True pltfunc = plt.semilogy if log else plt.plot params = {} params['linestyle'] = '-' params['marker'] = '.' params['markersize'] = 10 for i, var in enumerate(args): if 'err' in kwargs: pltfunc = plt.errorbar params['yerr'] = res[kwargs['err'][i]] params['capsize'] = 5 pltfunc(x, res[var], label=var, **params) for i,j in zip(x, res[var]): plt.annotate('{:.2f}'.format(j), xy=(i,j), xytext=(5,5), textcoords='offset points') margin = 0.25 / (len(Pvalues)-1) params['markersize'] = 10 plt.xlim(-margin, len(Pvalues)-1+margin) plt.title('{} vs {}'.format(', '.join(args), Pname)) plt.xlabel(Pname) plt.ylabel(' ,'.join(args)) plt.xticks(x, Pvalues) plt.grid(True); plt.legend(loc='best'); plt.show() # Classification results. plot('accuracy', 'baseline', err=['accuracy_std', 'baseline_std']) # Features extraction results. if regen_features: plot('objective_g', 'objective_i', 'objective_j', log=True) plot('sparsity') plot('time_features') plot('iterations_inner') plot('iterations_outer') for i, fig in enumerate(res['atoms']): print('Dictionary atoms for {} = {}'.format(Pname, Pvalues[i])) fig.show() print('Experiment time: {:.0f} seconds'.format(time.time() - texperiment)) # ### Unweighted objectives # In[8]: import numpy as np import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') def plot(*args, **kwargs): plt.figure(figsize=(8,5)) x = range(len(Pvalues)) log = 'log' in kwargs and kwargs['log'] is True pltfunc = plt.semilogy if log else plt.plot params = {} params['linestyle'] = '-' params['marker'] = '.' params['markersize'] = 10 for i, var in enumerate(args): if 'err' in kwargs: pltfunc = plt.errorbar params['yerr'] = res[kwargs['err'][i]] params['capsize'] = 5 pltfunc(x, res[var], label=var, **params) for i,j in zip(x, res[var]): plt.annotate('{:.2f}'.format(j), xy=(i,j), xytext=(5,5), textcoords='offset points') margin = 0.25 / (len(Pvalues)-1) params['markersize'] = 10 plt.xlim(-margin, len(Pvalues)-1+margin) plt.title('{} vs {}'.format(', '.join(args), Pname)) plt.xlabel(Pname) plt.ylabel(' ,'.join(args)) plt.xticks(x, Pvalues) plt.grid(True); plt.legend(loc='best'); plt.show() Pname = 'ld' Pvalues = np.array([1,10,100,1e3,1e4]) res = {} res['objective_j'] = [8.0171190202236176, 10749.341583251953, 90460.6201171875, 180343.73779296875, 456038.427734375] res['objective_i'] = [415.35968017578125, 58292.4140625, 166819.109375, 382056.78125, 669909.3125] res['objective_g'] = [26194.47265625, 72384.501953125, 78647.03369140625, 18604.650497436523, 3124.8548626899719] res['objective_j'] /= np.array(100) # lg res['objective_i'] /= np.array(1) # ls res['objective_g'] /= Pvalues # ld plot('objective_g', 'objective_i', 'objective_j', log=True) print('g(Z) = ||X-DZ||_2^2, h(Z) = ||Z-EX||_2^2, i(Z) = ||Z||_1, j(Z) = tr(Z^TLZ)')