#!/usr/bin/env python
# coding: utf-8

# # Genre recognition: experiment

# Goal: observe the effect of $\lambda_g$ on a small dataset.
# 
# Conclusion: no ideal value for $\lambda_g$ has been found. Gave the idea that we want to trade smoothness with sparsity.
# 
# Observations:
# * Accuracy almost constant, drops for $\lambda_g$ greater than 1000.
# * Constant running time. Increase of 50% compared to the model without a graph.
# * Sparsity increases linearly with $\lambda_g$.
# * Compare atoms.
# * Lots of variance between cross-validation runs: e.g. from 70 to 73. We should take the mean of the various runs. Done for next experiment.
# * The gap between the Dirichlet energy and the other objectives is reduced from 2 orders of magnitude to 1 as $\lambda_g$ becomes greater than 100.
# * $Z$ is much more constrained than $D$ (inner loop iterations).
# * We want to trade smoothness with sparsity, not reconstruction error (controlled by $\lambda_d$). Try to set $\lambda_d = \lambda_g$, i.e. vary the redundant $\lambda_s$.
# * Ran for 4h50.

# In[53]:


time_features = [1915, 3067, 3095, 3485, 3213, 916]
inner_iterations = [1555, 1711, 1610, 1683, 1509, 425]
outer_iterations = [26, 29, 14, 10, 12, 3]
sparsity = [4.6, 5.1, 7.7, 18.4, 46.2, 88.8]
objective_g = [4.156377e+04, 4.197443e+04, 4.792657e+04, 7.133035e+04, 1.145442e+05, 1.600268e+05]
objective_i = [6.914378e+04, 6.879492e+04, 6.621795e+04, 5.868650e+04, 4.573782e+04, 3.421723e+04]
objective_j = [0, 1.349397e+03, 6.038886e+03, 1.143229e+04, 1.177662e+04, 6.200758e+03]
accuracy = [73, 74, 72, 73, 69, 53]

import numpy as np
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')

def plot(*args, **kwargs):
    plt.figure(figsize=(8,5))
    x = range(len(Pvalues))
    log = 'log' in kwargs and kwargs['log'] is True
    pltfunc = plt.semilogy if log else plt.plot
    for var in args:
        pltfunc(x, globals()[var], '.-', label=var)
    plt.xlim(0, len(Pvalues)-1)
    plt.title('{} vs {}'.format(', '.join(args), Pname))
    plt.xlabel(Pname)
    plt.ylabel(' ,'.join(args))
    plt.xticks(x, Pvalues)
    plt.grid(True); plt.legend(loc='best'); plt.show()

plot('accuracy')
plot('objective_g', 'objective_i', 'objective_j', log=True)
plot('sparsity')
plot('time_features')
plot('inner_iterations')
plot('outer_iterations')


# ## Hyper-parameters

# ### Parameter under test

# In[1]:


Pname = 'lg'
Pvalues = [None, 1, 10, 1e2, 1e3, 1e4]

# Regenerate the graph or the features at each iteration.
regen_graph = False
regen_features = True


# ### Model parameters

# In[2]:


p = {}

# Preprocessing.

# Graph.
p['K'] = 10 + 1  # 5 to 10 + 1 for self-reference
p['dm'] = 'cosine'
p['Csigma'] = 1
p['diag'] = True
p['laplacian'] = 'normalized'

# Feature extraction.
p['m'] = 128  # 64, 128, 512
p['ld'] = 10
p['le'] = None
p['lg'] = 100

# Classification.
p['scale'] = None
p['Nvectors'] = 6
p['svm_type'] = 'C'
p['kernel'] = 'linear'
p['C'] = 1
p['nu'] = 0.5


# ### Numerical parameters

# In[3]:


# Dataset (10,100,644 | 5,100,149 | 2,10,644).
p['Ngenres'] = 5
p['Nclips'] = 100
p['Nframes'] = 149

# Graph.
p['tol'] = 1e-5

# Feature extraction.
p['rtol'] = 1e-6  # 1e-3, 1e-5, 1e-7
p['N_outer'] = 40  # 10, 15, 20

# Classification.
p['Ncv'] = 10
p['dataset_classification'] = 'Z'


# ## Processing

# In[4]:


import numpy as np
import time

texperiment = time.time()

def separator():
    print('\n' + 50 * '-' + '\n')
    # Fair comparison when tuning parameters.
    np.random.seed(1)

#%run gtzan.ipynb
#%run audio_preprocessing.ipynb
if not regen_graph:
    get_ipython().run_line_magic('run', 'audio_graph.ipynb')
    separator()
if not regen_features:
    get_ipython().run_line_magic('run', 'audio_features.ipynb')
    separator()

# Hyper-parameter under test.
for p[Pname] in Pvalues:

    if regen_graph:
        get_ipython().run_line_magic('run', 'audio_graph.ipynb')
        separator()
    if regen_features:
        get_ipython().run_line_magic('run', 'audio_features.ipynb')
        separator()
    get_ipython().run_line_magic('run', 'audio_classification.ipynb')
    separator()

# Baseline, i.e. classification with spectrograms.
p['dataset_classification'] = 'X'
p['scale'] = 'minmax'  # Todo: should be done in pre-processing.
if not regen_graph and not regen_features:
    # Classifier parameters are being tested.
    for p[Pname] in Pvalues:
        get_ipython().run_line_magic('run', 'audio_classification.ipynb')
        separator()
else:
    get_ipython().run_line_magic('run', 'audio_classification.ipynb')
    separator()

print('Experiment time: {:.0f} seconds'.format(time.time() - texperiment))