The audio genre recognition pipeline:
Open questions:
m
: number of atoms in the dictionary, sparse code length.ls
: weight of the sparse codes l1 penalty (redundant).ld
: weigth of the dictionary l2 penalty.le
: weight of the encoder l2 penalty.lg
: weight of the Dirichlet energy (via the graph Laplacian).rtol
: stopping criterion for inner and outer loops.N_inner
: hard limit on inner iterations.N_outer
: hard limit on outer iterations.Ngenres, Nclips, Nframes
: a way to reduce the size of the dataset.noise_std
: standard deviation of the Gaussian noise to be added to the data.folder
: relative path to HDF5 files.filename_*
: name of the HDF5 file.if 'p' in globals().keys():
# Hyper-parameters passed by the experiment runner.
for key, value in p.items():
globals()[key] = value
else:
m = 64 # 64, 128, 512
ls = 1
ld = 10
le = None
lg = 1
rtol = 1e-5 # 1e-3, 1e-5, 1e-7
N_inner = 500
N_outer = 50
Ngenres, Nclips, Nframes = 10, 100, 644
noise_std = 0
folder = 'data'
filename_audio = 'audio.hdf5'
filename_graph = 'graph.hdf5'
filename_features = 'features.hdf5'
import os, time
import numpy as np
import scipy.sparse
import h5py
import matplotlib.pyplot as plt
%matplotlib inline
# Import auto-encoder definition.
%run -n auto_encoder.ipynb
#import auto_encoder
# Profiling.
%reload_ext memory_profiler
%reload_ext line_profiler
import objgraph
#%load_ext autoreload
#%autoreload 2
toverall = time.time()
def datinfo(X, name='Dataset'):
r"""Print dataset size and dimensionality"""
print('{}:\n'
' size: N={:,} x n={} -> {:,} floats\n'
' dim: {:,} features per clip\n'
' shape: {}'
.format(name, np.prod(X.shape[:-1]), X.shape[-1],
np.prod(X.shape), np.prod(X.shape[2:]), X.shape))
filename = os.path.join(folder, filename_audio)
with h5py.File(filename, 'r') as audio:
# Display HDF5 attributes.
print('Attributes:')
for attr in audio.attrs:
print(' {} = {}'.format(attr, audio.attrs[attr]))
sr = audio.attrs['sr']
labels = audio.attrs['labels']
# Show datasets, their dimensionality and data type.
print('Datasets:')
for dname, dset in audio.items():
print(' {:2}: {:24}, {}'.format(dname, dset.shape, dset.dtype))
# Choose dataset: Xa, Xs.
X = audio.get('Xs')
# Full dataset.
n = X.shape[-1]
datinfo(X, 'Full dataset')
print(type(X))
# Load data into memory as a standard NumPy array.
X = X[:Ngenres,:Nclips,:Nframes,...]
datinfo(X, 'Reduced dataset')
print(type(X))
# Resize in place without memory loading via hyperslab.
# Require chunked datasets.
#X.resize((Ngenres, Nclips, Nframes, 2, n))
# Squeeze dataset to a 2D array. The auto-encoder does not
# care about the underlying structure of the dataset.
X.resize(Ngenres * Nclips * Nframes * 2, n)
print('Data: {}, {}'.format(X.shape, X.dtype))
# Independently rescale each feature.
# To be put in an sklearn Pipeline to avoid transductive learning.
X -= np.min(X, axis=0)
X /= np.max(X, axis=0)
# Independently normalize each sample.
#X /= np.linalg.norm(X, axis=1)[:,np.newaxis]
# Add Gaussian noise.
if noise_std is not 0:
X += np.random.normal(scale=noise_std, size=X.shape)
filename = os.path.join(folder, filename_graph)
with h5py.File(filename, 'r') as graph:
# Display HDF5 attributes.
print('Attributes:')
for attr in graph.attrs:
print(' {} = {}'.format(attr, graph.attrs[attr]))
# Show datasets, their dimensionality and data type.
print('Datasets:')
for dname, dset in graph.items():
print(' {:10}: {:10}, {}'.format(dname, dset.shape, dset.dtype))
# Data: Laplacian matrix.
pars = []
for par in ('data', 'indices', 'indptr', 'shape'):
pars.append(graph.get('L_'+par))
L = scipy.sparse.csr_matrix(tuple(pars[:3]), shape=pars[3])
if L.shape != (X.shape[0], X.shape[0]):
raise ValueError('Graph size does not correspond to data size.')
Size of training data and parameters.
N = Ngenres * Nclips * Nframes * 2
sizeX = N * n / 2.**20
sizeZ = N * m / 2.**20
sizeD = n * m / 2.**10
sizeE = m * n / 2.**10
# 32 bits float
print('Size X: {:.1f} M --> {:.1f} MiB'.format(sizeX, sizeX*4))
print('Size Z: {:.1f} M --> {:.1f} MiB'.format(sizeZ, sizeZ*4))
print('Size D: {:.1f} k --> {:.1f} kiB'.format(sizeD, sizeD*4))
print('Size E: {:.1f} k --> {:.1f} kiB'.format(sizeE, sizeE*4))
ae = auto_encoder(m=m, ls=ls, ld=ld, le=le, lg=lg, rtol=rtol, xtol=None, N_inner=N_inner, N_outer=N_outer)
tstart = time.time()
Z = ae.fit_transform(X, L)
time_features = time.time() - tstart
print('Elapsed time: {:.0f} seconds'.format(time_features))
Observations:
Time analysis:
if False:
%prun Z = ae.fit_transform(X)
Space analysis:
if False:
import gc
gc.collect()
objgraph.show_most_common_types()
from pyunlocbox import solvers, functions
%mprun -f ae.fit_transform -f ae._minD -f ae._minZ -f solvers.solve -f solvers.forward_backward._pre -f solvers.forward_backward._fista -f functions.norm_l1._prox -T profile.txt ae.fit_transform(X)
#%mprun -f solvers.solve -f solvers.forward_backward._pre -f solvers.forward_backward._fista -f functions.norm_l1._prox -T profile.txt ae.fit_transform(X)
gc.collect()
objgraph.show_most_common_types()
if False:
from pympler import tracker
tr = tracker.SummaryTracker()
Z = ae.fit_transform(X)
tr.print_diff()
ret = ae.plot_objective()
iterations_inner, iterations_outer = ret[:2]
objective_g, objective_h, objective_i, objective_j = ret[2:]
sparsity = sparse_codes(Z)
Observations:
if ld is not None:
dictenc(ae.D)
atoms_D = atoms(ae.D)
if le is not None:
dictenc(ae.E, enc=True)
atoms(ae.E)
We will store more Z when the various approximations will be implemented.
filename = os.path.join(folder, filename_features)
# Remove existing HDF5 file without warning if non-existent.
try:
os.remove(filename)
except OSError:
pass
# Create HDF5 file and datasets.
with h5py.File(filename, 'w') as features:
# Metadata.
features.attrs['sr'] = sr
features.attrs['labels'] = labels
# Data.
features.create_dataset('X', data=X.reshape(Ngenres, Nclips, Nframes, 2, n))
features.create_dataset('Z', data=Z.reshape(Ngenres, Nclips, Nframes, 2, Z.shape[-1]))
if ld is not None:
features.create_dataset('D', data=ae.D)
if le is not None:
features.create_dataset('E', data=ae.E)
# Show datasets, their dimensionality and data type.
print('Datasets:')
for dname, dset in features.items():
print(' {:2}: {:22}, {}'.format(dname, dset.shape, dset.dtype))
# Display HDF5 attributes.
print('Attributes:')
for name, value in features.attrs.items():
print(' {} = {}'.format(name, value))
print('Overall time: {:.0f} seconds'.format(time.time() - toverall))