Equivalance of Neural Architectures with Mixed Circular Ensembles

(c) 2020

Mehmet Suezen

suzen at /the domain name/ acm dot org

  • Motif in Brain networks
  • Complex brain networks: graph theoretical analysis of structural and functional systems.
    Bullmore, E., Sporns, O. Nat Rev Neurosci 10, 186–198 (2009). doi
  • Ensemble equivalence for dense graphs
    euclid
  • Ensemble inequivalence in random graphs
    doi
  • Correlations between eigenvalues of a random matrix
    Freeman Dyson
    euclid.

  • PyTorch vision
    VGG
    ResNet
    DenseNet
    torchvision models

Load required packages and report their versions

In [ ]:
import bristol
import torchvision
from bristol.ensembles import Circular
from bristol import cPSE
import torchvision.models as vision_models
import numpy as np
from numpy import linalg as LA

bristol.__version__, np.__version__, torchvision.__version__

# Cache and plot filenames
read_cache = True # make False to re-generate the data


# Change this for your local path
local_path = "/Users/msuzen/data/"
#
# Cache data
#
fname =  local_path + "mixed_ensemble_layer_circular_spectra.pickle" 

#
# Plot files: Change this for your local path
#
sdiff =  local_path + "mixed_ensemble_layer_circular_spectra_diff.eps" 
sdecay_resnet =  local_path + "mixed_ensemble_layer_circular_spectra_decay_resnet.eps" 
sdecay_vgg =  local_path + "mixed_ensemble_layer_circular_spectra_decay_vgg.eps" 
sdecay_densenet =  local_path + "mixed_ensemble_layer_circular_spectra_decay_densenet.eps" 

Generate spectral density data on half-space

We generate spectral data on range of architecture and their corresponding mixed circular ensemble on half-space, positive eigenvalues up to 6.0.

In [ ]:
#
# Given  : netname, ebins, erange, upper_order
# Output : _spectras and bin centres
#

def compute_layer_circular_spectra(netname, erange = (0.0, 6.0), ebins = 1000, upper_order=1000):
    """
    
    Given architecture name, produce spectra of layer 
    matrices and corresponding circular ensemble.
    
    Return :
        Spectral density of architecture, corresponding mixed circular ensemble and bin centers.
    
    """
    torch_model = getattr(vision_models, netname)(pretrained=True)
    layer_ensemble, layer_ensemble_orders, _ = cPSE.get_layer_matrix_set(torch_model)
    eigenvalues_list = [np.linalg.eigvals(Ai) for Ai in layer_ensemble]
    eigenvalues_flat = np.concatenate(eigenvalues_list)
    eigen_spectra, eigen_spectra_bins = np.histogram(eigenvalues_flat, range=erange, 
                                                     bins=ebins, density=True)
    eigen_spectra_bin_centres = (eigen_spectra_bins[1:] + eigen_spectra_bins[:-1])/2
    
    mixed_orders = [n[0] for n in layer_ensemble_orders] # matrix sizes
    ce = Circular()
    mseed = [2927361, 719732]  # 2 proc
    circular_ensemble = []
    for N in mixed_orders:
        if N < upper_order+1:
            Hcue = ce.gen_cue(N, seed=mseed, set_seed=True)
            circular_ensemble.append(Hcue)
    eigenvalues_list_ce = [np.linalg.eigvals(np.abs(Ai)) for Ai in circular_ensemble]
    eigenvalues_ce = np.imag(np.concatenate(eigenvalues_list_ce))
    ce_eigen, _ = np.histogram(eigenvalues_ce, range=erange, bins=ebins, density=True)
    
    return eigen_spectra, ce_eigen, eigen_spectra_bin_centres
In [ ]:
#
# Compute spectral densities
#
# VGG      : https://arxiv.org/abs/1409.1556
# ResNet   : https://arxiv.org/abs/1512.03385
# DenseNet : https://arxiv.org/abs/1608.06993
#
# https://pytorch.org/docs/stable/torchvision/models.html
#

if not read_cache:
    mixed_ensemble_layer_circular_spectra = {} 
    # name tuple   (eigen_spectra, ce_eigen, eigen_spectra_bin_centres) 
    netname_list = ['densenet121',
                    'densenet161',
                    'densenet169',
                    'densenet201',
                    'resnet101',
                    'resnet152',
                     'resnet18',
                     'resnet34',
                     'resnet50',
                     'vgg11',
                     'vgg11_bn',
                     'vgg13',
                     'vgg13_bn',
                     'vgg16',
                     'vgg16_bn',
                     'vgg19',
                     'vgg19_bn']
    for netname in netname_list:
        tuple_spectra = compute_layer_circular_spectra(netname)
        mixed_ensemble_layer_circular_spectra[netname] = tuple_spectra  
In [ ]:
#
# Cache produced data if running fresh.
#
import pickle
In [ ]:
if not read_cache:
    pickle.dump(mixed_ensemble_layer_circular_spectra, open(fname, "wb")) # create cache
In [ ]:
#
# Read data from cache
#
if read_cache:
    mixed_ensemble_layer_circular_spectra = pickle.load(open(fname, "rb")) # read cache
In [ ]:
anames = list(mixed_ensemble_layer_circular_spectra.keys()) # architecture names spectral density cached

Variance of CSD

Variance of the spectral differences are reported. This is to measure fluctuations on Circular Spectral Difference (CSD)

In [ ]:
for aname in anames:
    eigen_spectra, ce_eigen, eigen_spectra_bin_centres = mixed_ensemble_layer_circular_spectra[aname]
    v_ = np.var(np.real(eigen_spectra-ce_eigen))
    print(f"Architecture {aname} has variance of difference {v_} ")

Compiled variances in a LaTex table:

Performance comes from pytorch team

\begin{table}[]
\centering
\begin{tabular}{|l|l|l|l|}
\hline
Architecture  & Top-1 error  &  Top-5 error  & Variance CSD   \\ \hline
vgg11         & 30.98        &   11.37       &  0.19   \\ \hline
vgg13         & 30.07        &   10.75       &  0.20   \\ \hline
vgg16         & 28.41        &    9.63       &  0.19   \\ \hline
vgg19         & 27.62        &    9.12       &  0.18   \\ \hline
vgg11bn       & 29.62        &   10.19       &  0.10   \\ \hline
vgg13bn       & 28.45        &    9.63       &  0.09   \\ \hline
vgg16bn       & 26.63        &    8.50       &  0.10   \\ \hline
vgg19bn       & 25.76        &    8.15       &  0.09   \\ \hline
resnet18      & 30.24        &   10.92       &  0.20   \\ \hline
resnet34      & 26.70        &    8.58       &  0.23   \\ \hline
resnet50      & 23.85        &    7.13       &  1.45   \\ \hline
resnet101     & 22.63        &    6.44       &  1.86   \\ \hline
resnet152     & 21.69        &    5.94       &  1.98   \\ \hline
densenet121   & 25.35        &    7.83       &  0.42   \\ \hline
densenet161   & 22.35        &    6.20       &  0.29   \\ \hline
densenet169   & 24.00        &    7.00       &  0.52   \\ \hline
densenet201   & 22.80        &    6.43       &  0.54   \\ \hline
\end{tabular}
\
\caption{Variance of CSD per architecture corresponding Top-1 and Top-5 classification errors on ImageNet dataset.}
\label{corr}
\end{table}

Visualisations for Circular Spectral Difference

  • (eigen_spectra-ce_eigen) vs. eigen_spectra_bin_centres (oscilations) ['vgg19', 'resnet152', 'densenet201']
  • cumsum(eigen_spectra-ce_eigen) vs. eigen_spectra_bin_centres (decay) all
In [ ]:
import matplotlib.pylab as plt
%matplotlib inline
font = {"family": "normal", "weight": "bold", "size": 11}
plt.rc("font", **font)

anames0 = ['vgg19', 'resnet152', 'densenet201']

for aname in anames0:
    eigen_spectra, ce_eigen, eigen_spectra_bin_centres = mixed_ensemble_layer_circular_spectra[aname]
    x = np.real(eigen_spectra_bin_centres)[5:100]
    y = np.real(eigen_spectra-ce_eigen)[5:100]
    plt.plot(x, y, label=aname)
plt.legend(loc="lower right")
plt.xlabel("Spectra Locations (partial)", **font)
plt.ylabel("Spectral Difference", **font)
plt.title("Pre-trained Architectures:  \n Layer/Circular Spectral Difference", **font)
plt.savefig(
    sdiff, format="eps", dpi=300, bbox_inches="tight"
)

plt.cla()
plt.clf()
plt.gca()
plt.gcf()
plt.close()
In [ ]:
import matplotlib.pylab as plt
%matplotlib inline
font = {"family": "normal", "weight": "bold", "size": 11}
plt.rc("font", **font)



anames0 =  ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152']

for aname in anames0:
    eigen_spectra, ce_eigen, eigen_spectra_bin_centres = mixed_ensemble_layer_circular_spectra[aname]
    x = np.real(eigen_spectra_bin_centres)
    y = np.cumsum(np.real(eigen_spectra-ce_eigen))
    plt.plot(x, y, label=aname)
plt.legend(loc="lower right")
plt.xlabel("Spectra Locations (partial)", **font)
plt.ylabel("Spectral Difference Cumulative Sum", **font)
plt.title("Pre-trained Architectures:  \n Layer/Circular Spectral Difference Decay", **font)
plt.savefig(
    sdecay_resnet, format="eps", dpi=300, bbox_inches="tight"
)

plt.cla()
plt.clf()
plt.gca()
plt.gcf()
plt.close()
In [ ]:
import matplotlib.pylab as plt
%matplotlib inline
font = {"family": "normal", "weight": "bold", "size": 11}
plt.rc("font", **font)



anames0 =   ['vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19', 'vgg19_bn']

for aname in anames0:
    eigen_spectra, ce_eigen, eigen_spectra_bin_centres = mixed_ensemble_layer_circular_spectra[aname]
    x = np.real(eigen_spectra_bin_centres)
    y = np.cumsum(np.real(eigen_spectra-ce_eigen))
    plt.plot(x, y, label=aname)
plt.legend(loc="lower right")
plt.xlabel("Spectra Locations (partial)", **font)
plt.ylabel("Spectral Difference Cumulative Sum", **font)
plt.title("Pre-trained Architectures:  \n Layer/Circular Spectral Difference Decay", **font)
plt.savefig(
    sdecay_vgg, format="eps", dpi=300, bbox_inches="tight"
)

plt.cla()
plt.clf()
plt.gca()
plt.gcf()
plt.close()
In [ ]:
import matplotlib.pylab as plt
%matplotlib inline
font = {"family": "normal", "weight": "bold", "size": 11}
plt.rc("font", **font)

anames0 = ['densenet121', 'densenet161', 'densenet169', 'densenet201']

for aname in anames0:
    eigen_spectra, ce_eigen, eigen_spectra_bin_centres = mixed_ensemble_layer_circular_spectra[aname]
    x = np.real(eigen_spectra_bin_centres)
    y = np.cumsum(np.real(eigen_spectra-ce_eigen))
    plt.plot(x, y, label=aname)
plt.legend(loc="lower right")
plt.xlabel("Spectra Locations (partial)", **font)
plt.ylabel("Spectral Difference Cumulative Sum", **font)
plt.title("Pre-trained Architectures:  \n Layer/Circular Spectral Difference Decay", **font)
plt.savefig(
    sdecay_densenet, format="eps", dpi=300, bbox_inches="tight"
)

plt.cla()
plt.clf()
plt.gca()
plt.gcf()
plt.close()