(c) 2019
Developed by
Mehmet Süzen
suzen at acm dot org
periodic spectral ergodicity (PSE)
.periodic spectral ergodicity (PSE)
measure for pre-trained networks.('numpy version:',
'1.17.2',
'matplotlib :',
'3.1.1',
'Python version:',
'3.7.3 (default, Mar 27 2019, 16:54:48) \n[Clang 4.0.1 (tags/RELEASE_401/final)]',
'Bristol version',
'0.2.5',
'pytorch:',
'1.3.0',
'torchvision:',
'0.4.1a0+d94043a')
%load_ext lab_black
%matplotlib inline
import numpy as np
import sys
import matplotlib
import matplotlib.pyplot as plt
import bristol
from bristol.spectral import Ergodicity
import json
ergo = Ergodicity()
import torchvision
import torchvision.models as models
import torch
"numpy version:", np.__version__, "matplotlib :", matplotlib.__version__, "Python version:", sys.version, "Bristol version", bristol.__version__, "pytorch:", torch.__version__, "torchvision:", torchvision.__version__
Charles pulls slices from a single layer multiple times on convolutional layers, we will do a reshape and get a single weight matrix per layer rather than slices.
def get_layer_matrix_set(pmodel):
"""
Return layer matrix set of a given pre-trained model
Input
pmodel : pytorch torchvision pre-trained model
Returns:
A tuple (A_set, A_set_N, A_set_types)
A_set : A list of 2D np-array, weight matrices
A_set_N : Shape of NxN matrices.
A_set_types : Layer type, pytorch object type that is
extracted as 2D weight matrix.
"""
A_set = []
A_set_N = []
A_set_types = []
for x in pmodel.modules():
type_mod = str(type(x)) # module/method name
if "torch.nn.modules" in type_mod:
try:
layer_weights = torch.Tensor(x.weight)
shape_layer = list(layer_weights.shape)
len_shape = len(shape_layer)
if len_shape >= 2:
N = shape_layer[0]
M = np.prod(shape_layer[1:])
if N > 1 and M > 1:
X = layer_weights
Ap = np.array(X.reshape(N, M).detach().numpy())
A = np.matmul(Ap, np.transpose(Ap))
A_set.append(A)
A_set_N.append(A.shape)
A_set_types.append(type_mod)
except:
pass
return (A_set, A_set_N, A_set_types)
pmodel = models.resnet18(pretrained=True)
A_t = get_layer_matrix_set(pmodel)
def get_eigenvals_layer_matrix_set(A_set):
"""
Compute eigenvalues of given set of matrices
Input:
A_set : list of 2D ndarrays, square real
Output
eigenvals_set : List of list of eigenvalues
"""
eigenvals_set = []
for A in A_set:
eigen_values = np.linalg.eigvals(np.matmul(A, np.transpose(A)))
eigenvals_set.append(eigen_values)
return eigenvals_set
eset = get_eigenvals_layer_matrix_set(A_t[0])
# len(eset)
# [len(ei) for ei in eset]
from itertools import cycle
def list2plist(lst, upper_bound):
"""
Given list lst ans upper_bound.
Return period_lst, cycle.
"""
pool = cycle(lst)
c = 1
lst_period = []
for item in pool:
c = c + 1
if isinstance(item, np.complex64):
item = item.real # catch for numerical small-unstable numbers
lst_period.append(abs(item))
if c > upper_bound:
break
return lst_period
def eigenvals_set_to_periodic(layer_eigens):
"""
Layer matrix set eigenvalues to periodic set
"""
upper_bound = np.max([len(e) for e in eset])
eset_period = [list2plist(e, upper_bound) for e in eset]
return eset_period
eset_per = eigenvals_set_to_periodic(eset)
np.array(eset_per)[0][999]
These methods actuall do not know if eigenvalues comes from pretrained network.
PSE is quantified by symmetric distance.
def d_layers_pse(eset_per):
"""
Progression of D_layers given periodic set
"""
nlayers = len(eset_per) - 1 # minus 1 for the last layer
print(nlayers)
N = len(eset_per[0])
D_layer = []
for l in np.arange(1, nlayers):
eigen_l = np.ravel(np.array(eset_per[0:l]))
l1 = l + 1
eigen_l1 = np.ravel(np.array(eset_per[0:l1]))
omega_l = ergo.thirumalai_mountain(eigen_l, l, N)
omega_l1 = ergo.thirumalai_mountain(eigen_l1, l1, N)
dl = ergo.kl_distance_symmetric(omega_l, omega_l1)
D_layer.append(dl)
return D_layer
netnames = [
"vgg11",
"vgg13",
"vgg16",
"vgg19",
"vgg11_bn",
"vgg13_bn",
"vgg16_bn",
"vgg19_bn",
"resnet18",
"resnet34",
"resnet50",
"resnet101",
"resnet152",
]
d_layers_dict = {}
for netname in netnames:
print("d_layer for ", netname)
pmodel = getattr(models, netname)(pretrained=True)
print(type(pmodel))
A_t = get_layer_matrix_set(pmodel)
eset = get_eigenvals_layer_matrix_set(A_t[0])
eset_per = eigenvals_set_to_periodic(eset)
d_layers = d_layers_pse(eset_per)
d_layers_dict[netname] = d_layers
with open("/Users/msuzen/data/d_layers_dict.json", "w") as fp:
json.dump(d_layers_dict, fp)
# read
with open("/Users/msuzen/data/d_layers_dict.json", "r") as fp:
d_layers_dict = json.load(fp)
d_layers_dict.keys()
%matplotlib inline
font = {"family": "normal", "weight": "bold", "size": 14}
plt.rc("font", **font)
Dl_18 = d_layers_dict["resnet18"]
Dl_18l = np.log10(Dl_18)
m = len(Dl_18)
plt.plot(np.arange(1, m + 1), Dl_18l, "-", label="resnet18")
Dl_34 = d_layers_dict["resnet34"]
Dl_34l = np.log10(Dl_34)
m = len(Dl_34)
plt.plot(np.arange(1, m + 1), Dl_34l, "x-", label="resnet34")
Dl_50 = d_layers_dict["resnet50"]
Dl_50l = np.log10(Dl_50)
m = len(Dl_50)
plt.plot(np.arange(1, m + 1), Dl_50l, "o-", label="resnet50")
Dl_101 = d_layers_dict["resnet101"]
Dl_101l = np.log10(Dl_101)
m = len(Dl_101)
plt.plot(np.arange(1, m + 1), Dl_101l, "--", label="resnet101")
Dl_152 = d_layers_dict["resnet152"]
Dl_152l = np.log10(Dl_152)
m = len(Dl_152)
plt.plot(np.arange(1, m + 1), Dl_152l, ".-", label="resnet152")
plt.legend(loc="upper right")
plt.xlabel("Network layer depth", **font)
plt.ylabel("Approach to PSE: $\log_{10} D_{pse}$", **font)
plt.title("Resnet Pre-trained Architectures ", **font)
plt.savefig(
"plots/resnet_symmetric_resnet.eps", format="eps", dpi=1000, bbox_inches="tight"
)
# plt.cla()
# plt.clf()
# plt.gca()
# plt.gcf()
# plt.close()
mean_pse = [
np.mean(Dl_18l),
np.mean(Dl_34l),
np.mean(Dl_50l),
np.mean(Dl_101l),
np.mean(Dl_152l),
]
mean_pse
top1_err = [30.24, 26.70, 23.85, 22.63, 21.69]
# https://pytorch.org/docs/stable/torchvision/models.html
top5_err = [10.92, 8.58, 7.13, 6.44, 5.94]
np.corrcoef(mean_pse, top1_err)
np.corrcoef(mean_pse, top1_err)
np.corrcoef(
mean_pse, [1.1, 4.5, 2.3, 2.6, 0.5]
) # some random seq. to see the correlation
%matplotlib inline
font = {"family": "normal", "weight": "bold", "size": 14}
# 'vgg11', 'vgg13', 'vgg16', 'vgg19', 'vgg11_bn', 'vgg13_bn', 'vgg16_bn', 'vgg19_bn'
plt.rc("font", **font)
Dl = d_layers_dict["vgg11"]
Dl_11 = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_11, "-", label="vgg11")
Dl = d_layers_dict["vgg13"]
Dl_13 = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_13, "x-", label="vgg13")
Dl = d_layers_dict["vgg16"]
Dl_16 = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_16, "o-", label="vgg16")
Dl = d_layers_dict["vgg19"]
Dl_19 = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_19, "--", label="vgg19")
plt.legend(loc="upper right")
plt.xlabel("Network layer depth", **font)
plt.ylabel("Approach to PSE: $\log_{10} D_{pse}$", **font)
plt.title("VGG Pre-trained Architectures", **font)
plt.savefig("plots/vgg_symmetric_pse.eps", format="eps", dpi=1000, bbox_inches="tight")
%matplotlib inline
font = {"family": "normal", "weight": "bold", "size": 14}
# 'vgg11', 'vgg13', 'vgg16', 'vgg19', 'vgg11_bn', 'vgg13_bn', 'vgg16_bn', 'vgg19_bn'
Dl = d_layers_dict["vgg11_bn"]
Dl_11_bn = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_11_bn, "-", label="vgg11_bn")
plt.rc("font", **font)
Dl = d_layers_dict["vgg13_bn"]
Dl_13_bn = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_13_bn, "x-", label="vgg13_bn")
Dl = d_layers_dict["vgg16_bn"]
Dl_16_bn = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_16_bn, "o-", label="vgg16_bn")
Dl = d_layers_dict["vgg19_bn"]
Dl_19_bn = np.log10(Dl)
m = len(Dl)
plt.plot(np.arange(1, m + 1), Dl_19_bn, "--", label="vgg19_bn")
plt.legend(loc="upper right")
plt.xlabel("Network layer depth", **font)
plt.ylabel("Approach to PSE: $\log_{10} D_{pse}$", **font)
plt.title("VGG Pre-trained Architectures with BN", **font)
plt.savefig(
"plots/vgg_symmetric_pse_bn.eps", format="eps", dpi=1000, bbox_inches="tight"
)
mean_pse = [np.mean(Dl_11), np.mean(Dl_13), np.mean(Dl_16), np.mean(Dl_19)]
mean_pse
top1_err = [30.98, 30.07, 28.41, 27.62]
# https://pytorch.org/docs/stable/torchvision/models.html
top5_err = [11.37, 10.75, 9.62, 9.12]
np.corrcoef(mean_pse, top1_err)
mean_pse = [np.mean(Dl_11_bn), np.mean(Dl_13_bn), np.mean(Dl_16_bn), np.mean(Dl_19_bn)]
np.corrcoef(mean_pse, top5_err)
mean_pse = [np.mean(Dl_11_bn), np.mean(Dl_13_bn), np.mean(Dl_16_bn), np.mean(Dl_19_bn)]
top1_err = [29.62, 28.45, 26.63, 25.76]
# https://pytorch.org/docs/stable/torchvision/models.html
top5_err = [10.19, 9.63, 8.5, 8.15]
mean_pse
np.corrcoef(mean_pse, top1_err)
np.corrcoef(mean_pse, top5_err)
\begin{table}[]
\centering
\begin{tabular}{|l|l|l|l|}
\hline
Architecture & Top-1 error & Top-5 error & cPSE \\ \hline
vgg11 & 30.98 & 11.37 & 0.04 \\ \hline
vgg13 & 30.07 & 10.75 & 0.41 \\ \hline
vgg16 & 28.41 & 9.63 & 0.14 \\ \hline
vgg19 & 27.62 & 9.12 &-0.10 \\ \hline
vgg11bn & 29.62 & 10.19 & 0.38 \\ \hline
vgg13bn & 28.45 & 9.63 & 0.36 \\ \hline
vgg16bn & 26.63 & 8.50 & 0.18 \\ \hline
vgg19bn & 25.76 & 8.15 &-0.07 \\ \hline
resnet18 & 30.24 & 10.92 &-0.19 \\ \hline
resnet34 & 26.70 & 8.58 &-0.74 \\ \hline
resnet50 & 23.85 & 7.13 &-1.03 \\ \hline
resnet101 & 22.63 & 6.44 &-1.77 \\ \hline
resnet152 & 21.69 & 5.94 &-2.29 \\ \hline
\end{tabular}
\
\caption{Classification performance and cPSE of investigated architectures. The correlation between
both classification performances and cPSE for ResNet ($\rho=0.94$) for VGG ($\rho=0.44$ and $\rho_{bn}=0.93$
with batch normalisation.}
\label{corr}
\end{table}