MDI 720 : Statistiques

Ridge

Joseph Salmon

This notebook reproduces the pictures for the course "Ridge_fr"

In [ ]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt  # for plots
from matplotlib import rc
from sklearn.linear_model import RidgeCV
import seaborn as sns
from os import mkdir, path
from mpl_toolkits.mplot3d import Axes3D
# interaction mode better for 3D
%matplotlib notebook
In [ ]:
dirname = "../prebuiltimages/"
if not path.exists(dirname):
    mkdir(dirname)


np.random.seed(seed=44)

###############################################################################
# Plot initialization

plt.close('all')
dirname = "../srcimages/"
imageformat = '.pdf'


rc('font', **{'family': 'sans-serif', 'sans-serif': ['Computer Modern Roman']})
params = {'axes.labelsize': 12,
          'font.size': 16,
          'legend.fontsize': 16,
          'text.usetex': True,
          'figure.figsize': (8, 6)}
plt.rcParams.update(params)

sns.set_context("poster")
sns.set_palette("colorblind")
sns.set_style("white")
sns.axes_style()


###############################################################################
# display function:

saving = False


def my_saving_display(fig, dirname, filename, imageformat):
    """"Saving with personal function."""
    filename = filename.replace('.', 'pt')  # remove "." to avoid floats issues
    if saving is True:
        dirname + filename + imageformat
        image_name = dirname + filename + imageformat
        fig.savefig(image_name)

Ridge path / Ridge CV

In [ ]:
n_features = 50
n_samples = 51

eps = 1e-6

alpha_max = 1e4
n_alphas = 100
alphas = np.logspace(np.log10(alpha_max * eps),
                     np.log10(alpha_max), num=n_alphas)

# observe the weird phenomenon with such a design matrix X:
# x = 10. ** (-np.arange(n_samples,))
# X = (np.column_stack([x ** (i) for i in range(n_features)]))

X = np.random.randn(n_samples, n_features)
theta_true = np.zeros([n_features, ])
theta_true[0:5] = 2
y_true = np.dot(X, theta_true)
sigma = 1.
noise = sigma * np.random.randn(n_samples,)
y = np.dot(X, theta_true) + noise


def ridge_path(X, y, alphas):
    """ compute the ridge path for a list of tuning parameters """
    U, s, Vt = np.linalg.svd(X, full_matrices=False)
    theta_ridge = np.zeros((n_features, n_alphas))
    mat_d = np.zeros((n_features, n_alphas))
    UTy = np.dot(U.T, y)
    for index, alpha in enumerate(alphas):
        mat_d = np.diag((s / (s ** 2 + alpha)))
        coef_alpha = np.dot(Vt.T, np.dot(mat_d, UTy))
        theta_ridge[:, index] = coef_alpha

    return theta_ridge, s

# Possible alternative
# from sklearn.linear_model import _solve_svd

theta_ridge, s = ridge_path(X, y, np.asarray(alphas).ravel())

fig2 = plt.figure(figsize=(10, 8))
sns.despine()
plt.title("Ridge path: " +
          r"$p={0}, n={1} $".format(n_features, n_samples), fontsize=16)
ax1 = fig2.add_subplot(111)
ax1.plot(alphas, np.transpose(theta_ridge), linewidth=3)
ax1.set_xscale('log')
ax1.set_xlabel(r"$\lambda$")
ax1.set_ylabel("Coefficients values")
plt.tight_layout()
plt.show()

my_saving_display(fig2, dirname, "Ridge_path", imageformat)
In [ ]:
###############################################################################
# Cross Validation for Ridge

# If cv not specified  GCV (=leave-one-out) is used... seems really bad,
cv_fold = 5
clf = RidgeCV(alphas=alphas, fit_intercept=False, normalize=False, cv=cv_fold)
clf.fit(X, y)

ax1.axvline(clf.alpha_, color='K', linestyle='-', linewidth=3, label="$a$")
plt.annotate('$CV=5$', xy=(3 * clf.alpha_, 0.5), xycoords='data',
             xytext=(0, 150), textcoords='offset points', fontsize=18)

my_saving_display(fig2, dirname, "Ridge_path_CV", imageformat)

Bias / Variance trade-off

In [ ]:
# Bias contribution to the prediction risk
Biais2_tab = np.zeros(np.shape(alphas))
Gram = np.dot(X.T, X)
for index, alpha in enumerate(alphas):
    # Biais2 = np.dot(np.dot(Gram, theta_true),
    #                 np.linalg.solve(np.dot(Gram + alpha * np.eye(n_features),
    #                                 Gram + alpha * np.eye(n_features)),
    #                                 theta_true))
    intermed = alpha * X.dot(np.linalg.solve(Gram + alpha * np.eye(n_features),
                                             theta_true))
    Biais2 = np.linalg.norm(intermed) ** 2
    Biais2_tab[index] = Biais2

# Variance contribution to the prediction risk
Var_tab = np.zeros(np.shape(alphas))

for index, alpha in enumerate(alphas):
    Var_tab[index] = np.sum(s ** 4 / (s ** 2 + alpha) ** 2) * sigma ** 2

Risk = Var_tab + Biais2_tab
fig1 = plt.figure(figsize=(10, 8))

plt.title("Bias-Variance trade-off: " +
          r"$p={0}, n={1} $".format(n_features, n_samples), fontsize=16)
ax1 = fig1.add_subplot(111)
ax1.set_ylim([5 * sigma ** 2, Risk[-1] * 1.3])
plt.loglog(alphas, Var_tab, label="Variance", linewidth=6)
plt.loglog(alphas, Biais2_tab, label="Squared Bias", linewidth=6)
plt.loglog(alphas, Risk, label="Risk", linewidth=6)
plt.xlabel('$\lambda$')
plt.ylabel("Risk")
plt.legend(loc="upper left", fontsize=20)
plt.tight_layout()
plt.show()
my_saving_display(fig1, dirname, "Bias_variance_trade_off", imageformat)
In [ ]:
plt.axvline(clf.alpha_, color='K', linestyle='-', linewidth=8, label="$a$")
plt.annotate('$CV=5$', xy=(3 * clf.alpha_, 3), xycoords='data',
             xytext=(0, 80), textcoords='offset points', fontsize=18)

my_saving_display(fig1, dirname, "Bias_variance_trade_off_C", imageformat)