In [1]:
%matplotlib inline
In [2]:
from __future__ import division
In [3]:
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import pandas as pd
import seaborn as sns
import scipy as sp
from statsmodels import api as sm
In [4]:
from rpy2.robjects import pandas2ri, r
pandas2ri.activate()

We are trying to follow Wood, Generalized Additive Models, Chapter 3 in Python to build our own additive model.

Engine Wear

In [5]:
import warnings
warnings.filterwarnings('ignore')
!mkdir -p /tmp/Rpackages
r('install.packages("gamair", lib="/tmp/Rpackages/", repos="http://cran.us.r-project.org")');
warnings.filterwarnings('default')
The downloaded source packages are in
	‘/tmp/RtmpO2Xdeg/downloaded_packages’
In [6]:
r('library(gamair, lib="/tmp/Rpackages/")');
r.data('engine');
engine_df = pandas2ri.ri2py(r['engine'])
In [7]:
def scale(x):
    return (x - x.min()) / np.ptp(x)
In [8]:
engine_df['scaled_size'] = scale(engine_df['size'])
In [9]:
engine_df.head()
Out[9]:
size wear scaled_size
0 2.78 3.0 0.871795
1 2.43 3.1 0.647436
2 2.32 4.8 0.576923
3 2.43 3.3 0.647436
4 2.98 2.8 1.000000
In [10]:
fig, ax = plt.subplots(figsize=(8, 6))

blue = sns.color_palette()[0]
ax.scatter(engine_df['scaled_size'], engine_df.wear, color=blue, alpha=0.5);

ax.set_xlim(-0.05, 1.05);
ax.set_ylim(1.5, 5);
In [11]:
n = engine_df.shape[0]
In [12]:
def R(x, z):
    return ((z - 0.5)**2 - 1 / 12) * ((x - 0.5)**2 - 1 / 12) / 4 - ((np.abs(x - z) - 0.5)**4 - 0.5 * (np.abs(x - z) - 0.5)**2 + 7 / 240) / 24

R = np.frompyfunc(R, 2, 1)
In [13]:
def univariate_design_matrix(x, knots, intercept=True):
    n = x.shape[0]
    
    if intercept:
        q = knots.size + 2
    else:
        q = knots.size + 1
    
    X = np.empty((n, q))
    
    if intercept:
        X[:, 0] = 1
        X[:, 1] = x
        X[:, 2:] = R.outer(x, knots)
    else:
        X[:, 0] = x
        X[:, 1:] = R.outer(x, knots)
    
    return X
In [14]:
def design_matrix(x, knots):
    if x.ndim == 1:
        x = np.row_stack(x)
        knots = np.row_stack(knots)
    
    return np.hstack(univariate_design_matrix(x[:, i], knots[:, i], intercept=(i == 0)) for i in xrange(x.shape[1]))
In [15]:
def univariate_penalty_matrix(knots, intercept=True):
    if intercept:
        q = knots.size + 2
    else:
        q = knots.size + 1
    
    S = np.zeros((q, q))
    
    if intercept:
        S[2:, 2:] = R.outer(knots, knots)
    else:
        S[1:, 1:] = R.outer(knots, knots)
    
    return S
In [16]:
def penalty_matrix(knots, lambda_=None):
    if knots.ndim == 1:
        knots = np.row_stack(knots)
    
    if lambda_ is None:
        lambda_ = np.ones(knots.shape[1])
    
    return sp.linalg.block_diag(*[lambda_[i] * univariate_penalty_matrix(knots[:, i], intercept=(i == 0)) for i in xrange(knots.shape[1])])
In [17]:
def matrix_sqrt(m):
    e, V = np.linalg.eig(m)
    e = np.maximum(e, 0)
    
    return np.dot(V, np.dot(np.diag(np.sqrt(e)), V.T))
In [18]:
def fit(y, x, knots, lambda_=None):
    if lambda_ is None:
        lambda_ = np.ones(knots.shape[1])
    else:
        lambda_ = np.atleast_1d(lambda_)

    X = design_matrix(x, knots)
    S = penalty_matrix(knots, lambda_=lambda_)
    
    y_ = np.hstack((y, np.zeros(S.shape[0]))).T
    
    B = matrix_sqrt(S)
    X_ = np.vstack((X, B))
    
    return sm.OLS(y_, X_).fit()
In [19]:
def predict(x, knots, model, link=None):
    X = design_matrix(x, knots)
    
    if link is None:
        return model.predict(X)
    else:
        return link(model.predict(X))
In [20]:
lambdas = [10**-2, 10**-4, 10**-6]
In [21]:
q = 10
qs = np.linspace(0, 1, q - 2)
In [22]:
engine_y = engine_df.wear.values
engine_x = engine_df.scaled_size.values
engine_knots = engine_df.scaled_size.quantile(qs).values
In [23]:
fig, axes = plt.subplots(ncols=3, sharex=True, sharey=True, figsize=(24, 6))

plot_sizes = np.linspace(0, 1, 1000)

for lambda_, ax in zip(lambdas, axes):
    model = fit(engine_y, engine_x, engine_knots, lambda_=lambda_)
    ax.scatter(engine_x, engine_y, color=blue, alpha=0.5);
    ax.plot(plot_sizes, predict(plot_sizes, engine_knots, model));

    ax.set_xlim(-0.05, 1.05);
    ax.set_xlabel('Scaled capacity');
    
    ax.set_ylim(1.5, 5);
    ax.set_ylabel('Wear');
    
    ax.set_title('$\lambda = {}$'.format(lambda_));
In [24]:
def gcv_score(y, x, knots, lambda_=None):
    if lambda_ is None:
        lambda_ = np.ones(knots.shape[1])
    else:
        lambda_ = np.atleast_1d(lambda_)

    model = fit(y, x, knots, lambda_=lambda_)
    y_hat = predict(x, knots, model)
    
    n = x.shape[0]
    
    X = design_matrix(x, knots)
    hat_matrix_trace = model.get_influence().hat_matrix_diag[:n].sum()
    
    return n * np.power(y - y_hat, 2).sum() / np.power(n - hat_matrix_trace, 2)
In [25]:
gcv_lambdas = np.power(1.5, np.linspace(0, 60, 100)) * 10**-8
In [26]:
gcv_scores = np.array([gcv_score(engine_y, engine_x, engine_knots, lambda_=lambda_) for lambda_ in gcv_lambdas])
In [27]:
lambda_best = gcv_lambdas[gcv_scores.argmin()]
In [28]:
lambda_best
Out[28]:
0.0021681971002683468
In [29]:
fig, (gcv_ax, plot_ax) = plt.subplots(ncols=2, figsize=(16, 6))

gcv_ax.plot(gcv_lambdas, gcv_scores);

gcv_ax.set_xscale('log');
gcv_ax.set_xlabel('$\lambda$');

gcv_ax.set_ylabel('$V$');

gcv_ax.set_title('GCV score');

model = fit(engine_y, engine_x, engine_knots, lambda_=lambda_best)
plot_ax.scatter(engine_x, engine_y, color=blue, alpha=0.5);
plot_ax.plot(plot_sizes, predict(plot_sizes, engine_knots, model));

plot_ax.set_xlim(-0.05, 1.05);
plot_ax.set_xlabel('Scaled capacity');

plot_ax.set_ylim(1.5, 5);
plot_ax.set_ylabel('Wear');

plot_ax.set_title('GCV optimal fit');

Volume

Additive model

In [30]:
tree_df = pd.read_csv('https://vincentarelbundock.github.io/Rdatasets/csv/datasets/trees.csv',
                      usecols=['Girth', 'Height', 'Volume'])
In [31]:
tree_df['scaled_girth'] = scale(tree_df.Girth)
tree_df['scaled_height'] = scale(tree_df.Height)
In [32]:
tree_df.head()
Out[32]:
Girth Height Volume scaled_girth scaled_height
0 8.3 70 10.3 0.000000 0.291667
1 8.6 65 10.3 0.024390 0.083333
2 8.8 63 10.2 0.040650 0.000000
3 10.5 72 16.4 0.178862 0.375000
4 10.7 81 18.8 0.195122 0.750000
In [33]:
tree_x = tree_df[['scaled_girth', 'scaled_height']].values
tree_knots = tree_df[['scaled_girth', 'scaled_height']].quantile(qs, axis=0).values
In [34]:
girth_lambdas = np.power(2., np.arange(30)) * 1e-5
height_lambdas = np.power(2., np.arange(30)) * 1e-5
In [35]:
tree_gcv_scores = np.zeros((30, 30))
In [36]:
tree_y = tree_df.Volume.values
In [37]:
for i in xrange(30):
    for j in xrange(30):
        lambda_ = np.array([girth_lambdas[i], height_lambdas[j]])
        tree_gcv_scores[i, j] = gcv_score(tree_y, tree_x, tree_knots, lambda_=lambda_)
In [38]:
tree_gcv_row_min_col_ix = tree_gcv_scores.argmin(axis=1)
girth_lambda_ix = tree_gcv_scores[np.arange(30), tree_gcv_row_min_col_ix].argmin()
height_lambda_ix = tree_gcv_row_min_col_ix[girth_lambda_ix]

tree_lambda = np.array([girth_lambdas[girth_lambda_ix], height_lambdas[height_lambda_ix]])
In [39]:
tree_lambda
Out[39]:
array([  1.02400000e-02,   2.68435456e+03])
In [40]:
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')

xv, yv = np.meshgrid(np.arange(30), np.arange(30))

ax.plot_wireframe(xv, yv, tree_gcv_scores, color='k', lw=0.5);

ax.set_xticklabels([r'$2^{' + str(int(tick)) + r'}$' for tick in ax.get_xticks()]);
ax.set_xlabel(r'$\lambda_2 \times 1e-5$');

ax.set_yticklabels([r'$2^{' + str(int(tick)) + r'}$' for tick in ax.get_yticks()]);
ax.set_ylabel(r'$\lambda_1 \times 1e-5$');

ax.set_zlabel('GCV score');
In [41]:
model = fit(tree_y, tree_x, tree_knots, lambda_=tree_lambda)
In [42]:
fig, ax = plt.subplots(figsize=(8, 6))

ax.scatter(tree_y, predict(tree_x, tree_knots, model), color=blue, alpha=0.5);

v_min, v_max = tree_df.Volume.min(), tree_df.Volume.max()
MARGIN = 5

ax.plot((v_min - MARGIN, v_max + MARGIN), (v_min - MARGIN, v_max + MARGIN), '--',
        c='k', label='Perfect agreement');

ax.set_xlim(v_min - MARGIN, v_max + MARGIN);
ax.set_xlabel('Actual volume');

ax.set_ylim(v_min - MARGIN, v_max + MARGIN);
ax.set_ylabel('Predicted volume');

ax.legend(loc=2);
In [43]:
def predict_component_function(x, icol, knots, model):
    X = design_matrix(x, knots)
    
    q = knots.shape[0] + 2
    first_col = min(1, icol) * q + max(0, icol - 1) * (q - 1)
    last_col = first_col + q - min(1, icol)
    
    X_ = np.zeros_like(X)
    X_[:, first_col:last_col] = X[:, first_col:last_col]
        
    return model.predict(X_)
In [44]:
fig, ax = plt.subplots(figsize=(8, 6))

tree_plot_x = np.column_stack((np.linspace(0, 1, 100), np.linspace(0, 1, 100)))
ax.plot(tree_plot_x[:, 0], predict_component_function(tree_plot_x, 0, tree_knots, model),
        '--', c='k', alpha=0.5, lw=0.5);

ax.scatter(tree_x[:, 0], predict_component_function(tree_x, 0, tree_knots, model),
           color=blue, alpha=0.5);

v_min, v_max = tree_df.Volume.min(), tree_df.Volume.max()

ax.set_xlim(-0.05, 1.05);
ax.set_xlabel('Scaled girth');

ax.set_ylim(bottom=0);
ax.set_ylabel('$f_1$');
In [45]:
fig, ax = plt.subplots(figsize=(8, 6))

tree_plot_x = np.column_stack((np.linspace(0, 1, 100), np.linspace(0, 1, 100)))
ax.plot(tree_plot_x[:, 1], predict_component_function(tree_plot_x, 1, tree_knots, model),
        '--', c='k', alpha=0.5, lw=0.5);

ax.scatter(tree_x[:, 1], predict_component_function(tree_x, 1, tree_knots, model),
           color=blue, alpha=0.5);

v_min, v_max = tree_df.Volume.min(), tree_df.Volume.max()

ax.set_xlim(-0.05, 1.05);
ax.set_xlabel('Scaled girth');

ax.set_ylim(bottom=-0.25);
ax.set_ylabel('$f_1$');

Generalized additive model

The model is

$$\log(\mathbb{E}(\textrm{Volume})) = f_1(\textrm{Girth}) + f_2(\textrm{Height}),$$

where volume is gamma distributed

In [46]:
def penalized_irls_step(y, X, B, knots, model=None):    
    n = y.size
    
    # model is None for the first step
    if model is None:
        eta = np.log(y)
    else:
        eta = model.predict(X)

    mu = np.exp(eta)
    
    X_ = np.vstack((X, B))
    
    z = (y - mu) / mu + eta
    z_ = np.hstack((z, np.zeros(B.shape[0]))).T
    
    new_model = sm.OLS(z_, X_).fit()
    rss = np.power(z - new_model.predict(X), 2).sum()
    gcv_score = rss / np.power(n - new_model.get_influence().hat_matrix_diag[:n].sum(), 2)
    
    return rss, gcv_score, new_model
In [47]:
def fit_gam(y, x, knots, lambda_=None, rtol=1e-4):
    X = design_matrix(x, knots)
    S = penalty_matrix(knots, lambda_=lambda_)
    B = matrix_sqrt(S)    
    
    model = None
    old_rss, rss = 0, 1

    while np.abs(rss - old_rss) > rtol * old_rss:
        old_rss = rss
        
        rss, gcv_score, model = penalized_irls_step(y, X, B, knots, model=model)

    return gcv_score, model
In [48]:
tree_gam_gcv_scores = np.empty((30, 30))
In [49]:
for i in xrange(30):
    for j in xrange(30):
        if j == 0 and i % 5 == 0:
            print 'i = {}'.format(i)

        lambda_ = np.array([girth_lambdas[i], height_lambdas[j]])
        tree_gam_gcv_scores[i, j], _ = fit_gam(tree_y, tree_x, tree_knots, lambda_=lambda_)
i = 0
i = 5
i = 10
i = 15
i = 20
i = 25
In [50]:
tree_gam_gcv_row_min_col_ix = tree_gam_gcv_scores.argmin(axis=1)
gam_girth_lambda_ix = tree_gam_gcv_scores[np.arange(30), tree_gam_gcv_row_min_col_ix].argmin()
gam_height_lambda_ix = tree_gam_gcv_row_min_col_ix[gam_girth_lambda_ix]

tree_gam_lambda = np.array([girth_lambdas[gam_girth_lambda_ix], height_lambdas[gam_height_lambda_ix]])
In [51]:
tree_gam_lambda
Out[51]:
array([  1.02400000e-02,   2.68435456e+03])
In [52]:
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')

xv, yv = np.meshgrid(np.arange(30), np.arange(30))

ax.plot_wireframe(xv, yv, tree_gam_gcv_scores, color='k', lw=0.5);

ax.set_xticklabels([r'$2^{' + str(int(tick)) + r'}$' for tick in ax.get_xticks()]);
ax.set_xlabel(r'$\lambda_2 \times 1e-5$');

ax.set_yticklabels([r'$2^{' + str(int(tick)) + r'}$' for tick in ax.get_yticks()]);
ax.set_ylabel(r'$\lambda_1 \times 1e-5$');

ax.set_zlabel('GCV score');
In [53]:
_, model = fit_gam(tree_y, tree_x, tree_knots, lambda_=tree_gam_lambda)
In [54]:
fig, ax = plt.subplots(figsize=(8, 6))

ax.scatter(tree_y, predict(tree_x, tree_knots, model, link=np.exp), color=blue, alpha=0.5);

v_min, v_max = tree_df.Volume.min(), tree_df.Volume.max()
MARGIN = 5

ax.plot((v_min - MARGIN, v_max + MARGIN), (v_min - MARGIN, v_max + MARGIN), '--',
        c='k', label='Perfect agreement');

ax.set_xlim(v_min - MARGIN, v_max + MARGIN);
ax.set_xlabel('Actual volume');

ax.set_ylim(v_min - MARGIN, v_max + MARGIN);
ax.set_ylabel('Predicted volume');

ax.legend(loc=2);
In [55]:
fig, ax = plt.subplots(figsize=(8, 6))

tree_plot_x = np.column_stack((np.linspace(0, 1, 100), np.linspace(0, 1, 100)))
ax.plot(tree_plot_x[:, 0], predict_component_function(tree_plot_x, 0, tree_knots, model),
        '--', c='k', alpha=0.5, lw=0.5);

ax.scatter(tree_x[:, 0], predict_component_function(tree_x, 0, tree_knots, model),
           color=blue, alpha=0.5);

v_min, v_max = tree_df.Volume.min(), tree_df.Volume.max()

ax.set_xlim(-0.05, 1.05);
ax.set_xlabel('Scaled girth');

ax.set_ylim(2, 4.05);
ax.set_ylabel('$f_1$');
In [56]:
fig, ax = plt.subplots(figsize=(8, 6))

tree_plot_x = np.column_stack((np.linspace(0, 1, 100), np.linspace(0, 1, 100)))
ax.plot(tree_plot_x[:, 1], predict_component_function(tree_plot_x, 1, tree_knots, model),
        '--', c='k', alpha=0.5, lw=0.5);

ax.scatter(tree_x[:, 1], predict_component_function(tree_x, 1, tree_knots, model),
           color=blue, alpha=0.5);

v_min, v_max = tree_df.Volume.min(), tree_df.Volume.max()

ax.set_xlim(-0.05, 1.05);
ax.set_xlabel('Scaled girth');

ax.set_ylim(-0.01, 0.4);
ax.set_ylabel('$f_1$');