In [1]:

%matplotlib inline

In [2]:

from __future__ import division

In [3]:

from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import pandas as pd
import seaborn as sns
import scipy as sp
from statsmodels import api as sm

In [4]:

from rpy2.robjects import pandas2ri, r
pandas2ri.activate()

We are trying to follow Wood, Generalized Additive Models, Chapter 3 in Python to build our own additive model.

Engine Wear¶

In [5]:

import warnings
warnings.filterwarnings('ignore')
!mkdir -p /tmp/Rpackages
r('install.packages("gamair", lib="/tmp/Rpackages/", repos="http://cran.us.r-project.org")');
warnings.filterwarnings('default')

The downloaded source packages are in
	‘/tmp/RtmpO2Xdeg/downloaded_packages’

In [6]:

r('library(gamair, lib="/tmp/Rpackages/")');
r.data('engine');
engine_df = pandas2ri.ri2py(r['engine'])

In [7]:

def scale(x):
    return (x - x.min()) / np.ptp(x)

In [8]:

engine_df['scaled_size'] = scale(engine_df['size'])

In [9]:

engine_df.head()

Out[9]:

	size	wear	scaled_size
0	2.78	3.0	0.871795
1	2.43	3.1	0.647436
2	2.32	4.8	0.576923
3	2.43	3.3	0.647436
4	2.98	2.8	1.000000

In [10]:

fig, ax = plt.subplots(figsize=(8, 6))

blue = sns.color_palette()[0]
ax.scatter(engine_df['scaled_size'], engine_df.wear, color=blue, alpha=0.5);

ax.set_xlim(-0.05, 1.05);
ax.set_ylim(1.5, 5);

In [11]:

n = engine_df.shape[0]

In [12]:

def R(x, z):
    return ((z - 0.5)**2 - 1 / 12) * ((x - 0.5)**2 - 1 / 12) / 4 - ((np.abs(x - z) - 0.5)**4 - 0.5 * (np.abs(x - z) - 0.5)**2 + 7 / 240) / 24

R = np.frompyfunc(R, 2, 1)

In [13]:

def univariate_design_matrix(x, knots, intercept=True):
    n = x.shape[0]
    
    if intercept:
        q = knots.size + 2
    else:
        q = knots.size + 1
    
    X = np.empty((n, q))
    
    if intercept:
        X[:, 0] = 1
        X[:, 1] = x
        X[:, 2:] = R.outer(x, knots)
    else:
        X[:, 0] = x
        X[:, 1:] = R.outer(x, knots)
    
    return X

In [14]:

def design_matrix(x, knots):
    if x.ndim == 1:
        x = np.row_stack(x)
        knots = np.row_stack(knots)
    
    return np.hstack(univariate_design_matrix(x[:, i], knots[:, i], intercept=(i == 0)) for i in xrange(x.shape[1]))

In [15]:

def univariate_penalty_matrix(knots, intercept=True):
    if intercept:
        q = knots.size + 2
    else:
        q = knots.size + 1
    
    S = np.zeros((q, q))
    
    if intercept:
        S[2:, 2:] = R.outer(knots, knots)
    else:
        S[1:, 1:] = R.outer(knots, knots)
    
    return S

In [16]:

def penalty_matrix(knots, lambda_=None):
    if knots.ndim == 1:
        knots = np.row_stack(knots)
    
    if lambda_ is None:
        lambda_ = np.ones(knots.shape[1])
    
    return sp.linalg.block_diag(*[lambda_[i] * univariate_penalty_matrix(knots[:, i], intercept=(i == 0)) for i in xrange(knots.shape[1])])

In [17]:

def matrix_sqrt(m):
    e, V = np.linalg.eig(m)
    e = np.maximum(e, 0)
    
    return np.dot(V, np.dot(np.diag(np.sqrt(e)), V.T))

In [18]:

def fit(y, x, knots, lambda_=None):
    if lambda_ is None:
        lambda_ = np.ones(knots.shape[1])
    else:
        lambda_ = np.atleast_1d(lambda_)

    X = design_matrix(x, knots)
    S = penalty_matrix(knots, lambda_=lambda_)
    
    y_ = np.hstack((y, np.zeros(S.shape[0]))).T
    
    B = matrix_sqrt(S)
    X_ = np.vstack((X, B))
    
    return sm.OLS(y_, X_).fit()

In [19]:

def predict(x, knots, model, link=None):
    X = design_matrix(x, knots)
    
    if link is None:
        return model.predict(X)
    else:
        return link(model.predict(X))

In [20]:

lambdas = [10**-2, 10**-4, 10**-6]

In [21]:

q = 10
qs = np.linspace(0, 1, q - 2)

In [22]:

engine_y = engine_df.wear.values
engine_x = engine_df.scaled_size.values
engine_knots = engine_df.scaled_size.quantile(qs).values

In [23]:

fig, axes = plt.subplots(ncols=3, sharex=True, sharey=True, figsize=(24, 6))

plot_sizes = np.linspace(0, 1, 1000)

for lambda_, ax in zip(lambdas, axes):
    model = fit(engine_y, engine_x, engine_knots, lambda_=lambda_)
    ax.scatter(engine_x, engine_y, color=blue, alpha=0.5);
    ax.plot(plot_sizes, predict(plot_sizes, engine_knots, model));

    ax.set_xlim(-0.05, 1.05);
    ax.set_xlabel('Scaled capacity');
    
    ax.set_ylim(1.5, 5);
    ax.set_ylabel('Wear');
    
    ax.set_title('$\lambda = {}$'.format(lambda_));

In [24]:

def gcv_score(y, x, knots, lambda_=None):
    if lambda_ is None:
        lambda_ = np.ones(knots.shape[1])
    else:
        lambda_ = np.atleast_1d(lambda_)

    model = fit(y, x, knots, lambda_=lambda_)
    y_hat = predict(x, knots, model)
    
    n = x.shape[0]
    
    X = design_matrix(x, knots)
    hat_matrix_trace = model.get_influence().hat_matrix_diag[:n].sum()
    
    return n * np.power(y - y_hat, 2).sum() / np.power(n - hat_matrix_trace, 2)

In [25]:

gcv_lambdas = np.power(1.5, np.linspace(0, 60, 100)) * 10**-8

In [26]:

gcv_scores = np.array([gcv_score(engine_y, engine_x, engine_knots, lambda_=lambda_) for lambda_ in gcv_lambdas])

In [27]:

lambda_best = gcv_lambdas[gcv_scores.argmin()]

In [28]:

lambda_best

Out[28]:

0.0021681971002683468

In [29]:

fig, (gcv_ax, plot_ax) = plt.subplots(ncols=2, figsize=(16, 6))

gcv_ax.plot(gcv_lambdas, gcv_scores);

gcv_ax.set_xscale('log');
gcv_ax.set_xlabel('$\lambda$');

gcv_ax.set_ylabel('$V$');

gcv_ax.set_title('GCV score');

model = fit(engine_y, engine_x, engine_knots, lambda_=lambda_best)
plot_ax.scatter(engine_x, engine_y, color=blue, alpha=0.5);
plot_ax.plot(plot_sizes, predict(plot_sizes, engine_knots, model));

plot_ax.set_xlim(-0.05, 1.05);
plot_ax.set_xlabel('Scaled capacity');

plot_ax.set_ylim(1.5, 5);
plot_ax.set_ylabel('Wear');

plot_ax.set_title('GCV optimal fit');

Volume¶

Additive model¶

In [30]:

tree_df = pd.read_csv('https://vincentarelbundock.github.io/Rdatasets/csv/datasets/trees.csv',
                      usecols=['Girth', 'Height', 'Volume'])

In [31]:

tree_df['scaled_girth'] = scale(tree_df.Girth)
tree_df['scaled_height'] = scale(tree_df.Height)

In [32]:

tree_df.head()

Out[32]:

	Girth	Height	Volume	scaled_girth	scaled_height
0	8.3	70	10.3	0.000000	0.291667
1	8.6	65	10.3	0.024390	0.083333
2	8.8	63	10.2	0.040650	0.000000
3	10.5	72	16.4	0.178862	0.375000
4	10.7	81	18.8	0.195122	0.750000

In [33]:

tree_x = tree_df[['scaled_girth', 'scaled_height']].values
tree_knots = tree_df[['scaled_girth', 'scaled_height']].quantile(qs, axis=0).values

In [34]:

girth_lambdas = np.power(2., np.arange(30)) * 1e-5
height_lambdas = np.power(2., np.arange(30)) * 1e-5

In [35]:

tree_gcv_scores = np.zeros((30, 30))

In [36]:

tree_y = tree_df.Volume.values

In [37]:

for i in xrange(30):
    for j in xrange(30):
        lambda_ = np.array([girth_lambdas[i], height_lambdas[j]])
        tree_gcv_scores[i, j] = gcv_score(tree_y, tree_x, tree_knots, lambda_=lambda_)

In [38]:

tree_gcv_row_min_col_ix = tree_gcv_scores.argmin(axis=1)
girth_lambda_ix = tree_gcv_scores[np.arange(30), tree_gcv_row_min_col_ix].argmin()
height_lambda_ix = tree_gcv_row_min_col_ix[girth_lambda_ix]

tree_lambda = np.array([girth_lambdas[girth_lambda_ix], height_lambdas[height_lambda_ix]])

In [39]:

tree_lambda

Out[39]:

array([  1.02400000e-02,   2.68435456e+03])

In [40]:

fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')

xv, yv = np.meshgrid(np.arange(30), np.arange(30))

ax.plot_wireframe(xv, yv, tree_gcv_scores, color='k', lw=0.5);

ax.set_xticklabels([r'$2^{' + str(int(tick)) + r'}$' for tick in ax.get_xticks()]);
ax.set_xlabel(r'$\lambda_2 \times 1e-5$');

ax.set_yticklabels([r'$2^{' + str(int(tick)) + r'}$' for tick in ax.get_yticks()]);
ax.set_ylabel(r'$\lambda_1 \times 1e-5$');

ax.set_zlabel('GCV score');

In [41]:

model = fit(tree_y, tree_x, tree_knots, lambda_=tree_lambda)

In [42]:

fig, ax = plt.subplots(figsize=(8, 6))

ax.scatter(tree_y, predict(tree_x, tree_knots, model), color=blue, alpha=0.5);

v_min, v_max = tree_df.Volume.min(), tree_df.Volume.max()
MARGIN = 5

ax.plot((v_min - MARGIN, v_max + MARGIN), (v_min - MARGIN, v_max + MARGIN), '--',
        c='k', label='Perfect agreement');

ax.set_xlim(v_min - MARGIN, v_max + MARGIN);
ax.set_xlabel('Actual volume');

ax.set_ylim(v_min - MARGIN, v_max + MARGIN);
ax.set_ylabel('Predicted volume');

ax.legend(loc=2);

In [43]:

def predict_component_function(x, icol, knots, model):
    X = design_matrix(x, knots)
    
    q = knots.shape[0] + 2
    first_col = min(1, icol) * q + max(0, icol - 1) * (q - 1)
    last_col = first_col + q - min(1, icol)
    
    X_ = np.zeros_like(X)
    X_[:, first_col:last_col] = X[:, first_col:last_col]
        
    return model.predict(X_)

In [44]:

fig, ax = plt.subplots(figsize=(8, 6))

tree_plot_x = np.column_stack((np.linspace(0, 1, 100), np.linspace(0, 1, 100)))
ax.plot(tree_plot_x[:, 0], predict_component_function(tree_plot_x, 0, tree_knots, model),
        '--', c='k', alpha=0.5, lw=0.5);

ax.scatter(tree_x[:, 0], predict_component_function(tree_x, 0, tree_knots, model),
           color=blue, alpha=0.5);

v_min, v_max = tree_df.Volume.min(), tree_df.Volume.max()

ax.set_xlim(-0.05, 1.05);
ax.set_xlabel('Scaled girth');

ax.set_ylim(bottom=0);
ax.set_ylabel('$f_1$');

In [45]:

fig, ax = plt.subplots(figsize=(8, 6))

tree_plot_x = np.column_stack((np.linspace(0, 1, 100), np.linspace(0, 1, 100)))
ax.plot(tree_plot_x[:, 1], predict_component_function(tree_plot_x, 1, tree_knots, model),
        '--', c='k', alpha=0.5, lw=0.5);

ax.scatter(tree_x[:, 1], predict_component_function(tree_x, 1, tree_knots, model),
           color=blue, alpha=0.5);

v_min, v_max = tree_df.Volume.min(), tree_df.Volume.max()

ax.set_xlim(-0.05, 1.05);
ax.set_xlabel('Scaled girth');

ax.set_ylim(bottom=-0.25);
ax.set_ylabel('$f_1$');

Generalized additive model¶

The model is

$$\log(\mathbb{E}(\textrm{Volume})) = f_1(\textrm{Girth}) + f_2(\textrm{Height}),$$

where volume is gamma distributed

In [46]:

def penalized_irls_step(y, X, B, knots, model=None):    
    n = y.size
    
    # model is None for the first step
    if model is None:
        eta = np.log(y)
    else:
        eta = model.predict(X)

    mu = np.exp(eta)
    
    X_ = np.vstack((X, B))
    
    z = (y - mu) / mu + eta
    z_ = np.hstack((z, np.zeros(B.shape[0]))).T
    
    new_model = sm.OLS(z_, X_).fit()
    rss = np.power(z - new_model.predict(X), 2).sum()
    gcv_score = rss / np.power(n - new_model.get_influence().hat_matrix_diag[:n].sum(), 2)
    
    return rss, gcv_score, new_model

In [47]:

def fit_gam(y, x, knots, lambda_=None, rtol=1e-4):
    X = design_matrix(x, knots)
    S = penalty_matrix(knots, lambda_=lambda_)
    B = matrix_sqrt(S)    
    
    model = None
    old_rss, rss = 0, 1

    while np.abs(rss - old_rss) > rtol * old_rss:
        old_rss = rss
        
        rss, gcv_score, model = penalized_irls_step(y, X, B, knots, model=model)

    return gcv_score, model

In [48]:

tree_gam_gcv_scores = np.empty((30, 30))

In [49]:

for i in xrange(30):
    for j in xrange(30):
        if j == 0 and i % 5 == 0:
            print 'i = {}'.format(i)

        lambda_ = np.array([girth_lambdas[i], height_lambdas[j]])
        tree_gam_gcv_scores[i, j], _ = fit_gam(tree_y, tree_x, tree_knots, lambda_=lambda_)

i = 0
i = 5
i = 10
i = 15
i = 20
i = 25

In [50]:

tree_gam_gcv_row_min_col_ix = tree_gam_gcv_scores.argmin(axis=1)
gam_girth_lambda_ix = tree_gam_gcv_scores[np.arange(30), tree_gam_gcv_row_min_col_ix].argmin()
gam_height_lambda_ix = tree_gam_gcv_row_min_col_ix[gam_girth_lambda_ix]

tree_gam_lambda = np.array([girth_lambdas[gam_girth_lambda_ix], height_lambdas[gam_height_lambda_ix]])

In [51]:

tree_gam_lambda

Out[51]:

array([  1.02400000e-02,   2.68435456e+03])

In [52]:

fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')

xv, yv = np.meshgrid(np.arange(30), np.arange(30))

ax.plot_wireframe(xv, yv, tree_gam_gcv_scores, color='k', lw=0.5);

ax.set_xticklabels([r'$2^{' + str(int(tick)) + r'}$' for tick in ax.get_xticks()]);
ax.set_xlabel(r'$\lambda_2 \times 1e-5$');

ax.set_yticklabels([r'$2^{' + str(int(tick)) + r'}$' for tick in ax.get_yticks()]);
ax.set_ylabel(r'$\lambda_1 \times 1e-5$');

ax.set_zlabel('GCV score');

In [53]:

_, model = fit_gam(tree_y, tree_x, tree_knots, lambda_=tree_gam_lambda)

In [54]:

fig, ax = plt.subplots(figsize=(8, 6))

ax.scatter(tree_y, predict(tree_x, tree_knots, model, link=np.exp), color=blue, alpha=0.5);

v_min, v_max = tree_df.Volume.min(), tree_df.Volume.max()
MARGIN = 5

ax.plot((v_min - MARGIN, v_max + MARGIN), (v_min - MARGIN, v_max + MARGIN), '--',
        c='k', label='Perfect agreement');

ax.set_xlim(v_min - MARGIN, v_max + MARGIN);
ax.set_xlabel('Actual volume');

ax.set_ylim(v_min - MARGIN, v_max + MARGIN);
ax.set_ylabel('Predicted volume');

ax.legend(loc=2);

In [55]:

fig, ax = plt.subplots(figsize=(8, 6))

tree_plot_x = np.column_stack((np.linspace(0, 1, 100), np.linspace(0, 1, 100)))
ax.plot(tree_plot_x[:, 0], predict_component_function(tree_plot_x, 0, tree_knots, model),
        '--', c='k', alpha=0.5, lw=0.5);

ax.scatter(tree_x[:, 0], predict_component_function(tree_x, 0, tree_knots, model),
           color=blue, alpha=0.5);

v_min, v_max = tree_df.Volume.min(), tree_df.Volume.max()

ax.set_xlim(-0.05, 1.05);
ax.set_xlabel('Scaled girth');

ax.set_ylim(2, 4.05);
ax.set_ylabel('$f_1$');

In [56]:

fig, ax = plt.subplots(figsize=(8, 6))

tree_plot_x = np.column_stack((np.linspace(0, 1, 100), np.linspace(0, 1, 100)))
ax.plot(tree_plot_x[:, 1], predict_component_function(tree_plot_x, 1, tree_knots, model),
        '--', c='k', alpha=0.5, lw=0.5);

ax.scatter(tree_x[:, 1], predict_component_function(tree_x, 1, tree_knots, model),
           color=blue, alpha=0.5);

v_min, v_max = tree_df.Volume.min(), tree_df.Volume.max()

ax.set_xlim(-0.05, 1.05);
ax.set_xlabel('Scaled girth');

ax.set_ylim(-0.01, 0.4);
ax.set_ylabel('$f_1$');

In [ ]: