%pylab inline

import numpy as np
import matplotlib.pylab as plt

%load_ext rmagic

def scale_design_mtx(X):
    """utility to scale the design matrix for display"""
    mi, ma = X.min(axis=0), X.max(axis=0)
    col_neq = (ma - mi) > 1.e-8
    Xs = np.ones_like(X)
    mi = mi[col_neq]
    ma = ma[col_neq]
    Xs[:,col_neq] = (X[:,col_neq] - mi)/(ma - mi)
    return Xs

def show_x(X, title='Design matrix'):
    """ Utility to show a design matrix """
    N, P = X.shape
    plt.imshow(scale_design_mtx(X), interpolation='nearest', cmap=plt.cm.gray)
    plt.title(title)
    # Only integer ticks for the design matrix columns
    ax = plt.gca()
    ticks = ax.get_xticks()
    ax.set_xticks(ticks[(ticks == np.round(ticks)) & (ticks >= 0)])
    ax.set_xlim(-0.5, P-0.5)

show_x(np.random.normal(size=(10, 3)), 'My design')

N = 15
x1 = [49., 50, 45, 53, 69, 73, 35, 40, 36, 30, 49, 36, 73, 61, 74]
Y = [ 132.03, 98.64, 146.24, 97.9, 145.12, 191.78, 155.34, 123.08, 140.87,
      122.51, 146.08, 89.3, 134.52, 89.16, 136.08]

%%R -i x1,Y -o X
XYfit = lm(Y~x1)
X = model.matrix(XYfit)

X

show_x(X, 'Simple linear regression')

%R model.matrix(~ x1)

%R model.matrix(~x1 + 1)

C = np.ones((N,))
%Rpush C
%R model.matrix(~x1 + C)

%R model.matrix(~x1 - 1)

x2 = np.arange(N)
%Rpush x2
X = %R model.matrix(~ x1 + x2)
show_x(X, 'Two variables')

X = %R model.matrix(~ x2 + x1)
show_x(X, 'Same two variables')

X = %R model.matrix(~x1 + x2 + x1:x2)
show_x(X, "Two variables and their interaction")

X[:,3] == x1 * x2

X2 = %R model.matrix(~ x1:x2 + x1 + x2)
if np.all(X == X2):
    print "Designs are the same"
%R print(coef(lm(Y ~ x1:x2 + x1 + x2)))

x3 = np.random.normal(size=(N,))
x4 = np.random.normal(size=(N,))
%Rpush x3 x4

%R print(coef(lm(Y ~ x1 + x2 + x1:x2 + x3 + x4)))

nationality = ['USA'] * 5 + ['UK'] * 5 + ['France'] * 5

%Rpush nationality

%R print(model.matrix( ~ nationality))

%%R
nationality = factor(nationality)
print(levels(nationality))
print(model.matrix( ~ nationality))

%R print(model.matrix( ~ C(nationality, helmert)))

%R print(model.matrix( ~ C(nationality, c(2,-1,-1))))

nationality = np.array(nationality)
indicate_usa = nationality == 'USA'
indicate_uk = nationality == 'UK'
indicate_france = nationality == 'France'
X = np.column_stack((indicate_usa, indicate_uk, indicate_france)).astype(int)
X

W = np.column_stack((np.ones(N,), X))
W

Z = %R model.matrix( ~ nationality)
Z

C = np.array([[0, 0, 1], [0, 1, 0], [1, -1, -1]]).T
np.dot(Z, C)

np.all(np.dot(X, np.linalg.inv(C)) == Z)

X_indicator = %R model.matrix(~ nationality-1)
X_contrast = %R model.matrix(~ nationality)
C_map = %R contr.treatment(3) # Three levels for nationality
np.all(np.dot(X_indicator, C_map) == X_contrast[:, 1:])                                

C_map_with_inter = np.column_stack(([1, 1, 1], C_map))
np.all(np.dot(X_indicator, C_map_with_inter) == X_contrast)

Z = %R model.matrix( ~ C(nationality, helmert))
C = np.array([[1./3, -0.5, -1./6], [1./3, 0.5, -1./6], [1./3, 0, 1./3]]).T
np.set_printoptions(suppress=True) # suppress very small floating point values
np.dot(Z, C) # same as X_indicator

%R model.matrix( ~ nationality-1)

awesome = np.array(["Yes", "No"] * 7 + ["Yes"])
awesome

%%R -i awesome
awesome = factor(awesome)
print(model.matrix(~ nationality + awesome))

indicator_columns = []
for country in 'USA', 'UK', 'France':
    for quality in 'Yes', 'No':
        indicator_columns.append((nationality == country) & (awesome == quality))
X = np.column_stack(indicator_columns).astype(int)
X

show_x(X, 'Interaction of nationality and awesome')

nat_cols = []
for country in 'USA', 'UK', 'France':
    nat_cols.append(nationality == country)
awe_cols = []
for quality in 'Yes', 'No':
    awe_cols.append(awesome == quality)
all_cols = []
for nat_col in nat_cols:
    for awe_col in awe_cols:
        all_cols.append(nat_col * awe_col)
X_again = np.column_stack(all_cols).astype(int)
show_x(X_again, 'Interaction by product of indicator columns')

X_r = %R model.matrix(~ awesome:nationality-1)
show_x(X_r, 'R interaction using indicator coding')

X_nations = %R model.matrix(~ nationality-1)
X_nations[:, 0] == X_r[:, 0] + X_r[:, 1]

X_awesome = %R model.matrix(~ awesome-1)
X_awesome[:, 0] == np.sum(X_r[:, (0, 2, 4)], axis=1)

%%R
fit_with_intercept = lm(Y ~ nationality)
print(model.matrix(fit_with_intercept))

%R print(summary(fit_with_intercept))

%%R
fit_no_intercept = lm(Y ~ nationality -1)
print(summary(fit_no_intercept))

%R sum(abs(fit_no_intercept$residuals - fit_with_intercept$residuals))

%R fit_main_inters = lm(Y ~ nationality + awesome + nationality:awesome)
X_main_inters = %R model.matrix(fit_main_inters)
print X_main_inters

names = %R colnames(model.matrix(fit_main_inters))
for i, name in enumerate(names):
    print i, name

print np.all(X_main_inters[:, 4] == X_main_inters[:, 1] * X_main_inters[:, 3])
print np.all(X_main_inters[:, 5] == X_main_inters[:, 2] * X_main_inters[:, 3])

%R print(summary(fit_main_inters))

%R fit_inters = lm(Y ~ nationality:awesome-1)
X_inters = %R model.matrix(fit_inters)
print X_inters

%R print(summary(fit_inters))

C = np.dot(np.linalg.pinv(X_inters), X_main_inters)
np.allclose(np.dot(X_inters, C), X_main_inters)

C_ind2treat = %R contr.treatment(3) # 3 factor levels for nationality
C_N2W = np.column_stack(([1, 1, 1], C_ind2treat)) # Add intercept generating column
X_N = %R model.matrix(fit_no_intercept)
X_W = %R model.matrix(fit_with_intercept)
print np.all(np.dot(X_N, C_N2W) == X_W) # Confirm we have C' to map X_N to X_W

C_W2N = np.linalg.inv(C_N2W) # The C we want
print np.all(np.dot(X_W, C_W2N) == X_N) # confirm it's the C we want

B_N = %R coef(fit_no_intercept) # No intercept coefficients
B_N = B_N.reshape(-1, 1) # Make into a column vector for dot product
B_I_est = np.dot(C_W2N, B_N) # Estimate coefficients for with-intercept model
B_I = %R coef(fit_with_intercept)
print "Coefficients from intercept model", B_I
print "Coefficients recreated from non-intercept model", B_I_est.T

%%R
fit_sequential = lm(Y ~ nationality + awesome + nationality:awesome)
print(anova(fit_sequential))

%R print(anova(lm(Y ~ nationality)))
(2179.8 / 2) / (3481.3 / 9)

%%R
print(anova(lm(Y ~ nationality + awesome)))
print(anova(lm(Y ~ awesome + nationality)))


%R print(anova(lm(Y ~ nationality:awesome)))

%%R
print(attributes(terms(~A * B))$term)
print(attributes(terms(~A * B * C))$term)
print(attributes(terms(~A %in% B))$term)
print(attributes(terms(~A / B))$term)
print(attributes(terms(~A + B - A))$term)
print(attributes(terms(~A + B + A:B - B))$term)
print(attributes(terms(~(A + B + C)^2))$term)
print(attributes(terms(~(A + B + C)^3))$term)

%%R
print(attributes(terms(~A:B +1 + B))$term) # actually the intercept does not appear in the terms
print(attributes(terms(~D + A:B:C + A + B + E))$term)

%R print(model.matrix(~ x1 + x2 + x1:x2 - 1))
x1 * x2

X_nat = %R model.matrix(~ nationality - 1) # indicator coding
X_awe = %R model.matrix(~ awesome - 1) # indicator coding
X_both = %R model.matrix(~ nationality:awesome - 1)
print X_nat
print X_awe
print X_both

%R model.matrix(~ x1:nationality - 1)

%R model.matrix(~ 1 + nationality)

%R model.matrix(~ nationality - 1)

%R print(model.matrix(~ nationality + awesome + nationality:awesome - 1))

%R print(model.matrix(~ nationality + nationality:awesome - 1))

%R model.matrix(~ x1:nationality + x1:awesome)

X_nat = %R model.matrix(~ nationality - 1) # indicator coding
X_awe = %R model.matrix(~ awesome - 1) # indicator coding
X = np.column_stack((X_nat, X_awe)) # our design with indicator coding for both
# Do an estimation.  This is ordinary least squares, but it doesn't matter how we get the estimate
B = np.dot(np.linalg.pinv(X), Y)
Y_hat = np.dot(X, B) # fitted values
print "OLS coefficient estimate", B
print "Fitted values", Y_hat
S = 100 # any old constant value
B2 = B[:] # Make a new parameter vector with the constant applied
B2[:3] += S
B2[3:] -= S
print "Parameters with constant added, subtracted", B2
print "Same Yhat?", np.allclose(Y_hat, np.dot(X, B2)) # Are the fitted values the same?

%R print(lm(Y ~ nationality:awesome))

popular = ['Yes'] * 8 + ['No'] * 7
overrated = ['Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No']

%%R -i popular,overrated
popular = factor(popular)
overrated = factor(overrated)
print(lm(Y ~ awesome + popular + overrated + awesome:popular:overrated))

X_3 = %R model.matrix(~ awesome + popular + overrated + awesome:popular:overrated)
show_x(X_3)