%pylab inline

import seaborn
seaborn.set()
colors = seaborn.color_palette()
import moss

n_obs = 100
X = column_stack((randn(n_obs, 4), ones(n_obs)))

w = rand(5)
noise = randn(100) * 5
y = dot(X, w) + noise

ols_fit = lambda X, y: dot(dot(inv(dot(X.T, X)), X.T), y)
w_ols = ols_fit(X, y)

bar(arange(5) + .1, w, .4, label="actual weights")
bar(arange(5) + .5, w_ols, .4, color=colors[1], label="estimated weights");

iterations = 1000
w_boot = zeros((iterations, len(w_ols)))
for i in xrange(iterations):
    
    # Get an index vector to sample with replacement 
    samp = randint(0, n_obs, n_obs)

    # Resample data and model
    samp_X = X[samp, :]
    samp_y = y[samp]
    
    # Fit the model on this iteration
    w_boot[i, :] = ols_fit(samp_X, samp_y)

# Get the 95% confidence interval for each weight (across each column in the design matrix)
w_ols_ci = moss.percentiles(w_boot, [2.5, 97.5], axis=0)

# The bar() function expects errorbar coordinates to be relative to bar height
# We have a convenience function in our plotting library to convert these
ebar_coords = seaborn.ci_to_errsize(w_ols_ci, w_ols)

# Plot the confidence intervals as error bars on our barplot from above
bar(arange(5) + .1, w, .4, label="actual weights")
bar(arange(5) + .5, w_ols, .4, yerr=ebar_coords, color=colors[1], ecolor="gray", label="estimated weights");

X = column_stack((randn(1000, 4), ones(1000)))
w = rand(5)

# Large sample high noise; N = 1000, noise std = 5
y_large_n = dot(X, w) + randn(1000) * 5

# Small sample low noise; high signal to noise ratio
y_lownoise = dot(X[:100], w) + randn(100)

# Small sample, high noise
y_noisy = dot(X[:100], w) + randn(100) * 5

# Bootstrap all three models
n_boot = 1000
w_boot1 = moss.bootstrap(X, y_large_n, n_boot=n_boot, func=ols_fit)
w_boot2 = moss.bootstrap(X[:100], y_lownoise, n_boot=n_boot, func=ols_fit)
w_boot3 = moss.bootstrap(X[:100], y_noisy, n_boot=n_boot, func=ols_fit)

# The median of these bootstrapped estimates are the weights for each model.
# You could also use the mean if you prefer.
w_model1 = median(w_boot1, axis=0)
w_model2 = median(w_boot2, axis=0)
w_model3 = median(w_boot3, axis=0)

# Compute the 95% confidence intervals on parameter estimates
pcts = [2.5, 97.5]
ci1 = moss.percentiles(w_boot1, pcts, axis=0)
ci2 = moss.percentiles(w_boot2, pcts, axis=0)
ci3 = moss.percentiles(w_boot3, pcts, axis=0)

barx = linspace(0, 1, 6)[:-1]
models = [w, w_model1, w_model2, w_model3]
cis = [None] + map(seaborn.ci_to_errsize, [ci1, ci2, ci3], models[1:])
for i, model in enumerate(models):
    bar(barx + i, model, 0.2, yerr=cis[i], color=colors[i], ecolor="gray")
xticks([.5, 1.5, 2.5, 3.5], ["model", "large N", "low noise", "noisy"]);

# We will use the R^2 function from the Scikit-learn function
from sklearn.metrics import r2_score
r2_model1 = r2_score(y_large_n, dot(X, w_model1))
r2_model2 = r2_score(y_lownoise, dot(X[:100], w_model2))
r2_model3 = r2_score(y_noisy, dot(X[:100], w_model3))

print 'Model 1 is reliable but not accurate. R^2 = %.2f' % r2_model1
print 'Model 2 is reliable and accurate. R^2 = %.2f' % r2_model2
print 'Model 3 is neither reliable nor accurate. R^2 = %.2f' % r2_model3

# Here are the weights
w = rand(3)

# Simulate data from 3 uncorrelated regressors
X_orth = randn(100, 3)

# Signal with no noise
y_orth = dot(X_orth, w)

# Add noise that with a std proportional to the signal std
y_orth = y_orth + std(y_orth) * randn(100)

# Simulate data from 3 correlated regressors.  To do this first we define a covariance matrix, S.
cor = 0.8  # This is the correlation to induce between the variables
S = eye(3)
S[S == 0] = cor

# Then simulate data that comes from a 3-D gaussian (mean 0) with this covariance matrix
X_corr = random.multivariate_normal(zeros(3), S, 100)
y_corr = dot(X_corr, w)
y_corr = y_corr + std(y_corr) * randn(100)

# Bootstrap the model fits for the correlated and uncorrelated data
n_boot = 1000
boot_orth = moss.bootstrap(X_orth, y_orth, n_boot=n_boot, func=ols_fit)
boot_corr = moss.bootstrap(X_corr, y_corr, n_boot=n_boot, func=ols_fit)

# Calculate the 95% confidence intervals on the 2 sets of weights
pcts = [2.5, 97.5]
ci1 = moss.percentiles(boot_orth, pcts, axis=0)
ci2 = moss.percentiles(boot_corr, pcts, axis=0)

barx = linspace(0, 1, 4)[:-1]
models = [w, median(boot_orth, axis=0), median(boot_corr, axis=0)]
cis = [None] + map(seaborn.ci_to_errsize, [ci1, ci2], models[1:])
for i, model in enumerate(models):
    bar(barx + i, model, 1. / 3, yerr=cis[i], color=colors[i], ecolor="gray")
xticks([.5, 1.5, 2.5], ["model", "orthogonal", "correlated"]);

# Assume that this is the population of data
x_pop = randn(1000) + 4
noise = randn(1000) * 5
y_pop = 1.5 * x_pop ** 2 + noise

# And this is a random sample we collect in an experiment
n = 10
samp = permutation(arange(len(y_pop)))[:n]
x_samp = x_pop[samp]
y_samp = y_pop[samp]

plot(x_pop, y_pop, "o", label="population")
plot(x_samp, y_samp, "o", color=colors[2], label="sample");

from scipy.optimize import curve_fit
modelfunc = lambda x, a, b, c: a * x ** b + c
# Bootstrap the linear and nonlinear fitting procedures
w_ols_boot, w_nonlin_boot = [], []
for i in xrange(100):
    
    # Get the bootstrap index
    boot_samp = random.randint(0, n, n)
    boot_x = x_samp[boot_samp]
    boot_y = y_samp[boot_samp]
    
    # Fit a linear model
    w_ols_boot.append(lstsq(moss.add_constant(boot_x), boot_y)[0])
    w_nonlin_boot.append(curve_fit(modelfunc, boot_x, boot_y, maxfev=100000)[0])
w_ols_boot = array(w_ols_boot)
w_nonlin_boot = array(w_nonlin_boot)

# Visualize the reliability of the parameter estimates of the linear model
ols_model = median(w_ols_boot, axis=0)
pcts = [16, 84]
ols_ci = percentile(w_ols_boot, pcts, axis=0)
nonlin_model = median(w_nonlin_boot, axis=0)
nonlin_ci = percentile(w_nonlin_boot, pcts, axis=0)
ols_ebars = seaborn.ci_to_errsize(ols_ci, ols_model)
bar(range(2), ols_model, 1, yerr=ols_ebars, ecolor="gray", label="OLS")
nonlin_ebars = seaborn.ci_to_errsize(nonlin_ci, nonlin_model)
bar(range(2, 5), nonlin_model, 1, yerr=nonlin_ebars, ecolor="gray", color=colors[1], label="nonlinear")
xticks(arange(5) + .5, ["slope", "constant", "slope", "exponent", "constant"])
legend();

scatter(x_pop, y_pop, facecolor='none')
l, r = xlim()
xx = linspace(l, r, 200)
# Loop over each bootstrap iteration and compute that models prediction for all of the x values
y_model = []
for i, w in enumerate(w_ols_boot):
    y_i = dot(xx, w_ols_boot[i][0]) + w_ols_boot[i][1]
    y_model.append(y_i)

# Plot the linear prediction and transparent error bars
pcts = [16, 84]
plot(xx, median(y_model, axis=0))
lin_ci = moss.percentiles(y_model, pcts, axis=0)
fill_between(xx, *lin_ci, color=colors[0], alpha=.2)

# Now derive the nonlinear predictions and plot
y_model_nlin = []
for i, w in enumerate(w_nonlin_boot):
    y_i = modelfunc(xx, *w)
    y_model_nlin.append(y_i)
plot(xx, median(y_model_nlin, axis=0))
nlin_ci = moss.percentiles(y_model_nlin, pcts, axis=0)
fill_between(xx, *nlin_ci, color=colors[1], alpha=.2);

scatter(x_pop, y_pop, facecolor='none')
plot(xx, mean(y_model_nlin, 0), label="mean", color=colors[3])
plot(xx, median(y_model_nlin, 0), label="median", color=colors[4])
legend(loc="best");

figure(figsize=(9, 9))
plot_type = ["slope", "exponent", "constant"]
for pi in range(3):
    subplot(3, 1, pi + 1)
    hist(w_nonlin_boot[:, pi], 25, color=colors[pi])
    title(plot_type[pi]);

figure(figsize=(9, 6))
plot_type = ["slope", "constant"]
for pi in range(2):
    subplot(2, 1, pi + 1)
    hist(w_ols_boot[:, pi], 25, color=colors[pi])
    title(plot_type[pi]);