import numpy as np
import pandas as pd
import patsy
import seaborn as sns
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import itertools


%matplotlib inline

sns.set_style("white")

betas = np.array([1, 1, -2.])

x = np.linspace(0, 1) # vector for x 
y = np.linspace(0, 1)  # vector for y
X, Y = np.meshgrid(x, y)  # grid of X and Y
# the height Z is the sum of the weighted X and Ys (weighted by our betas):
Z = X*betas[0] + \
    Y*betas[1] + \
    X*Y*betas[2]

# create a matplotlib figure window, add 3D axes
# http://matplotlib.org/examples/mplot3d/subplot3d_demo.html
fig = plt.figure(figsize=(12, 4))
# fig = plt.figure(figsize=plt.figaspect(0.3))


# first view:
ax = fig.add_subplot(1, 3, 1, projection='3d')
ax.plot_surface(X, Y, Z, alpha=1, cmap=cm.coolwarm)
ax.view_init(elev=15, azim=20)
ax.set_xlabel('Fire')
ax.set_xticks([0, .5, 1])
ax.set_ylabel('Hydrogen')
ax.set_yticks([0, .5, 1])
ax.set_zlabel('Lift')
ax.set_zticks([])

# second view:
ax = fig.add_subplot(1, 3, 2, projection='3d')
ax.plot_surface(X, Y, Z, alpha=1, cmap=cm.coolwarm)
ax.view_init(elev=20, azim=55)
ax.set_xlabel('Fire')
ax.set_xticks([0, .5, 1])
ax.set_ylabel('Hydrogen')
ax.set_yticks([0, .5, 1])
ax.set_zlabel('Lift')
ax.set_zticks([])

# third view:
ax = fig.add_subplot(1, 3, 3, projection='3d')
ax.plot_surface(X, Y, Z, alpha=1, cmap=cm.coolwarm)
ax.view_init(elev=10, azim=80)
ax.set_xlabel('Fire')
ax.set_xticks([0, .5, 1])
ax.set_ylabel('Hydrogen')
ax.set_yticks([0, .5, 1])
ax.set_zlabel('Lift')
ax.set_zticks([])

plt.show()

# make a z vector
z = x*betas[0] + \
    y*betas[1] + \
    x*y*betas[2]
    
# put all the vectors into a new dataframe:
df = pd.DataFrame({'fire': x,
                   'hydrogen': y,
                   'lift': z})

# fit regression with interaction, plot:
sns.interactplot('fire', 'hydrogen', 'lift', df,
                 cmap="coolwarm", filled=True, levels=25);

# import the dataset csv file into a Pandas dataframe object:
sleep = pd.read_csv('../Datasets/sleep.txt', sep='\t') 
sleep = sleep.dropna(axis=0)  # drop on axis 0 (i.e. rows)

# variable creation:
sleep['log_BodyWt'] = np.log(sleep['BodyWt'])

sleep['danger_cat'] = 'low'  # set as a string, "low" for all.
sleep.loc[sleep['Danger'] > 3, 'danger_cat'] = 'high'  # set values of 4 and 5 to "high"

sleep['life_cat'] = pd.qcut(sleep['LifeSpan'], [0, .33, .66, 1])
sleep['life_cat'] = sleep['life_cat'].cat.rename_categories(['short', 'med', 'long']) # rename categories
sleep['life_cat'] = sleep['life_cat'].astype('object')  # make it an object, rather than a category

sleep.info()

X = patsy.dmatrix('log_BodyWt * Gestation', sleep) 
X

# convert to pandas df so seaborn is happy:
tmp = pd.DataFrame(X, columns = ['intercept', 'log_bodyweight', 'gestation', 'interaction'] )
# plot pairplot:
sns.pairplot(tmp, vars=['log_bodyweight', 'gestation', 'interaction']);

fit_2 = smf.glm('TotalSleep ~ log_BodyWt * Gestation', sleep).fit()
print(fit_2.summary())

x = np.linspace(sleep['log_BodyWt'].min(), sleep['log_BodyWt'].max()) # vector for x (log bodyweight)
y = np.linspace(sleep['Gestation'].min(), sleep['Gestation'].max())  # vector for y (gestation)
X, Y = np.meshgrid(x, y)  # grid of X and Y
# the height Z is the sum of the weighted X and Ys (weighted by our betas):
Z = fit_2.params[0] + \
    X*fit_2.params[1] + \
    Y*fit_2.params[2] + \
    X*Y*fit_2.params[3] # note the addition of the interaction term here!

# create a matplotlib figure window, add 3D axes
# http://matplotlib.org/examples/mplot3d/subplot3d_demo.html
fig = plt.figure(figsize=(12, 4))

# first view:
ax = fig.add_subplot(1, 3, 1, projection='3d')
ax.plot_surface(X, Y, Z, cmap=cm.coolwarm)
ax.view_init(elev=10, azim=20)
ax.set_xlabel('log bodyweight (kg)')
ax.set_ylabel('gestation (days)')
ax.set_zlabel('Total sleep (hours)')

# second view:
ax = fig.add_subplot(1, 3, 2, projection='3d')
ax.plot_surface(X, Y, Z, cmap=cm.coolwarm)
ax.view_init(elev=10, azim=45)
ax.set_xlabel('log bodyweight (kg)')
ax.set_ylabel('gestation (days)')
ax.set_zlabel('Total sleep (hours)')

# third view:
ax = fig.add_subplot(1, 3, 3, projection='3d')
ax.plot_surface(X, Y, Z, cmap=cm.coolwarm)
ax.view_init(elev=10, azim=70)
ax.set_xlabel('log bodyweight (kg)')
ax.set_ylabel('gestation (days)')
ax.set_zlabel('Total sleep (hours)')

plt.show()

sns.interactplot("log_BodyWt", "Gestation", "TotalSleep", sleep, 
                 cmap="coolwarm", filled=True, levels=25);

# read in the data, rename some values:
planes = pd.read_csv('../Datasets/planes.txt', sep='\t')

# column names to lowercase:
planes.columns = [c.lower() for c in planes.columns]

# turn "paper" variable into an object, not int64:
planes['paper'] = planes['paper'].astype(object)

planes.replace(to_replace={'paper': {1: '80',
                                     2: '50'},
                           'angle': {1: 'horz',
                                     2: '45deg '},
                           'plane': {1: 'sleek',
                                     2: 'simple'}}, 
                inplace=True)

# rename "plane" to "design":
planes = planes.rename(columns={'plane': 'design'})

print(planes)

fit = smf.glm('distance ~ design * paper', planes).fit()
print(fit.summary())

X = patsy.dmatrix('~ design * paper', planes)
X

# make a data frame containing the predictions, using the expand grid function.

# define expand grid function:
def expand_grid(data_dict):
    """ A port of R's expand.grid function for use with Pandas dataframes.
    
    from http://pandas.pydata.org/pandas-docs/stable/cookbook.html?highlight=expand%20grid
    
    """
    rows = itertools.product(*data_dict.values())
    return pd.DataFrame.from_records(rows, columns=data_dict.keys())


# build a new matrix with expand grid:
preds = expand_grid({'paper': ['80', '50'],
                     'design': ['sleek', 'simple']})
preds_2 = preds.copy()

preds['distance'] = fit.predict(preds)

# also fit a model with no interaction for comparison:
fit_2 = smf.glm('distance ~ paper + design', planes).fit()
preds_2['distance'] = fit_2.predict(preds_2)

# to plot model predictions against data, we should merge them together:
# see answer (https://stackoverflow.com/questions/28239359/showing-data-and-model-predictions-in-one-plot-using-seaborn-and-statsmodels)
merged = pd.concat(dict(data=planes, 
                        additive=preds_2,
                        interaction=preds),
                   names=['type']).reset_index()
print(merged)

# plot with seaborn:
g = sns.factorplot('design', 'distance', 'paper', 
                   data=merged, 
                   col='type',
                   kind='point',
                   size=3.5)
g.fig.subplots_adjust(wspace=0.3)
sns.despine(offset=10);

print(fit_2.summary())

X = patsy.dmatrix('~ log_BodyWt * danger_cat', sleep)
X

fit_2 = smf.glm('TotalSleep ~ log_BodyWt * danger_cat', sleep).fit()  # interaction model
print(fit_2.summary())

# Plot the two models against each other. 

# we do this slightly differently to the categorical examples above, 
# due to the need for some data re-arranging to plot the data as points
# and the model predictions as lines.

preds = expand_grid(
                {'log_BodyWt': np.linspace(-6, 9, 2),
                 'life_cat': ['short', 'med', 'long'],
                 'danger_cat': ['low', 'high']})

fit = smf.glm('TotalSleep ~ log_BodyWt + danger_cat', sleep).fit()  # additive model
fit_2 = smf.glm('TotalSleep ~ log_BodyWt * danger_cat', sleep).fit()  # interaction model

# add model predictions to data frames:
preds['yhat'] = fit.predict(preds)
preds['yhat_2'] = fit_2.predict(preds)

merged = sleep.append(preds)


# Plot additive model: 
g = sns.FacetGrid(merged, hue='danger_cat', size=4)
# use the `map` method to add stuff to the facetgrid axes:
g.map(plt.scatter, "log_BodyWt", "TotalSleep")
g.map(plt.plot, "log_BodyWt", "yhat")
sns.despine(offset=10)
g.set_axis_labels('Bodyweight [log kg]', 'Sleep [hours]')
g.add_legend(title='Danger')


# plot interaction model:

g = sns.FacetGrid(merged, hue='danger_cat', size=4)
# use the `map` method to add stuff to the facetgrid axes:
g.map(plt.scatter, "log_BodyWt", "TotalSleep")
g.map(plt.plot, "log_BodyWt", "yhat_2")
sns.despine(offset=10)
g.set_axis_labels('Bodyweight [log kg]', 'Sleep [hours]')
g.add_legend(title='Danger')


from IPython.core.display import HTML


def css_styling():
    styles = open("../custom_style.css", "r").read()
    return HTML(styles)
css_styling()