%pylab inline --no-import import pylab as pl import seaborn as sns import numpy as np from scipy import stats import pandas as pd from IPython.display import HTML, display from sklearn.preprocessing import label_binarize, LabelEncoder sns.set(context = 'notebook', style = 'whitegrid') x = np.linspace(0, 14, 100) y1 = np.sin(x + .5) y2 = np.sin(x + 4*.5) * 3 #c1, c2 = sns.color_palette("deep", n_colors = 2, ) pl.plot(x, y1) pl.fill_between(x, y1 -.5, y1+.5, alpha = .2) pl.plot(x, y2) pl.fill_between(x, y2 -.5, y2+.5, alpha = .2) weather = pd.read_csv('../data/weather.csv', parse_dates=['Date']) weather.columns sns.boxplot(weather.loc[:, ['MinTemp', 'MaxTemp', 'Humidity9am', 'Humidity3pm']]) sns.boxplot(weather.loc[:, 'MinTemp'], groupby=weather.RainTomorrow) pl.title('MinTemp ~ RainTomorrow') sns.regplot(x = weather.Pressure9am, y = weather.Temp9am, xlabel='Pressure9am', ylabel = 'Temp9am',) ## A more data frame friendly way sns.regplot(x = 'Pressure3pm', y = 'Temp3pm', data = weather, corr_func=stats.kendalltau,) sns.lmplot(x = 'Pressure3pm', y = 'Temp3pm', color = 'RainToday', order = 2, data = weather, ) sns.lmplot(x = 'Pressure3pm', y = 'Temp3pm', col = 'RainToday', fit_reg=False, data = weather, ) sns.lmplot(x = 'Pressure3pm', y = 'Temp3pm', col = 'RainToday', color = 'RainTomorrow', data = weather, ) sns.lmplot(x = 'Pressure3pm', y = 'Temp3pm', col = 'RainToday', row = 'RainTomorrow', data = weather, ) df = weather.copy() df['RainTodayEncoded'] = LabelEncoder().fit_transform(df.RainToday) sns.lmplot(x = 'RainTodayEncoded', y = 'Temp3pm', color = 'RainTomorrow', data = df, ) df = weather.copy() df['RainTomorrowBinary'] = label_binarize(weather.RainTomorrow, classes=['Yes', 'No']).ravel() sns.lmplot(x = 'Pressure3pm', y = 'RainTomorrowBinary', logistic=True, data = df, ) ## partial regression of Temp9am, Temp3pm, conditioning on MaxTemp sns.lmplot('Temp9am', 'Temp3pm', weather) sns.lmplot('Temp9am', 'Temp3pm', weather, x_partial='MaxTemp') pl.figure(figsize=(25, 25)) sns.corrplot(weather) sns.coefplot('Temp3pm ~ RainToday + Pressure3pm + RainToday * Pressure3pm + MinTemp + MaxTemp + MinTemp * MaxTemp', data = weather, intercept=True) print stats.skew(weather.loc[:, ['MinTemp', 'MaxTemp']]) sns.kdeplot(weather.loc[:, ['MinTemp']], label = 'MinTemp') sns.kdeplot(weather.loc[:, ['MaxTemp']], label = 'MaxTemp') pl.title('kde') pl.legend(loc = 'best') ## estimate lambda for Box-Cox transform for lamb in np.linspace(-1, 1, 50): print lamb, stats.skew(map(lambda x: (x**lamb - 1.)/lamb, np.array(weather.loc[:, ['MaxTemp']]))) sns.kdeplot(map(lambda x: (x**.3 - 1.)/.3, np.array(weather.loc[:, ['MaxTemp']])), label = '$log(MaxTemp)$') print stats.skew(map(lambda x: (x**.3 - 1.)/.3, np.array(weather.loc[:, ['MaxTemp']]))) n_pts = 29 x = np.linspace(0, 14, n_pts) true_data = np.sin(x) np.random.seed(9221999) n_subjs = 20 subj_noise = np.random.rand(20) * 2 subj_data = np.array([true_data + # real signal np.random.randn() + # subject specific offset from real signal np.random.randn(n_pts) * (subj_noise[s]) # sample specific error with subject specific variance for s in range(n_subjs)]) sns.tsplot(x, subj_data, err_style="ci_bars") df = weather.copy() df.sort('Date') sns.tsplot(df.Date, df.Cloud3pm, interpolate=False, )