from ggplot import ggplot
import ggplot as gg
from IPython.html.widgets import interact
import matplotlib.pyplot as plt
import pandas as pd
import qgrid
import seaborn as sns
%matplotlib inline
qgrid.nbinstall()
# Pull in the CSV, drop NAs
df = pd.read_csv('mthood_snotel.csv', header=7, parse_dates=['Date']).dropna()
qgrid.show_grid(df, remote_js=True)
# Let's start with some basic histograms of our key dimensions
sns.set_context(rc={"figure.figsize": (15, 7)})
sns.distplot(df['Precipitation Accumulation (in)'], bins=50)
<matplotlib.axes._subplots.AxesSubplot at 0x111854650>
sns.distplot(df['Snow Water Equivalent (in)'], bins=100)
<matplotlib.axes._subplots.AxesSubplot at 0x111972710>
sns.kdeplot(df['Air Temperature Maximum (degF)'], shade=True);
sns.kdeplot(df['Air Temperature Minimum (degF)'], shade=True);
sns.kdeplot(df['Air Temperature Average (degF)'], shade=True);
# We can use Seaborn + IPython interact widgets to do quick comparison of dimensions
subset = df.drop(['Date'], axis=1)
dims = subset.columns.tolist()
@interact
def linear_comp(x=dims, y=dims):
sns.jointplot(x, y, data=subset, size=9)
# How closely to average and Maximum temps follow one another?
sns.lmplot("Air Temperature Minimum (degF)", "Air Temperature Maximum (degF)", df, size=10)
<seaborn.axisgrid.FacetGrid at 0x11287b650>
# Now to use some Pandas timeseries magic to look at monthly trends
# First we need to set the Date column as the Index
indexed = df.set_index('Date')
resampled = indexed.resample('MS').dropna()
qgrid.show_grid(resampled, remote_js=True)
# Exploratory: Pandas plotting should let us take a nice quick look at the data
# Going to use Seaborn to set our plot context
sns.set_context(rc={"figure.figsize": (18, 9)})
resampled.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1156aee50>
# ggplot is quite good at handling timeseries. Let's use it to look at long-term trends
resampled['Date'] = resampled.index
(ggplot(gg.aes(x='Date', y='Snow Water Equivalent (in)'), data=resampled)
+ gg.geom_line()
+ gg.stat_smooth())
<ggplot: (290809233)>
# What about temperatures?
(ggplot(gg.aes(x='Date', y='Air Temperature Average (degF)'), data=resampled)
+ gg.geom_line()
+ gg.stat_smooth())
<ggplot: (286913285)>
# I want to look at monthly statistics, so need to create a column that's just months
resampled['Month'] = resampled.index.month
monthly_grouped = resampled.groupby('Month').mean()
# Matplotlib now has context managers to set styles. Let's try the bmh style
with plt.style.context('bmh'):
sns.set_context(rc={"figure.figsize": (18, 9)})
monthly_grouped.plot()
res_dims = resampled.columns.tolist()
@interact
def res_comp(x=res_dims, y=res_dims):
sns.jointplot(x, y, data=resampled, size=9)
qgrid.show_grid(monthly_grouped)
# Back to ggplot
monthly_grouped['Month'] = monthly_grouped.index
ggplot(gg.aes(x='Month', y='Snow Water Equivalent (in)'),
data=monthly_grouped) + gg.geom_line()
<repr(<ggplot.ggplot.ggplot at 0x115348450>) failed: KeyError: 0>
# Let's do some faceting to look at some monthly statistics
(ggplot(gg.aes(x='Air Temperature Average (degF)'), data=resampled)
+ gg.geom_density(alpha=0.25)
+ gg.facet_wrap('Month')
+ gg.labs("Air Temperature Average (degF)", "Freq"))
<ggplot: (288846193)>
(ggplot(gg.aes(x='Snow Water Equivalent (in)'), data=resampled)
+ gg.geom_density(alpha=0.25)
+ gg.facet_wrap('Month')
+ gg.labs("Snow Water Equivalent (in)", "Freq"))
<ggplot: (292700801)>
# Seaborn also has very powerful faceting mechanisms. Let's look at the monthly average temperatures
# again, but in a FacetGrid
months = resampled['Month'].unique()
months.sort()
months
g = sns.FacetGrid(resampled, row="Month", hue="Month", palette="deep",
size=1.8, aspect=4, hue_order=months, row_order=months)
g.map(sns.distplot, 'Air Temperature Average (degF)');
pair_cols = resampled[['Snow Water Equivalent (in)', 'Precipitation Accumulation (in)',
'Air Temperature Average (degF)', 'Month']].reset_index(drop=True)
pair_cols.head()
pair = sns.PairGrid(pair_cols, hue="Month", palette="GnBu_d")
pair.map(plt.scatter)
pair.add_legend()
from IPython.core.display import HTML
# Use the following if running locally:
# styles = open("styles/custom.css", "r").read()
# This is for nbviewer:
styles = open("custom.css", "r").read()
HTML(styles)