#!/usr/bin/env python
# coding: utf-8
# ![matplotlib](http://matplotlib.org/_static/logo2.svg)
# # Week 10 - Advanced Visualization
#
# ## Today's Agenda
# Today we will be focusing on plotting and on how to produce __`publication-ready`__ plots
# - Modifying your `matplotlibrc` file
# - Matplotlib Style sheets
# - Seaborn
# - Bokeh
# # Modifying your matplotlibrc file
# Matplotlib uses a _special_ file called `matplotlibrc` file.
# This file is usually in your __~/.matplotlib/matplotlibrc__
#
# You can check the location of this file by typing:
# In[1]:
# Importing modules
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib
# Printing out `fname` for `matplotlibrc`
matplotlib.matplotlib_fname()
# In[2]:
import numpy as np
import matplotlib.pyplot as plt
# You can also __edit__ the different parameters of the `matplotlibrc` file.
# In[3]:
# List of possible values to be changed
import matplotlib as mpl
matplotlib.rcParams
print('Linewidth: {0} Color: {1}'.format(
mpl.rcParams['lines.linewidth'],
mpl.rcParams['lines.color']))
# In[4]:
# Changing line properties
mpl.rc('lines', linewidth=10, color='g')
print('Linewidth: {0} Color: {1}'.format(
mpl.rcParams['lines.linewidth'],
mpl.rcParams['lines.color']))
# # Matplotlib Style sheets
# Newer versions of matplotlib offer the option to setup a __style sheet__.
# For example, one can have a plot look like taken from `ggplot` or `SuperMongo`.
# In[5]:
def plotting(stylename='classic'):
# Defining data
x = np.arange(0,10)
y = np.random.randint(20,30,x.size)
# Defining style sheet
try:
plt.style.use(stylename)
except IOError:
msg = '{0} not found'.format(stylename)
raise IOError(msg)
# Plotting
plt.clf()
plt.plot(x,y,'-ro', label=stylename)
plt.xlabel('X label')
plt.ylabel('X label')
plt.legend(loc=1)
plt.title('Plot using "{0}" Style'.format(stylename), fontsize=20 )
plt.show()
# In[6]:
plotting(stylename='classic')
# Now with a new __style sheet__
# In[7]:
plotting(stylename='ggplot')
# In[8]:
for style in plt.style.available[0:5]:
plotting(stylename=style)
# To get a list of the all the __styles available__:
# In[9]:
plt.style.available
# - A nice website to look at the different style sheet is: [https://tonysyu.github.io/raw_content/matplotlib-style-gallery/gallery.html](https://tonysyu.github.io/raw_content/matplotlib-style-gallery/gallery.html)
# - For more information on `style sheets`, see: [http://www.delaytolerantnetworks.com/Customizing-plots-with-style-sheets-in-matplotlib/](http://www.delaytolerantnetworks.com/Customizing-plots-with-style-sheets-in-matplotlib/)
# # Seaborn
# - Website: [https://seaborn.pydata.org/](https://seaborn.pydata.org/)
#
# Seaborn is visualization library based on matplotlib. It provides high-level interface for drawing attractive statistical graphics.
#
# Some of the data here was taken from: [http://blog.insightdatalabs.com/advanced-functionality-in-seaborn/](http://blog.insightdatalabs.com/advanced-functionality-in-seaborn/)
# ## The Data
#
# We'll be using the [ UCI "Auto MPG"](https://archive.ics.uci.edu/ml/datasets/Auto+MPG) data for the purpose of this module.
#
# We'll be using pandas along with `Seaborn`.
# In[10]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
get_ipython().run_line_magic('matplotlib', 'inline')
import seaborn as sns
sns.set_style('darkgrid')
# Reading in the data that we'll be using:
# In[11]:
# Names of columns
names = [
'mpg'
, 'cylinders'
, 'displacement'
, 'horsepower'
, 'weight'
, 'acceleration'
, 'model_year'
, 'origin'
, 'car_name'
]
# Reading in ASCII file with motor data
df = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data", sep='\s+', names=names)
# Creating new column named `maker`
df['maker'] = df.car_name.map(lambda x: x.split()[0])
# Assiging Continents corresponding values
df.origin = df.origin.map({1: 'America', 2: 'Europe', 3: 'Asia'})
# Dropping `NaN` values
df=df.applymap(lambda x: np.nan if x == '?' else x).dropna()
# Changing the data type of `horsepower`
df['horsepower'] = df.horsepower.astype(float)
df.head()
# ### Factorplot and FacetGrid
# In[12]:
sns.set_context('notebook')
sns.factorplot(data=df, x="model_year", y="mpg")
# We can start off by visualizing '`model_year`' vs '`mpg`' for each type of '`origin`' class.
#
We can do this by using the __factorplot__ command:
# In[13]:
sns.factorplot(data=df, x="model_year", y="mpg", col="origin")
# You can easily change the type of graph that you're plotting
# In[14]:
g = sns.FacetGrid(df, col="origin")
g.map(sns.distplot, "mpg")
# Or look at a scatter plot of the data
# In[15]:
g = sns.FacetGrid(df, col="origin")
g.map(plt.scatter, "horsepower", "mpg")
# You can easily compute and plot a regression of the data
# In[16]:
g = sns.FacetGrid(df, col="origin")
g.map(sns.regplot, "horsepower", "mpg")
plt.xlim(0, 250)
plt.ylim(0, 60)
# Let's say you want to visualize the "Kernel Density Estimation" for each type
# In[17]:
# Define new variable `tons`
df['tons'] = (df.weight/2000).astype(int)
# Create grid to plot your data
g = sns.FacetGrid(df, col="origin", row="tons")
# 1) Specify type of function
# 2) Specify 'x' and 'y' for each plot
g.map(sns.kdeplot, "horsepower", "mpg")
# Define the x- and y-limits for each subplot
plt.xlim(0, 250)
plt.ylim(0, 60)
# ## pairplot and PairGrid
#
# These functions allow you to plot _pairwise relations_ in a dataset.
#
Let's say we want to plot the relation between `mpg`, `horsepower`, `weight`, and `origin`.
#
And we also want to separate them based on the `origin`.
# __No types__
# In[18]:
g = sns.PairGrid(df[["mpg", "horsepower", "weight", "origin"]])
g.map_upper(sns.regplot)
g.map_lower(sns.residplot)
g.map_diag(plt.hist)
for ax in g.axes.flat:
plt.setp(ax.get_xticklabels(), rotation=45)
g.set(alpha=0.5)
# We can specify the types of `origin`
# In[19]:
g = sns.PairGrid(df[["mpg", "horsepower", "weight", "origin"]], hue="origin")
g.map_upper(sns.regplot)
g.map_lower(sns.residplot)
g.map_diag(plt.hist)
for ax in g.axes.flat:
plt.setp(ax.get_xticklabels(), rotation=45)
g.add_legend()
g.set(alpha=0.5)
# Or `maker`
# In[20]:
g = sns.PairGrid(df[["mpg", "horsepower", "weight", "origin","maker"]], hue="maker")
g.map_upper(sns.regplot)
g.map_lower(sns.residplot)
g.map_diag(plt.hist)
for ax in g.axes.flat:
plt.setp(ax.get_xticklabels(), rotation=45)
g.add_legend()
g.set(alpha=0.5)
# You can see, you have some freedom when it comes to what you want to plot~
# In[21]:
g = sns.PairGrid(df[["mpg", "horsepower", "weight", "origin"]])
g.map_diag(sns.kdeplot)
g.map_offdiag(sns.kdeplot, cmap="Blues_d", n_levels=6);
# ## jointplot and JointGrid
# You can also visualize 2D-data in different ways:
# ### KDE and Histograms
# You can estimate the [Pearson correlation factor](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient), along with the [p-value](https://en.wikipedia.org/wiki/P-value) of a linear fitting.
# In[22]:
sns.jointplot("mpg", "horsepower", data=df, kind='kde')
# In[23]:
g = sns.jointplot(x="mpg", y="horsepower", data=df, kind="kde", color="r")
g.plot_joint(plt.scatter, c="w", s=30, linewidth=1, marker="+")
g.ax_joint.collections[0].set_alpha(0)
# Or show the linear regression of data
# In[24]:
sns.jointplot("horsepower", "mpg", data=df, kind="reg")
# Or a 2nd-order fitting to the data:
# In[25]:
g = sns.JointGrid(x="horsepower", y="mpg", data=df)
g.plot_joint(sns.regplot, order=2)
g.plot_marginals(sns.distplot)
# You can also plot __2D-Histograms__
# In[26]:
with sns.axes_style("white"):
sns.jointplot(x="horsepower", y="mpg", data=df, kind="hex");
# In[27]:
sns.boxplot(x="origin", y="horsepower",data=df, palette="PRGn")
sns.despine(offset=10, trim=True)
# In[28]:
sns.boxplot(x="origin", y="horsepower", data=df, palette="PRGn")
sns.despine(offset=10, trim=True)
# ### tsplot
# Plot one or more timeseries with flexible representation of uncertainty.
# In[29]:
gammas = sns.load_dataset("gammas")
ax = sns.tsplot(time="timepoint", value="BOLD signal",
unit="subject", condition="ROI",
data=gammas)
# In[30]:
x = np.linspace(0, 15, 31)
data = np.sin(x) + np.random.rand(10, 31) + np.random.randn(10, 1)
ax = sns.tsplot(data=data)
# In[31]:
ax = sns.tsplot(data=data, err_style="unit_traces")
# In[32]:
ax = sns.tsplot(data=data, err_style="boot_traces", n_boot=500)
# ### Scatterplot
# In[33]:
sns.set(style="whitegrid", palette="muted")
# Load the Dataset
iris = sns.load_dataset("iris")
# Create a boxplot
iris = pd.melt(iris, "species", var_name="measurement")
# Draw a categorical scatterplot to show each observation
sns.swarmplot(x="measurement", y="value", hue="species", data=iris)
# ### Heatmaps
# In[34]:
sns.set(context="paper", font="monospace")
# Load the datset of correlations between cortical brain networks
df = sns.load_dataset("brain_networks", header=[0, 1, 2], index_col=0)
corrmat = df.corr()
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(12, 9))
# Draw the heatmap using seaborn
sns.heatmap(corrmat, vmax=.8, square=True)
# Use matplotlib directly to emphasize known networks
networks = corrmat.columns.get_level_values("network")
for i, network in enumerate(networks):
if i and network != networks[i - 1]:
ax.axhline(len(networks) - i, c="w")
ax.axvline(i, c="w")
f.tight_layout()
# # See also
# - Look into [http://blog.insightdatalabs.com/advanced-functionality-in-seaborn/](http://blog.insightdatalabs.com/advanced-functionality-in-seaborn/)
# - [http://seaborn.pydata.org/generated/seaborn.FacetGrid.html](http://seaborn.pydata.org/generated/seaborn.FacetGrid.html)
# - [https://seaborn.pydata.org/tutorial.html](https://seaborn.pydata.org/tutorial.html)
# - [https://github.com/InsightDataLabs/ipython-notebooks/blob/master/seaborn.ipynb](https://github.com/InsightDataLabs/ipython-notebooks/blob/master/seaborn.ipynb)
# # Next Week
# - Open Hack Session
# - Make sure to bring some ideas or problems, and we will try to answer them or help you out with them.