#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: import seaborn as sns import pandas as pd import matplotlib.pyplot as plt # Note that this is using current seaborn master: # In[3]: print sns.__version__ # I think the "whitegrid" style looks best with barplots and other plots that have large patches of color. # In[4]: sns.set(style="whitegrid", font_scale=1.3) # In[5]: df = pd.read_csv(("https://gist.githubusercontent.com/mrocklin/56830db18bd71ba7d9e3/" "raw/c2adcfd160f05b199a47d071791676a8a316dda1/costs.csv"), index_col=0) # In[6]: df.index.name = "storage" df = df.reset_index() # In[7]: df.head() # Melt the dataframe to long-form. # In[8]: df = pd.melt(df, "storage", value_name="duration", var_name="operation") # In[9]: df.head() # ### Option 1: a horizontal nested bar plot # # Drawing the bars horizontally works best when the category values are long strings, as they don't overlap or have to get awkwardly rotated. # # Stacked bar plots can be hard to read and don't work all that well except in certain circumstances, so we'll draw separate plots for each operation. # # Using nested plots emphasizes the comparison between the two operations for each storage option, but makes it slightly harder to compare across storage options for a given operation (though still easier than with stacked bar plots): # In[10]: f, ax = plt.subplots(figsize=(5, 7)) # Draw the plot sns.barplot("duration", "storage", "operation", data=df) # The default legend doesn't look great here # let's move it and add a frame and title ax.legend(loc="lower center", ncol=2, frameon=True, title="operation") # Seaborn automatically labels the axes with the variable names # which can be useful, but for production plots it' usually good # to tweak them. Let's add units to the x axis label and remove # the y axis label, which is not overly necessary and takes up space. ax.set(xlabel="Duration (s)", ylabel="") # Removing the axes spines gives a cleaner look sns.despine(bottom=True) # ### Option 2: faceted bar plot # # The other option would be to make two plots, one for each operation. This will make it easier to compare across storage options within each operation, but it will be somewhat more difficult to compare across operations within each storage option. # In[11]: g = sns.factorplot("duration", "storage", col="operation", data=df, kind="bar", size=7, aspect=.6, palette="Set3") g.set_axis_labels("Duration (s)", "") g.despine(bottom=True) # ### Option 3: dot plot # # Another option would be a dot plot, which makes it easy to compare both across operations and across storage. # In[14]: f, ax = plt.subplots(figsize=(5, 8)) # Draw the plot sns.pointplot("duration", "storage", "operation", data=df, join=False, palette="Set1") # The default legend doesn't look great here # let's move it and add a frame and title ax.legend(loc="lower right", ncol=2, frameon=True) # Seaborn automatically labels the axes with the variable names # which can be useful, but for production plots it' usually good # to tweak them. Let's add units to the x axis label and remove # the y axis label, which is not overly necessary and takes up space. ax.set(xlabel="Duration (s)", ylabel="", xlim=(-.2, None)) # A dot plot looks better with a grid on the categorical axis # to help connect each point to its label. Removing the grid on the # numerical axis makes the plot cleaner, though it is a bit hard to # read off exact values for each point. ax.xaxis.grid(False) ax.yaxis.grid(True) # Removing the axes spines gives a cleaner look sns.despine(left=True, bottom=True) # In[ ]: