!date import numpy as np, pandas as pd, matplotlib.pyplot as plt, mpld3, seaborn as sns %matplotlib inline # get some of my favorite data df = pd.read_csv('http://ghdx.healthdata.org/sites/default/files/' 'record-attached-files/IHME_PHMRC_VA_DATA_ADULT_Y2013M09D11_0.csv', low_memory=False) df.head() # make a summary table that would like to inspect df['Field Site'] = df.site df['Underlying Cause'] = df.gs_text34 g = df.groupby('Field Site')['Underlying Cause'] t = g.value_counts().unstack(0) t = t.fillna(0) t['Mean'] = t.mean(axis=1) t['Max'] = t.max(axis=1) t['Min'] = t.min(axis=1) t t = t.sort('Mean') # sort the table in a meaningful way fig = plt.figure(figsize=(12,16)) # make a nice, big figure for the plot y = np.arange(len(t.index)) # select points on the y-axis for each bar # do actual plotting plt.barh(y+.05, t.Pemba, height=.45, color=sns.color_palette()[0], label='Pemba') plt.barh( y+.5, t.Mean, height=.45, xerr=[t.Mean - t.Min, t.Max - t.Mean], # annoying format for error bars color=sns.color_palette()[1], ecolor='k', label='Cross-site Mean') plt.axis(xmin=0) # silly error-bars go below zero, but don't show that plt.legend(loc=(.5,.1)) # legend uses label values from calls to plt.barh plt.yticks(y+.5, t.index) # label each tick on the y-axis with the corresponding cause plt.subplots_adjust(left=.5) # make sure there is enough room to read the tick labels plt.xlabel('Verbal Autopsies Collected') # label the plot so that it is easy to remember what it is in the future pass # do something with no output to keep display clean # make the figure interactive mpld3.display(fig)