In [1]:
!date
Mon Oct  6 11:42:13 PDT 2014
In [2]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt, mpld3, seaborn as sns
%matplotlib inline
In [3]:
df = pd.read_csv('http://ghdx.healthdata.org/sites/default/files/record-attached-files/'
                 'IHME_GBD_2010_MORTALITY_AGE_SPECIFIC_BY_COUNTRY_1970_2010.CSV')
In [4]:
df.head()
Out[4]:
iso3 country_name year age_name sex_name death_abs death_abs_ui death_rate death_rate_ui
0 AFG Afghanistan 1970 0-6 days Male 19,241 (22,918�15,579) 318,292.9 (379,126.5�257,719.0)
1 AFG Afghanistan 1970 0-6 days Female 12,600 (16,898�9,109) 219,544.2 (294,448.5�158,713.0)
2 AFG Afghanistan 1970 0-6 days Both 31,840 (39,837�24,672) 270,200.7 (338,056.6�209,366.9)
3 AFG Afghanistan 1970 7-27 days Male 15,939 (17,890�13,751) 92,701.0 (104,045.9�79,977.0)
4 AFG Afghanistan 1970 7-27 days Female 11,287 (14,521�8,585) 68,594.5 (88,249.1�52,171.7)
In [5]:
# select data for a specific country
df = df[df.iso3=='ZAF']
In [6]:
# and for a specific sex
df = df[df.sex_name=='Male']
In [47]:
# and plot deaths over time by age group

fig, ax = plt.subplots(figsize=(12,8))

labels = []
line_collections = []

for g, dfg in df.groupby('age_name'):
    if g == 'All ages':
        continue
        
    x = dfg.year
    y = dfg.death_abs.map(lambda x: float(x.replace(',', '')))

    l, = ax.plot(x, y, 'o-', lw=4, ms=15)
    labels.append(g)
    line_collections.append(l)
    
    pt_labels = ['Age %s<br/>Year %s<br/>%d Deaths'%(g, x.iloc[i], y.iloc[i]) for i in range(len(x))]
    tooltip = mpld3.plugins.PointHTMLTooltip(l, labels=pt_labels)
    mpld3.plugins.connect(fig, tooltip)


    
plt.axis(xmin=1968, xmax=2012)
plt.xticks([1970, 1980, 1990, 2000, 2010], [1970, 1980, 1990, 2000, 2010])
plt.subplots_adjust(right=.7)

interactive_legend = mpld3.plugins.InteractiveLegendPlugin(line_collections, labels, alpha_sel=.2, alpha_unsel=1)
mpld3.plugins.connect(fig, interactive_legend)

mpld3.display()
Out[47]: