In [1]:
!date
Mon Mar 10 16:08:52 PDT 2014
In [2]:
import numpy as np, pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt, seaborn
In [3]:
df = pd.read_csv('http://ghdx.healthmetricsandevaluation.org/sites/default/files/record-attached-files/IHME_IRAQ_MORTALITY_STUDY_2001_2011_HH_DEATHS.CSV')
df.head()
Out[3]:
cluster hh gov sex mod yod cod death_cert war_death war_cod respo distance year_hh_formed
0 1 6 18 F 7 2007 cardiovascular death cert not available N NaN NaN NaN 1980
1 1 10 18 M 3 2002 injury (not war) >=18 able to see death cert N NaN NaN NaN 1989
2 1 10 18 M 4 2006 injury (not war) >=18 able to see death cert N NaN NaN NaN 1989
3 1 15 18 F 10 2003 injury (war) <18 able to see death cert Y gunshot coalition forces Yes <1 KM 1976
4 1 16 18 F 5 2009 injury (not war) <18 able to see death cert N NaN NaN NaN 1983

5 rows × 13 columns

In [5]:
s = df.death_cert.value_counts() / float(len(df.index))
s *= 100

s.order().plot(kind='barh', fontsize=24)
plt.xticks(fontsize=18)
plt.xlabel('Percent of Deaths in HH Survey', fontsize=24)
plt.axis(xmax=100);