!date
Mon Oct 6 11:42:13 PDT 2014
import numpy as np, pandas as pd, matplotlib.pyplot as plt, mpld3, seaborn as sns
%matplotlib inline
df = pd.read_csv('http://ghdx.healthdata.org/sites/default/files/record-attached-files/'
'IHME_GBD_2010_MORTALITY_AGE_SPECIFIC_BY_COUNTRY_1970_2010.CSV')
df.head()
iso3 | country_name | year | age_name | sex_name | death_abs | death_abs_ui | death_rate | death_rate_ui | |
---|---|---|---|---|---|---|---|---|---|
0 | AFG | Afghanistan | 1970 | 0-6 days | Male | 19,241 | (22,918�15,579) | 318,292.9 | (379,126.5�257,719.0) |
1 | AFG | Afghanistan | 1970 | 0-6 days | Female | 12,600 | (16,898�9,109) | 219,544.2 | (294,448.5�158,713.0) |
2 | AFG | Afghanistan | 1970 | 0-6 days | Both | 31,840 | (39,837�24,672) | 270,200.7 | (338,056.6�209,366.9) |
3 | AFG | Afghanistan | 1970 | 7-27 days | Male | 15,939 | (17,890�13,751) | 92,701.0 | (104,045.9�79,977.0) |
4 | AFG | Afghanistan | 1970 | 7-27 days | Female | 11,287 | (14,521�8,585) | 68,594.5 | (88,249.1�52,171.7) |
# select data for a specific country
df = df[df.iso3=='ZAF']
# and for a specific sex
df = df[df.sex_name=='Male']
# and plot deaths over time by age group
fig, ax = plt.subplots(figsize=(12,8))
labels = []
line_collections = []
for g, dfg in df.groupby('age_name'):
if g == 'All ages':
continue
x = dfg.year
y = dfg.death_abs.map(lambda x: float(x.replace(',', '')))
l, = ax.plot(x, y, 'o-', lw=4, ms=15)
labels.append(g)
line_collections.append(l)
pt_labels = ['Age %s<br/>Year %s<br/>%d Deaths'%(g, x.iloc[i], y.iloc[i]) for i in range(len(x))]
tooltip = mpld3.plugins.PointHTMLTooltip(l, labels=pt_labels)
mpld3.plugins.connect(fig, tooltip)
plt.axis(xmin=1968, xmax=2012)
plt.xticks([1970, 1980, 1990, 2000, 2010], [1970, 1980, 1990, 2000, 2010])
plt.subplots_adjust(right=.7)
interactive_legend = mpld3.plugins.InteractiveLegendPlugin(line_collections, labels, alpha_sel=.2, alpha_unsel=1)
mpld3.plugins.connect(fig, interactive_legend)
mpld3.display()