%matplotlib inline import pandas as pd import numpy as np import numpy.ma as ma from datetime import datetime import matplotlib.pyplot as plt import pdb from IPython.core.display import HTML def css_styling(): styles = open("styles/custom.css", "r").read() return HTML(styles) css_styling() data_dir = "data/" !rm -rf ~/.theano measles_data = pd.read_csv(data_dir+"measles.csv", index_col=0) measles_data.NOTIFICATION = pd.to_datetime(measles_data.NOTIFICATION) measles_data.BIRTH = pd.to_datetime(measles_data.BIRTH) measles_data.ONSET = pd.to_datetime(measles_data.ONSET) measles_data = measles_data.replace({'DISTRICT': {'BRASILANDIA':'BRAZILANDIA'}}) sp_pop = pd.read_csv(data_dir+'sp_pop.csv', index_col=0) _names = sp_pop.index.values _names[_names=='BRASILANDIA'] = 'BRAZILANDIA' sp_pop.set_index(_names, inplace = True) sp_pop.head() vaccination_data = pd.read_csv('data/BrazilVaxRecords.csv', index_col=0) vaccination_data.head() vaccination_data.VAX[:18] vax_97 = np.r_[[0]*(1979-1921+1), vaccination_data.VAX[:17]] n = len(vax_97) FOI_mat = np.resize((1 - vax_97*0.9), (n,n)).T # Mean age of infection for those born prior to vaccination coverage, assuming R0=16 A = 4.375 (1 - vax_97*0.9)[:-1] np.tril(FOI_mat).sum(0) natural_susc = np.exp((-1/A) * np.tril(FOI_mat).sum(0))[::-1] vacc_susc = (1 - vax_97*0.9)[::-1] vacc_susc[0] = 0.5 vacc_susc sia_susc = np.ones(len(vax_97)) birth_year = np.arange(1922, 1998)[::-1] by_mask = (birth_year > 1983) & (birth_year < 1992) sia_susc[by_mask] *= 0.2 total_susc = sia_susc * vacc_susc * natural_susc total_susc pd.Series(total_susc).plot(); measles_onset_dist = measles_data.groupby(['DISTRICT','ONSET']).size().unstack(level=0).fillna(0) measles_onset_dist.cumsum().plot(legend=False, grid=False) total_district_cases = measles_onset_dist.sum() totals = measles_onset_dist.sum() totals.sort(ascending=False) totals[:5] by_conclusion = measles_data.groupby(["YEAR_AGE", "CONCLUSION"]) counts_by_cause = by_conclusion.size().unstack().fillna(0) ax = counts_by_cause.plot(kind='bar', stacked=True, xlim=(0,50), figsize=(15,5)) age_classes = [0,5,10,15,20,25,30,35,40,100] measles_data.dropna(subset=['YEAR_AGE'], inplace=True) measles_data['YEAR_AGE'] = measles_data.YEAR_AGE.astype(int) measles_data['AGE_GROUP'] = pd.cut(measles_data.AGE, age_classes, right=False) CONFIRMED = measles_data.CONCLUSION == 'CONFIRMED' CLINICAL = measles_data.CONCLUSION == 'CLINICAL' DISCARDED = measles_data.CONCLUSION == 'DISCARDED' lab_subset = measles_data[(CONFIRMED | CLINICAL) & measles_data.COUNTY.notnull()].copy() age = lab_subset.YEAR_AGE.values ages = lab_subset.YEAR_AGE.unique() counties = lab_subset.COUNTY.unique() y = (lab_subset.CONCLUSION=='CONFIRMED').values _lab_subset = lab_subset.replace({"CONCLUSION": {"CLINICAL": "UNCONFIRMED"}}) by_conclusion = _lab_subset.groupby(["YEAR_AGE", "CONCLUSION"]) counts_by_cause = by_conclusion.size().unstack().fillna(0) ax = counts_by_cause.plot(kind='bar', stacked=True, xlim=(0,50), figsize=(15,5), grid=False) (measles_data[CONFIRMED].YEAR_AGE>20).mean() #Extract cases by age and time. age_group = pd.cut(age, age_classes, right=False) age_index = np.array([age_group.categories.tolist().index(i) for i in age_group]) age_groups = age_group.categories age_groups # Get index from full crosstabulation to use as index for each district dates_index = measles_data.groupby( ['ONSET', 'AGE_GROUP']).size().unstack().index unique_districts = measles_data.DISTRICT.dropna().unique() excludes = ['BOM RETIRO'] N = sp_pop.drop(excludes).ix[unique_districts].sum().drop('Total') N N_age = N.iloc[:8] N_age.index = age_groups[:-1] N_age[age_groups[-1]] = N.iloc[8:].sum() N_age age_slice_endpoints = [g[1:-1].split(',') for g in age_groups.values] age_slices = [slice(int(i[0]), int(i[1])) for i in age_slice_endpoints] p_susc_age = np.array([total_susc[s].mean() for s in age_slices]) sp_counts_2w = lab_subset.groupby( ['ONSET', 'AGE_GROUP']).size().unstack().reindex(dates_index).fillna(0).resample('2W', how='sum') # All confirmed cases, by district confirmed_data = lab_subset[lab_subset.CONCLUSION=='CONFIRMED'] confirmed_counts = confirmed_data.groupby( ['ONSET', 'AGE_GROUP']).size().unstack().reindex(dates_index).fillna(0).sum() all_confirmed_cases = confirmed_counts.reindex_axis(measles_data['AGE_GROUP'].unique()).fillna(0) # Ensure the age groups are ordered I_obs = sp_counts_2w.reindex_axis(measles_data['AGE_GROUP'].unique(), axis=1).fillna(0).values.astype(int) assert I_obs.shape == (28, len(age_groups)) from pymc import rbeta plt.hist(rbeta(5, 100, 10000)) I_obs # Extract observed data obs_date = '1997-06-15' obs_index = sp_counts_2w.index <= obs_date I_obs_t = I_obs[obs_index] # Specify confirmation model confirmation = True from theano.printing import pp from pymc3 import * import theano.tensor as tt from theano import shared, scan from theano.tensor.nlinalg import matrix_inverse as inv invlogit = tt.nnet.sigmoid with Model() as model: n_periods, n_groups = I_obs_t.shape if confirmation: mu = Normal('mu', mu=0, tau=0.0001, shape=n_groups) sig = HalfCauchy('sig', 25, shape=n_groups, testval=np.ones(n_groups)) Tau = tt.diag(tt.pow(sig, -2)) # Age-specific probabilities of confirmation as multivariate normal # random variables beta_age = MvNormal('beta_age', mu=mu, tau=Tau, shape=n_groups) p_age = Deterministic('p_age', invlogit(beta_age)) p_confirm = invlogit(beta_age[age_index]) # Confirmation likelihood lab_confirmed = Bernoulli('lab_confirmed', p=p_confirm, observed=y) # Confirmed infecteds by age I_conf = Binomial('I_conf', I_obs_t, p_age, shape=I_obs_t.shape) I = I_conf else: I = I_obs_t # Transmission parameter beta = HalfCauchy('beta', 25, testval=9) ### Calculate initial number of susceptibles # Downsample annual series to observed age groups downsample = lambda x: np.array([x[s].mean() for s in age_slices]) A = Deterministic('A', 75./(beta - 1)) lt_sum = downsample(np.tril(FOI_mat).sum(0)[::-1]) natural_susc = tt.exp((-1/A) * lt_sum) total_susc = downsample(sia_susc) * downsample(vacc_susc) * natural_susc # Estimated total initial susceptibles, by age S_0 = Binomial('S_0', n=shared(N_age.values), p=total_susc, shape=N_age.shape) # Susceptibles at each time step (subtract cumulative cases from initial S) S = S_0 - I.cumsum(0) # Force of infection lam = tt.transpose(beta * I.sum(1) * tt.transpose(S / N_age.values)) # Likelihood of observed cases as function of FOI of prevous period like = Potential('like', dist_math.logpow(lam[:-1], I[1:]) - dist_math.factln(I[1:]) - lam[:-1]) with model: step1 = NUTS(vars=model.cont_vars) step2 = Metropolis(vars=model.disc_vars) trace = sample(2000, step=(step1, step2)) burn = 0 traceplot(trace[burn:], vars=['beta', 'A']) forestplot(trace[burn:], vars=['p_age']) forestplot(trace[burn:], vars=['S_0'])