#!/usr/bin/env python # coding: utf-8 # # Disease Outbreak Response Decision-making Under Uncertainty: A retrospective analysis of measles in Sao Paulo # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import pandas as pd import numpy as np import numpy.ma as ma from datetime import datetime import matplotlib.pyplot as plt import seaborn as sb sb.set() import pdb from IPython.core.display import HTML def css_styling(): styles = open("styles/custom.css", "r").read() return HTML(styles) css_styling() # In[2]: data_dir = "data/" # Import outbreak data # In[3]: measles_data = pd.read_csv(data_dir+"measles.csv", index_col=0) measles_data.NOTIFICATION = pd.to_datetime(measles_data.NOTIFICATION) measles_data.BIRTH = pd.to_datetime(measles_data.BIRTH) measles_data.ONSET = pd.to_datetime(measles_data.ONSET) # In[4]: measles_data = measles_data.replace({'DISTRICT': {'BRASILANDIA':'BRAZILANDIA'}}) # Sao Paulo population by district # In[5]: sp_pop = pd.read_csv(data_dir+'sp_pop.csv', index_col=0) # In[6]: _names = sp_pop.index.values _names[_names=='BRASILANDIA'] = 'BRAZILANDIA' sp_pop.set_index(_names, inplace = True) # In[7]: sp_pop.head() # Plot of cumulative cases by district # In[8]: measles_onset_dist = measles_data.groupby(['DISTRICT','ONSET']).size().unstack(level=0).fillna(0) measles_onset_dist.cumsum().plot(legend=False, grid=False) # In[9]: total_district_cases = measles_onset_dist.sum() # Top 5 districts by number of cases # In[10]: totals = measles_onset_dist.sum() totals.sort(ascending=False) totals[:5] # Age distribution of cases, by confirmation status # In[11]: by_conclusion = measles_data.groupby(["YEAR_AGE", "CONCLUSION"]) counts_by_cause = by_conclusion.size().unstack().fillna(0) ax = counts_by_cause.plot(kind='bar', stacked=True, xlim=(0,50), figsize=(15,5)) # ### Vaccination Data # In[12]: vaccination_data = pd.read_csv('data/BrazilVaxRecords.csv', index_col=0) vaccination_data.head() # In[13]: vax_97 = np.r_[[0]*(1979-1921+1), vaccination_data.VAX[:17]] n = len(vax_97) FOI_mat = np.resize((1 - vax_97*0.9), (n,n)).T # In[14]: vacc_susc = (1 - vax_97*0.9)[::-1] vacc_susc[0] = 0.5 # In[15]: sia_susc = np.ones(len(vax_97)) birth_year = np.arange(1922, 1998)[::-1] by_mask = (birth_year > 1983) & (birth_year < 1992) sia_susc[by_mask] *= 0.2 # ## Stochastic Disease Transmission Model # # As a baseline for comparison, we can fit a model to all the clinically-confirmed cases, regardless of lab confirmation status. For this, we will use a simple SIR disease model, which will be fit using MCMC. # # This model fits the series of 2-week infection totals in each district $i$ as a set of Poisson models: # # \\[Pr(I(t)_{i} | \lambda(t)_i) = \text{Poisson}(\lambda(t)_i) \\] # # Where the outbreak intensity is modeled as: # # \\[\lambda(t)_i = \beta [I^{(w)}(t-1)_i]^{\alpha} S(t-1)_i\\] # # \\[\alpha \sim \text{Exp}(1)\\] # # We will assume here that the transmission rate is constant over time (and across districts): # # \\[\beta \sim \text{Gamma}(1, 0.1)\\] # # To account for the influence of infected individuals from neighboring districts on new infections, the outbreak intensity was modeled using a spatial-weighted average of infecteds across districts, where populations were weighted as an exponential function of the distance between district centroids: # # \\[w_{d} = \text{exp}(-\theta d)\\] # # \\[\theta \sim \text{Exp}(1)\\] # # ### Confirmation Sub-model # # Rather than assume all clinical cases are true cases, we can adjust the model to account for lab confirmation probability. This is done by including a sub-model that estimates age group-specific probabilities of confirmation, and using these probabilities to estimate the number of lab-confirmed cases. These estimates are then plugged into the model in place of the clinically-confirmed cases. # # We specified a structured confirmation model to retrospectively determine the age group-specific probabilities of lab confirmation for measles, conditional on clinical diagnosis. Individual lab confirmation events $c_i$ were modeled as Bernoulli random variables, with the probability of confirmation being allowed to vary by age group: # # $$c_i \sim \text{Bernoulli}(p_{a(i)})$$ # # where $a(i)$ denotes the appropriate age group for the individual indexed by i. There were 16 age groups, the first 15 of which were 5-year age intervals $[0,5), [5, 10), \ldots , [70, 75)$, with the 16th interval including all individuals 75 years and older. # # Since the age interval choices were arbitrary, and the confirmation probabilities of adjacent groups likely correlated, we modeled the correlation structure directly, using a multivariate logit-normal model. Specifically, we allowed first-order autocorrelation among the age groups, whereby the variance-covariance matrix retained a tridiagonal structure. # # $$\begin{aligned} # \Sigma = \left[{ # \begin{array}{c} # {\sigma^2} & {\sigma^2 \rho} & 0& \ldots & {0} & {0} \\ # {\sigma^2 \rho} & {\sigma^2} & \sigma^2 \rho & \ldots & {0} & {0} \\ # {0} & \sigma^2 \rho & {\sigma^2} & \ldots & {0} & {0} \\ # \vdots & \vdots & \vdots & & \vdots & \vdots\\ # {0} & {0} & 0 & \ldots & {\sigma^2} & \sigma^2 \rho \\ # {0} & {0} & 0 & \ldots & \sigma^2 \rho & {\sigma^2} # \end{array} # }\right] # \end{aligned}$$ # # From this, the confirmation probabilities were specified as multivariate normal on the inverse-logit scale. # # $$ \text{logit}(p_a) = \{a\} \sim N(\mu, \Sigma)$$ # # Priors for the confirmation sub-model were specified by: # # $$\begin{aligned} # \mu_i &\sim N(0, 100) \\ # \sigma &\sim \text{HalfCauchy}(25) \\ # \rho &\sim U(-1, 1) # \end{aligned}$$ # Age classes are defined in 5-year intervals. # In[16]: age_classes = [0,5,10,15,20,25,30,35,40,100] measles_data.dropna(subset=['YEAR_AGE'], inplace=True) measles_data['YEAR_AGE'] = measles_data.YEAR_AGE.astype(int) measles_data['AGE_GROUP'] = pd.cut(measles_data.AGE, age_classes, right=False) # Lab-checked observations are extracted for use in estimating lab confirmation probability. # In[17]: CONFIRMED = measles_data.CONCLUSION == 'CONFIRMED' CLINICAL = measles_data.CONCLUSION == 'CLINICAL' DISCARDED = measles_data.CONCLUSION == 'DISCARDED' # Extract confirmed and clinical subset, with no missing county information. # In[18]: lab_subset = measles_data[(CONFIRMED | CLINICAL) & measles_data.COUNTY.notnull()].copy() # In[19]: age = lab_subset.YEAR_AGE.values ages = lab_subset.YEAR_AGE.unique() counties = lab_subset.COUNTY.unique() y = (lab_subset.CONCLUSION=='CONFIRMED').values # In[20]: _lab_subset = lab_subset.replace({"CONCLUSION": {"CLINICAL": "UNCONFIRMED"}}) by_conclusion = _lab_subset.groupby(["YEAR_AGE", "CONCLUSION"]) counts_by_cause = by_conclusion.size().unstack().fillna(0) ax = counts_by_cause.plot(kind='bar', stacked=True, xlim=(0,50), figsize=(15,5), grid=False) # In[21]: lab_subset.shape # In[22]: y.sum() # Proportion of lab-confirmed cases older than 20 years # In[23]: (measles_data[CONFIRMED].YEAR_AGE>20).mean() # In[24]: age_classes # In[30]: age_group = pd.cut(age, age_classes, right=False) age_index = np.array([age_group.categories.tolist().index(i) for i in age_group]) age_groups = age_group.categories age_groups # In[31]: #Extract cases by age and time. age_group = pd.cut(age, age_classes, right=False) age_index = np.array([age_group.categories.tolist().index(i) for i in age_group]) # In[32]: age_group.categories # In[33]: age_slice_endpoints = [g[1:-1].split(',') for g in age_groups] age_slices = [slice(int(i[0]), int(i[1])) for i in age_slice_endpoints] # In[34]: # Get index from full crosstabulation to use as index for each district dates_index = measles_data.groupby( ['ONSET', 'AGE_GROUP']).size().unstack().index # In[35]: unique_districts = measles_data.DISTRICT.dropna().unique() # In[36]: excludes = ['BOM RETIRO'] # In[37]: N = sp_pop.ix[unique_districts, 'Total'].dropna() N = N.drop(excludes).sum() N # Compile bi-weekly confirmed and unconfirmed data by Sao Paulo district # In[38]: sp_counts_2w = lab_subset.groupby( ['ONSET', 'AGE_GROUP']).size().unstack().reindex(dates_index).fillna(0).resample('2W', how='sum') # All confirmed cases, by district confirmed_data = lab_subset[lab_subset.CONCLUSION=='CONFIRMED'] confirmed_counts = confirmed_data.groupby( ['ONSET', 'AGE_GROUP']).size().unstack().reindex(dates_index).fillna(0).sum() all_confirmed_cases = confirmed_counts.reindex_axis(measles_data['AGE_GROUP'].unique()).fillna(0) # In[39]: # Ensure the age groups are ordered I_obs = sp_counts_2w.reindex_axis(measles_data['AGE_GROUP'].unique(), axis=1).fillna(0).values.astype(int) # In[40]: I_obs.max() # In[41]: I_obs.sum() # In[42]: age_groups = np.sort(measles_data['AGE_GROUP'].unique()) age_groups # Check shape of data frame # # - 28 bi-monthly intervals, 9 age groups # In[43]: assert I_obs.shape == (28, len(age_groups)) # Prior distribution on susceptible proportion: # # $$p_s \sim \text{Beta}(2, 100)$$ # In[44]: from pymc import rbeta plt.hist(rbeta(2, 100, 10000)) # In[45]: I_obs # In[46]: obs_date = '1997-06-15' obs_index = sp_counts_2w.index <= obs_date I_obs_t = I_obs[obs_index] # In[47]: np.sum(I_obs_t, (0)) / float(I_obs_t.sum()) # In[48]: I_obs_t # In[49]: I_obs_t.sum((0,1)) # In[50]: I_obs_t.sum(1).cumsum() # In[150]: from pymc import MCMC, Matplot, AdaptiveMetropolis from pymc import (Uniform, DiscreteUniform, Beta, Binomial, Normal, CompletedDirichlet, Poisson, NegativeBinomial, negative_binomial_like, poisson_like, Lognormal, Exponential, binomial_like, TruncatedNormal, Binomial, Gamma, HalfCauchy, normal_like, MvNormalCov, Bernoulli, Uninformative, Multinomial, rmultinomial, rbinomial, Dirichlet, multinomial_like) from pymc import (Lambda, observed, invlogit, deterministic, potential, stochastic,) def measles_model(obs_date, confirmation=True, spatial_weighting=False, all_traces=False): n_periods, n_age_groups = I_obs.shape ### Confirmation sub-model if confirmation: # Specify priors on age-specific means age_classes = np.unique(age_index) mu = Normal("mu", mu=0, tau=0.0001, value=[0]*len(age_classes)) sig = HalfCauchy('sig', 0, 25, value=1) var = sig**2 cor = Uniform('cor', -1, 1, value=0) # Build variance-covariance matrix with first-order correlation # among age classes @deterministic def Sigma(var=var, cor=cor): I = np.eye(len(age_classes))*var E = np.diag(np.ones(len(age_classes)-1), k=-1)*var*cor return I + E + E.T # Age-specific probabilities of confirmation as multivariate normal # random variables beta_age = MvNormalCov("beta_age", mu=mu, C=Sigma, value=[1]*len(age_classes)) p_age = Lambda('p_age', lambda t=beta_age: invlogit(t)) @deterministic(trace=False) def p_confirm(beta=beta_age): return invlogit(beta[age_index]) # Confirmation likelihood lab_confirmed = Bernoulli('lab_confirmed', p=p_confirm, value=y, observed=True) ''' Truncate data at observation period ''' obs_index = sp_counts_2w.index <= obs_date I_obs_t = I_obs[obs_index] # Index for observation date, used to index out values of interest # from the model. t_obs = obs_index.sum() - 1 if confirmation: @stochastic(trace=all_traces, dtype=int) def I(value=(I_obs_t*0.5).astype(int), n=I_obs_t, p=p_age): # Binomial confirmation process return np.sum([binomial_like(xi, ni, p) for xi,ni in zip(value,n)]) age_dist_init = np.sum(I.value, 0)/ float(I.value.sum()) else: I = I_obs_t age_dist_init = np.sum(I, 0) / float(I.sum()) assert I.shape == (t_obs +1, n_age_groups) # Calcuate age distribution from observed distribution of infecteds to date _age_dist = Dirichlet('_age_dist', np.ones(n_age_groups), value=age_dist_init[:-1]/age_dist_init.sum()) age_dist = CompletedDirichlet('age_dist', _age_dist) @potential def age_dist_like(p=age_dist, I=I): return multinomial_like(I.sum(0), I.sum(), p) # age_dist = age_dist_init # Transmission parameter beta = Uniform('beta', 1, 100, value=10) # Downsample annual series to observed age groups downsample = lambda x: np.array([x[s].mean() for s in age_slices]) # Weakly-informative prior on proportion susceptible being # between 0 and 0.07 p_susceptible = Beta('p_susceptible', 2, 100) # Estimated total initial susceptibles S_0 = Binomial('S_0', n=int(N), p=p_susceptible) S = Lambda('S', lambda I=I, S_0=S_0: S_0 - I.cumsum(axis=0)) # Check shape assert S.value.shape == (t_obs+1., n_age_groups) S_t = Lambda('S_t', lambda S=S: S[-1]) # Susceptibles at time t, by age S_age = Multinomial('S_age', S_t, age_dist) # Force of infection @deterministic def lam(beta=beta, I=I, S=S, N=N): return beta * I.sum(axis=1) * S.sum(axis=1) / N # Check shape assert lam.value.shape == (t_obs+1,) # n_age_groups) # FOI in observation period lam_t = Lambda('lam_t', lambda lam=lam: lam[-1]) # Poisson likelihood for observed cases @potential def new_cases(I=I, lam=lam): # return negative_binomial_like(I[1:].sum(1), lam[:-1], I[:-1].sum(1)) return poisson_like(I.sum(axis=1), lam) ''' Vaccination targets ''' @deterministic def vacc_5(S=S_age): # Vaccination of 5 and under p = [0.95] + [0]*(n_age_groups - 1) return rbinomial(S, p) # Proportion of susceptibles vaccinated pct_5 = Lambda('pct_5', lambda V=vacc_5, S=S_age: V.sum()/S.sum()) @deterministic def vacc_15(S=S_age): # Vaccination of 15 and under p = [0.95]*3 + [0]*(n_age_groups - 3) return rbinomial(S, p) # Proportion of susceptibles vaccinated pct_15 = Lambda('pct_15', lambda V=vacc_15, S=S_age: V.sum()/S.sum()) @deterministic def vacc_30(S=S_age): # Vaccination of 30 and under p = [0.95]*6 + [0]*(n_age_groups - 6) return rbinomial(S, p) # Proportion of 30 and under susceptibles vaccinated pct_30 = Lambda('pct_30', lambda V=vacc_30, S=S_age: V.sum()/S.sum()) @deterministic def vacc_adult(S=S_age): # Vaccination of adults under 30 (and young kids) p = [0.95, 0, 0, 0, 0.95, 0.95] + [0]*(n_age_groups - 6) return rbinomial(S, p) # Proportion of adults under 30 (and young kids) pct_adult = Lambda('pct_adult', lambda V=vacc_adult, S=S_age: V.sum()/S.sum()) return locals() # Run models for June 15 and July 15 observation points, both with and without clinical confirmation. # In[151]: db = 'ram' n_iterations = 100000 n_burn = 50000 # June 15, with lab confirmation # In[152]: model = measles_model # In[153]: model_june = MCMC(model('1997-06-15'), db=db, dbname='model_june') # model_june.use_step_method(AdaptiveMetropolis, model_june._age_dist) # model_june.use_step_method(AdaptiveMetropolis, [model_june.beta, # model_june.p_susceptible]) # In[154]: model_june.sample(n_iterations, n_burn) # July 15, with lab confirmation # In[155]: model_july = MCMC(model('1997-07-15'), db=db, dbname='model_july') # model_july.use_step_method(AdaptiveMetropolis, model_july.age_dist) # model_july.use_step_method(AdaptiveMetropolis, [model_july.beta, model_july.p_susceptible]) # In[156]: model_july.sample(n_iterations, n_burn) # June 15, no lab confirmation # In[170]: model_june_noconf = MCMC(model('1997-06-15', confirmation=False), db=db, dbname='model_june_noconf') # model_june_noconf.use_step_method(AdaptiveMetropolis, [model_june_noconf.β, model_june_noconf.p_susceptible]) # model_june_noconf.use_step_method(AdaptiveMetropolis, model_june_noconf.age_dist) # In[171]: model_june_noconf.sample(n_iterations, n_burn) # July 15, no lab confirmation # In[172]: model_july_noconf = MCMC(model('1997-07-15', confirmation=False), db=db, dbname='model_july_noconf') # In[173]: model_july_noconf.sample(n_iterations, n_burn) # ## Summary of model output # # Distance weighting parameter for june model with confirmation # In[160]: Matplot.plot(model_june.p_susceptible) # In[161]: Matplot.plot(model_june.beta) # Lab confirmation rates, June model # In[162]: p_age = pd.DataFrame(model_june.p_age.trace(), columns=age_groups) f, axes = plt.subplots(figsize=(14,6)) sb.boxplot(data=p_age, linewidth=0.3, fliersize=0, ax=axes, color=sb.color_palette("coolwarm", 5)[0], order=age_group.categories) axes.set_ylabel('Confirmation rate') axes.set_xlabel('Age group') # Proportion of population susceptible, June model. # In[163]: Matplot.plot(model_july.beta) # Proportion of population susceptible, June model with no confirmation correction # In[174]: Matplot.plot(model_june_noconf.p_susceptible) # Epidemic intensity estimates at June and July, per district. # In[175]: Matplot.plot(model_june.lam_t) # In[168]: Matplot.plot(model_july.lam_t) # Epidemic intensity for lab- versus clinical-confirmation models # In[176]: lam_june = model_june.lam.stats() fig, axes = plt.subplots(2, 1, sharey=True) axes[0].plot(lam_june['quantiles'][50].T, 'b-', alpha=0.4) axes[0].set_ylabel('Epidemic intensity') axes[0].set_xlabel('time (2-week periods)') axes[0].set_title('Lab confirmation') lam_june_noconf = model_june_noconf.lam.stats() axes[1].plot(lam_june_noconf['quantiles'][50].T, 'b-', alpha=0.4) axes[1].set_ylabel('Epidemic intensity') axes[1].set_xlabel('time (2-week periods)') axes[1].set_title('Clinical confirmation') plt.tight_layout() # In[177]: model_june.S_age.trace()[0] # In[178]: S_age_june = pd.DataFrame(model_june.S_age.trace().squeeze(), columns=age_groups).unstack().reset_index() S_age_june.columns = 'Age', 'Iteration', 'S' S_age_june['Confirmation'] = 'Lab' # In[179]: S_age_june = pd.DataFrame(model_june.S_age.trace().squeeze(), columns=age_groups).unstack().reset_index() S_age_june.columns = 'Age', 'Iteration', 'S' S_age_june['Confirmation'] = 'Lab' S_age_june_noconf = pd.DataFrame(model_june_noconf.S_age.trace().squeeze(), columns=age_groups).unstack().reset_index() S_age_june_noconf.columns = 'Age', 'Iteration', 'S' S_age_june_noconf['Confirmation'] = 'Clinical' S_age_june = pd.concat([S_age_june, S_age_june_noconf], ignore_index=True) # In[180]: S_age_july = pd.DataFrame(model_july.S_age.trace().squeeze(), columns=age_groups).unstack().reset_index() S_age_july.columns = 'Age', 'Iteration', 'S' S_age_july['Confirmation'] = 'Lab' S_age_july_noconf = pd.DataFrame(model_july_noconf.S_age.trace().squeeze(), columns=age_groups).unstack().reset_index() S_age_july_noconf.columns = 'Age', 'Iteration', 'S' S_age_july_noconf['Confirmation'] = 'Clinical' S_age_july = pd.concat([S_age_july, S_age_july_noconf], ignore_index=True) # Numbers of suscepibles in each age group, under lab vs clinical confirmation # In[181]: g = sb.factorplot("Age", "S", "Confirmation", S_age_june, kind="box", palette="hls", size=6, aspect=2, linewidth=0.3, fliersize=0, order=age_group.categories) g.despine(offset=10, trim=True) g.set_axis_labels("Age Group", "Susceptibles"); # In[182]: june_lam = pd.DataFrame(model_june.lam_t.trace()).unstack().reset_index() june_lam.columns = ('district', 'iteration', 'λ') june_lam['month'] = 'June' # In[183]: june_lam_noconf = pd.DataFrame(model_june_noconf.lam_t.trace()).unstack().reset_index() june_lam_noconf.columns = ('district', 'iteration', 'λ') june_lam_noconf['month'] = 'June' # In[184]: july_lam = pd.DataFrame(model_july.lam_t.trace()).unstack().reset_index() july_lam.columns = ('district', 'iteration', 'λ') july_lam['month'] = 'July' # In[185]: july_lam_noconf = pd.DataFrame(model_july_noconf.lam_t.trace()).unstack().reset_index() july_lam_noconf.columns = ('district', 'iteration', 'λ') july_lam_noconf['month'] = 'July' # In[186]: confirmed_lam = june_lam.append(july_lam, ignore_index=True) # In[187]: june_means = june_lam.groupby('district')['λ'].mean() june_means.sort(ascending=False) # In[188]: july_means = july_lam.groupby('district')['λ'].mean() july_means.sort(ascending=False) # In[ ]: sorted_districts = june_means.index.values # Epidemic intensity by district in June and July (with lab confirmation), sorted by June means. # In[ ]: sb.set_context("talk", font_scale=0.8) f, (ax_1, ax_2) = plt.subplots(2, 1, figsize=(12,6), sharey=True, sharex=True) sb.boxplot('district', 'λ', data=june_lam, ax=ax_1, linewidth=0.5, fliersize=0, color='r', order=sorted_districts) # ax_1.hlines(1, xmin=0, xmax=93, linestyles='dashed', linewidth=0.2) ax_1.set_xticks([]) ax_1.set_xlabel('') ax_1.set_ylabel('June') ax_1.set_title(r'Epidemic intensity (λ) estimates, ordered by June means') sb.boxplot('district', 'λ', data=july_lam, ax=ax_2, linewidth=0.5, fliersize=0, color='r', order=sorted_districts) # ax_2.hlines(1, xmin=0, xmax=93, linestyles='dashed', linewidth=0.2) ax_2.set_xticks([]) ax_2.set_ylabel('July') f.tight_layout() # Epidemic intensity by district in June for lab-confirmed and clinical-confirmed, sorted by lab-confirmed means. # In[ ]: f, (ax_1, ax_2) = plt.subplots(2, 1, figsize=(12,6), sharey=True, sharex=True) sb.boxplot('district', 'λ', data=june_lam, ax=ax_1, linewidth=0.5, fliersize=0, color='r', order=june_means.index.values) # ax_1.hlines(1, xmin=0, xmax=93, linestyles='dotted', linewidth=0.75) ax_1.set_xticks([]) ax_1.set_xlabel('') ax_1.set_ylabel('Lab') ax_1.set_title(r'June epidemic intensity (λ) estimates, ordered by lab-confirmed means') sb.boxplot('district', 'λ', data=june_lam_noconf, ax=ax_2, linewidth=0.5, fliersize=0, color='r', order=june_means.index.values) # ax_2.hlines(1, xmin=0, xmax=93, linestyles='dotted', linewidth=0.75) ax_2.set_xticks([]) ax_2.set_ylabel('Clinical') f.tight_layout() # Epidemic intensity by district in July for lab-confirmed and clinical-confirmed, sorted by lab-confirmed means. # In[ ]: july_means = july_lam.groupby('district')['λ'].mean() july_means.sort(ascending=False) # In[ ]: f, (ax_1, ax_2) = plt.subplots(2, 1, figsize=(12,6), sharey=True, sharex=True) sb.boxplot('district', 'λ', data=july_lam, ax=ax_1, linewidth=0.5, fliersize=0, color='r', order=july_means.index.values) # ax_1.hlines(1, xmin=0, xmax=93, linestyles='dotted', linewidth=0.75) ax_1.set_xticks([]) ax_1.set_xlabel('') ax_1.set_ylabel('Lab') # ax_1.set_yticks(np.arange(13, step=2)) ax_1.set_title(r'July epidemic intensity (λ) estimates, ordered by lab-confirmed means') sb.boxplot('district', 'λ', data=july_lam_noconf, ax=ax_2, linewidth=0.5, fliersize=0, color='r', order=sorted_districts) # ax_2.hlines(1, xmin=0, xmax=93, linestyles='dotted', linewidth=0.75) ax_2.set_xticks([]) ax_2.set_ylabel('Clinical') f.tight_layout() # In[189]: model_june.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult']) # In[190]: june_coverage = pd.DataFrame({name: model_june.trace(name)[:] for name in ['pct_5', 'pct_15', 'pct_30', 'pct_adult']}) june_coverage['Month'] = 'June' june_coverage['Confirmation'] = 'Lab' # In[191]: june_noconf_coverage = pd.DataFrame({name: model_june_noconf.trace(name)[:] for name in ['pct_5', 'pct_15', 'pct_30', 'pct_adult']}) june_noconf_coverage['Month'] = 'June' june_noconf_coverage['Confirmation'] = 'Clinical' july_coverage = pd.DataFrame({name: model_july.trace(name)[:] for name in ['pct_5', 'pct_15', 'pct_30', 'pct_adult']}) july_coverage['Month'] = 'July' july_coverage['Confirmation'] = 'Lab' july_noconf_coverage = pd.DataFrame({name: model_july_noconf.trace(name)[:] for name in ['pct_5', 'pct_15', 'pct_30', 'pct_adult']}) july_noconf_coverage['Month'] = 'July' july_noconf_coverage['Confirmation'] = 'Clinical' # In[192]: coverage = pd.concat([june_coverage, june_noconf_coverage, july_coverage, july_noconf_coverage], ignore_index=True) # In[193]: sb.factorplot(row="Month", col="Confirmation", data=coverage, kind='box', row_order=['June', 'July'], order=['pct_5', 'pct_15', 'pct_30', 'pct_adult'], palette="YlGnBu_d", linewidth=0.7, fliersize=0, aspect=1.25).despine(left=True) # In[194]: sb.factorplot(row="Month", col="Confirmation", data=coverage, kind='box', row_order=['June', 'July'], order=['pct_5', 'pct_15', 'pct_30', 'pct_adult'], palette="YlGnBu_d", linewidth=0.7, fliersize=0, aspect=1.25).despine(left=True) # In[195]: axes = sb.boxplot(data=june_coverage, order=['pct_5', 'pct_15', 'pct_30', 'pct_adult'], color=sb.color_palette("coolwarm", 5)[0]) axes.set_xticklabels(['Under 5', 'Under 15', 'Under 30', 'Under 5 + 20-30']) axes.set_ylabel('% susceptibles vaccinated') sb.despine(offset=10, trim=True) # In[196]: model_june_noconf.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult']) # In[197]: model_july.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult']) # In[198]: model_july_noconf.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult']) # In[199]: Matplot.summary_plot(model_june.p_age) # ## Mapping spatial effects # In[ ]: from mpl_toolkits.basemap import Basemap import geopandas as gpd lllat=-24 urlat=-23.3 lllon=-47 urlon=-46.3 SP_base = Basemap(ax=None, lon_0=(urlon + lllon) / 2, lat_0=(urlat + lllat) / 2, llcrnrlat=lllat, urcrnrlat=urlat, llcrnrlon=lllon, urcrnrlon=urlon, resolution='i', epsg='4326') # In[ ]: SP_dist = gpd.GeoDataFrame.from_file('Sao Paulo/Brazil_full/BRA_adm3.shp').to_crs({'proj': 'longlat', 'ellps': 'WGS84', 'datum': 'WGS84'}) # In[ ]: SP_dist['DIST_NAME'] = [trans.trans(_).upper() for _ in SP_dist.NAME_3] # In[ ]: λ_june = pd.Series(model_june.λ_t.stats()['mean'], index=sp_districts) # In[ ]: λ_june # In[ ]: SP_dist_merged = SP_dist.merge(pd.DataFrame(λ_june, columns=['λ']), left_on='DIST_NAME', right_index=True) # In[ ]: measles_onset_conf = measles_data[CONFIRMED].groupby(['DISTRICT','ONSET']).size().unstack(level=0).fillna(0).sum() # In[ ]: measles_onset_conf # In[ ]: _rates = measles_onset_conf/sp_pop.sum(1) # In[ ]: SP_dist_conf = SP_dist.merge(pd.DataFrame(_rates, columns=['rate']), left_on='DIST_NAME', right_index=True) # Estimated expected value for infecteds, by district # In[ ]: from matplotlib.pyplot import cm map_fig = plt.figure(figsize=(16,12)) map_ax = plt.gca() SP_base.drawcoastlines() SP_base.drawrivers() SP_dist_merged.plot(column='λ', colormap=cm.Reds, axes=map_ax) # Observed confirmed cases, by district # In[ ]: map_fig = plt.figure(figsize=(16,12)) map_ax = plt.gca() SP_base.drawcoastlines() SP_base.drawrivers() SP_dist_conf.plot(column='rate', colormap=cm.Reds, axes=map_ax)