#!/usr/bin/env python # coding: utf-8 # # Disease Outbreak Response Decision-making Under Uncertainty: A retrospective analysis of measles in Sao Paulo # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import pandas as pd import numpy as np import numpy.ma as ma from datetime import datetime import matplotlib.pyplot as plt import pdb from IPython.core.display import HTML def css_styling(): styles = open("styles/custom.css", "r").read() return HTML(styles) css_styling() # In[2]: data_dir = "data/" # Import outbreak data # In[3]: measles_data = pd.read_csv(data_dir+"measles.csv", index_col=0) measles_data.NOTIFICATION = pd.to_datetime(measles_data.NOTIFICATION) measles_data.BIRTH = pd.to_datetime(measles_data.BIRTH) measles_data.ONSET = pd.to_datetime(measles_data.ONSET) # In[4]: measles_data = measles_data.replace({'DISTRICT': {'BRASILANDIA':'BRAZILANDIA'}}) # Sao Paulo population by district # In[5]: sp_pop = pd.read_csv(data_dir+'sp_pop.csv', index_col=0) # In[6]: _names = sp_pop.index.values _names[_names=='BRASILANDIA'] = 'BRAZILANDIA' sp_pop.set_index(_names, inplace = True) # In[7]: sp_pop.head() # Plot of cumulative cases by district # In[8]: measles_onset_dist = measles_data.groupby(['DISTRICT','ONSET']).size().unstack(level=0).fillna(0) measles_onset_dist.cumsum().plot(legend=False, grid=False) # In[9]: total_district_cases = measles_onset_dist.sum() # Top 5 districts by number of cases # In[10]: totals = measles_onset_dist.sum() totals.sort(ascending=False) totals[:5] # Age distribution of cases, by confirmation status # In[11]: by_conclusion = measles_data.groupby(["YEAR_AGE", "CONCLUSION"]) counts_by_cause = by_conclusion.size().unstack().fillna(0) ax = counts_by_cause.plot(kind='bar', stacked=True, xlim=(0,50), figsize=(15,5)) # ## Stochastic Disease Transmission Model # # As a baseline for comparison, we can fit a model to all the clinically-confirmed cases, regardless of lab confirmation status. For this, we will use a simple SIR disease model, which will be fit using MCMC. # # This model fits the series of 2-week infection totals in each district $i$ as a set of Poisson models: # # \\[Pr(I(t)_{i} | \lambda(t)_i) = \text{Poisson}(\lambda(t)_i) \\] # # Where the outbreak intensity is modeled as: # # \\[\lambda(t)_i = \beta [I^{(w)}(t-1)_i]^{\alpha} S(t-1)_i\\] # # \\[\alpha \sim \text{Exp}(1)\\] # # We will assume here that the transmission rate is constant over time (and across districts): # # \\[\beta \sim \text{Gamma}(1, 0.1)\\] # # To account for the influence of infected individuals from neighboring districts on new infections, the outbreak intensity was modeled using a spatial-weighted average of infecteds across districts, where populations were weighted as an exponential function of the distance between district centroids: # # \\[w_{d} = \text{exp}(-\theta d)\\] # # \\[\theta \sim \text{Exp}(1)\\] # # ### Confirmation Sub-model # # Rather than assume all clinical cases are true cases, we can adjust the model to account for lab confirmation probability. This is done by including a sub-model that estimates age group-specific probabilities of confirmation, and using these probabilities to estimate the number of lab-confirmed cases. These estimates are then plugged into the model in place of the clinically-confirmed cases. # # We specified a structured confirmation model to retrospectively determine the age group-specific probabilities of lab confirmation for measles, conditional on clinical diagnosis. Individual lab confirmation events $c_i$ were modeled as Bernoulli random variables, with the probability of confirmation being allowed to vary by age group: # # $$c_i \sim \text{Bernoulli}(p_{a(i)})$$ # # where $a(i)$ denotes the appropriate age group for the individual indexed by i. There were 16 age groups, the first 15 of which were 5-year age intervals $[0,5), [5, 10), \ldots , [70, 75)$, with the 16th interval including all individuals 75 years and older. # # Since the age interval choices were arbitrary, and the confirmation probabilities of adjacent groups likely correlated, we modeled the correlation structure directly, using a multivariate logit-normal model. Specifically, we allowed first-order autocorrelation among the age groups, whereby the variance-covariance matrix retained a tridiagonal structure. # # $$\begin{aligned} # \Sigma = \left[{ # \begin{array}{c} # {\sigma^2} & {\sigma^2 \rho} & 0& \ldots & {0} & {0} \\ # {\sigma^2 \rho} & {\sigma^2} & \sigma^2 \rho & \ldots & {0} & {0} \\ # {0} & \sigma^2 \rho & {\sigma^2} & \ldots & {0} & {0} \\ # \vdots & \vdots & \vdots & & \vdots & \vdots\\ # {0} & {0} & 0 & \ldots & {\sigma^2} & \sigma^2 \rho \\ # {0} & {0} & 0 & \ldots & \sigma^2 \rho & {\sigma^2} # \end{array} # }\right] # \end{aligned}$$ # # From this, the confirmation probabilities were specified as multivariate normal on the inverse-logit scale. # # $$ \text{logit}(p_a) = \{a\} \sim N(\mu, \Sigma)$$ # # Priors for the confirmation sub-model were specified by: # # $$\begin{aligned} # \mu_i &\sim N(0, 100) \\ # \sigma &\sim \text{HalfCauchy}(25) \\ # \rho &\sim U(-1, 1) # \end{aligned}$$ # Age classes are defined in 5-year intervals. # In[19]: age_classes = [0,5,10,15,20,25,30,35,40,100] measles_data.dropna(subset=['YEAR_AGE'], inplace=True) measles_data['YEAR_AGE'] = measles_data.YEAR_AGE.astype(int) measles_data['AGE_GROUP'] = pd.cut(measles_data.AGE, age_classes, right=False) # Lab-checked observations are extracted for use in estimating lab confirmation probability. # In[20]: CONFIRMED = measles_data.CONCLUSION == 'CONFIRMED' CLINICAL = measles_data.CONCLUSION == 'CLINICAL' DISCARDED = measles_data.CONCLUSION == 'DISCARDED' # Extract confirmed and clinical subset, with no missing county information. # In[21]: lab_subset = measles_data[(CONFIRMED | CLINICAL) & measles_data.COUNTY.notnull()].copy() # In[22]: age = lab_subset.YEAR_AGE.values ages = lab_subset.YEAR_AGE.unique() counties = lab_subset.COUNTY.unique() y = (lab_subset.CONCLUSION=='CONFIRMED').values # In[23]: _lab_subset = lab_subset.replace({"CONCLUSION": {"CLINICAL": "UNCONFIRMED"}}) by_conclusion = _lab_subset.groupby(["YEAR_AGE", "CONCLUSION"]) counts_by_cause = by_conclusion.size().unstack().fillna(0) ax = counts_by_cause.plot(kind='bar', stacked=True, xlim=(0,50), figsize=(15,5), grid=False) # In[24]: lab_subset.shape # In[25]: y.sum() # Proportion of lab-confirmed cases older than 20 years # In[26]: (measles_data[CONFIRMED].YEAR_AGE>20).mean() # In[27]: age_classes # In[28]: #Extract cases by age and time. age_group = pd.cut(age, age_classes, right=False) age_index = np.array([age_group.categories.tolist().index(i) for i in age_group]) # In[29]: age_group.categories # In[30]: # Get index from full crosstabulation to use as index for each district dates_index = measles_data.groupby( ['ONSET', 'AGE_GROUP']).size().unstack().index # In[31]: unique_districts = measles_data.DISTRICT.dropna().unique() # In[32]: excludes = ['BOM RETIRO'] # In[33]: N = sp_pop.ix[unique_districts, 'Total'].dropna() N = N.drop(excludes) # In[34]: sp_districts = N.index.values len(sp_districts) # Compile bi-weekly confirmed and unconfirmed data by Sao Paulo district # In[35]: all_district_data = [] all_confirmed_cases = [] for d in sp_districts: # All bi-weekly unconfirmed and confirmed cases district_data = lab_subset[lab_subset.DISTRICT==d] district_counts_2w = district_data.groupby( ['ONSET', 'AGE_GROUP']).size().unstack().reindex(dates_index).fillna(0).resample('2W', how='sum') all_district_data.append(district_counts_2w) # All confirmed cases, by district confirmed_data = district_data[district_data.CONCLUSION=='CONFIRMED'] confirmed_counts = confirmed_data.groupby( ['ONSET', 'AGE_GROUP']).size().unstack().reindex(dates_index).fillna(0).sum() all_confirmed_cases.append(confirmed_counts.reindex_axis(measles_data['AGE_GROUP'].unique()).fillna(0)) # Time series of cases by district, summarized in 2-week intervals # In[36]: # Sum over ages for susceptibles sp_cases_2w = [dist.sum(1) for dist in all_district_data] len(sp_cases_2w) # In[37]: # Ensure the age groups are ordered I_obs = np.array([dist.reindex_axis(measles_data['AGE_GROUP'].unique(), axis=1).fillna(0).values.astype(int) for dist in all_district_data]) # In[38]: I_obs.max() # In[39]: I_obs.sum() # In[40]: age_groups = np.sort(measles_data['AGE_GROUP'].unique()) age_groups # Check shape of data frame # # - 92 districts, 28 bi-monthly intervals, 9 age groups # In[42]: assert I_obs.shape == (92, 28, len(age_groups)) # ### Spatial distance between districts # In[43]: import geopandas as gpd shp = gpd.GeoDataFrame.from_file("Sao Paulo/Brazil_full/BRA_adm3.shp") # In[44]: district_names = N.index.unique() # In[45]: import trans shp['district_name'] = shp.NAME_3.apply( lambda x: trans.trans(x).upper()) # In[46]: sp_shp = shp[shp.NAME_2=='São Paulo'].set_index('district_name') # In[47]: centroids = sp_shp.geometry.centroid # In[48]: distance_matrix = pd.concat([sp_shp.geometry.distance(o) for o in sp_shp.geometry], axis=1) distance_matrix.columns = sp_shp.index # In[49]: assert (distance_matrix.index == centroids.index).all() # In[50]: distance_matrix = distance_matrix.ix[sp_districts, sp_districts] # In[51]: assert not distance_matrix.isnull().values.sum() # In[52]: min_x, min_y = sp_shp.bounds.min()[:2] max_x, max_y = sp_shp.bounds.max()[2:] # In[53]: centroid_xy = np.array([[c.x, c.y] for c in sp_shp.geometry.centroid]) # Here is an arbitrary distance metric for an arbitrary district, as an example. # In[54]: _beta = -1 np.exp(_beta*distance_matrix).values.round(2)[0] # Specifying a neighborhood, based on shared borders. This can be used to develop a conditional autoregressive (CAR) model. # In[55]: neighbors = np.array([sp_shp.ix[district_names].geometry.touches(v).values for i, v in sp_shp.ix[district_names].geometry.iteritems()]) # In[56]: lon = centroids.ix[district_names].apply(lambda x: x.x) lat = centroids.ix[district_names].apply(lambda x: x.y) coords = pd.DataFrame({'x': lon.values-lon.mean(), 'y': lat.values-lat.mean()}, index=lon.index) # Prior distribution on susceptible proportion: # # $$p_s \sim \text{Beta}(2, 100)$$ # In[57]: from pymc import rbeta plt.hist(rbeta(2, 100, 10000)) # In[58]: obs_date = '1997-06-15' obs_index = all_district_data[0].index <= obs_date I_obs_t = np.array([I_dist[obs_index] for I_dist in I_obs]) # In[59]: np.sum(I_obs_t, (0,1)) / I_obs_t.sum() # In[60]: I_obs_t.sum((0,1)) # In[150]: get_ipython().run_line_magic('pinfo', 'negative_binomial_like') # In[250]: N.shape # In[480]: from pymc import MCMC, Matplot from pymc import (Uniform, DiscreteUniform, Beta, Lambda, Binomial, Normal, Poisson, NegativeBinomial, observed, negative_binomial_like, poisson_like, Lognormal, Exponential, binomial_like, stochastic, potential, invlogit, TruncatedNormal, Binomial, Gamma, HalfCauchy, normal_like, deterministic, MvNormalCov, Bernoulli, potential, Uninformative, Multinomial, rmultinomial, rbinomial, AdaptiveMetropolis, Dirichlet, multinomial_like) def measles_model(obs_date, confirmation=True, spatial_weighting=False, all_traces=False): n_districts, n_periods, n_age_groups = I_obs.shape ### Confirmation sub-model if confirmation: # Specify priors on age-specific means age_classes = np.unique(age_index) mu = Normal("mu", mu=0, tau=0.0001, value=[0]*len(age_classes)) sig = HalfCauchy('sig', 0, 25, value=1) var = sig**2 cor = Uniform('cor', -1, 1, value=0) # Build variance-covariance matrix with first-order correlation # among age classes @deterministic def Sigma(var=var, cor=cor): I = np.eye(len(age_classes))*var E = np.diag(np.ones(len(age_classes)-1), k=-1)*var*cor return I + E + E.T # Age-specific probabilities of confirmation as multivariate normal # random variables beta_age = MvNormalCov("beta_age", mu=mu, C=Sigma, value=[1]*len(age_classes)) p_age = Lambda('p_age', lambda t=beta_age: invlogit(t)) @deterministic(trace=False) def p_confirm(beta=beta_age): return invlogit(beta[age_index]) # Confirmation likelihood lab_confirmed = Bernoulli('lab_confirmed', p=p_confirm, value=y, observed=True) ''' Truncate data at observation period ''' obs_index = all_district_data[0].index <= obs_date I_obs_t = np.array([I_dist[obs_index] for I_dist in I_obs]) # Index for observation date, used to index out values of interest # from the model. t_obs = obs_index.sum() - 1 if confirmation: @stochastic(trace=all_traces, dtype=int) def I(value=(I_obs_t).astype(int), n=I_obs_t, p=p_age): # Binomial confirmation process: confirm by age, then re-combine return np.sum([[binomial_like(value[d,:,a], n[d,:,a], p[a]) for a in range(n_age_groups)] for d in range(n_districts)]) age_dist_init = np.sum(I.value, (0,1))/I.value.sum() else: I = I_obs_t age_dist_init = np.sum(I, (0,1))/I.sum() assert I.shape == (n_districts, t_obs +1, n_age_groups) # Calcuate age distribution from observed distribution of infecteds to date age_dist = Dirichlet('age_dist', np.ones(n_age_groups), value=age_dist_init[:-1]/age_dist_init.sum()) @potential def age_dist_like(p=age_dist, I=I): p = np.append(p, 1-p.sum()) return sum([multinomial_like(I_dist.sum(0), I_dist.sum(), p) for I_dist in I]) # Weakly-informative prior on proportion susceptible being # between 0 and 0.07 p_susceptible = Beta('p_susceptible', 2, 100, value=0.09) # Estimated total initial susceptibles by district S_0 = Binomial('S_0', n=N.values.astype(int), p=p_susceptible) @deterministic(trace=all_traces) def S(I=I, S_0=S_0): # Calculate susceptibles from total number of infections return np.array([S_0[d] - np.array([I[d,:t].sum() for t in range(t_obs+1)]) for d in range(n_districts)]) # Check shape assert S.value.shape == (n_districts, t_obs+1) # Susceptibles at time t, by age @deterministic def S_age(S=S, p=age_dist): p = np.append(p, 1-p.sum()) return np.array([rmultinomial(s[-1], p) for s in S]) assert S_age.value.shape == (n_districts, n_age_groups) # Transmission parameter β = Uniform('β', 1, 50, value=10) if spatial_weighting: θ = Exponential('θ', 1, value=0.01) @deterministic def Iw(I=I, θ=θ): # Distance-weighted infecteds return np.transpose([np.exp(-θ*distance_matrix.values).dot(I_t) for I_t in I.sum(2).T]) # Distance-weighted population Nw = Lambda('Nw', lambda θ=θ: np.exp(-θ*distance_matrix.values).dot(N.values)) else: Iw = Lambda('Iw', lambda I=I: I.sum(2)) Nw = N # Check shape assert Iw.value.shape == (n_districts, t_obs+1) # α = Exponential('α', 1, value=1) # Force of infection @deterministic def λ(β=β, I=Iw, S=S, N=Nw): #return np.array([β*((I_d+0.1)**α)*S_d for I_d, S_d in zip(I,S)]) # return np.array([β*(I_d**α)*S_d * np.exp(ϕ_d) for I_d, S_d, ϕ_d in zip(I,S,ϕ)]) return β * (I+0.01) * S / N[:, np.newaxis] # Check shape assert λ.value.shape == (n_districts, t_obs+1) # FOI in observation period λ_t = Lambda('λ_t', lambda λ=λ: λ[:, -1]) # Poisson likelihood for observed cases @potential def new_cases(I=I, λ=λ): x = I.sum(2) return negative_binomial_like(x[:,1:], λ[:, :-1], x[:,:-1]+1) # return poisson_like(I.sum(2)[:,1:], λ[:, :-1]) ''' Vaccination targets ''' @deterministic def vacc_5(S=S_age): # Vaccination of 15 and under p = [0.95] + [0]*(n_age_groups-1) return rbinomial(S.sum(0), p) # Proportion of susceptibles vaccinated pct_5 = Lambda('pct_5', lambda V=vacc_5, S=S_age: float(V.sum())/S.sum()) @deterministic def vacc_15(S=S_age): # Vaccination of 15 and under p = [0.95]*3 + [0]*(n_age_groups-3) return rbinomial(S.sum(0), p) # Proportion of susceptibles vaccinated pct_15 = Lambda('pct_15', lambda V=vacc_15, S=S_age: float(V.sum())/S.sum()) @deterministic def vacc_30(S=S_age): # Vaccination of 30 and under p = [0.95]*6 + [0]*(n_age_groups-6) return rbinomial(S.sum(0), p) # Proportion of 30 and under susceptibles vaccinated pct_30 = Lambda('pct_30', lambda V=vacc_30, S=S_age: float(V.sum())/S.sum()) @deterministic def vacc_adult(S=S_age): # Vaccination of adults under 30 (and young kids) p = [0.95, 0, 0, 0, 0.95, 0.95] + [0]*(n_age_groups-6) return rbinomial(S.sum(0), p) # Proportion of adults under 30 (and young kids) pct_adult = Lambda('pct_adult', lambda V=vacc_adult, S=S_age: float(V.sum())/S.sum()) return locals() # Run models for June 15 and July 15 observation points, both with and without clinical confirmation. # In[481]: db = 'ram' n_iterations = 50000 n_burn = 10000 # June 15, with lab confirmation # In[482]: model = measles_model # In[483]: model_june = MCMC(model('1997-06-15'), db=db, dbname='model_june') model_june.use_step_method(AdaptiveMetropolis, model_june.age_dist) model_june.use_step_method(AdaptiveMetropolis, [model_june.β, model_june.p_susceptible]) # In[484]: model_june.sample(n_iterations, n_burn) # July 15, with lab confirmation # In[485]: model_july = MCMC(model('1997-07-15'), db=db, dbname='model_july') model_july.use_step_method(AdaptiveMetropolis, model_july.age_dist) model_july.use_step_method(AdaptiveMetropolis, [model_july.β, model_july.p_susceptible]) # In[486]: model_july.sample(n_iterations, n_burn) # June 15, no lab confirmation # In[487]: model_june_noconf = MCMC(model('1997-06-15', confirmation=False), db=db, dbname='model_june_noconf') model_june_noconf.use_step_method(AdaptiveMetropolis, [model_june_noconf.β, model_june_noconf.p_susceptible]) model_june_noconf.use_step_method(AdaptiveMetropolis, model_june_noconf.age_dist) # In[488]: model_june_noconf.sample(n_iterations, n_burn) # July 15, no lab confirmation # In[489]: model_july_noconf = MCMC(model('1997-07-15', confirmation=False), db=db, dbname='model_july_noconf') model_july_noconf.use_step_method(AdaptiveMetropolis, [model_july_noconf.β, model_july_noconf.p_susceptible]) model_july_noconf.use_step_method(AdaptiveMetropolis, model_july_noconf.age_dist) # In[490]: model_july_noconf.sample(n_iterations, n_burn) # ## Summary of model output # # Distance weighting parameter for june model with confirmation # In[491]: Matplot.summary_plot(model_june.S_0) # In[492]: Matplot.plot(model_june.θ) # Lab confirmation rates, June model # In[493]: import seaborn as sb p_age = pd.DataFrame(model_june.p_age.trace(), columns=age_groups) f, axes = plt.subplots(figsize=(14,6)) sb.boxplot(data=p_age, linewidth=0.3, fliersize=0, ax=axes, color=sb.color_palette("coolwarm", 5)[0], order=age_group.categories) axes.set_ylabel('Confirmation rate') axes.set_xlabel('Age group') # Proportion of population susceptible, June model. # In[494]: Matplot.plot(model_july.β) # Proportion of population susceptible, June model with no confirmation correction # In[495]: Matplot.plot(model_june_noconf.p_susceptible) # Epidemic intensity estimates at June and July, per district. # In[496]: Matplot.summary_plot(model_june.λ_t) # In[497]: Matplot.summary_plot(model_july.λ_t) # Epidemic intensity for lab- versus clinical-confirmation models # In[498]: lam_june = model_june.λ.stats() fig, axes = plt.subplots(2, 1, sharey=True) axes[0].plot(lam_june['quantiles'][50].T, 'b-', alpha=0.4) axes[0].set_ylabel('Epidemic intensity') axes[0].set_xlabel('time (2-week periods)') axes[0].set_title('Lab confirmation') lam_june_noconf = model_june_noconf.λ.stats() axes[1].plot(lam_june_noconf['quantiles'][50].T, 'b-', alpha=0.4) axes[1].set_ylabel('Epidemic intensity') axes[1].set_xlabel('time (2-week periods)') axes[1].set_title('Clinical confirmation') # In[499]: S_age_june = pd.DataFrame(model_june.S_age.trace()[:, -1], columns=age_groups).unstack().reset_index() S_age_june.columns = 'Age', 'Iteration', 'S' S_age_june['Confirmation'] = 'Lab' # In[500]: S_age_june = pd.DataFrame(model_june.S_age.trace()[:, -1], columns=age_groups).unstack().reset_index() S_age_june.columns = 'Age', 'Iteration', 'S' S_age_june['Confirmation'] = 'Lab' S_age_june_noconf = pd.DataFrame(model_june_noconf.S_age.trace()[:, -1], columns=age_groups).unstack().reset_index() S_age_june_noconf.columns = 'Age', 'Iteration', 'S' S_age_june_noconf['Confirmation'] = 'Clinical' S_age_june = pd.concat([S_age_june, S_age_june_noconf], ignore_index=True) # In[501]: S_age_july = pd.DataFrame(model_july.S_age.trace()[:, -1], columns=age_groups).unstack().reset_index() S_age_july.columns = 'Age', 'Iteration', 'S' S_age_july['Confirmation'] = 'Lab' S_age_july_noconf = pd.DataFrame(model_july_noconf.S_age.trace()[:, -1], columns=age_groups).unstack().reset_index() S_age_july_noconf.columns = 'Age', 'Iteration', 'S' S_age_july_noconf['Confirmation'] = 'Clinical' S_age_july = pd.concat([S_age_july, S_age_july_noconf], ignore_index=True) # Numbers of suscepibles in each age group, under lab vs clinical confirmation # In[502]: import seaborn as sb sb.set_context("talk", font_scale=0.8) sb.set_style("white") g = sb.factorplot("Age", "S", "Confirmation", S_age_june, kind="box", palette="hls", size=6, aspect=2, linewidth=0.3, fliersize=0, order=age_group.categories) g.despine(offset=10, trim=True) g.set_axis_labels("Age Group", "Susceptibles"); # In[503]: june_lam = pd.DataFrame(model_june.λ_t.trace()).unstack().reset_index() june_lam.columns = ('district', 'iteration', 'λ') june_lam['month'] = 'June' # In[504]: june_lam_noconf = pd.DataFrame(model_june_noconf.λ_t.trace()).unstack().reset_index() june_lam_noconf.columns = ('district', 'iteration', 'λ') june_lam_noconf['month'] = 'June' # In[505]: july_lam = pd.DataFrame(model_july.λ_t.trace()).unstack().reset_index() july_lam.columns = ('district', 'iteration', 'λ') july_lam['month'] = 'July' # In[506]: model_july.S.value.min() # In[507]: july_lam_noconf = pd.DataFrame(model_july_noconf.λ_t.trace()).unstack().reset_index() july_lam_noconf.columns = ('district', 'iteration', 'λ') july_lam_noconf['month'] = 'July' # In[508]: confirmed_lam = june_lam.append(july_lam, ignore_index=True) # In[509]: june_means = june_lam.groupby('district')['λ'].mean() june_means.sort(ascending=False) # In[510]: july_means = july_lam.groupby('district')['λ'].mean() july_means.sort(ascending=False) # In[511]: sorted_districts = june_means.index.values # Epidemic intensity by district in June and July (with lab confirmation), sorted by June means. # In[512]: sb.set_context("talk", font_scale=0.8) f, (ax_1, ax_2) = plt.subplots(2, 1, figsize=(12,6), sharey=True, sharex=True) sb.boxplot('district', 'λ', data=june_lam, ax=ax_1, linewidth=0.5, fliersize=0, color='r', order=sorted_districts) # ax_1.hlines(1, xmin=0, xmax=93, linestyles='dashed', linewidth=0.2) ax_1.set_xticks([]) ax_1.set_xlabel('') ax_1.set_ylabel('June') ax_1.set_title(r'Epidemic intensity (λ) estimates, ordered by June means') sb.boxplot('district', 'λ', data=july_lam, ax=ax_2, linewidth=0.5, fliersize=0, color='r', order=sorted_districts) # ax_2.hlines(1, xmin=0, xmax=93, linestyles='dashed', linewidth=0.2) ax_2.set_xticks([]) ax_2.set_ylabel('July') f.tight_layout() # Epidemic intensity by district in June for lab-confirmed and clinical-confirmed, sorted by lab-confirmed means. # In[513]: f, (ax_1, ax_2) = plt.subplots(2, 1, figsize=(12,6), sharey=True, sharex=True) sb.boxplot('district', 'λ', data=june_lam, ax=ax_1, linewidth=0.5, fliersize=0, color='r', order=june_means.index.values) # ax_1.hlines(1, xmin=0, xmax=93, linestyles='dotted', linewidth=0.75) ax_1.set_xticks([]) ax_1.set_xlabel('') ax_1.set_ylabel('Lab') ax_1.set_title(r'June epidemic intensity (λ) estimates, ordered by lab-confirmed means') sb.boxplot('district', 'λ', data=june_lam_noconf, ax=ax_2, linewidth=0.5, fliersize=0, color='r', order=june_means.index.values) # ax_2.hlines(1, xmin=0, xmax=93, linestyles='dotted', linewidth=0.75) ax_2.set_xticks([]) ax_2.set_ylabel('Clinical') f.tight_layout() # Epidemic intensity by district in July for lab-confirmed and clinical-confirmed, sorted by lab-confirmed means. # In[514]: july_means = july_lam.groupby('district')['λ'].mean() july_means.sort(ascending=False) # In[515]: f, (ax_1, ax_2) = plt.subplots(2, 1, figsize=(12,6), sharey=True, sharex=True) sb.boxplot('district', 'λ', data=july_lam, ax=ax_1, linewidth=0.5, fliersize=0, color='r', order=july_means.index.values) # ax_1.hlines(1, xmin=0, xmax=93, linestyles='dotted', linewidth=0.75) ax_1.set_xticks([]) ax_1.set_xlabel('') ax_1.set_ylabel('Lab') # ax_1.set_yticks(np.arange(13, step=2)) ax_1.set_title(r'July epidemic intensity (λ) estimates, ordered by lab-confirmed means') sb.boxplot('district', 'λ', data=july_lam_noconf, ax=ax_2, linewidth=0.5, fliersize=0, color='r', order=sorted_districts) # ax_2.hlines(1, xmin=0, xmax=93, linestyles='dotted', linewidth=0.75) ax_2.set_xticks([]) ax_2.set_ylabel('Clinical') f.tight_layout() # In[516]: model_june.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult']) # In[517]: june_coverage = pd.DataFrame({name: model_june.trace(name)[:] for name in ['pct_5', 'pct_15', 'pct_30', 'pct_adult']}) june_coverage['Month'] = 'June' june_coverage['Confirmation'] = 'Lab' # In[518]: june_noconf_coverage = pd.DataFrame({name: model_june_noconf.trace(name)[:] for name in ['pct_5', 'pct_15', 'pct_30', 'pct_adult']}) june_noconf_coverage['Month'] = 'June' june_noconf_coverage['Confirmation'] = 'Clinical' july_coverage = pd.DataFrame({name: model_july.trace(name)[:] for name in ['pct_5', 'pct_15', 'pct_30', 'pct_adult']}) july_coverage['Month'] = 'July' july_coverage['Confirmation'] = 'Lab' july_noconf_coverage = pd.DataFrame({name: model_july_noconf.trace(name)[:] for name in ['pct_5', 'pct_15', 'pct_30', 'pct_adult']}) july_noconf_coverage['Month'] = 'July' july_noconf_coverage['Confirmation'] = 'Clinical' # In[519]: coverage = pd.concat([june_coverage, june_noconf_coverage, july_coverage, july_noconf_coverage], ignore_index=True) # In[520]: sb.factorplot(row="Month", col="Confirmation", data=coverage, kind='box', row_order=['June', 'July'], order=['pct_5', 'pct_15', 'pct_30', 'pct_adult'], palette="YlGnBu_d", linewidth=0.7, fliersize=0, aspect=1.25).despine(left=True) # In[521]: sb.factorplot(row="Month", col="Confirmation", data=coverage, kind='box', row_order=['June', 'July'], order=['pct_5', 'pct_15', 'pct_30', 'pct_adult'], palette="YlGnBu_d", linewidth=0.7, fliersize=0, aspect=1.25).despine(left=True) # In[432]: axes = sb.boxplot(data=june_coverage, order=['pct_5', 'pct_15', 'pct_30', 'pct_adult'], color=sb.color_palette("coolwarm", 5)[0]) axes.set_xticklabels(['Under 5', 'Under 15', 'Under 30', 'Under 5 + 20-30']) axes.set_ylabel('% susceptibles vaccinated') sb.despine(offset=10, trim=True) # In[433]: model_june_noconf.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult']) # In[434]: model_july.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult']) # In[435]: model_july_noconf.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult']) # In[436]: Matplot.summary_plot(model_june.p_age) # ## Mapping spatial effects # In[366]: from mpl_toolkits.basemap import Basemap import geopandas as gpd lllat=-24 urlat=-23.3 lllon=-47 urlon=-46.3 SP_base = Basemap(ax=None, lon_0=(urlon + lllon) / 2, lat_0=(urlat + lllat) / 2, llcrnrlat=lllat, urcrnrlat=urlat, llcrnrlon=lllon, urcrnrlon=urlon, resolution='i', epsg='4326') # In[367]: SP_dist = gpd.GeoDataFrame.from_file('Sao Paulo/Brazil_full/BRA_adm3.shp').to_crs({'proj': 'longlat', 'ellps': 'WGS84', 'datum': 'WGS84'}) # In[368]: SP_dist['DIST_NAME'] = [trans.trans(_).upper() for _ in SP_dist.NAME_3] # In[369]: λ_june = pd.Series(model_june.λ_t.stats()['mean'], index=sp_districts) # In[370]: λ_june # In[371]: SP_dist_merged = SP_dist.merge(pd.DataFrame(λ_june, columns=['λ']), left_on='DIST_NAME', right_index=True) # In[372]: measles_onset_conf = measles_data[CONFIRMED].groupby(['DISTRICT','ONSET']).size().unstack(level=0).fillna(0).sum() # In[373]: measles_onset_conf # In[374]: _rates = measles_onset_conf/sp_pop.sum(1) # In[375]: SP_dist_conf = SP_dist.merge(pd.DataFrame(_rates, columns=['rate']), left_on='DIST_NAME', right_index=True) # Estimated expected value for infecteds, by district # In[376]: from matplotlib.pyplot import cm map_fig = plt.figure(figsize=(16,12)) map_ax = plt.gca() SP_base.drawcoastlines() SP_base.drawrivers() SP_dist_merged.plot(column='λ', colormap=cm.Reds, axes=map_ax) # Observed confirmed cases, by district # In[377]: map_fig = plt.figure(figsize=(16,12)) map_ax = plt.gca() SP_base.drawcoastlines() SP_base.drawrivers() SP_dist_conf.plot(column='rate', colormap=cm.Reds, axes=map_ax)