#!/usr/bin/env python
# coding: utf-8

# # Disease Outbreak Response Decision-making Under Uncertainty: A retrospective analysis of measles in Sao Paulo

# In[15]:


get_ipython().run_line_magic('matplotlib', 'inline')
import pandas as pd
import numpy as np
import numpy.ma as ma
from datetime import datetime
import matplotlib.pyplot as plt
pd.set_option('max_columns', 20)
pd.set_option('max_rows', 25)

from IPython.core.display import HTML
def css_styling():
    styles = open("styles/custom.css", "r").read()
    return HTML(styles)
css_styling()


# In[16]:


data_dir = "data/"


# Import outbreak data

# In[17]:


measles_data = pd.read_csv(data_dir+"measles.csv", index_col=0)
measles_data.NOTIFICATION = pd.to_datetime(measles_data.NOTIFICATION)
measles_data.BIRTH = pd.to_datetime(measles_data.BIRTH)
measles_data.ONSET = pd.to_datetime(measles_data.ONSET)


# In[18]:


measles_data = measles_data.replace({'DISTRICT': {'BRASILANDIA':'BRAZILANDIA'}})


# Sao Paulo population by district

# In[19]:


sp_pop = pd.read_csv(data_dir+'sp_pop.csv', index_col=0)


# In[20]:


_names = sp_pop.index.values
_names[_names=='BRASILANDIA'] = 'BRAZILANDIA'
sp_pop.set_index(_names, inplace = True)


# In[21]:


sp_pop.head()


# Plot of cumulative cases by district

# In[22]:


measles_onset_dist = measles_data.groupby(['DISTRICT','ONSET']).size().unstack(level=0).fillna(0)
measles_onset_dist.cumsum().plot(legend=False, grid=False)


# In[23]:


total_district_cases = measles_onset_dist.sum()


# Top 5 districts by number of cases

# In[24]:


totals = measles_onset_dist.sum()
totals.sort(ascending=False)
totals[:5]


# Age distribution of cases, by confirmation status

# In[25]:


by_conclusion = measles_data.groupby(["YEAR_AGE", "CONCLUSION"])
counts_by_cause = by_conclusion.size().unstack().fillna(0)
ax = counts_by_cause.plot(kind='bar', stacked=True, xlim=(0,50), figsize=(15,5))


# ## Chain Binomial Transmission Model
# 
# As a baseline for comparison, we can fit a model to all the clinically-confirmed cases, regardless of lab confirmation status. For this, we will use a simple chain binomial model, which will be fit using MCMC.
# 
# This model fits the series of 2-week infection totals as a set of Binomial models:
# 
# \\[Pr(I_{t+1} | S_t, p_t) = \text{Bin}(S_t, p_t) \\]
# 
# Where the binomial probability is modeled as:
# 
# \\[p_t = 1 - \exp\(-\lambda_t\)\\]
# 
# \\[\lambda_t = \frac{\beta_t I_t}{N}\\]
# 
# We will assume here that the transmission rate is constant over time (and across districts):
# 
# \\[\beta_t = \beta_0\\]
# 
# which allows the effective reproductive number to be calculated as:
# 
# \\[R_t = \frac{\beta S_t}{N}\\]
# 
# ### Confirmation Sub-model
# 
# Rather than assume all clinical cases are true cases, we can adjust the model to account for lab confirmation probability. This is done by including a sub-model that estimates age group-specific probabilities of confirmation, and using these probabilities to estimate the number of lab-confirmed cases. These estimates are then plugged into the model in place of the clinically-confirmed cases.
# 
# We specified a structured confirmation model to retrospectively determine the age group-specific probabilities of lab confirmation for measles, conditional on clinical diagnosis. Individual lab confirmation events $c_i$ were modeled as Bernoulli random variables, with the probability of confirmation being allowed to vary by age group:
# 
# $$c_i \sim \text{Bernoulli}(p_{a(i)})$$
# 
# where $a(i)$ denotes the appropriate age group for the individual indexed by i. There were 16 age groups, the first 15 of which were 5-year age intervals $[0,5), [5, 10), \ldots , [70, 75)$, with the 16th interval including all individuals 75 years and older.
# 
# Since the age interval choices were arbitrary, and the confirmation probabilities of adjacent groups likely correlated, we modeled the correlation structure directly, using a multivariate logit-normal model. Specifically, we allowed first-order autocorrelation among the age groups, whereby the variance-covariance matrix retained a tridiagonal structure. 
# 
# $$\begin{aligned}
# \Sigma = \left[{
# \begin{array}{c}
#   {\sigma^2} & {\sigma^2 \rho} & 0& \ldots & {0} & {0}  \\
#   {\sigma^2 \rho} & {\sigma^2} &  \sigma^2 \rho & \ldots & {0}  & {0} \\
#   {0} & \sigma^2 \rho & {\sigma^2} & \ldots & {0} & {0} \\
#   \vdots & \vdots & \vdots &  & \vdots & \vdots\\
#   {0} & {0} & 0 & \ldots &  {\sigma^2} & \sigma^2 \rho  \\
# {0} & {0} & 0 & \ldots & \sigma^2 \rho &  {\sigma^2} 
# \end{array}
# }\right]
# \end{aligned}$$
# 
# From this, the confirmation probabilities were specified as multivariate normal on the inverse-logit scale.
# 
# $$ \text{logit}(p_a) = \{a\} \sim N(\mu, \Sigma)$$
# 
# Priors for the confirmation sub-model were specified by:
# 
# $$\begin{aligned}
# \mu_i &\sim N(0, 100) \\
# \sigma &\sim \text{HalfCauchy}(25) \\
# \rho &\sim U(-1, 1)
# \end{aligned}$$

# Age classes are defined in 5-year intervals.

# In[26]:


age_classes = [0,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,100]
measles_data.dropna(subset=['AGE'], inplace=True)
measles_data['AGE_GROUP'] = pd.cut(measles_data.AGE, age_classes, right=False)


# Lab-checked observations are extracted for use in estimating lab confirmation probability.

# In[27]:


CONFIRMED = measles_data.CONCLUSION == 'CONFIRMED'
CLINICAL = measles_data.CONCLUSION == 'CLINICAL'
DISCARDED = measles_data.CONCLUSION == 'DISCARDED'


# In[28]:


lab_subset = measles_data[(CONFIRMED | DISCARDED) & measles_data.YEAR_AGE.notnull() & measles_data.COUNTY.notnull()].copy()
lab_subset.loc[lab_subset.YEAR_AGE > 75, 'YEAR_AGE'] = 75
age = lab_subset.YEAR_AGE.astype(int).values
ages = lab_subset.YEAR_AGE.astype(int).unique()
counties = lab_subset.COUNTY.unique()
y = (lab_subset.CONCLUSION=='CONFIRMED').values


# Proportion of lab-confirmed cases older than 20 years

# In[29]:


(measles_data[CONFIRMED].AGE>20).mean()


# Extract cases by age and time.

# In[30]:


age_group = pd.cut(age, age_classes, right=False)
age_index = np.array([age_group.categories.tolist().index(i) for i in age_group])


# In[31]:


# Get index from full crosstabulation to use as index for each district
dates_index = measles_data.groupby(
        ['ONSET', 'AGE_GROUP']).size().unstack().index


# In[32]:


unique_districts = measles_data.DISTRICT.dropna().unique()


# In[33]:


N = sp_pop.ix[unique_districts, 'Total'].dropna()


# In[34]:


sp_districts = N.index.values
len(sp_districts)


# In[35]:


all_district_data = []
all_confirmed_cases = []
for d in sp_districts:
    
    # All bi-weekly unconfirmed and confirmed cases
    district_data = measles_data[measles_data.DISTRICT==d]
    district_counts_2w = district_data.groupby(
        ['ONSET', 'AGE_GROUP']).size().unstack().reindex(dates_index).fillna(0).resample('2W', how='sum')
    all_district_data.append(district_counts_2w)
    
    # All confirmed cases, by district
    confirmed_data = district_data[district_data.CONCLUSION!='CONFIRMED']
    confirmed_counts = confirmed_data.groupby(
        ['ONSET', 'AGE_GROUP']).size().unstack().reindex(dates_index).fillna(0).sum()
    all_confirmed_cases.append(confirmed_counts.reindex_axis(measles_data['AGE_GROUP'].unique()).fillna(0))


# Time series of cases by district, summarized in 2-week intervals

# In[36]:


# Sum over ages for susceptibles
sp_cases_2w = [dist.sum(1) for dist in all_district_data]


# In[37]:


# Ensure the age groups are ordered
I_obs = np.array([dist.reindex_axis(measles_data['AGE_GROUP'].unique(), 
                            axis=1).fillna(0).values.astype(int) for dist in all_district_data])


# In[38]:


measles_data['AGE_GROUP'].unique()


# Check shape of data frame
# 
# - 93 districts, 28 bi-monthly intervals, 16 age groups

# In[39]:


assert I_obs.shape == (93, 28, 16)


# ### Spatial distance between districts

# In[40]:


import geopandas as gpd

shp = gpd.GeoDataFrame.from_file("Sao Paulo/Brazil_full/BRA_adm3.shp")


# In[41]:


district_names = N.index.unique()


# In[42]:


import trans
shp['district_name'] = shp.NAME_3.apply(
    lambda x: trans.trans(x).upper())


# In[43]:


sp_shp = shp[shp.NAME_2=='São Paulo'].set_index('district_name')


# In[45]:


centroids = sp_shp.geometry.centroid


# In[46]:


distance_matrix = pd.concat([sp_shp.geometry.distance(o) for o in sp_shp.geometry],
                     axis=1)
distance_matrix.columns = sp_shp.index


# In[47]:


min_x, min_y = sp_shp.bounds.min()[:2]
max_x, max_y = sp_shp.bounds.max()[2:]


# In[48]:


assert (distance_matrix.index == centroids.index).all()


# In[49]:


centroid_xy = np.array([[c.x, c.y] for c in sp_shp.geometry.centroid])


# Here is an arbitrary distance metric for an arbitrary district, as an example.

# In[50]:


_beta = -100
np.exp(_beta*distance_matrix).values.round(2)[0]


# ### Spatial decision model
# 
# We attempt to estimate $R_t$ for a truncated subset of the data, to simulate a decision-maker's information during (rather than after) an outbreak. This essentially involves turning part of the time series into missing data, and running the model.
# 
# This is an example of creating a mask for data not observed by the decision date.

# In[51]:


np.array([np.resize(all_district_data[0].index > '1997-06-15', I_obs[0].T.shape).T]*len(all_district_data))


# In[52]:


from pymc import MCMC, Matplot
from pymc import (Uniform, DiscreteUniform, Beta, Lambda, Binomial, Normal, Poisson, 
                  NegativeBinomial, observed, negative_binomial_like, Lognormal, Exponential, binomial_like,
                  stochastic, potential, invlogit, TruncatedNormal, Binomial, Gamma)
from pymc import (HalfCauchy, deterministic, MvNormalCov, Bernoulli, potential, Uninformative, Multinomial,
                  rmultinomial, rbinomial, AdaptiveMetropolis)

from pymc.gp import *
from pymc.gp.cov_funs import matern


# In[59]:


def measles_model(obs_date, confirmation=True, spatially_weighted=True, all_traces=False):
    
    ### Confirmation sub-model
    
    if confirmation:

        # Specify priors on age-specific means
        age_classes = np.unique(age_index)

        mu = Normal("mu", mu=0, tau=0.0001, value=[0]*len(age_classes))
        sig = HalfCauchy('sig', 0, 25, value=1)
        var = sig**2
        cor = Uniform('cor', -1, 1, value=0)


        # Build variance-covariance matrix with first-order correlation among age classes
        @deterministic
        def Sigma(var=var, cor=cor):
            I = np.eye(len(age_classes))*var
            E = np.diag(np.ones(len(age_classes)-1), k=-1)*var*cor
            return I + E + E.T


        # Age-specific probabilities of confirmation as multivariate normal random variables
        beta_age = MvNormalCov("beta_age", mu=mu, C=Sigma, value=[1]*len(age_classes))
        p_age = Lambda('p_age', lambda t=beta_age: invlogit(t))

        @deterministic(trace=False)
        def p_confirm(beta=beta_age):
            return invlogit(beta[age_index])


        # Confirmation likelihood
        lab_confirmed = Bernoulli('lab_confirmed', p=p_confirm, value=y, observed=True)


    '''
    Missing data sub-model 
    
    We treat observations later than the decision date as missing data. This is 
    implemented as a `masked_array` in NumPy, which requires a boolean mask to identify missing values.
    '''
    missing_mask = all_district_data[0].index > obs_date
    district_mask = np.resize(missing_mask, I_obs[0].T.shape).T
    
    # Index to the observation period
    current_index = (~missing_mask).sum()-1

    I_obs_masked = ma.masked_array(I_obs,
                                   mask=np.array([district_mask]*len(all_district_data)),
                                   fill_value=1)


    # Index for observation date, used to index out values of interest from the model.
    t_obs = (~missing_mask).sum() - 1


    # Imputed infecteds
    I_imp = DiscreteUniform('I_imp', 0, 2000, value=I_obs_masked, observed=True)

    ### Chain binomial model for disease dynamics

    if confirmation:
        
        # Confirmed cases
        @stochastic(trace=all_traces, dtype=int)
        def I(value=(I_imp.value*0.7).astype(int), n=I_imp, p=p_age):
            return np.sum([np.sum([binomial_like(vj[:,i], nj[:,i], p[i]) for i in range(len(p))]).sum(0) 
                                    for vj, nj in zip(value, n)])

    else:
        
        I = I_imp


    # Infecteds at time $t_{obs}$
    It = Lambda('It', lambda I=I: I.sum(0)[t_obs])


    # Calculate susceptibles from total number of infections
    @deterministic(trace=all_traces)
    def S(I=I):
        return np.array([Ij.sum() - np.array([Ij[:i].sum() for i in range(len(Ij))]) for Ij in I])


    # Total infecteds until time t_obs, by age
    I_total = Lambda('I_total', lambda I=I: I[:,:t_obs].sum(0).sum(0))
    # Age distribution of infecteds
    age_dist = Lambda('age_dist', lambda I=I_total: I/I.sum())
    
    # Susceptibles at time t
    S_t = Lambda('S_t', lambda S=S: S[:, t_obs])
    
    # Susceptibles at time t, by age
    @deterministic
    def S_age(S=S_t, p=age_dist):
        return np.array([rmultinomial(si, p) for si in S])  
    
    @deterministic
    def vacc_5(S=S_age):
        # Vaccination of 15 and under
        p = [0.95] + [0]*15
        return rbinomial(S[current_index], p)
    
    # Proportion of susceptibles vaccinated
    pct_5 = Lambda('pct_5', lambda V=vacc_5, S=S_age: float(V.sum())/S[current_index].sum())
    
    @deterministic
    def vacc_15(S=S_age):
        # Vaccination of 15 and under
        p = [0.95]*3 + [0]*13
        return rbinomial(S[current_index], p)
    
    # Proportion of susceptibles vaccinated
    pct_15 = Lambda('pct_15', lambda V=vacc_15, S=S_age: float(V.sum())/S[current_index].sum())
    
    @deterministic
    def vacc_30(S=S_age):
        # Vaccination of 30 and under
        p = [0.95]*6 + [0]*10
        return rbinomial(S[current_index], p)
    
    # Proportion of 30 and under susceptibles vaccinated
    pct_30 = Lambda('pct_30', lambda V=vacc_30, S=S_age: float(V.sum())/S[current_index].sum())
    
    @deterministic
    def vacc_adult(S=S_age):
        # Vaccination of adults under 30 (and young kids)
        p = [0.95, 0, 0, 0, 0.95, 0.95] + [0]*10
        return rbinomial(S[current_index], p)
    
    # Proportion of adults under 30 (and young kids)
    pct_adult = Lambda('pct_adult', lambda V=vacc_adult, S=S_age: float(V.sum())/S[current_index].sum())
    
    # Transmission parameter
    beta = Gamma('beta', 1, 0.1, value=10) 


    '''
    Calculation of the effective reproduction number depends on whether we believe that the districts 
    are independent or not. If not (`spatially_weighted = True`), both the number of susceptibles and the 
    denominator population are calculated as a distance-weighted average of all the districts in Sao Paulo.
    '''
    if spatially_weighted:

        geo_mesh = np.transpose([np.linspace(min_x, max_x),
                                np.linspace(min_y, max_y)])

        # Vague prior for the overall mean
        m = Uninformative('m', value=0)

        def constant(x, val):
            return np.zeros(x.shape[:-1],dtype=float) + val

        @deterministic
        def M(m=m):
            return Mean(constant, val=m)

        # ==================
        # = The covariance =
        # ==================

        # Vague priors for the amplitude and scale
        amp = Exponential('amp', 1e-5, value=1)
        scale = Exponential('scale',1e-5, value=0.1)
        @deterministic
        def C(amp=amp, scale=scale):
            return FullRankCovariance(exponential.euclidean, amp = amp, scale = scale)

        # ===================
        # = The GP submodel =
        # ===================
        zeta = GPSubmodel('zeta', M, C, geo_mesh)

        z_eval = Lambda('z_eval', lambda z=zeta.f(centroid_xy): z)


    if spatially_weighted:
        alpha = Exponential('alpha', 1, value=0.0001)
        Rt = Lambda('Rt', lambda B=beta, S=S, z=z_eval, a=alpha: ((B * (1 + a*z) * S.T) / N.values).T)
    else:
        Rt = Lambda('Rt', lambda B=beta, S=S: ((B * S).T / N.values).T)


    # Effective reproduction number at time of observation, and implied vaccination target
    Rt_obs = Lambda('Rt_obs', lambda r=Rt: r[:, t_obs])
    vaccination_target = Lambda('vaccination_target', lambda r=Rt_obs: np.maximum(1-1./r, 0))


    # Force of infection, assuming mass action transmission
    if spatially_weighted:
        lam = Lambda('lam', lambda B=beta, I=I, z=z_eval, a=alpha: 
                     np.array([(B * (1 + zj) * Ij) / nj for Ij,nj,zj in zip(I, N.values, a*z)]), 
                     trace=False)
    else:
        lam = Lambda('lam', lambda B=beta, I=I: np.array([(B * Ij) / nj for Ij,nj in zip(I, N.values)]), 
                     trace=False)


    # 2-week infection probabilities
    p = Lambda('p', lambda lam=lam: 1. - np.exp(-lam) + 1e-6, trace=False)


    # Binomial likelihood for observed cases
    @potential
    def new_cases(p=p, I=I, S=S): 
        return np.sum([[binomial_like(i, s, pt) for pt,i,s in zip(pj[:t_obs], Ij[:t_obs], Sj[:t_obs])] 
                                                   for pj,Ij,Sj in zip(p, I, S)])

    return locals()


# Run models for June 15 and July 15 observation points, both with and without clinical confirmation.

# In[60]:


db = 'txt'
n_iterations = 50000
n_burn = 40000


# June 15, with lab confirmation

# In[61]:


model_june = MCMC(measles_model('1997-06-15', spatially_weighted=False), 
                         db=db, dbname='model_june')


# In[62]:


model_june.sample(n_iterations, n_burn)


# July 15, with lab confirmation

# In[59]:


model_july = MCMC(measles_model('1997-07-15', spatially_weighted=False), 
                         db=db, dbname='model_july')


# In[60]:


model_july.sample(n_iterations, n_burn)


# June 15, no lab confirmation

# In[53]:


model_june_noconf = MCMC(measles_model('1997-06-15', 
                                       spatially_weighted=False, 
                                       confirmation=False), 
                         db=db, dbname='model_june_noconf')


# In[54]:


model_june_noconf.sample(n_iterations, n_burn)


# July 15, no lab confirmation

# In[55]:


model_july_noconf = MCMC(measles_model('1997-07-15', spatially_weighted=False, 
                                       confirmation=False), 
                         db=db, dbname='model_july_noconf')


# In[56]:


model_july_noconf.sample(50000, 40000)


# ## Summary of model output

# In[63]:


Rt_june = model_june.Rt.stats()


# In[64]:


plt.plot(Rt_june['quantiles'][50].T, 'b-', alpha=0.4)
plt.ylabel('R(t)')
plt.xlabel('time (2-week periods)')


# In[65]:


Matplot.summary_plot(model_june.vacc_30)


# In[66]:


Matplot.plot(model_june.pct_15)


# In[67]:


Matplot.plot(model_june.pct_30)


# In[121]:


june_Rt = pd.DataFrame(model_june.Rt_obs.trace()).unstack().reset_index()
june_Rt.columns = ('district', 'iteration', 'Rt')
june_Rt['month'] = 'June'


# In[204]:


june_Rt_noconf = pd.DataFrame(model_june_noconf.Rt_obs.trace()).unstack().reset_index()
june_Rt_noconf.columns = ('district', 'iteration', 'Rt')
june_Rt_noconf['month'] = 'June'


# In[122]:


july_Rt = pd.DataFrame(model_july.Rt_obs.trace()).unstack().reset_index()
july_Rt.columns = ('district', 'iteration', 'Rt')
july_Rt['month'] = 'July'


# In[101]:


confirmed_Rt = june_Rt.append(july_Rt, ignore_index=True)


# In[158]:


june_means = june_Rt.groupby('district')['Rt'].mean()
june_means.sort(ascending=False)


# In[159]:


sorted_districts = june_means.index.values


# In[203]:


import seaborn as sb

sb.set_context("talk", font_scale=1.1)
f, (ax_1, ax_2) = plt.subplots(2, 1, figsize=(12,6), sharey=True, sharex=True)

sb.boxplot('district', 'Rt', data=june_Rt, ax=ax_1, linewidth=0.5, 
           fliersize=0, color='r', order=sorted_districts)
ax_1.hlines(1, xmin=0, xmax=93, linestyles='dashed', linewidth=0.2)
ax_1.set_xticks([])
ax_1.set_xlabel('')
ax_1.set_ylabel('June')
ax_1.set_ylim(0, 6)
ax_1.set_yticks(range(7))
ax_1.set_title(r'$R_t$ estimates, ordered by June means')

sb.boxplot('district', 'Rt', data=july_Rt, ax=ax_2, linewidth=0.5, 
           fliersize=0, color='r', order=sorted_districts)
ax_2.hlines(1, xmin=0, xmax=93, linestyles='dashed', linewidth=0.2)
ax_2.set_xticks([])
ax_2.set_ylabel('July')

f.tight_layout()


# In[217]:


sb.set_context("talk", font_scale=1.1)
f, (ax_1, ax_2) = plt.subplots(2, 1, figsize=(12,6), sharey=True, sharex=True)

sb.boxplot('district', 'Rt', data=june_Rt, ax=ax_1, linewidth=0.5, 
           fliersize=0, color='r', order=sorted_districts)
ax_1.hlines(1, xmin=0, xmax=93, linestyles='dotted', linewidth=0.75)
ax_1.set_xticks([])
ax_1.set_xlabel('')
ax_1.set_ylabel('Lab')
ax_1.set_yticks(np.arange(13, step=2))
ax_1.set_title(r'June $R_t$ estimates, ordered by confirmed means')

sb.boxplot('district', 'Rt', data=june_Rt_noconf, ax=ax_2, linewidth=0.5, 
           fliersize=0, color='r', order=sorted_districts)
ax_2.hlines(1, xmin=0, xmax=93, linestyles='dotted', linewidth=0.75)
ax_2.set_xticks([])
ax_2.set_ylabel('Clinical')

f.tight_layout()


# In[66]:


model_june.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult'])


# In[224]:


june_coverage = pd.DataFrame({name: model_june.trace(name)[:] for name in ['pct_5', 'pct_15', 'pct_30', 'pct_adult']})


# In[233]:


axes = sb.boxplot(june_coverage, order=['pct_5', 'pct_15', 'pct_30', 'pct_adult'])
axes.set_xticklabels(['Under 5', 'Under 15', 'Under 30', 'Under 5 + 20-30'])
plt.ylabel('% susceptibles vaccinated')
sb.despine(offset=10, trim=True);


# In[67]:


model_june_noconf.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult'])


# In[68]:


model_july.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult'])


# In[69]:


model_july_noconf.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult'])


# In[70]:


Rt_july = model_july.Rt.stats()


# In[71]:


plt.plot(Rt_july['quantiles'][50].T, 'b-', alpha=0.4)
plt.ylabel('R(t)')
plt.xlabel('time (2-week periods)')


# In[72]:


Rt_june_noconf = model_june_noconf.Rt.stats()


# In[73]:


plt.plot(Rt_june_noconf['quantiles'][50].T, 'b-', alpha=0.4)
plt.ylabel('R(t)')
plt.xlabel('time (2-week periods)')


# In[74]:


Matplot.summary_plot(model_june.p_age)


# In[75]:


june_age_dist = model_june.age_dist.trace()[:]
june_age_dist.shape


# In[76]:


june_age_dist = pd.DataFrame(june_age_dist, columns=measles_data['AGE_GROUP'].unique())


# In[154]:


plt.figure(figsize=(8,12))
Matplot.summary_plot(model_july.vaccination_target)


# In[155]:


plt.figure(figsize=(8,12))
Matplot.summary_plot(model_july.vaccination_target)


# In[157]:


Matplot.plot(model_june.alpha)


# ## Mapping spatial effects

# In[240]:


from mpl_toolkits.basemap import Basemap
import geopandas as gpd

lllat=-24
urlat=-23.3
lllon=-47
urlon=-46.3

SP_base = Basemap(ax=None, lon_0=(urlon + lllon) / 2, lat_0=(urlat + lllat) / 2,
        llcrnrlat=lllat, urcrnrlat=urlat, llcrnrlon=lllon, urcrnrlon=urlon, 
                  resolution='i',
                 epsg='4326')


# In[241]:


SP_dist = gpd.GeoDataFrame.from_file('Sao Paulo/Brazil_full/BRA_adm3.shp').to_crs({'proj': 'longlat', 
                                                                               'ellps': 'WGS84', 
                                                                               'datum': 'WGS84'})


# In[242]:


SP_dist['DIST_NAME'] = [trans.trans(_).upper() for _ in SP_dist.NAME_3]


# In[243]:


SP_dist.head()


# In[244]:


SP_dist.NAME_3.unique()


# In[245]:


district_names


# In[307]:


Rt_june = pd.Series(model_june.Rt_obs.stats()['mean'], index=sp_districts)


# In[308]:


Rt_june


# In[311]:


SP_dist_merged = SP_dist.merge(pd.DataFrame(Rt_june, columns=['Rt']), left_on='DIST_NAME', right_index=True)


# In[319]:


SP_dist_merged = SP_dist.merge(foo, left_on='DIST_NAME', right_index=True)


# In[276]:


measles_onset_conf = measles_data[CONFIRMED].groupby(['DISTRICT','ONSET']).size().unstack(level=0).fillna(0).sum()


# In[296]:


measles_onset_conf


# In[298]:


_rates = measles_onset_conf/sp_pop.sum(1)


# In[299]:


SP_dist_conf = SP_dist.merge(pd.DataFrame(_rates, columns=['rate']), left_on='DIST_NAME', right_index=True)


# In[320]:


from matplotlib.pyplot import cm

map_fig = plt.figure(figsize=(16,12))
map_ax = plt.gca()
SP_base.drawcoastlines()
SP_base.drawrivers()

SP_dist_merged.plot(column='cases', colormap=cm.Reds, axes=map_ax)


# In[ ]: