#!/usr/bin/env python
# coding: utf-8

# # Disease Outbreak Response Decision-making Under Uncertainty: A retrospective analysis of measles in Sao Paulo

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
import pandas as pd
import numpy as np
import numpy.ma as ma
from datetime import datetime
import matplotlib.pyplot as plt
import pdb

from IPython.core.display import HTML
def css_styling():
    styles = open("styles/custom.css", "r").read()
    return HTML(styles)
css_styling()


# In[2]:


data_dir = "data/"


# Import outbreak data

# In[3]:


measles_data = pd.read_csv(data_dir+"measles.csv", index_col=0)
measles_data.NOTIFICATION = pd.to_datetime(measles_data.NOTIFICATION)
measles_data.BIRTH = pd.to_datetime(measles_data.BIRTH)
measles_data.ONSET = pd.to_datetime(measles_data.ONSET)


# In[4]:


measles_data = measles_data.replace({'DISTRICT': {'BRASILANDIA':'BRAZILANDIA'}})


# Sao Paulo population by district

# In[5]:


sp_pop = pd.read_csv(data_dir+'sp_pop.csv', index_col=0)


# In[6]:


_names = sp_pop.index.values
_names[_names=='BRASILANDIA'] = 'BRAZILANDIA'
sp_pop.set_index(_names, inplace = True)


# In[7]:


sp_pop.head()


# Plot of cumulative cases by district

# In[8]:


measles_onset_dist = measles_data.groupby(['DISTRICT','ONSET']).size().unstack(level=0).fillna(0)
measles_onset_dist.cumsum().plot(legend=False, grid=False)


# In[9]:


total_district_cases = measles_onset_dist.sum()


# Top 5 districts by number of cases

# In[10]:


totals = measles_onset_dist.sum()
totals.sort(ascending=False)
totals[:5]


# Age distribution of cases, by confirmation status

# In[11]:


by_conclusion = measles_data.groupby(["YEAR_AGE", "CONCLUSION"])
counts_by_cause = by_conclusion.size().unstack().fillna(0)
ax = counts_by_cause.plot(kind='bar', stacked=True, xlim=(0,50), figsize=(15,5))


# ## Stochastic Disease Transmission Model
# 
# As a baseline for comparison, we can fit a model to all the clinically-confirmed cases, regardless of lab confirmation status. For this, we will use a simple SIR disease model, which will be fit using MCMC.
# 
# This model fits the series of 2-week infection totals in each district $i$ as a set of Poisson models:
# 
# \\[Pr(I(t)_{i} | \lambda(t)_i) = \text{Poisson}(\lambda(t)_i) \\]
# 
# Where the outbreak intensity is modeled as:
# 
# \\[\lambda(t)_i = \beta [I^{(w)}(t-1)_i]^{\alpha} S(t-1)_i\\]
# 
# \\[\alpha \sim \text{Exp}(1)\\]
# 
# We will assume here that the transmission rate is constant over time (and across districts):
# 
# \\[\beta \sim \text{Gamma}(1, 0.1)\\]
# 
# To account for the influence of infected individuals from neighboring districts on new infections, the outbreak intensity was modeled using a spatial-weighted average of infecteds across districts, where populations were weighted as an exponential function of the distance between district centroids:
# 
# \\[w_{d} = \text{exp}(-\theta d)\\]
# 
# \\[\theta \sim \text{Exp}(1)\\]
# 
# ### Confirmation Sub-model
# 
# Rather than assume all clinical cases are true cases, we can adjust the model to account for lab confirmation probability. This is done by including a sub-model that estimates age group-specific probabilities of confirmation, and using these probabilities to estimate the number of lab-confirmed cases. These estimates are then plugged into the model in place of the clinically-confirmed cases.
# 
# We specified a structured confirmation model to retrospectively determine the age group-specific probabilities of lab confirmation for measles, conditional on clinical diagnosis. Individual lab confirmation events $c_i$ were modeled as Bernoulli random variables, with the probability of confirmation being allowed to vary by age group:
# 
# $$c_i \sim \text{Bernoulli}(p_{a(i)})$$
# 
# where $a(i)$ denotes the appropriate age group for the individual indexed by i. There were 16 age groups, the first 15 of which were 5-year age intervals $[0,5), [5, 10), \ldots , [70, 75)$, with the 16th interval including all individuals 75 years and older.
# 
# Since the age interval choices were arbitrary, and the confirmation probabilities of adjacent groups likely correlated, we modeled the correlation structure directly, using a multivariate logit-normal model. Specifically, we allowed first-order autocorrelation among the age groups, whereby the variance-covariance matrix retained a tridiagonal structure. 
# 
# $$\begin{aligned}
# \Sigma = \left[{
# \begin{array}{c}
#   {\sigma^2} & {\sigma^2 \rho} & 0& \ldots & {0} & {0}  \\
#   {\sigma^2 \rho} & {\sigma^2} &  \sigma^2 \rho & \ldots & {0}  & {0} \\
#   {0} & \sigma^2 \rho & {\sigma^2} & \ldots & {0} & {0} \\
#   \vdots & \vdots & \vdots &  & \vdots & \vdots\\
#   {0} & {0} & 0 & \ldots &  {\sigma^2} & \sigma^2 \rho  \\
# {0} & {0} & 0 & \ldots & \sigma^2 \rho &  {\sigma^2} 
# \end{array}
# }\right]
# \end{aligned}$$
# 
# From this, the confirmation probabilities were specified as multivariate normal on the inverse-logit scale.
# 
# $$ \text{logit}(p_a) = \{a\} \sim N(\mu, \Sigma)$$
# 
# Priors for the confirmation sub-model were specified by:
# 
# $$\begin{aligned}
# \mu_i &\sim N(0, 100) \\
# \sigma &\sim \text{HalfCauchy}(25) \\
# \rho &\sim U(-1, 1)
# \end{aligned}$$

# Age classes are defined in 5-year intervals.

# In[19]:


age_classes = [0,5,10,15,20,25,30,35,40,100]
measles_data.dropna(subset=['YEAR_AGE'], inplace=True)
measles_data['YEAR_AGE'] = measles_data.YEAR_AGE.astype(int)
measles_data['AGE_GROUP'] = pd.cut(measles_data.AGE, age_classes, right=False)


# Lab-checked observations are extracted for use in estimating lab confirmation probability.

# In[20]:


CONFIRMED = measles_data.CONCLUSION == 'CONFIRMED'
CLINICAL = measles_data.CONCLUSION == 'CLINICAL'
DISCARDED = measles_data.CONCLUSION == 'DISCARDED'


# Extract confirmed and clinical subset, with no missing county information.

# In[21]:


lab_subset = measles_data[(CONFIRMED | CLINICAL) & measles_data.COUNTY.notnull()].copy()


# In[22]:


age = lab_subset.YEAR_AGE.values
ages = lab_subset.YEAR_AGE.unique()
counties = lab_subset.COUNTY.unique()
y = (lab_subset.CONCLUSION=='CONFIRMED').values


# In[23]:


_lab_subset = lab_subset.replace({"CONCLUSION": {"CLINICAL": "UNCONFIRMED"}})
by_conclusion = _lab_subset.groupby(["YEAR_AGE", "CONCLUSION"])
counts_by_cause = by_conclusion.size().unstack().fillna(0)
ax = counts_by_cause.plot(kind='bar', stacked=True, xlim=(0,50), figsize=(15,5), grid=False)


# In[24]:


lab_subset.shape


# In[25]:


y.sum()


# Proportion of lab-confirmed cases older than 20 years

# In[26]:


(measles_data[CONFIRMED].YEAR_AGE>20).mean()


# In[27]:


age_classes


# In[28]:


#Extract cases by age and time.

age_group = pd.cut(age, age_classes, right=False)
age_index = np.array([age_group.categories.tolist().index(i) for i in age_group])


# In[29]:


age_group.categories


# In[30]:


# Get index from full crosstabulation to use as index for each district
dates_index = measles_data.groupby(
        ['ONSET', 'AGE_GROUP']).size().unstack().index


# In[31]:


unique_districts = measles_data.DISTRICT.dropna().unique()


# In[32]:


excludes = ['BOM RETIRO']


# In[33]:


N = sp_pop.ix[unique_districts, 'Total'].dropna()
N = N.drop(excludes)


# In[34]:


sp_districts = N.index.values
len(sp_districts)


# Compile bi-weekly confirmed and unconfirmed data by Sao Paulo district

# In[35]:


all_district_data = []
all_confirmed_cases = []
for d in sp_districts:

    # All bi-weekly unconfirmed and confirmed cases
    district_data = lab_subset[lab_subset.DISTRICT==d]
    district_counts_2w = district_data.groupby(
        ['ONSET', 'AGE_GROUP']).size().unstack().reindex(dates_index).fillna(0).resample('2W', how='sum')
    all_district_data.append(district_counts_2w)

    # All confirmed cases, by district
    confirmed_data = district_data[district_data.CONCLUSION=='CONFIRMED']
    confirmed_counts = confirmed_data.groupby(
        ['ONSET', 'AGE_GROUP']).size().unstack().reindex(dates_index).fillna(0).sum()
    all_confirmed_cases.append(confirmed_counts.reindex_axis(measles_data['AGE_GROUP'].unique()).fillna(0))


# Time series of cases by district, summarized in 2-week intervals

# In[36]:


# Sum over ages for susceptibles
sp_cases_2w = [dist.sum(1) for dist in all_district_data]
len(sp_cases_2w)


# In[37]:


# Ensure the age groups are ordered
I_obs = np.array([dist.reindex_axis(measles_data['AGE_GROUP'].unique(), 
                            axis=1).fillna(0).values.astype(int) for dist in all_district_data])


# In[38]:


I_obs.max()


# In[39]:


I_obs.sum()


# In[40]:


age_groups = np.sort(measles_data['AGE_GROUP'].unique())
age_groups


# Check shape of data frame
# 
# - 92 districts, 28 bi-monthly intervals, 9 age groups

# In[42]:


assert I_obs.shape == (92, 28, len(age_groups))


# ### Spatial distance between districts

# In[43]:


import geopandas as gpd

shp = gpd.GeoDataFrame.from_file("Sao Paulo/Brazil_full/BRA_adm3.shp")


# In[44]:


district_names = N.index.unique()


# In[45]:


import trans
shp['district_name'] = shp.NAME_3.apply(
    lambda x: trans.trans(x).upper())


# In[46]:


sp_shp = shp[shp.NAME_2=='São Paulo'].set_index('district_name')


# In[47]:


centroids = sp_shp.geometry.centroid


# In[48]:


distance_matrix = pd.concat([sp_shp.geometry.distance(o) for o in sp_shp.geometry],
                     axis=1)
distance_matrix.columns = sp_shp.index


# In[49]:


assert (distance_matrix.index == centroids.index).all()


# In[50]:


distance_matrix = distance_matrix.ix[sp_districts, sp_districts]


# In[51]:


assert not distance_matrix.isnull().values.sum()


# In[52]:


min_x, min_y = sp_shp.bounds.min()[:2]
max_x, max_y = sp_shp.bounds.max()[2:]


# In[53]:


centroid_xy = np.array([[c.x, c.y] for c in sp_shp.geometry.centroid])


# Here is an arbitrary distance metric for an arbitrary district, as an example.

# In[54]:


_beta = -1
np.exp(_beta*distance_matrix).values.round(2)[0]


# Specifying a neighborhood, based on shared borders. This can be used to develop a conditional autoregressive (CAR) model.

# In[55]:


neighbors = np.array([sp_shp.ix[district_names].geometry.touches(v).values 
                      for i, v in sp_shp.ix[district_names].geometry.iteritems()])


# In[56]:


lon = centroids.ix[district_names].apply(lambda x: x.x)
lat = centroids.ix[district_names].apply(lambda x: x.y)

coords = pd.DataFrame({'x': lon.values-lon.mean(),
              'y': lat.values-lat.mean()}, index=lon.index)


# Prior distribution on susceptible proportion:
# 
# $$p_s \sim \text{Beta}(2, 100)$$

# In[57]:


from pymc import rbeta
plt.hist(rbeta(2, 100, 10000))


# In[58]:


obs_date = '1997-06-15'
obs_index = all_district_data[0].index <= obs_date
I_obs_t = np.array([I_dist[obs_index] for I_dist in I_obs])


# In[59]:


np.sum(I_obs_t, (0,1)) / I_obs_t.sum()


# In[60]:


I_obs_t.sum((0,1))


# In[150]:


get_ipython().run_line_magic('pinfo', 'negative_binomial_like')


# In[250]:


N.shape


# In[480]:


from pymc import MCMC, Matplot
from pymc import (Uniform, DiscreteUniform, Beta, Lambda, Binomial, Normal, 
                  Poisson, NegativeBinomial, observed, negative_binomial_like, poisson_like,
                  Lognormal, Exponential, binomial_like, stochastic, potential, 
                  invlogit, TruncatedNormal, Binomial, Gamma, HalfCauchy, normal_like,
                  deterministic, MvNormalCov, Bernoulli, potential, Uninformative, 
                  Multinomial, rmultinomial, rbinomial, AdaptiveMetropolis,
                  Dirichlet, multinomial_like)

def measles_model(obs_date, confirmation=True, spatial_weighting=False, all_traces=False):
    
    n_districts, n_periods, n_age_groups = I_obs.shape
    
    ### Confirmation sub-model
    
    if confirmation:

        # Specify priors on age-specific means
        age_classes = np.unique(age_index)

        mu = Normal("mu", mu=0, tau=0.0001, value=[0]*len(age_classes))
        sig = HalfCauchy('sig', 0, 25, value=1)
        var = sig**2
        cor = Uniform('cor', -1, 1, value=0)

        # Build variance-covariance matrix with first-order correlation 
        # among age classes
        @deterministic
        def Sigma(var=var, cor=cor):
            I = np.eye(len(age_classes))*var
            E = np.diag(np.ones(len(age_classes)-1), k=-1)*var*cor
            return I + E + E.T

        # Age-specific probabilities of confirmation as multivariate normal 
        # random variables
        beta_age = MvNormalCov("beta_age", mu=mu, C=Sigma, 
                        value=[1]*len(age_classes))
        p_age = Lambda('p_age', lambda t=beta_age: invlogit(t))

        @deterministic(trace=False)
        def p_confirm(beta=beta_age):
            return invlogit(beta[age_index])


        # Confirmation likelihood
        lab_confirmed = Bernoulli('lab_confirmed', p=p_confirm, value=y, 
                                observed=True)


    '''
    Truncate data at observation period
    '''
    obs_index = all_district_data[0].index <= obs_date
    I_obs_t = np.array([I_dist[obs_index] for I_dist in I_obs])  
                        

    # Index for observation date, used to index out values of interest 
    # from the model.
    t_obs = obs_index.sum() - 1
    
    if confirmation:
        
        @stochastic(trace=all_traces, dtype=int)
        def I(value=(I_obs_t).astype(int), n=I_obs_t, p=p_age):
            # Binomial confirmation process: confirm by age, then re-combine
            
            return np.sum([[binomial_like(value[d,:,a], n[d,:,a], p[a]) 
                            for a in range(n_age_groups)] 
                                for d in range(n_districts)])

        age_dist_init = np.sum(I.value, (0,1))/I.value.sum()
    else:
        
        I = I_obs_t
        age_dist_init = np.sum(I, (0,1))/I.sum()
        
    assert I.shape == (n_districts, t_obs +1, n_age_groups)
        
        
    # Calcuate age distribution from observed distribution of infecteds to date
    age_dist = Dirichlet('age_dist', np.ones(n_age_groups), value=age_dist_init[:-1]/age_dist_init.sum())
    @potential
    def age_dist_like(p=age_dist, I=I):
        p = np.append(p, 1-p.sum())
        return sum([multinomial_like(I_dist.sum(0), I_dist.sum(), p) for I_dist in I])

    # Weakly-informative prior on proportion susceptible being 
    # between 0 and 0.07
    p_susceptible = Beta('p_susceptible', 2, 100, value=0.09)
    
    # Estimated total initial susceptibles by district
    S_0 = Binomial('S_0', n=N.values.astype(int), p=p_susceptible)

    @deterministic(trace=all_traces)
    def S(I=I, S_0=S_0):
        # Calculate susceptibles from total number of infections
        return np.array([S_0[d] - np.array([I[d,:t].sum() 
                        for t in range(t_obs+1)])
                            for d in range(n_districts)])
    
    # Check shape
    assert S.value.shape == (n_districts, t_obs+1)
    
    # Susceptibles at time t, by age
    @deterministic
    def S_age(S=S, p=age_dist):
        p = np.append(p, 1-p.sum())
        return np.array([rmultinomial(s[-1], p) for s in S])
    
    assert S_age.value.shape == (n_districts, n_age_groups)
    
    # Transmission parameter
    β = Uniform('β', 1, 50, value=10)         

    if spatial_weighting:

        θ = Exponential('θ', 1, value=0.01)
        @deterministic
        def Iw(I=I, θ=θ): 
            # Distance-weighted infecteds
            return np.transpose([np.exp(-θ*distance_matrix.values).dot(I_t) for I_t in I.sum(2).T])
        
        # Distance-weighted population
        Nw = Lambda('Nw', lambda θ=θ: np.exp(-θ*distance_matrix.values).dot(N.values))
        
    else:
        
        Iw = Lambda('Iw', lambda I=I: I.sum(2))
        Nw = N
    
    # Check shape
    assert Iw.value.shape == (n_districts, t_obs+1)
    
#     α = Exponential('α', 1, value=1)
    
    # Force of infection
    @deterministic
    def λ(β=β, I=Iw, S=S, N=Nw): 
        #return np.array([β*((I_d+0.1)**α)*S_d for I_d, S_d in zip(I,S)])
#         return np.array([β*(I_d**α)*S_d * np.exp(ϕ_d) for I_d, S_d, ϕ_d in zip(I,S,ϕ)])
        
         return β * (I+0.01) * S / N[:, np.newaxis]
    
    # Check shape
    assert λ.value.shape == (n_districts, t_obs+1)
    
    # FOI in observation period
    λ_t = Lambda('λ_t', lambda λ=λ: λ[:, -1])
    
    # Poisson likelihood for observed cases
    @potential
    def new_cases(I=I, λ=λ): 
        x = I.sum(2)
        return negative_binomial_like(x[:,1:], λ[:, :-1], x[:,:-1]+1)
#         return poisson_like(I.sum(2)[:,1:], λ[:, :-1])
    
    
    '''
    Vaccination targets
    '''
    
    @deterministic
    def vacc_5(S=S_age):
        # Vaccination of 15 and under
        p = [0.95] + [0]*(n_age_groups-1)
        return rbinomial(S.sum(0), p)
    
    # Proportion of susceptibles vaccinated
    pct_5 = Lambda('pct_5', 
                lambda V=vacc_5, S=S_age: float(V.sum())/S.sum())


    @deterministic
    def vacc_15(S=S_age):
        # Vaccination of 15 and under
        p = [0.95]*3 + [0]*(n_age_groups-3)
        return rbinomial(S.sum(0), p)
    
    # Proportion of susceptibles vaccinated
    pct_15 = Lambda('pct_15', 
            lambda V=vacc_15, S=S_age: float(V.sum())/S.sum())
    
    @deterministic
    def vacc_30(S=S_age):
        # Vaccination of 30 and under
        p = [0.95]*6 + [0]*(n_age_groups-6)
        return rbinomial(S.sum(0), p)
    
    # Proportion of 30 and under susceptibles vaccinated
    pct_30 = Lambda('pct_30', 
            lambda V=vacc_30, S=S_age: float(V.sum())/S.sum())
    
    @deterministic
    def vacc_adult(S=S_age):
        # Vaccination of adults under 30 (and young kids)
        p = [0.95, 0, 0, 0, 0.95, 0.95] + [0]*(n_age_groups-6)
        return rbinomial(S.sum(0), p)
    
    # Proportion of adults under 30 (and young kids)
    pct_adult = Lambda('pct_adult', 
            lambda V=vacc_adult, S=S_age: float(V.sum())/S.sum())

    return locals()


# Run models for June 15 and July 15 observation points, both with and without clinical confirmation.

# In[481]:


db = 'ram'
n_iterations = 50000
n_burn = 10000


# June 15, with lab confirmation

# In[482]:


model = measles_model


# In[483]:


model_june = MCMC(model('1997-06-15'), db=db, dbname='model_june')
model_june.use_step_method(AdaptiveMetropolis, model_june.age_dist)
model_june.use_step_method(AdaptiveMetropolis, [model_june.β, model_june.p_susceptible])


# In[484]:


model_june.sample(n_iterations, n_burn)


# July 15, with lab confirmation

# In[485]:


model_july = MCMC(model('1997-07-15'), db=db, dbname='model_july')
model_july.use_step_method(AdaptiveMetropolis, model_july.age_dist)
model_july.use_step_method(AdaptiveMetropolis, [model_july.β, model_july.p_susceptible])


# In[486]:


model_july.sample(n_iterations, n_burn)


# June 15, no lab confirmation

# In[487]:


model_june_noconf = MCMC(model('1997-06-15', 
                                       confirmation=False), 
                         db=db, dbname='model_june_noconf')
model_june_noconf.use_step_method(AdaptiveMetropolis, [model_june_noconf.β, model_june_noconf.p_susceptible])
model_june_noconf.use_step_method(AdaptiveMetropolis, model_june_noconf.age_dist)


# In[488]:


model_june_noconf.sample(n_iterations, n_burn)


# July 15, no lab confirmation

# In[489]:


model_july_noconf = MCMC(model('1997-07-15', 
                                       confirmation=False), 
                         db=db, dbname='model_july_noconf')
model_july_noconf.use_step_method(AdaptiveMetropolis, [model_july_noconf.β, model_july_noconf.p_susceptible])
model_july_noconf.use_step_method(AdaptiveMetropolis, model_july_noconf.age_dist)


# In[490]:


model_july_noconf.sample(n_iterations, n_burn)


# ## Summary of model output
# 
# Distance weighting parameter for june model with confirmation

# In[491]:


Matplot.summary_plot(model_june.S_0)


# In[492]:


Matplot.plot(model_june.θ)


# Lab confirmation rates, June model

# In[493]:


import seaborn as sb

p_age = pd.DataFrame(model_june.p_age.trace(), columns=age_groups)

f, axes = plt.subplots(figsize=(14,6))
sb.boxplot(data=p_age, linewidth=0.3, fliersize=0, ax=axes,
          color=sb.color_palette("coolwarm", 5)[0],
          order=age_group.categories)
axes.set_ylabel('Confirmation rate')
axes.set_xlabel('Age group')


# Proportion of population susceptible, June model.

# In[494]:


Matplot.plot(model_july.β)


# Proportion of population susceptible, June model with no confirmation correction

# In[495]:


Matplot.plot(model_june_noconf.p_susceptible)


# Epidemic intensity estimates at June and July, per district.

# In[496]:


Matplot.summary_plot(model_june.λ_t)


# In[497]:


Matplot.summary_plot(model_july.λ_t)


# Epidemic intensity for lab- versus clinical-confirmation models

# In[498]:


lam_june = model_june.λ.stats()

fig, axes = plt.subplots(2, 1, sharey=True)

axes[0].plot(lam_june['quantiles'][50].T, 'b-', alpha=0.4)
axes[0].set_ylabel('Epidemic intensity')
axes[0].set_xlabel('time (2-week periods)')
axes[0].set_title('Lab confirmation')

lam_june_noconf = model_june_noconf.λ.stats()

axes[1].plot(lam_june_noconf['quantiles'][50].T, 'b-', alpha=0.4)
axes[1].set_ylabel('Epidemic intensity')
axes[1].set_xlabel('time (2-week periods)')
axes[1].set_title('Clinical confirmation')


# In[499]:


S_age_june = pd.DataFrame(model_june.S_age.trace()[:, -1], columns=age_groups).unstack().reset_index()
S_age_june.columns = 'Age', 'Iteration', 'S'
S_age_june['Confirmation'] = 'Lab'


# In[500]:


S_age_june = pd.DataFrame(model_june.S_age.trace()[:, -1], columns=age_groups).unstack().reset_index()
S_age_june.columns = 'Age', 'Iteration', 'S'
S_age_june['Confirmation'] = 'Lab'

S_age_june_noconf = pd.DataFrame(model_june_noconf.S_age.trace()[:, -1], columns=age_groups).unstack().reset_index()
S_age_june_noconf.columns = 'Age', 'Iteration', 'S'
S_age_june_noconf['Confirmation'] = 'Clinical'

S_age_june = pd.concat([S_age_june, S_age_june_noconf], ignore_index=True)


# In[501]:


S_age_july = pd.DataFrame(model_july.S_age.trace()[:, -1], columns=age_groups).unstack().reset_index()
S_age_july.columns = 'Age', 'Iteration', 'S'
S_age_july['Confirmation'] = 'Lab'

S_age_july_noconf = pd.DataFrame(model_july_noconf.S_age.trace()[:, -1], columns=age_groups).unstack().reset_index()
S_age_july_noconf.columns = 'Age', 'Iteration', 'S'
S_age_july_noconf['Confirmation'] = 'Clinical'

S_age_july = pd.concat([S_age_july, S_age_july_noconf], ignore_index=True)


# Numbers of suscepibles in each age group, under lab vs clinical confirmation

# In[502]:


import seaborn as sb
sb.set_context("talk", font_scale=0.8)
sb.set_style("white")

g = sb.factorplot("Age", "S", "Confirmation", S_age_june, kind="box",
                   palette="hls", size=6, aspect=2, linewidth=0.3, fliersize=0, 
                  order=age_group.categories)
g.despine(offset=10, trim=True)
g.set_axis_labels("Age Group", "Susceptibles");


# In[503]:


june_lam = pd.DataFrame(model_june.λ_t.trace()).unstack().reset_index()
june_lam.columns = ('district', 'iteration', 'λ')
june_lam['month'] = 'June'


# In[504]:


june_lam_noconf = pd.DataFrame(model_june_noconf.λ_t.trace()).unstack().reset_index()
june_lam_noconf.columns = ('district', 'iteration', 'λ')
june_lam_noconf['month'] = 'June'


# In[505]:


july_lam = pd.DataFrame(model_july.λ_t.trace()).unstack().reset_index()
july_lam.columns = ('district', 'iteration', 'λ')
july_lam['month'] = 'July'


# In[506]:


model_july.S.value.min()


# In[507]:


july_lam_noconf = pd.DataFrame(model_july_noconf.λ_t.trace()).unstack().reset_index()
july_lam_noconf.columns = ('district', 'iteration', 'λ')
july_lam_noconf['month'] = 'July'


# In[508]:


confirmed_lam = june_lam.append(july_lam, ignore_index=True)


# In[509]:


june_means = june_lam.groupby('district')['λ'].mean()
june_means.sort(ascending=False)


# In[510]:


july_means = july_lam.groupby('district')['λ'].mean()
july_means.sort(ascending=False)


# In[511]:


sorted_districts = june_means.index.values


# Epidemic intensity by district in June and July (with lab confirmation), sorted by June means.

# In[512]:


sb.set_context("talk", font_scale=0.8)

f, (ax_1, ax_2) = plt.subplots(2, 1, figsize=(12,6), sharey=True, sharex=True)

sb.boxplot('district', 'λ', data=june_lam, ax=ax_1, linewidth=0.5, 
           fliersize=0, color='r', order=sorted_districts)
# ax_1.hlines(1, xmin=0, xmax=93, linestyles='dashed', linewidth=0.2)
ax_1.set_xticks([])
ax_1.set_xlabel('')
ax_1.set_ylabel('June')
ax_1.set_title(r'Epidemic intensity (λ) estimates, ordered by June means')

sb.boxplot('district', 'λ', data=july_lam, ax=ax_2, linewidth=0.5, 
           fliersize=0, color='r', order=sorted_districts)
# ax_2.hlines(1, xmin=0, xmax=93, linestyles='dashed', linewidth=0.2)
ax_2.set_xticks([])
ax_2.set_ylabel('July')

f.tight_layout()


# Epidemic intensity by district in June for lab-confirmed and clinical-confirmed, sorted by lab-confirmed means.

# In[513]:


f, (ax_1, ax_2) = plt.subplots(2, 1, figsize=(12,6), sharey=True, sharex=True)

sb.boxplot('district', 'λ', data=june_lam, ax=ax_1, linewidth=0.5, 
           fliersize=0, color='r', order=june_means.index.values)
# ax_1.hlines(1, xmin=0, xmax=93, linestyles='dotted', linewidth=0.75)
ax_1.set_xticks([])
ax_1.set_xlabel('')
ax_1.set_ylabel('Lab')
ax_1.set_title(r'June epidemic intensity (λ) estimates, ordered by lab-confirmed means')

sb.boxplot('district', 'λ', data=june_lam_noconf, ax=ax_2, linewidth=0.5, 
           fliersize=0, color='r', order=june_means.index.values)
# ax_2.hlines(1, xmin=0, xmax=93, linestyles='dotted', linewidth=0.75)
ax_2.set_xticks([])
ax_2.set_ylabel('Clinical')

f.tight_layout()


# Epidemic intensity by district in July for lab-confirmed and clinical-confirmed, sorted by lab-confirmed means.

# In[514]:


july_means = july_lam.groupby('district')['λ'].mean()
july_means.sort(ascending=False)


# In[515]:


f, (ax_1, ax_2) = plt.subplots(2, 1, figsize=(12,6), sharey=True, sharex=True)

sb.boxplot('district', 'λ', data=july_lam, ax=ax_1, linewidth=0.5, 
           fliersize=0, color='r', order=july_means.index.values)
# ax_1.hlines(1, xmin=0, xmax=93, linestyles='dotted', linewidth=0.75)
ax_1.set_xticks([])
ax_1.set_xlabel('')
ax_1.set_ylabel('Lab')
# ax_1.set_yticks(np.arange(13, step=2))
ax_1.set_title(r'July epidemic intensity (λ) estimates, ordered by lab-confirmed means')

sb.boxplot('district', 'λ', data=july_lam_noconf, ax=ax_2, linewidth=0.5, 
           fliersize=0, color='r', order=sorted_districts)
# ax_2.hlines(1, xmin=0, xmax=93, linestyles='dotted', linewidth=0.75)
ax_2.set_xticks([])
ax_2.set_ylabel('Clinical')

f.tight_layout()


# In[516]:


model_june.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult'])


# In[517]:


june_coverage = pd.DataFrame({name: model_june.trace(name)[:] for name in ['pct_5', 'pct_15', 'pct_30', 'pct_adult']})
june_coverage['Month'] = 'June'
june_coverage['Confirmation'] = 'Lab'


# In[518]:


june_noconf_coverage = pd.DataFrame({name: model_june_noconf.trace(name)[:] for name in ['pct_5', 'pct_15', 'pct_30', 'pct_adult']})
june_noconf_coverage['Month'] = 'June'
june_noconf_coverage['Confirmation'] = 'Clinical'

july_coverage = pd.DataFrame({name: model_july.trace(name)[:] for name in ['pct_5', 'pct_15', 'pct_30', 'pct_adult']})
july_coverage['Month'] = 'July'
july_coverage['Confirmation'] = 'Lab'

july_noconf_coverage = pd.DataFrame({name: model_july_noconf.trace(name)[:] for name in ['pct_5', 'pct_15', 'pct_30', 'pct_adult']})
july_noconf_coverage['Month'] = 'July'
july_noconf_coverage['Confirmation'] = 'Clinical'


# In[519]:


coverage = pd.concat([june_coverage, june_noconf_coverage, july_coverage, july_noconf_coverage], 
                     ignore_index=True)


# In[520]:


sb.factorplot(row="Month", col="Confirmation", data=coverage, kind='box',
              row_order=['June', 'July'],
              order=['pct_5', 'pct_15', 'pct_30', 'pct_adult'],
               palette="YlGnBu_d", linewidth=0.7, fliersize=0, aspect=1.25).despine(left=True)


# In[521]:


sb.factorplot(row="Month", col="Confirmation", data=coverage, kind='box',
              row_order=['June', 'July'],
              order=['pct_5', 'pct_15', 'pct_30', 'pct_adult'],
               palette="YlGnBu_d", linewidth=0.7, fliersize=0, aspect=1.25).despine(left=True)


# In[432]:


axes = sb.boxplot(data=june_coverage, order=['pct_5', 'pct_15', 'pct_30', 'pct_adult'], 
                  color=sb.color_palette("coolwarm", 5)[0])
axes.set_xticklabels(['Under 5', 'Under 15', 'Under 30', 'Under 5 + 20-30'])
axes.set_ylabel('% susceptibles vaccinated')
sb.despine(offset=10, trim=True)


# In[433]:


model_june_noconf.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult'])


# In[434]:


model_july.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult'])


# In[435]:


model_july_noconf.summary(['pct_5', 'pct_15', 'pct_30', 'pct_adult'])


# In[436]:


Matplot.summary_plot(model_june.p_age)


# ## Mapping spatial effects

# In[366]:


from mpl_toolkits.basemap import Basemap
import geopandas as gpd

lllat=-24
urlat=-23.3
lllon=-47
urlon=-46.3

SP_base = Basemap(ax=None, lon_0=(urlon + lllon) / 2, lat_0=(urlat + lllat) / 2,
        llcrnrlat=lllat, urcrnrlat=urlat, llcrnrlon=lllon, urcrnrlon=urlon, 
                  resolution='i',
                 epsg='4326')


# In[367]:


SP_dist = gpd.GeoDataFrame.from_file('Sao Paulo/Brazil_full/BRA_adm3.shp').to_crs({'proj': 'longlat', 
                                                                               'ellps': 'WGS84', 
                                                                               'datum': 'WGS84'})


# In[368]:


SP_dist['DIST_NAME'] = [trans.trans(_).upper() for _ in SP_dist.NAME_3]


# In[369]:


λ_june = pd.Series(model_june.λ_t.stats()['mean'], index=sp_districts)


# In[370]:


λ_june


# In[371]:


SP_dist_merged = SP_dist.merge(pd.DataFrame(λ_june, columns=['λ']), left_on='DIST_NAME', right_index=True)


# In[372]:


measles_onset_conf = measles_data[CONFIRMED].groupby(['DISTRICT','ONSET']).size().unstack(level=0).fillna(0).sum()


# In[373]:


measles_onset_conf


# In[374]:


_rates = measles_onset_conf/sp_pop.sum(1)


# In[375]:


SP_dist_conf = SP_dist.merge(pd.DataFrame(_rates, columns=['rate']), left_on='DIST_NAME', right_index=True)


# Estimated expected value for infecteds, by district

# In[376]:


from matplotlib.pyplot import cm

map_fig = plt.figure(figsize=(16,12))
map_ax = plt.gca()
SP_base.drawcoastlines()
SP_base.drawrivers()

SP_dist_merged.plot(column='λ', colormap=cm.Reds, axes=map_ax)


# Observed confirmed cases, by district

# In[377]:


map_fig = plt.figure(figsize=(16,12))
map_ax = plt.gca()
SP_base.drawcoastlines()
SP_base.drawrivers()

SP_dist_conf.plot(column='rate', colormap=cm.Reds, axes=map_ax)