Manatee Proportions of Mortality Model

In [36]:
# Import modules
%matplotlib inline
from pymc import Lambda, Multinomial, Dirichlet, deterministic, MCMC, Matplot, Uniform, potential, AdaptiveMetropolis
from pymc import multinomial_like, extend_dirichlet
import numpy as np
import pandas as pd

Cause of death data were extracted Dec 3, 2012. Data were processed as shown below. In particular, adults and subadults were combined into a single age class for the purposes of this analysis.

In [2]:
# Import data
mdata = pd.read_csv("data/FracAnal_1993_2010_Chris_20121203.csv")
        
# FL regions
regions = ['ATL', 'USJ', 'NW', 'SW']
# Causes of death
causes = ['watercraft', 'wcs', 'entanglement', 'cold', 'redtide', 'other']
# Age classes
# classes = ('Calves', 'Subadults', 'Adults')
classes = ['Calves', 'Adults']

# Combine adult and subadults
for r in regions:
    for c in causes:
        mdata[c][(mdata['age']=='Adults') & (mdata['area']==r)] += (mdata[c][(mdata['age']=='Subadults') & (mdata['area']==r)]).tolist()
    
data = mdata[mdata['age']!='Subadults']

Here is a sample of the dataset:

In [3]:
data.head()
Out[3]:
   year  watercraft  wcs  entanglement  cold  redtide  other  undetermined area     age
0  1993           5    0             0     0        0      4             4  ATL  Calves
1  1994           3    0             0     0        0      4             2  ATL  Calves
2  1995           1    0             1     0        0      4            10  ATL  Calves
3  1996           7    0             0     5        0      6             7  ATL  Calves
4  1997           7    0             0     3        0      8            15  ATL  Calves

In order to avoid the complexities of dealing with the recent cold year, analyses were conducted from 2001 to 2009, inclusive. Recovery rates were taken as fixed, extracted from Runge et al.'s incidental take paper. Additionally, we modeled red tide years in the Southwest region differently than all the other regions and years; we included a red tide effect parameter that accounts for the increased red tide activity in this region during 2003, 2005 and 2006.

In [4]:
# Constants
JUVENILE, ADULT = 0, 1
STARTYEAR, ENDYEAR = 2001, 2009
YEARS = ENDYEAR - STARTYEAR + 1

# Recovery rates from incidental take paper
recovery = dict(zip(regions, (0.8625, 0.8242, 0.4069, 0.5837)))

# Strong red tide years
tide_years = (2003, 2005, 2006)

# Uninformative priors for unknown proportions
prior0 = {'Calves':[np.ones(len(causes))]*2, 'Adults':[np.ones(len(causes))]*2}
In [5]:
def generate_model(age_class='Adults', prior=prior0):

    # Containers for variables
    U = []
    p = []
    pi = []
    X = []
    X_like = []
    Z = []
    undetermined = []
    theta = []

    # Red tide effect factor
    a = Uniform('a', lower=0, upper=2)

    # Loop over regions
    for area in regions:
    
        # Upper St. John never gets red tide
        k = len(causes) - 1*(area == 'USJ')
        
        if area == 'USJ':
                        
            undetermined_priors = r_[prior[age_class][0][:4], prior[age_class][0][-1]]
            
        elif area == 'SW':
            
            undetermined_priors = prior[age_class][1]
            
        else:

            undetermined_priors = prior[age_class][0]
            
    
        # Undetermined deaths by latent cause modeled as multinomial
        # with Dirichlet prior on probabilities
        p_i = Dirichlet('p_%s' % area, theta=undetermined_priors, value=np.ones(k-1)/k)
        p.append(p_i)
            
        # Proportions of mortality for region i
        pi_i = Dirichlet('pi_%s' % area, theta=np.ones(k))
        pi.append(pi_i)

        # Loop over years
        for year in range(STARTYEAR, ENDYEAR+1):

            # Subset data by year and age
            cdata = data[(data["area"]==area) & (data['year']==year) & (data['age']==age_class)]

            # Undetermined mortality
            undetermined_i = cdata['undetermined']
            undetermined.append(undetermined_i)
        
            # Upper St. John never gets red tide
            if area == 'USJ':
                # Remove red tide as a cause
                Z_i = cdata[causes[0:4] + [causes[-1]]].values[0]
            else:
                Z_i = cdata[causes].values[0]
        
        
            # Undetermined deaths by latent cause modeled as multinomial
            # with Dirichlet prior on probabilities
            U_i = Multinomial('U_%s%i' % (area,year), n=undetermined_i, p=p_i)
            U.append(U_i)

            # Total deaths by cause, corrected for recovery rate
            X_i = Lambda('X_%s%i' % (area,year), lambda u=U_i, z=Z_i: ((z + u)/recovery[area]).astype(int))
            X.append(X_i)

            # A red tide year ...
            if (year in tide_years) and (area == 'SW'):

                @deterministic
                def theta_i(p=pi_i, a=a):
                    """Adjust proportions in red tide years"""
                    theta = p.copy()
                    theta[4] += a
                    return theta/(1+a) 
                theta_i.__name__ += '_%s%i' % (area,year)
                theta.append(theta_i)

            # Normal year ...
            else:
            
                # No adjustment to proportions
                theta_i = pi_i
        
            @potential
            def X_like_i(x=X_i, p=theta_i):
                "The likelihood of total deaths by cause"
                return multinomial_like(x, n=np.sum(x), p=extend_dirichlet(p))
            X_like.append(X_like_i)
            
    return locals()

Each model was run for 200K iterations, with half discarded as burn-in, and the remaining samples thinned by a factor iof 10.

In [6]:
iterations = 200000
burnin = 100000
thin = 10

Adult model

In [7]:
adult_model = MCMC(generate_model('Adults'))
adult_model.sample(iterations, burn=burnin, thin=thin)
[****************100%******************]  200000 of 200000 complete

Proportions of mortality

In [8]:
[p.summary() for p in adult_model.pi]
pi_ATL:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	0.58             0.042            0.004            [ 0.497  0.651]
	0.01             0.004            0.0              [ 0.003  0.018]
	0.011            0.004            0.0              [ 0.004  0.02 ]
	0.111            0.039            0.004            [ 0.051  0.183]
	0.081            0.028            0.003            [ 0.032  0.136]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	0.497            0.553           0.579          0.612         0.651
	0.004            0.007           0.009          0.012         0.02
	0.004            0.008           0.01           0.013         0.022
	0.053            0.077           0.104          0.145         0.187
	0.029            0.059           0.078          0.104         0.134
	

pi_USJ:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	0.523            0.143            0.013            [ 0.281  0.787]
	0.069            0.086            0.008            [ 0.     0.269]
	0.043            0.032            0.001            [ 0.001  0.103]
	0.274            0.137            0.013            [ 0.059  0.536]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	0.279            0.408           0.521          0.633         0.786
	0.006            0.024           0.044          0.072         0.357
	0.005            0.02            0.036          0.058         0.123
	0.07             0.158           0.257          0.375         0.55
	

pi_NW:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	0.556            0.078            0.006            [ 0.4    0.696]
	0.011            0.012            0.0              [ 0.     0.035]
	0.033            0.021            0.001            [ 0.002  0.072]
	0.202            0.076            0.007            [ 0.088  0.375]
	0.021            0.015            0.001            [ 0.     0.052]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	0.4              0.501           0.559          0.614         0.696
	0.0              0.003           0.008          0.016         0.044
	0.007            0.019           0.029          0.043         0.085
	0.094            0.147           0.186          0.244         0.385
	0.002            0.01            0.018          0.029         0.061
	

pi_SW:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	0.462            0.017            0.001            [ 0.431  0.496]
	0.031            0.009            0.001            [ 0.017  0.051]
	0.008            0.003            0.0              [ 0.002  0.014]
	0.077            0.012            0.001            [ 0.055  0.1  ]
	0.326            0.023            0.002            [ 0.28   0.368]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	0.432            0.451           0.462          0.473         0.497
	0.018            0.025           0.03           0.036         0.054
	0.003            0.006           0.008          0.01          0.016
	0.055            0.069           0.077          0.085         0.102
	0.28             0.31            0.326          0.342         0.368
	
Out[8]:
[None, None, None, None]

Calf model

In [9]:
calf_model = MCMC(generate_model('Calves'))
calf_model.sample(iterations, burn=burnin, thin=thin)
[****************100%******************]  200000 of 200000 complete

Proportions of mortality

In [10]:
[p.summary() for p in calf_model.pi]
pi_ATL:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	0.177            0.029            0.003            [ 0.12   0.223]
	0.004            0.003            0.0                [ 0.    0.01]
	0.015            0.009            0.001            [ 0.003  0.034]
	0.503            0.042            0.004            [ 0.428  0.578]
	0.021            0.014            0.001            [ 0.005  0.053]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	0.122            0.154           0.179          0.201         0.226
	0.0              0.002           0.003          0.005         0.012
	0.003            0.007           0.013          0.02          0.035
	0.424            0.471           0.504          0.536         0.575
	0.006            0.012           0.017          0.026         0.061
	

pi_USJ:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	0.533            0.17             0.012            [ 0.215  0.829]
	0.065            0.063            0.003              [ 0.    0.19]
	0.093            0.099            0.008            [ 0.     0.313]
	0.232            0.144            0.011            [ 0.025  0.535]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	0.218            0.401           0.537          0.668         0.834
	0.002            0.019           0.045          0.091         0.239
	0.002            0.023           0.056          0.126         0.37
	0.043            0.124           0.196          0.31          0.578
	

pi_NW:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	0.443            0.096            0.01             [ 0.219  0.567]
	0.019            0.017            0.001            [ 0.     0.052]
	0.019            0.019            0.001            [ 0.     0.059]
	0.25             0.054            0.005            [ 0.151  0.361]
	0.019            0.02             0.001            [ 0.     0.059]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	0.207            0.397           0.469          0.516         0.561
	0.001            0.006           0.014          0.028         0.06
	0.0              0.005           0.013          0.027         0.071
	0.155            0.212           0.239          0.274         0.37
	0.0              0.005           0.013          0.026         0.073
	

pi_SW:
 
	Mean             SD               MC Error        95% HPD interval
	------------------------------------------------------------------
	0.214            0.012            0.001            [ 0.187  0.231]
	0.003            0.002            0.0              [ 0.     0.008]
	0.005            0.003            0.0                [ 0.    0.01]
	0.457            0.033            0.003            [ 0.385  0.515]
	0.184            0.033            0.003            [ 0.125  0.26 ]
	
	
	Posterior quantiles:
	
	2.5             25              50              75             97.5
	 |---------------|===============|===============|---------------|
	0.187            0.207           0.216          0.224         0.231
	0.0              0.002           0.003          0.004         0.009
	0.001            0.003           0.004          0.006         0.012
	0.387            0.436           0.459          0.479         0.518
	0.125            0.161           0.181          0.203         0.26
	
Out[10]:
[None, None, None, None]