# Import modules %matplotlib inline from pymc import Lambda, Multinomial, Dirichlet, deterministic, MCMC, Matplot, Uniform, potential, AdaptiveMetropolis from pymc import multinomial_like, extend_dirichlet import numpy as np import pandas as pd # Import data mdata = pd.read_csv("data/FracAnal_1993_2010_Chris_20121203.csv") # FL regions regions = ['ATL', 'USJ', 'NW', 'SW'] # Causes of death causes = ['watercraft', 'wcs', 'entanglement', 'cold', 'redtide', 'other'] # Age classes # classes = ('Calves', 'Subadults', 'Adults') classes = ['Calves', 'Adults'] # Combine adult and subadults for r in regions: for c in causes: mdata[c][(mdata['age']=='Adults') & (mdata['area']==r)] += (mdata[c][(mdata['age']=='Subadults') & (mdata['area']==r)]).tolist() data = mdata[mdata['age']!='Subadults'] data.head() # Constants JUVENILE, ADULT = 0, 1 STARTYEAR, ENDYEAR = 2001, 2009 YEARS = ENDYEAR - STARTYEAR + 1 # Recovery rates from incidental take paper recovery = dict(zip(regions, (0.8625, 0.8242, 0.4069, 0.5837))) # Strong red tide years tide_years = (2003, 2005, 2006) # Uninformative priors for unknown proportions prior0 = {'Calves':[np.ones(len(causes))]*2, 'Adults':[np.ones(len(causes))]*2} def generate_model(age_class='Adults', prior=prior0): # Containers for variables U = [] p = [] pi = [] X = [] X_like = [] Z = [] undetermined = [] theta = [] # Red tide effect factor a = Uniform('a', lower=0, upper=2) # Loop over regions for area in regions: # Upper St. John never gets red tide k = len(causes) - 1*(area == 'USJ') if area == 'USJ': undetermined_priors = r_[prior[age_class][0][:4], prior[age_class][0][-1]] elif area == 'SW': undetermined_priors = prior[age_class][1] else: undetermined_priors = prior[age_class][0] # Undetermined deaths by latent cause modeled as multinomial # with Dirichlet prior on probabilities p_i = Dirichlet('p_%s' % area, theta=undetermined_priors, value=np.ones(k-1)/k) p.append(p_i) # Proportions of mortality for region i pi_i = Dirichlet('pi_%s' % area, theta=np.ones(k)) pi.append(pi_i) # Loop over years for year in range(STARTYEAR, ENDYEAR+1): # Subset data by year and age cdata = data[(data["area"]==area) & (data['year']==year) & (data['age']==age_class)] # Undetermined mortality undetermined_i = cdata['undetermined'] undetermined.append(undetermined_i) # Upper St. John never gets red tide if area == 'USJ': # Remove red tide as a cause Z_i = cdata[causes[0:4] + [causes[-1]]].values[0] else: Z_i = cdata[causes].values[0] # Undetermined deaths by latent cause modeled as multinomial # with Dirichlet prior on probabilities U_i = Multinomial('U_%s%i' % (area,year), n=undetermined_i, p=p_i) U.append(U_i) # Total deaths by cause, corrected for recovery rate X_i = Lambda('X_%s%i' % (area,year), lambda u=U_i, z=Z_i: ((z + u)/recovery[area]).astype(int)) X.append(X_i) # A red tide year ... if (year in tide_years) and (area == 'SW'): @deterministic def theta_i(p=pi_i, a=a): """Adjust proportions in red tide years""" theta = p.copy() theta[4] += a return theta/(1+a) theta_i.__name__ += '_%s%i' % (area,year) theta.append(theta_i) # Normal year ... else: # No adjustment to proportions theta_i = pi_i @potential def X_like_i(x=X_i, p=theta_i): "The likelihood of total deaths by cause" return multinomial_like(x, n=np.sum(x), p=extend_dirichlet(p)) X_like.append(X_like_i) return locals() iterations = 200000 burnin = 100000 thin = 10 adult_model = MCMC(generate_model('Adults')) adult_model.sample(iterations, burn=burnin, thin=thin) [p.summary() for p in adult_model.pi] calf_model = MCMC(generate_model('Calves')) calf_model.sample(iterations, burn=burnin, thin=thin) [p.summary() for p in calf_model.pi]