Author(s): Paul Miles | Date Created: June 18, 2018
This is a simplified lake algae dynamics model. We consider phytoplankton $A$, zooplankton $Z$ and nutrition $P$ (eg. phosphorus) available for $A$ in the water. The system is affected by the water outflow/inflow $Q$, incoming phosphorus load $P_{in}$ and temperature $T$. It is described as a simple predator - pray dynamics between $A$ and $Z$. The growth of $A$ is limited by the availability of $P$ and it depends on the water temperature $T$. The inflow/outflow $Q$ affects both $A$ and $P$, but not $Z$.
# import required packages
import numpy as np
import scipy.io as sio
from scipy.integrate import odeint
from pymcmcstat.MCMC import MCMC
from pymcmcstat.plotting import MCMCPlotting
import matplotlib.pyplot as plt
np.seterr(over='ignore');
The data is saved in a .mat
file as the original example comes from the Matlab. We extract the necessary data as follows
# load Algae data
algaedata = sio.loadmat('algaedata.mat')
# extract dictionary contents
adata = algaedata['data']
tx = adata['xdata'][0][0]
ty = adata['ydata'][0][0]
xlbls = adata['xlabels'][0][0][0]
ylbls = adata['ylabels'][0][0][0]
The model function requires evaluation of a system of ODEs. The solution implemented below is non-optimal. It is intended to serve as an example of how to couple and ODE system type problem with pymcmcstat. Optimizing your ODE system solver is left to the user's discretion.
def algaess(theta, data):
# sum-of-squares function for algae example
ndp, nbatch = data.shape[0]
time = data.ydata[0][:, 0]
ydata = data.ydata[0][:, 1:4]
xdata = data.user_defined_object[0]
# last 3 parameters are the initial states
y0 = np.array(theta[-3:])
# evaluate model
tmodel, ymodel = algaefun(time, theta, y0, xdata)
res = ymodel - ydata
ss = (res**2).sum(axis=0)
return ss
def algaefun(time, theta, y0, xdata):
"""
Evaluate Ordinary Differential Equation
"""
sol = odeint(algaesys, y0, time, args=(theta, xdata))
return time, sol
def algaesys(y, t, theta, xdata):
"""
Model System
"""
A = y[0]
Z = y[1]
P = y[2]
# control variables are assumed to be saved at each time unit interval
idx = int(np.ceil(t)) - 1
if idx >= 120:
idx = 119
QpV = xdata[idx, 1]
T = xdata[idx, 2]
Pin = xdata[idx, 3]
# model parameters
mumax = theta[0]
rhoa = theta[1]
rhoz = theta[2]
k = theta[3]
alpha = theta[4]
th = theta[5]
mu = mumax*(th**(T-20))*P*((k+P)**(-1))
dotA = (mu - rhoa - QpV - alpha*Z)*A
dotZ = alpha*Z*A - rhoz*Z
dotP = -QpV*(P-Pin) + (rhoa-mu)*A + rhoz*Z
ydot = np.array([dotA, dotZ, dotP])
return ydot
# initialize MCMC object
mcstat = MCMC()
# initialize data structure
mcstat.data.add_data_set(x=tx[:, 0],
y=ty[:, 0:4],
user_defined_object=tx)
# initialize parameter array
#theta = [0.5, 0.03, 0.1, 10, 0.02, 1.14, 0.77, 1.3, 10]
# add model parameters
mcstat.parameters.add_model_parameter(name='mumax', theta0=0.5, minimum=0)
mcstat.parameters.add_model_parameter(name='rhoa', theta0=0.03, minimum=0)
mcstat.parameters.add_model_parameter(name='rhoz', theta0=0.1, minimum=0)
mcstat.parameters.add_model_parameter(name='k', theta0=10, minimum=0)
mcstat.parameters.add_model_parameter(name='alpha', theta0=0.02, minimum=0)
mcstat.parameters.add_model_parameter(name='th', theta0=1.14, minimum=0,
maximum=np.inf, prior_mu=0.14,
prior_sigma=0.2)
# initial values for the model states
mcstat.parameters.add_model_parameter(name='A0', theta0=0.77, minimum=0,
maximum=np.inf, prior_mu=0.77,
prior_sigma=2)
mcstat.parameters.add_model_parameter(name='Z0', theta0=1.3, minimum=0,
maximum=np.inf, prior_mu=1.3,
prior_sigma=2)
mcstat.parameters.add_model_parameter(name='P0', theta0=10, minimum=0,
maximum=np.inf, prior_mu=10,
prior_sigma=2)
# Generate options
mcstat.simulation_options.define_simulation_options(
nsimu=1.0e3, updatesigma=True)
# Define model object:
mcstat.model_settings.define_model_settings(
sos_function=algaess,
sigma2=0.01**2,
S20=np.array([1, 1, 2]),
N0=np.array([4, 4, 4]))
The code takes some time to run, so here we simply check to make sure the data structure can be processed using our sum-of-squares function. Note, we have separate sum-of-squares for each quantity of interest and there will be a separate error variance for each as well.
# check model evaluation
theta = [0.5, 0.03, 0.1, 10, 0.02, 1.14, 0.77, 1.3, 10]
ss = algaess(theta, mcstat.data)
print('ss = {}'.format(ss))
ss = [ 930.44890289 521.2441601 1278.11736803]
# Run simulation
mcstat.run_simulation()
# Rerun starting from results of previous run
mcstat.simulation_options.nsimu = int(5.0e3)
mcstat.run_simulation(use_previous_results=True)
Sampling these parameters: name start [ min, max] N( mu, sigma^2) mumax: 0.50 [ 0.00e+00, inf] N( 0.00e+00, inf) rhoa: 0.03 [ 0.00e+00, inf] N( 0.00e+00, inf) rhoz: 0.10 [ 0.00e+00, inf] N( 0.00e+00, inf) k: 10.00 [ 0.00e+00, inf] N( 0.00e+00, inf) alpha: 0.02 [ 0.00e+00, inf] N( 0.00e+00, inf) th: 1.14 [ 0.00e+00, inf] N( 0.14, 0.20^2) A0: 0.77 [ 0.00e+00, inf] N( 0.77, 2.00^2) Z0: 1.30 [ 0.00e+00, inf] N( 1.30, 2.00^2) P0: 10.00 [ 0.00e+00, inf] N( 10.00, 2.00^2) [-----------------100%-----------------] 1001 of 1000 complete in 119.1 sec Sampling these parameters: name start [ min, max] N( mu, sigma^2) mumax: 0.39 [ 0.00e+00, inf] N( 0.00e+00, inf) rhoa: 0.03 [ 0.00e+00, inf] N( 0.00e+00, inf) rhoz: 0.11 [ 0.00e+00, inf] N( 0.00e+00, inf) k: 10.44 [ 0.00e+00, inf] N( 0.00e+00, inf) alpha: 0.02 [ 0.00e+00, inf] N( 0.00e+00, inf) th: 1.00 [ 0.00e+00, inf] N( 0.14, 0.20^2) A0: 1.30 [ 0.00e+00, inf] N( 0.77, 2.00^2) Z0: 2.09 [ 0.00e+00, inf] N( 1.30, 2.00^2) P0: 9.64 [ 0.00e+00, inf] N( 10.00, 2.00^2) [-----------------100%-----------------] 5000 of 5000 complete in 522.5 sec
# extract info from results
results = mcstat.simulation_results.results
chain = results['chain']
s2chain = results['s2chain']
names = results['names'] # parameter names
# display chain stats
mcstat.chainstats(chain, results)
mcpl = MCMCPlotting
# plot chain panel
mcpl.plot_chain_panel(chain, names, figsizeinches=[7, 6])
# plot density panel
mcpl.plot_density_panel(chain, names, figsizeinches=[7, 6])
# pairwise correlation
mcpl.plot_pairwise_correlation_panel(chain, names, figsizeinches=[7, 6])
--------------------- name : mean std MC_err tau geweke mumax : 0.5500 0.1332 0.0266 653.2883 0.7246 rhoa : 0.0664 0.0351 0.0063 497.2180 0.1512 rhoz : 0.1003 0.0059 0.0009 238.1233 0.9029 k : 12.0906 3.5540 0.6679 624.3892 0.6571 alpha : 0.0237 0.0013 0.0002 199.3897 0.9008 th : 1.0069 0.0157 0.0024 240.8882 0.9587 A0 : 1.0016 0.3543 0.0319 44.6822 0.7331 Z0 : 1.8666 0.4907 0.0435 74.6226 0.8759 P0 : 8.9193 0.9699 0.0972 75.9392 0.9266 ---------------------
def predmodelfun(data, theta):
obj = data.user_defined_object[0]
time = obj[:, 0]
xdata = obj
# last 3 parameters are the initial states
y0 = np.array(theta[-3:])
# evaluate model
tmodel, ymodel = algaefun(time, theta, y0, xdata)
return ymodel
mcstat.PI.setup_prediction_interval_calculation(
results=results,
data=mcstat.data,
modelfunction=predmodelfun)
mcstat.PI.generate_prediction_intervals(
nsample=500,
calc_pred_int=True,
waitbar=True)
Generating credible/prediction intervals: [-----------------100%-----------------] 500 of 500 complete in 29.8 sec Interval generation complete
# plot prediction intervals
fighandle, axhandle = mcstat.PI.plot_prediction_intervals(
adddata=False,
addlegend=False,
figsizeinches=[7.5, 8])
for ii in range(3):
axhandle[ii].plot(mcstat.data.ydata[0][:, 0],
mcstat.data.ydata[0][:, ii + 1],
'ko', mfc='none', label='data')
axhandle[ii].set_ylabel('')
axhandle[ii].set_title(ylbls[ii + 1][0])
axhandle[ii].legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
axhandle[-1].set_xlabel('days');
plt.savefig('algae_ci.png',
format='png',
dpi=500,
bbox_inches='tight')