Notebook

Algae Example¶

Author(s): Paul Miles | Date Created: June 18, 2018

This is a simplified lake algae dynamics model. We consider phytoplankton $A$, zooplankton $Z$ and nutrition $P$ (eg. phosphorus) available for $A$ in the water. The system is affected by the water outflow/inflow $Q$, incoming phosphorus load $P_{in}$ and temperature $T$. It is described as a simple predator - pray dynamics between $A$ and $Z$. The growth of $A$ is limited by the availability of $P$ and it depends on the water temperature $T$. The inflow/outflow $Q$ affects both $A$ and $P$, but not $Z$.

In [1]:

# import required packages
import numpy as np
import scipy.io as sio
from scipy.integrate import odeint
from pymcmcstat.MCMC import MCMC
from pymcmcstat.plotting import MCMCPlotting
import matplotlib.pyplot as plt
np.seterr(over='ignore');

The data is saved in a .mat file as the original example comes from the Matlab. We extract the necessary data as follows

In [2]:

# load Algae data
algaedata = sio.loadmat('algaedata.mat')
# extract dictionary contents
adata = algaedata['data']
tx = adata['xdata'][0][0]
ty = adata['ydata'][0][0]
xlbls = adata['xlabels'][0][0][0]
ylbls = adata['ylabels'][0][0][0]

Define Sum-of-Squares and Model Functions¶

The model function requires evaluation of a system of ODEs. The solution implemented below is non-optimal. It is intended to serve as an example of how to couple and ODE system type problem with pymcmcstat. Optimizing your ODE system solver is left to the user's discretion.

In [3]:

def algaess(theta, data):
    # sum-of-squares function for algae example
    ndp, nbatch = data.shape[0]
    time = data.ydata[0][:, 0]
    ydata = data.ydata[0][:, 1:4]
    xdata = data.user_defined_object[0]
    # last 3 parameters are the initial states
    y0 = np.array(theta[-3:])
    # evaluate model
    tmodel, ymodel = algaefun(time, theta, y0, xdata)
    res = ymodel - ydata
    ss = (res**2).sum(axis=0)
    return ss    

def algaefun(time, theta, y0, xdata):
    """
    Evaluate Ordinary Differential Equation
    """
    sol = odeint(algaesys, y0, time, args=(theta, xdata))
    return time, sol
    
def algaesys(y, t, theta, xdata):
    """
    Model System
    """
    A = y[0]
    Z = y[1]
    P = y[2]
    # control variables are assumed to be saved at each time unit interval
    idx = int(np.ceil(t)) - 1    
    if idx >= 120:
        idx = 119
    QpV = xdata[idx, 1]
    T = xdata[idx, 2]
    Pin = xdata[idx, 3]
    # model parameters
    mumax = theta[0]
    rhoa = theta[1]
    rhoz = theta[2]
    k = theta[3]
    alpha = theta[4]
    th = theta[5]
    mu = mumax*(th**(T-20))*P*((k+P)**(-1))
    dotA = (mu - rhoa - QpV - alpha*Z)*A
    dotZ = alpha*Z*A - rhoz*Z
    dotP = -QpV*(P-Pin) + (rhoa-mu)*A + rhoz*Z
    ydot = np.array([dotA, dotZ, dotP])
    return ydot

Initialize MCMC Object and Setup Simulation¶

Define data structure
Assign parameters and define constraints
Set simulation options and model settings

In [8]:

# initialize MCMC object
mcstat = MCMC()
# initialize data structure 
mcstat.data.add_data_set(x=tx[:, 0],
                         y=ty[:, 0:4],
                         user_defined_object=tx)
# initialize parameter array
#theta = [0.5, 0.03, 0.1, 10, 0.02, 1.14, 0.77, 1.3, 10]
# add model parameters
mcstat.parameters.add_model_parameter(name='mumax', theta0=0.5, minimum=0)
mcstat.parameters.add_model_parameter(name='rhoa', theta0=0.03, minimum=0)
mcstat.parameters.add_model_parameter(name='rhoz', theta0=0.1, minimum=0)
mcstat.parameters.add_model_parameter(name='k', theta0=10, minimum=0)
mcstat.parameters.add_model_parameter(name='alpha', theta0=0.02, minimum=0)
mcstat.parameters.add_model_parameter(name='th', theta0=1.14, minimum=0,
                                      maximum=np.inf, prior_mu=0.14,
                                      prior_sigma=0.2)
# initial values for the model states
mcstat.parameters.add_model_parameter(name='A0', theta0=0.77, minimum=0,
                                      maximum=np.inf, prior_mu=0.77,
                                      prior_sigma=2)
mcstat.parameters.add_model_parameter(name='Z0', theta0=1.3, minimum=0,
                                      maximum=np.inf, prior_mu=1.3,
                                      prior_sigma=2)
mcstat.parameters.add_model_parameter(name='P0', theta0=10, minimum=0,
                                      maximum=np.inf, prior_mu=10,
                                      prior_sigma=2)

# Generate options
mcstat.simulation_options.define_simulation_options(
    nsimu=1.0e3, updatesigma=True)
# Define model object:
mcstat.model_settings.define_model_settings(
    sos_function=algaess,
    sigma2=0.01**2,
    S20=np.array([1, 1, 2]),
    N0=np.array([4, 4, 4]))

The code takes some time to run, so here we simply check to make sure the data structure can be processed using our sum-of-squares function. Note, we have separate sum-of-squares for each quantity of interest and there will be a separate error variance for each as well.

In [9]:

# check model evaluation
theta = [0.5, 0.03, 0.1, 10, 0.02, 1.14, 0.77, 1.3, 10]
ss = algaess(theta, mcstat.data)
print('ss = {}'.format(ss))

ss = [ 930.44890289  521.2441601  1278.11736803]

Run simulation¶

We run an initialize sequence of 1000, then restart and perform another 5000

In [10]:

# Run simulation
mcstat.run_simulation()
# Rerun starting from results of previous run
mcstat.simulation_options.nsimu = int(5.0e3)
mcstat.run_simulation(use_previous_results=True)

Sampling these parameters:
      name      start [      min,       max] N(       mu,   sigma^2)
     mumax:      0.50 [ 0.00e+00,       inf] N( 0.00e+00,      inf)
      rhoa:      0.03 [ 0.00e+00,       inf] N( 0.00e+00,      inf)
      rhoz:      0.10 [ 0.00e+00,       inf] N( 0.00e+00,      inf)
         k:     10.00 [ 0.00e+00,       inf] N( 0.00e+00,      inf)
     alpha:      0.02 [ 0.00e+00,       inf] N( 0.00e+00,      inf)
        th:      1.14 [ 0.00e+00,       inf] N(     0.14,     0.20^2)
        A0:      0.77 [ 0.00e+00,       inf] N(     0.77,     2.00^2)
        Z0:      1.30 [ 0.00e+00,       inf] N(     1.30,     2.00^2)
        P0:     10.00 [ 0.00e+00,       inf] N(    10.00,     2.00^2)
 [-----------------100%-----------------] 1001 of 1000 complete in 119.1 sec
Sampling these parameters:
      name      start [      min,       max] N(       mu,   sigma^2)
     mumax:      0.39 [ 0.00e+00,       inf] N( 0.00e+00,      inf)
      rhoa:      0.03 [ 0.00e+00,       inf] N( 0.00e+00,      inf)
      rhoz:      0.11 [ 0.00e+00,       inf] N( 0.00e+00,      inf)
         k:     10.44 [ 0.00e+00,       inf] N( 0.00e+00,      inf)
     alpha:      0.02 [ 0.00e+00,       inf] N( 0.00e+00,      inf)
        th:      1.00 [ 0.00e+00,       inf] N(     0.14,     0.20^2)
        A0:      1.30 [ 0.00e+00,       inf] N(     0.77,     2.00^2)
        Z0:      2.09 [ 0.00e+00,       inf] N(     1.30,     2.00^2)
        P0:      9.64 [ 0.00e+00,       inf] N(    10.00,     2.00^2)
 [-----------------100%-----------------] 5000 of 5000 complete in 522.5 sec

Extract results and plot chain diagnostics¶

chain panel
density panel
pairwise correlation panel

In [12]:

# extract info from results
results = mcstat.simulation_results.results
chain = results['chain']
s2chain = results['s2chain']
names = results['names'] # parameter names

# display chain stats
mcstat.chainstats(chain, results)

mcpl = MCMCPlotting
# plot chain panel
mcpl.plot_chain_panel(chain, names, figsizeinches=[7, 6])
# plot density panel
mcpl.plot_density_panel(chain, names, figsizeinches=[7, 6])
# pairwise correlation
mcpl.plot_pairwise_correlation_panel(chain, names, figsizeinches=[7, 6])

---------------------
name      :       mean        std     MC_err        tau     geweke
mumax     :     0.5500     0.1332     0.0266   653.2883     0.7246
rhoa      :     0.0664     0.0351     0.0063   497.2180     0.1512
rhoz      :     0.1003     0.0059     0.0009   238.1233     0.9029
k         :    12.0906     3.5540     0.6679   624.3892     0.6571
alpha     :     0.0237     0.0013     0.0002   199.3897     0.9008
th        :     1.0069     0.0157     0.0024   240.8882     0.9587
A0        :     1.0016     0.3543     0.0319    44.6822     0.7331
Z0        :     1.8666     0.4907     0.0435    74.6226     0.8759
P0        :     8.9193     0.9699     0.0972    75.9392     0.9266
---------------------

Out[12]:

Generate prediction/credible intervals for each quantity of interest¶

Generate intervals¶

In [13]:

def predmodelfun(data, theta):
    obj = data.user_defined_object[0]
    time = obj[:, 0]
    xdata = obj
    # last 3 parameters are the initial states
    y0 = np.array(theta[-3:])
    # evaluate model    
    tmodel, ymodel = algaefun(time, theta, y0, xdata)
    return ymodel

mcstat.PI.setup_prediction_interval_calculation(
    results=results,
    data=mcstat.data,
    modelfunction=predmodelfun)
mcstat.PI.generate_prediction_intervals(
    nsample=500,
    calc_pred_int=True,
    waitbar=True)

Generating credible/prediction intervals:

 [-----------------100%-----------------] 500 of 500 complete in 29.8 sec
Interval generation complete

Plot intervals¶

In [14]:

# plot prediction intervals
fighandle, axhandle = mcstat.PI.plot_prediction_intervals(
    adddata=False,
    addlegend=False,
    figsizeinches=[7.5, 8])
for ii in range(3):
    axhandle[ii].plot(mcstat.data.ydata[0][:, 0],
                      mcstat.data.ydata[0][:, ii + 1],
                      'ko', mfc='none', label='data')
    axhandle[ii].set_ylabel('')
    axhandle[ii].set_title(ylbls[ii + 1][0])
    axhandle[ii].legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
axhandle[-1].set_xlabel('days');
plt.savefig('algae_ci.png',
            format='png',
            dpi=500,
            bbox_inches='tight')