State space model in Edward variational bayes

In [1]:
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline
import statsmodels.api as sm
import numpy as np
/Users/apple/Library/Python/3.6/lib/python/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools

Nile dataset

In [3]:
data=pd.read_csv("https://raw.githubusercontent.com/statsmodels/statsmodels/master/statsmodels/datasets/nile/nile.csv",index_col='year',)
In [4]:
data.plot()
Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x1176326d8>
In [5]:
y=data["volume"]
T = 1000
N = len(y)
T = 1000
In [28]:
def plotseries(y,mu_samples,title,ax=0):
    #axis=0 stan, 1 pymc
    mu_lower, mu_upper = np.percentile(mu_samples, q=[2.5, 97.5], axis=ax)
    pred = mu_samples.mean(axis=ax)
    plt.plot(list(range(len(y)+1)), list(y)+[None], color='black')
    plt.plot(list(range(1, len(y)+1)), pred)
    plt.fill_between(list(range(1, len(y)+1)), mu_lower, mu_upper, alpha=0.3)
    plt.title(title)
    plt.show()

Edward

In [7]:
import tensorflow as tf
import edward as ed
from edward.models import Normal, InverseGamma, Empirical

print(ed.__version__)
/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: compiletime version 3.5 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.6
  return f(*args, **kwds)
/Users/apple/Library/Python/3.6/lib/python/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
1.3.5
In [18]:
def genq():
#    return Normal( loc=tf.Variable(tf.random_normal([])),  scale=tf.nn.softplus(tf.Variable(tf.random_normal([]))))
    return Normal( loc=tf.Variable(tf.random_normal([])),  scale=tf.nn.softplus(tf.Variable(tf.random_normal([]))))
In [53]:
def nile_st_vb(y,N,T):
    muZero = Normal(loc=0.0, scale=1.0)
    sigmaW = InverseGamma(concentration=1.0, rate=1.0)
 
    mu = [0]*N
    mu[0] = Normal(loc=muZero, scale=sigmaW)
    for n in range(1, N):
        mu[n] = Normal(loc=mu[n-1], scale=sigmaW)
 
    sigmaV = InverseGamma(concentration=1.0, rate=1.0)
    y_pre   = Normal(loc=tf.stack(mu), scale=sigmaV)
    
    qmuZero = genq()
    qsigmaW = genq()
    qmu = [ genq() for n in range(N) ]
    qsigmaV = genq()
 
    latent_vars = {m: qm for m, qm in zip(mu, qmu)}
    latent_vars[muZero] =qmuZero
    latent_vars[sigmaW]= qsigmaW
    latent_vars[sigmaV] = qsigmaV
    
    return ed.KLqp(latent_vars, data={y_pre: y}) ,qmu
In [25]:
inference,qmu=nile_st_vb(y,N,T)
/usr/local/lib/python3.6/site-packages/edward/util/random_variables.py:52: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  not np.issubdtype(value.dtype, np.float) and \
/usr/local/lib/python3.6/site-packages/edward/util/random_variables.py:53: FutureWarning: Conversion of the second argument of issubdtype from `int` to `np.signedinteger` is deprecated. In future, it will be treated as `np.int64 == np.dtype(int).type`.
  not np.issubdtype(value.dtype, np.int) and \
In [26]:
inference.run(n_iter=T)
1000/1000 [100%] ██████████████████████████████ Elapsed: 41s | Loss: 8264293.000
In [27]:
qmu_sample=np.array([ _qmu.sample(1000).eval()  for _qmu in qmu])
In [29]:
qmu_sample.shape
Out[29]:
(100, 1000)
In [30]:
plotseries(y,qmu_sample,"Edward VB"+ed.__version__,1)
In [59]:
def simpleplot(qmu_sample,ax=1):
    mu_lower, mu_upper = np.percentile(qmu_sample, q=[2.5, 97.5], axis=ax)
    pred = qmu_sample.mean(axis=ax)
    plt.plot(list(range(1, len(y)+1)), pred)
    plt.fill_between(list(range(1, len(y)+1)), mu_lower, mu_upper, alpha=0.3)
    plt.show()
In [31]:
mu_lower, mu_upper = np.percentile(qmu_sample, q=[2.5, 97.5], axis=1)
pred = qmu_sample.mean(axis=1)
plt.plot(list(range(1, len(y)+1)), pred)
plt.fill_between(list(range(1, len(y)+1)), mu_lower, mu_upper, alpha=0.3)
plt.show()

Articifial data

In [33]:
import math
In [54]:
sy=[100*(math.sin(i*10/N)+1) for i in range(N)]
In [51]:
sy.plot()
Out[51]:
<matplotlib.axes._subplots.AxesSubplot at 0x15f911b38>
In [55]:
inference_sin,qmu_sin=nile_st_vb(sy,N,T)
In [56]:
inference_sin.run(n_iter=T)
1000/1000 [100%] ██████████████████████████████ Elapsed: 51s | Loss: 20429.459
In [57]:
qmu_sin_sample=np.array([ _qmu.sample(1000).eval()  for _qmu in qmu_sin])
plotseries(sy,qmu_sin_sample,"Edward VB"+ed.__version__,1)
In [60]:
simpleplot(qmu_sin_sample,1)