State space model in Edward variational bayes¶

In [1]:

import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
%matplotlib inline
import statsmodels.api as sm
import numpy as np

/Users/apple/Library/Python/3.6/lib/python/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools

Nile dataset¶

In [3]:

data=pd.read_csv("https://raw.githubusercontent.com/statsmodels/statsmodels/master/statsmodels/datasets/nile/nile.csv",index_col='year',)

In [4]:

data.plot()

Out[4]:

<matplotlib.axes._subplots.AxesSubplot at 0x1176326d8>

In [5]:

y=data["volume"]
T = 1000
N = len(y)
T = 1000

In [28]:

def plotseries(y,mu_samples,title,ax=0):
    #axis=0 stan, 1 pymc
    mu_lower, mu_upper = np.percentile(mu_samples, q=[2.5, 97.5], axis=ax)
    pred = mu_samples.mean(axis=ax)
    plt.plot(list(range(len(y)+1)), list(y)+[None], color='black')
    plt.plot(list(range(1, len(y)+1)), pred)
    plt.fill_between(list(range(1, len(y)+1)), mu_lower, mu_upper, alpha=0.3)
    plt.title(title)
    plt.show()

Edward¶

In [7]:

import tensorflow as tf
import edward as ed
from edward.models import Normal, InverseGamma, Empirical

print(ed.__version__)

/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: compiletime version 3.5 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.6
  return f(*args, **kwds)
/Users/apple/Library/Python/3.6/lib/python/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters

1.3.5

In [18]:

def genq():
#    return Normal( loc=tf.Variable(tf.random_normal([])),  scale=tf.nn.softplus(tf.Variable(tf.random_normal([]))))
    return Normal( loc=tf.Variable(tf.random_normal([])),  scale=tf.nn.softplus(tf.Variable(tf.random_normal([]))))

In [53]:

def nile_st_vb(y,N,T):
    muZero = Normal(loc=0.0, scale=1.0)
    sigmaW = InverseGamma(concentration=1.0, rate=1.0)
 
    mu = [0]*N
    mu[0] = Normal(loc=muZero, scale=sigmaW)
    for n in range(1, N):
        mu[n] = Normal(loc=mu[n-1], scale=sigmaW)
 
    sigmaV = InverseGamma(concentration=1.0, rate=1.0)
    y_pre   = Normal(loc=tf.stack(mu), scale=sigmaV)
    
    qmuZero = genq()
    qsigmaW = genq()
    qmu = [ genq() for n in range(N) ]
    qsigmaV = genq()
 
    latent_vars = {m: qm for m, qm in zip(mu, qmu)}
    latent_vars[muZero] =qmuZero
    latent_vars[sigmaW]= qsigmaW
    latent_vars[sigmaV] = qsigmaV
    
    return ed.KLqp(latent_vars, data={y_pre: y}) ,qmu

In [25]:

inference,qmu=nile_st_vb(y,N,T)

/usr/local/lib/python3.6/site-packages/edward/util/random_variables.py:52: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  not np.issubdtype(value.dtype, np.float) and \
/usr/local/lib/python3.6/site-packages/edward/util/random_variables.py:53: FutureWarning: Conversion of the second argument of issubdtype from `int` to `np.signedinteger` is deprecated. In future, it will be treated as `np.int64 == np.dtype(int).type`.
  not np.issubdtype(value.dtype, np.int) and \

In [26]:

inference.run(n_iter=T)

1000/1000 [100%] ██████████████████████████████ Elapsed: 41s | Loss: 8264293.000

In [27]:

qmu_sample=np.array([ _qmu.sample(1000).eval()  for _qmu in qmu])

In [29]:

qmu_sample.shape

Out[29]:

(100, 1000)

In [30]:

plotseries(y,qmu_sample,"Edward VB"+ed.__version__,1)

In [59]:

def simpleplot(qmu_sample,ax=1):
    mu_lower, mu_upper = np.percentile(qmu_sample, q=[2.5, 97.5], axis=ax)
    pred = qmu_sample.mean(axis=ax)
    plt.plot(list(range(1, len(y)+1)), pred)
    plt.fill_between(list(range(1, len(y)+1)), mu_lower, mu_upper, alpha=0.3)
    plt.show()

In [31]:

mu_lower, mu_upper = np.percentile(qmu_sample, q=[2.5, 97.5], axis=1)
pred = qmu_sample.mean(axis=1)
plt.plot(list(range(1, len(y)+1)), pred)
plt.fill_between(list(range(1, len(y)+1)), mu_lower, mu_upper, alpha=0.3)
plt.show()

Articifial data¶

In [33]:

import math

In [54]:

sy=[100*(math.sin(i*10/N)+1) for i in range(N)]

In [51]:

sy.plot()

Out[51]:

<matplotlib.axes._subplots.AxesSubplot at 0x15f911b38>

In [55]:

inference_sin,qmu_sin=nile_st_vb(sy,N,T)

In [56]:

inference_sin.run(n_iter=T)

1000/1000 [100%] ██████████████████████████████ Elapsed: 51s | Loss: 20429.459

In [57]:

qmu_sin_sample=np.array([ _qmu.sample(1000).eval()  for _qmu in qmu_sin])
plotseries(sy,qmu_sin_sample,"Edward VB"+ed.__version__,1)

In [60]:

simpleplot(qmu_sin_sample,1)

In [ ]: