#!/usr/bin/env python # coding: utf-8 # # Generalized Linear Mixed‐effects Model Playground # # or the many ways to perform GLMM in python # # A comparison among: # [StatsModels](https://github.com/statsmodels/statsmodels) # [Theano](https://github.com/Theano/Theano) # [PyMC3](https://github.com/pymc-devs/pymc3)(Base on Theano) # [TensorFlow](https://github.com/tensorflow/tensorflow) # [Stan](https://github.com/stan-dev/stan) and [pyStan](https://github.com/stan-dev/pystan) # [Keras](https://github.com/fchollet/keras) with Tensorflow backend # In[1]: import numpy as np #import pandas as pd import seaborn as sns from theano import tensor as tt get_ipython().run_line_magic('pylab', 'inline') get_ipython().run_line_magic('config', "InlineBackend.figure_format = 'retina'") get_ipython().run_line_magic('qtconsole', '--colors=linux') get_ipython().run_line_magic('load_ext', 'watermark') # In[2]: get_ipython().run_line_magic('watermark', '-v -m -p numpy,pandas,seaborn,statsmodels,theano,pymc3,tensorflow,keras,matplotlib') # # Prepare data and quick visualization # In[3]: # load data import pandas as pd Tbl_beh = pd.read_csv('./behavioral_data.txt', delimiter='\t') Tbl_beh["subj"] = Tbl_beh["subj"].astype('category') #%% visualized data # Draw a nested violinplot and split the violins for easier comparison sns.set(style="ticks", palette="muted", color_codes=True) plt.figure() Tbl_beh['cbcond'] = pd.Series(Tbl_beh['chimera'] + '-' + Tbl_beh['identity'], index=Tbl_beh.index) sns.violinplot(y="cbcond", x="rt", hue="group", data=Tbl_beh, split=True, inner="quart", palette={"cp": "b", "cg": "y"}) sns.despine(left=True) # `Tbl_beh` is the raw data table. # In[4]: Tbl_beh.head(5) # In[5]: #%% Compute the conditional mean of dataset condi_sel = ['subj', 'chimera', 'identity', 'orientation'] tblmean = pd.DataFrame({'Nt' : Tbl_beh.groupby( condi_sel ).size()}).reset_index() tblmean["subj"] = tblmean["subj"].astype('category') tblmean['rt'] = np.asarray(Tbl_beh.groupby(condi_sel)['rt'].mean()) tblmean['ACC'] = np.asarray(Tbl_beh.groupby(condi_sel)['acc'].mean()) tblmean['group']= np.asarray(Tbl_beh.groupby(condi_sel)['group'].all()) # `tblmean` is the summary data table. # In[6]: tblmean['cbcond'] = pd.Series(tblmean['chimera'] + '-' + tblmean['identity'], index=tblmean.index) ## boxplot + scatter plot to show accuracy #ax = sns.boxplot(y="cbcond", x="acc", data=tbltest, # whis=np.inf, color="c") ## Add in points to show each observation #sns.stripplot(y="cbcond", x="acc", data=tbltest, # jitter=True, size=3, color=".3", linewidth=0) plt.figure() g1 = sns.violinplot(y="cbcond", x="rt", hue="group", data=tblmean, split=True, inner="quart", palette={"cp": "b", "cg": "y"}) g1.set(xlim=(0, 3000)) # Make the quantitative axis logarithmic sns.despine(trim=True) # In[7]: tblmean.head(5) # Here I used the mean table as the data - it is smaller and faster to run # In[8]: tbltest = tblmean tbltest.shape # # Using StatsModels to perform a linear mixed model of reaction time # In[9]: import statsmodels.formula.api as smf from patsy import dmatrices formula = "rt ~ group*orientation*identity" #formula = "rt ~ -1 + cbcond" md = smf.mixedlm(formula, tbltest, groups=tbltest["subj"]) mdf = md.fit() print(mdf.summary()) fe_params = pd.DataFrame(mdf.fe_params,columns=['LMM']) random_effects = pd.DataFrame(mdf.random_effects) random_effects = random_effects.transpose() random_effects = random_effects.rename(index=str, columns={'groups': 'LMM'}) #%% Generate Design Matrix for later use Y, X = dmatrices(formula, data=tbltest, return_type='matrix') Terms = X.design_info.column_names _, Z = dmatrices('rt ~ -1+subj', data=tbltest, return_type='matrix') X = np.asarray(X) # fixed effect Z = np.asarray(Z) # mixed effect Y = np.asarray(Y).flatten() nfixed = np.shape(X) nrandm = np.shape(Z) # A helper function for ploting # In[10]: #%% ploting function def plotfitted(fe_params=fe_params,random_effects=random_effects,X=X,Z=Z,Y=Y): plt.figure(figsize=(18,9)) ax1 = plt.subplot2grid((2,2), (0, 0)) ax2 = plt.subplot2grid((2,2), (0, 1)) ax3 = plt.subplot2grid((2,2), (1, 0), colspan=2) fe_params.plot(ax=ax1) random_effects.plot(ax=ax2) ax3.plot(Y.flatten(),'o',color='k',label = 'Observed', alpha=.25) for iname in fe_params.columns.get_values(): fitted = np.dot(X,fe_params[iname])+np.dot(Z,random_effects[iname]).flatten() print("The MSE of "+iname+ " is " + str(np.mean(np.square(Y.flatten()-fitted)))) ax3.plot(fitted,lw=1,label = iname, alpha=.5) ax3.legend(loc=0) #plt.ylim([0,5]) plt.show() plotfitted(fe_params=fe_params,random_effects=random_effects,X=X,Z=Z,Y=Y) # # Using PyMC3 # In[12]: beta0 = np.linalg.lstsq(X,Y) fixedpred = np.argmax(X,axis=1) randmpred = np.argmax(Z,axis=1) con = tt.constant(fixedpred) sbj = tt.constant(randmpred) import pymc3 as pm with pm.Model() as glmm1: # Fixed effect beta = pm.Normal('beta', mu=0., sd=100., shape=(nfixed[1])) # random effect s = pm.HalfCauchy('s', 10.) b = pm.Normal('b', mu=0., sd=s, shape=(nrandm[1])) eps = pm.HalfCauchy('eps', 5.) #mu_est = pm.Deterministic('mu_est',beta[con] + b[sbj]) mu_est = pm.Deterministic('mu_est', tt.dot(X,beta)+tt.dot(Z,b)) RT = pm.Normal('RT', mu_est, eps, observed=Y) trace = pm.sample(1000, tune=1000) pm.traceplot(trace,varnames=['beta','b','s']) # plt.show() fixed_pymc = pm.summary(trace, varnames=['beta']) randm_pymc = pm.summary(trace, varnames=['b']) fe_params['PyMC'] = pd.Series(np.asarray(fixed_pymc['mean']), index=fe_params.index) random_effects['PyMC'] = pd.Series(np.asarray(randm_pymc['mean']),index=random_effects.index) print(fe_params) # # Using Theano # This is a non-probabilistic model, but none-the-less uncertainty related to parameter estimation could be obtained using dropout (http://mlg.eng.cam.ac.uk/yarin/blog_3d801aa532c1ce.html). # In[13]: Y, X = dmatrices(formula, data=tbltest, return_type='matrix') Terms = X.design_info.column_names _, Z = dmatrices('rt ~ -1+subj', data=tbltest, return_type='matrix') X = np.asarray(X) # fixed effect Z = np.asarray(Z) # mixed effect Y = np.asarray(Y).flatten() nfixed = np.shape(X) nrandm = np.shape(Z) X = X.astype(np.float32) Z = Z.astype(np.float32) Y = Y.astype(np.float32) import theano theano.config.compute_test_value = 'ignore' def floatX(X): return np.asarray(X, dtype=theano.config.floatX) def init_weights(shape): return theano.shared(floatX(np.random.randn(*shape) * 0.01)) def model(X, beta, Z, b): return tt.sum(tt.dot(X, beta) + tt.dot(Z, b),axis=1) def sgd(cost, params, lr=0.001): grads = tt.grad(cost=cost, wrt=params) updates = [] for p, g in zip(params, grads): updates.append([p, p - g * lr]) return updates Xtf = tt.fmatrix() Ztf = tt.fmatrix() y = tt.vector() tbeta = init_weights([nfixed[1], 1]) tb = init_weights([nrandm[1], 1]) eps = init_weights([0]) y_ = model(Xtf, tbeta, Ztf,tb) cost = tt.mean(tt.sqr(y - y_)) params = [tbeta, tb] updates = sgd(cost, params) train = theano.function(inputs=[Xtf, Ztf, y], outputs=cost, updates=updates, allow_input_downcast=True) for i in range(100000): sel = np.random.randint(0,nfixed[0],size=int(nfixed[0]/2)) batch_xs, batch_zs, batch_ys = X[sel,:],Z[sel,:],Y[sel] train(batch_xs, batch_zs, batch_ys) outdf = pd.DataFrame(data=tbeta.get_value(),columns=['beta'],index=Terms) fe_params['theano'] = pd.Series(tbeta.get_value().flatten(), index=fe_params.index) random_effects['theano'] = pd.Series(tb.get_value().flatten() ,index=random_effects.index) print(fe_params) # # Using TensorFlow # In[14]: Y, X = dmatrices(formula, data=tbltest, return_type='matrix') Terms = X.design_info.column_names _, Z = dmatrices('rt ~ -1+subj', data=tbltest, return_type='matrix') X = np.asarray(X) # fixed effect Z = np.asarray(Z) # mixed effect Y = np.asarray(Y) nfixed = np.shape(X) nrandm = np.shape(Z) X = X.astype(np.float32) Z = Z.astype(np.float32) Y = Y.astype(np.float32) import tensorflow as tf def init_weights(shape): return tf.Variable(tf.random_normal(shape, stddev=0.01)) def model(X, beta, Z, b): y_pred = tf.matmul(X, beta) + tf.matmul(Z, b) #randcoef = tf.matmul(Z, b) #Xnew = tf.transpose(X) * tf.transpose(randcoef) #y_pred = tf.matmul(tf.transpose(Xnew), beta) return y_pred # notice we use the same model as linear regression, # this is because there is a baked in cost function which performs softmax and cross entropy Xtf = tf.placeholder("float32", [None, nfixed[1]]) # create symbolic variables Ztf = tf.placeholder("float32", [None, nrandm[1]]) y = tf.placeholder("float32", [None, 1]) beta = init_weights([nfixed[1], 1]) b = init_weights([nrandm[1], 1]) eps = init_weights([0]) y_ = model(Xtf, beta, Ztf, b) # y_ = tf.nn.softmax(model(Xtf, beta) + model(Ztf, b) + eps) # cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_), reduction_indices=[1])) cost = tf.square(y - y_) # use square error for cost function train_step = tf.train.GradientDescentOptimizer(0.0001).minimize(cost) init = tf.global_variables_initializer() # Add ops to save and restore all the variables. saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) for i in range(2000): sel = np.random.randint(0,nfixed[0],size=int(nfixed[0]/2)) batch_xs, batch_zs, batch_ys = X[sel,:],Z[sel,:],Y[sel] sess.run(train_step, feed_dict={Xtf: batch_xs, Ztf: batch_zs, y: batch_ys}) accuracy = tf.reduce_mean(tf.cast(cost, tf.float32)) if i % 1000 == 0: print(i,sess.run(accuracy, feed_dict={Xtf: X, Ztf: Z, y: Y})) betaend = sess.run(beta) bend = sess.run(b) fe_params['tf'] = pd.Series(betaend.flatten(), index=fe_params.index) random_effects['tf'] = pd.Series(bend.flatten(), index=random_effects.index) print(fe_params) # # PyMC3 again # model similar to brms # In[21]: Y, X = dmatrices(formula, data=tbltest, return_type='matrix') Terms = X.design_info.column_names _, Z = dmatrices('rt ~ -1+subj', data=tbltest, return_type='matrix') X = np.asarray(X) # fixed effect Z = np.asarray(Z) # mixed effect Y = np.asarray(Y).flatten() nfixed = np.shape(X) nrandm = np.shape(Z) beta0 = np.linalg.lstsq(X,Y) fixedpred = np.argmax(X,axis=1) randmpred = np.argmax(Z,axis=1) con = tt.constant(fixedpred) sbj = tt.constant(randmpred) X1 = X[0:, 1:] X_mean = np.mean(X1, axis=0) # column means of X before centering X_cent = X1 - X_mean with pm.Model() as glmm2: # temporary Intercept temp_Intercept = pm.StudentT('temp_Intercept', nu=3, mu=0, sd=186)# # Fixed effect beta = pm.Normal('beta', mu = 0, sd = 100, shape=(nfixed[1]-1)) # random effect # group-specific standard deviation s = pm.HalfStudentT('sd_1', nu=3, sd=186) b = pm.Normal('b', mu = 0, sd = 1, shape=(nrandm[1])) r_1 = pm.Deterministic('r_1', s*b) sigma = pm.HalfStudentT('sigma', nu=3, sd=186) # compute linear predictor mu_est = tt.dot(X_cent,beta) + temp_Intercept + r_1[sbj] RT = pm.Normal('RT', mu_est, sigma, observed = Y) b_Intercept = pm.Deterministic("b_Intercept", temp_Intercept - tt.sum(X_mean * beta)) trace2 = pm.sample(1000, tune=1000) pm.traceplot(trace2, varnames=['beta', 'b_Intercept', 'r_1']) # plt.show() # In[23]: df_summary = pm.summary(trace2, varnames=['beta']) df_stmp = pm.summary(trace2, varnames=['b_Intercept']) #['temp_Intercept'] df_new = df_stmp.append(df_summary, ignore_index=True) df_new.index=Terms randm_pymc = pm.summary(trace2,varnames=['r_1']) fe_params['PyMC2'] = pd.Series(np.asarray(df_new['mean']), index=fe_params.index) random_effects['PyMC2'] = pd.Series(np.asarray(randm_pymc['mean']),index=random_effects.index) print(fe_params) # # Usint PyStan (Model is constructed using brm in R) # # # ```r # library(lme4) # library(brms) # ``` # ``` # Tbl_beh <- read.csv("behavioral_data.txt",sep = "\t") # Tbl_beh$subj <- factor(Tbl_beh$subj) # Tbl_beh$trial <- factor(Tbl_beh$trial) # stanmodel <- make_stancode(rt ~ group*orientation*identity + (1|subj), # data = Tbl_beh,family = "normal") # standata <- make_standata(rt ~ group*orientation*identity + (1|subj), # data = Tbl_beh,family = "normal") # bayesfit <- brm(rt ~ group*orientation*identity + (1|subj), # data = Tbl_beh,family = "normal") # bayesfit$model # summary(bayesfit, waic = TRUE) # ``` # ``` # lmefit <- lmer(rt ~ group*orientation*identity + (1|subj), # data = Tbl_beh) # summary(lmefit) # ``` # In[30]: stan_datadict = {} stan_datadict['N'] = Y.shape[0] stan_datadict['Y'] = Y stan_datadict['K'] = X1.shape[1] stan_datadict['X'] = X_cent stan_datadict['X_means'] = X_mean stan_datadict['J_1'] = randmpred + 1 stan_datadict['N_1'] = max(randmpred + 1) stan_datadict['K_1'] = 1 stan_datadict['Z_1'] = np.ones(randmpred.shape) stan_datadict['prior_only'] = 0 #stan_datadict.items() # In[31]: stan_mdlspec_lmm = """ // This Stan code was generated with the R package 'brms'. // We recommend generating the data with the 'make_standata' function. functions { } data { int N; // total number of observations vector[N] Y; // response variable int K; // number of population-level effects matrix[N, K] X; // centered population-level design matrix vector[K] X_means; // column means of X before centering // data for group-specific effects of subj int J_1[N]; int N_1; int K_1; vector[N] Z_1; int prior_only; // should the likelihood be ignored? } transformed data { } parameters { vector[K] b; // population-level effects real temp_Intercept; // temporary Intercept real sd_1; // group-specific standard deviation vector[N_1] z_1; // unscaled group-specific effects real sigma; // residual SD } transformed parameters { // group-specific effects vector[N_1] r_1; vector[N] eta; // linear predictor r_1 <- sd_1 * (z_1); // compute linear predictor eta <- X * b + temp_Intercept; for (n in 1:N) { eta[n] <- eta[n] + r_1[J_1[n]] * Z_1[n]; } } model { // prior specifications sd_1 ~ student_t(3, 0, 186); z_1 ~ normal(0, 1); sigma ~ student_t(3, 0, 186); // likelihood contribution if (!prior_only) { Y ~ normal(eta, sigma); } } generated quantities { real b_Intercept; // population-level intercept b_Intercept <- temp_Intercept - dot_product(X_means, b); } """ # In[32]: import pystan stan_fit_lmm = pystan.stan(model_code=stan_mdlspec_lmm, data=stan_datadict, iter=3000, warmup=1000, chains=4, n_jobs=2,verbose=False) stan_fit_lmm.plot(pars=['b_Intercept','b','z_1']) plt.show() # In[33]: Itercept = stan_fit_lmm.extract(permuted=True)['b_Intercept'] betatemp = stan_fit_lmm.extract(permuted=True)['b'] btemp = stan_fit_lmm.extract(permuted=True)['z_1'] sdtemp = stan_fit_lmm.extract(permuted=True)['sd_1'] betastan = np.hstack([Itercept.mean(axis=0), betatemp.mean(axis=0)]) bstan = btemp.mean(axis=0)*sdtemp.mean(axis=0) fe_params['Stan'] = pd.Series(betastan, index=fe_params.index) random_effects['Stan'] = pd.Series(bstan,index=random_effects.index) print(fe_params) # # Using Keras # In[34]: import keras from keras.models import Model from keras.layers import Input, Add, Dense from keras import backend as K K.clear_session() nb_epoch = 1000 fixedpred = np.argmax(X,axis=1) randmpred = np.argmax(Z,axis=1) Xinput = Input(batch_shape=(None, nfixed[1]-1)) fixed_keras = Dense(1, input_dim=nfixed[1]-1, name = 'fixedEffect')(Xinput) Zinput = Input(batch_shape=(None, nrandm[1])) randm_keras = Dense(1, input_dim=nrandm[1], use_bias=None, name = 'randomEffect')(Zinput) merged = keras.layers.add([fixed_keras, randm_keras]) model = Model([Xinput,Zinput],merged) model.compile(loss='mean_squared_error', optimizer='sgd') from keras.callbacks import TensorBoard # train the model model.fit([X[:,1:], Z], Y.flatten(), epochs=nb_epoch, batch_size=100, verbose=0, shuffle=True, )#callbacks=[TensorBoard(log_dir='/tmp/xinyi_spot_invert')]) Ypredict = model.predict([X[:,1:], Z]) betakeras = np.hstack((model.get_weights()[1], model.get_weights()[0].flatten())) bkeras = model.get_weights()[2].flatten() fe_params['Keras'] = pd.Series(betakeras, index=fe_params.index) random_effects['Keras'] = pd.Series(bkeras, index=random_effects.index) print(fe_params) # # Display estimation # In[35]: plotfitted(fe_params=fe_params,random_effects=random_effects,X=X,Z=Z,Y=Y) # It is important to note from before that, although the model fitting (i.e., regression coefficients) are not the same across different approach, the model prediction is highly similar (at least it pass the eyeball test).