Multi-way ANOVA

Some notes on who to run a multi-way ANOVA in Python. Primary sources include Ben Bolker's book and the Statsmodels docs.

In [1]:
from urllib2 import urlopen

import numpy as np

import statsmodels.api as sm

import pandas

import matplotlib.pyplot as plt

from statsmodels.formula.api import ols

from statsmodels.graphics.api import interaction_plot, abline_plot

from statsmodels.stats.anova import anova_lm

url = 'http://stats191.stanford.edu/data/kidney.table'
kidney_table = pandas.read_table(url, delimiter=" *")

Days = kidney_table['Days']
Duration = kidney_table
In [2]:
kidney_lm = ols('np.log(Days+1) ~ C(Duration) * C(Weight)', kidney_table).fit()
anova_lm(kidney_lm)
Out[2]:
df sum_sq mean_sq F PR(>F)
C(Duration) 1 2.339693 2.339693 4.358293 0.041562
C(Weight) 2 16.971291 8.485645 15.806745 0.000004
C(Duration):C(Weight) 2 0.635658 0.317829 0.592040 0.556748
Residual 54 28.989198 0.536837 NaN NaN
In [3]:
#This is the style of the examples in the docs, but data=rehab_table doesn't work
#just rehab_table as the second argument does
url = 'http://stats191.stanford.edu/data/rehab.csv'
rehab_table = pandas.read_table(url, delimiter=",")
rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-3-49381bc9f535> in <module>()
      3 url = 'http://stats191.stanford.edu/data/rehab.csv'
      4 rehab_table = pandas.read_table(url, delimiter=",")
----> 5 rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()

TypeError: from_formula() takes at least 3 arguments (2 given)