Some notes on who to run a multi-way ANOVA in Python. Primary sources include Ben Bolker's book and the Statsmodels docs.
from urllib2 import urlopen
import numpy as np
import statsmodels.api as sm
import pandas
import matplotlib.pyplot as plt
from statsmodels.formula.api import ols
from statsmodels.graphics.api import interaction_plot, abline_plot
from statsmodels.stats.anova import anova_lm
url = 'http://stats191.stanford.edu/data/kidney.table'
kidney_table = pandas.read_table(url, delimiter=" *")
Days = kidney_table['Days']
Duration = kidney_table
kidney_lm = ols('np.log(Days+1) ~ C(Duration) * C(Weight)', kidney_table).fit()
anova_lm(kidney_lm)
df | sum_sq | mean_sq | F | PR(>F) | |
---|---|---|---|---|---|
C(Duration) | 1 | 2.339693 | 2.339693 | 4.358293 | 0.041562 |
C(Weight) | 2 | 16.971291 | 8.485645 | 15.806745 | 0.000004 |
C(Duration):C(Weight) | 2 | 0.635658 | 0.317829 | 0.592040 | 0.556748 |
Residual | 54 | 28.989198 | 0.536837 | NaN | NaN |
#This is the style of the examples in the docs, but data=rehab_table doesn't work
#just rehab_table as the second argument does
url = 'http://stats191.stanford.edu/data/rehab.csv'
rehab_table = pandas.read_table(url, delimiter=",")
rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit()
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-3-49381bc9f535> in <module>() 3 url = 'http://stats191.stanford.edu/data/rehab.csv' 4 rehab_table = pandas.read_table(url, delimiter=",") ----> 5 rehab_lm = ols('Time ~ C(Fitness)', data=rehab_table).fit() TypeError: from_formula() takes at least 3 arguments (2 given)