import scipy.stats as stats import statsmodels.api as sm import statsmodels.formula.api as smf import numpy as np import matplotlib.pyplot as plt import pandas as pd %matplotlib inline x = [1, 2, 3, 4, 5] stats.describe(x) data = pd.DataFrame([[1, 2, 3.5], [2, 2.4, 3.1], [3, 1.8, 2.5]], columns=['a', 'b', 'c']) print data data.describe() x1 = np.random.randn(100, 1) x2 = np.random.randn(100, 1) tstat, pval = stats.ttest_1samp(x1, 0) print "Comparison of the mean of x1 to 0.\nT-statistic = %s; P-value = %s." % (tstat, pval) tstat, pval = stats.ttest_1samp(x1, 1) print "Comparison of the mean of x1 to 5.\nT-statistic = %s; P-value = %s." % (tstat, pval) tstat, pval = stats.ttest_ind(x1, x2) print "Comparison of the means of x1 and x2.\nT-statistic = %s; P-value = %s." % (tstat, pval) u, sigma = 4, 2 random_numbers = stats.norm.rvs(u, sigma, size=50) random_numbers stats.norm.fit(random_numbers) #generate some data x = np.array(range(20)) y = 3 + 0.5 * x + 2 * np.random.randn(20) data = pd.DataFrame(zip(x, y), columns=['x', 'y']) #plot the data plt.plot(data['x'], data['y'], 'bo') plt.show() results = smf.ols('y ~ x', data).fit() print results.summary() intercept, slope = results.params r2 = results.rsquared print slope, intercept, r2 plt.plot(data['x'], data['y'], 'bo') plt.hold(True) x = np.array([min(x), max(x)]) y = intercept + slope * x plt.plot(x, y, 'r-') plt.show() import pandas as pd #generate some data x = np.random.randn(50) z = np.random.randn(50) noise = np.random.randn(50) y = 3 + 0.5 * x + 1.5 * z + noise data = pd.DataFrame(zip(x, y, z), columns=['x', 'y', 'z']) results = smf.ols('y ~ x + z', data).fit() print results.summary() results = smf.ols('y ~ x + z + x*z', data).fit() print results.summary() results = smf.ols('y ~ x + np.log10(z)', data).fit() print results.summary() url = 'http://stats191.stanford.edu/data/rehab.csv' rehab_table = pd.read_table(url, delimiter=",") rehab_table results = smf.ols('Time ~ C(Fitness)', rehab_table).fit() print results.summary() from statsmodels.stats.anova import anova_lm anova_lm(results)