#!/usr/bin/env python # coding: utf-8 # In[2]: # HIDDEN from datascience import * get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plots plots.style.use('fivethirtyeight') import numpy as np # In[3]: # HIDDEN def standard_units(any_numbers): "Convert any array of numbers to standard units." return (any_numbers - np.mean(any_numbers))/np.std(any_numbers) def correlation(t, x, y): return np.mean(standard_units(t.column(x))*standard_units(t.column(y))) def slope(table, x, y): r = correlation(table, x, y) return r * np.std(table.column(y))/np.std(table.column(x)) def intercept(table, x, y): a = slope(table, x, y) return np.mean(table.column(y)) - a * np.mean(table.column(x)) # In[5]: galton = Table.read_table('galton.csv') # In[6]: galton # In[10]: correlation(galton, 'father', 'mother') # In[7]: def galton_multiple_mse(a, b, c): y = galton.column('childHeight') fitted = a*galton.column('father') + b*galton.column('mother') + c return np.mean((y - fitted) ** 2) # In[13]: best_quad = minimize(galton_multiple_mse) # In[14]: best_quad # In[11]: correlation(galton, 'father', 'childHeight') # In[12]: correlation(galton, 'mother', 'childHeight') # In[15]: best_quad # In[17]: intercept(galton, 'midparentHeight', 'childHeight') # In[18]: slope(galton, 'midparentHeight', 'childHeight') # In[19]: best_quad.item(0) + best_quad.item(1) # In[20]: best_quad.item(0) + best_quad.item(1)/1.08 # In[ ]: