In [48]:
import sys as sys
import numpy as np
import pylab as pb
In [49]:
# Loading in data: the 0:1 and 1:2 ensure we get column vectors
# for x and y.
olympics = np.genfromtxt('olympicMarathonTimes.csv', delimiter=',')
x = olympics[:, 0:1]
y = olympics[:, 1:2]
In [50]:
print(x)
print(y)
[[ 1896.]
 [ 1900.]
 [ 1904.]
 [ 1908.]
 [ 1912.]
 [ 1920.]
 [ 1924.]
 [ 1928.]
 [ 1932.]
 [ 1936.]
 [ 1948.]
 [ 1952.]
 [ 1956.]
 [ 1960.]
 [ 1964.]
 [ 1968.]
 [ 1972.]
 [ 1976.]
 [ 1980.]
 [ 1984.]
 [ 1988.]
 [ 1992.]
 [ 1996.]
 [ 2000.]
 [ 2004.]
 [ 2008.]
 [ 2012.]]
[[ 4.47083333]
 [ 4.46472926]
 [ 5.22208333]
 [ 4.15467867]
 [ 3.90331675]
 [ 3.56951267]
 [ 3.82454477]
 [ 3.62483707]
 [ 3.59284275]
 [ 3.53880792]
 [ 3.67010309]
 [ 3.39029111]
 [ 3.43642612]
 [ 3.20583007]
 [ 3.13275665]
 [ 3.32819844]
 [ 3.13583758]
 [ 3.0789588 ]
 [ 3.10581822]
 [ 3.06552909]
 [ 3.09357349]
 [ 3.16111704]
 [ 3.14255244]
 [ 3.08527867]
 [ 3.10265829]
 [ 2.99877553]
 [ 3.03392977]]
In [51]:
pb.plot(x, y, 'rx')
Out[51]:
[<matplotlib.lines.Line2D at 0x5205310>]
In [52]:
m = -0.4
c = 80
In [53]:
# This is an iterative solution for 
# finding the gradient and bias. 
# Notice how slow it is!
xTest = np.linspace(1890, 2020, 130)[:, None]
for i in arange(100000):
    m = ((y - c)*x).sum()/(x*x).sum()
    c = (y-m*x).sum()/y.shape[0]
pb.plot(xTest, m*xTest + c, 'b-')
pb.plot(x, y, 'rx')
Out[53]:
[<matplotlib.lines.Line2D at 0x520d5d0>]
In [54]:
print(m)
print(c)
-0.0129806477465
28.8952457368
In [55]:
Phi = np.hstack([np.ones(x.shape), x, x**2])
In [61]:
#This is the multivariate linear regression solution. 
w = np.linalg.solve(np.dot(Phi.T, Phi), np.dot(Phi.T, y))
print(w)
[[  6.43641952e+02]
 [ -6.42502986e-01]
 [  1.61109703e-04]]
In [57]:
xTest = np.linspace(1890, 2020, 130)[:, None]
PhiTest = np.hstack([np.ones(xTest.shape), xTest, xTest**2])
In [58]:
fTest = np.dot(PhiTest, w)
In [59]:
pb.plot(x, y, 'rx')
pb.plot(xTest, fTest, '-') 
Out[59]:
[<matplotlib.lines.Line2D at 0x501a190>]