In [48]:
import sys as sys
import numpy as np
import pylab as pb

In [49]:
# Loading in data: the 0:1 and 1:2 ensure we get column vectors
# for x and y.
olympics = np.genfromtxt('olympicMarathonTimes.csv', delimiter=',')
x = olympics[:, 0:1]
y = olympics[:, 1:2]

In [50]:
print(x)
print(y)

[[ 1896.]
[ 1900.]
[ 1904.]
[ 1908.]
[ 1912.]
[ 1920.]
[ 1924.]
[ 1928.]
[ 1932.]
[ 1936.]
[ 1948.]
[ 1952.]
[ 1956.]
[ 1960.]
[ 1964.]
[ 1968.]
[ 1972.]
[ 1976.]
[ 1980.]
[ 1984.]
[ 1988.]
[ 1992.]
[ 1996.]
[ 2000.]
[ 2004.]
[ 2008.]
[ 2012.]]
[[ 4.47083333]
[ 4.46472926]
[ 5.22208333]
[ 4.15467867]
[ 3.90331675]
[ 3.56951267]
[ 3.82454477]
[ 3.62483707]
[ 3.59284275]
[ 3.53880792]
[ 3.67010309]
[ 3.39029111]
[ 3.43642612]
[ 3.20583007]
[ 3.13275665]
[ 3.32819844]
[ 3.13583758]
[ 3.0789588 ]
[ 3.10581822]
[ 3.06552909]
[ 3.09357349]
[ 3.16111704]
[ 3.14255244]
[ 3.08527867]
[ 3.10265829]
[ 2.99877553]
[ 3.03392977]]
In [51]:
pb.plot(x, y, 'rx')

Out[51]:
[<matplotlib.lines.Line2D at 0x5205310>]
In [52]:
m = -0.4
c = 80

In [53]:
# This is an iterative solution for
# finding the gradient and bias.
# Notice how slow it is!
xTest = np.linspace(1890, 2020, 130)[:, None]
for i in arange(100000):
m = ((y - c)*x).sum()/(x*x).sum()
c = (y-m*x).sum()/y.shape[0]
pb.plot(xTest, m*xTest + c, 'b-')
pb.plot(x, y, 'rx')

Out[53]:
[<matplotlib.lines.Line2D at 0x520d5d0>]
In [54]:
print(m)
print(c)

-0.0129806477465
28.8952457368
In [55]:
Phi = np.hstack([np.ones(x.shape), x, x**2])

In [61]:
#This is the multivariate linear regression solution.
w = np.linalg.solve(np.dot(Phi.T, Phi), np.dot(Phi.T, y))
print(w)

[[  6.43641952e+02]
[ -6.42502986e-01]
[  1.61109703e-04]]
In [57]:
xTest = np.linspace(1890, 2020, 130)[:, None]
PhiTest = np.hstack([np.ones(xTest.shape), xTest, xTest**2])

In [58]:
fTest = np.dot(PhiTest, w)

In [59]:
pb.plot(x, y, 'rx')
pb.plot(xTest, fTest, '-')

Out[59]:
[<matplotlib.lines.Line2D at 0x501a190>]