Linear Regression Derived

Cost function

$$ C = \sum_i (y_i - mx_i - c)^2 $$

Calculate the partial derivative with respect to $m$

$$ \begin{align} \frac{\partial C}{\partial m} &= \sum_i 2(y_i - m x_i -c)(-x_i) \\ &= -2 \sum_i x_i (y_i - m x_i -c) \\ \end{align} $$

Set derivative to zero

$$ \begin{align} & \frac{\partial C}{\partial m} = 0 \\ \Rightarrow & -2 \sum_i x_i (y_i - m x_i -c) = 0 \\ \Rightarrow & \sum_i x_i y_i - m x_i x_i - x_i c = 0 \\ \Rightarrow & \sum_i x_i y_i - \sum_i m x_i x_i - \sum_i x_i c = 0 \\ \Rightarrow & \sum_i x_i y_i - \sum_i x_i c = \sum_i x_i x_i \\ \Rightarrow & \sum_i x_i y_i - c \sum_i x_i = m \sum_i x_i x_i \\ \Rightarrow & m = \frac{\sum_i x_i y_i - c \sum_i x_i}{\sum_i x_i x_i} \end{align} $$

Calculate the partial derivative with respect to $c$

$$ \begin{align} \frac{\partial C}{\partial c} &= \sum_i 2(y_i - m x_i -c)(-1) \\ &= -2 \sum_i (y_i - m x_i -c) \\ \end{align} $$

Set the derivative to zero

$$ \begin{align} & \frac{\partial C}{\partial c} = 0 \\ \Rightarrow & -2 \sum_i (y_i - m x_i - c) = 0 \\ \Rightarrow & \sum_i (y_i - m x_i - c) = 0 \\ \Rightarrow & \sum_i y_i - \sum_i m x_i - \sum_i c = 0 \\ \Rightarrow & \sum_i y_i - m \sum_i x_i - c \sum_i 1 = 0 \\ \Rightarrow & \sum_i y_i - m \sum_i x_i = c \sum_i 1 \\ \Rightarrow & c = \frac{\sum_i y_i - m \sum_i x_i}{\sum_i 1} \\ \Rightarrow & c = \frac{\sum_i y_i - m \sum_i x_i}{\sum_i 1} \\ \Rightarrow & c = \frac{\sum_i y_i}{\sum_i 1} - m \frac{\sum_i x_i}{\sum_i 1} \\ \Rightarrow & c = \bar{y} - m \bar{x} \\ \end{align} $$

Combine the estimates

$$ \begin{align} & m = \frac{\sum_i x_i y_i - c \sum_i x_i}{\sum_i x_i x_i} \\ & c = \bar{y} - m \bar{x} \\ & \Rightarrow m = \frac{\sum_i x_i y_i - (\bar{y} - m \bar{x}) \sum_i x_i}{\sum_i x_i x_i} \\ & \Rightarrow m = \frac{\sum_i x_i y_i - \bar{y} \sum_i x_i - m \bar{x} \sum_i x_i}{\sum_i x_i x_i} \\ & \Rightarrow m = \frac{\sum_i x_i y_i - \bar{y} \sum_i x_i}{\sum_i x_i x_i} - m \frac{\bar{x} \sum_i x_i}{\sum_i x_i x_i} \\ & \Rightarrow m + m \frac{\bar{x} \sum_i x_i}{\sum_i x_i x_i} = \frac{\sum_i x_i y_i - \bar{y} \sum_i x_i}{\sum_i x_i x_i} \\ & \Rightarrow m(1 + \frac{\bar{x} \sum_i x_i}{\sum_i x_i x_i}) = \frac{\sum_i x_i y_i - \bar{y} \sum_i x_i}{\sum_i x_i x_i} \\ & \Rightarrow m(\frac{\sum_i x_i x_i + \bar{x} \sum_i x_i}{\sum_i x_i x_i}) = \frac{\sum_i x_i y_i - \bar{y} \sum_i x_i}{\sum_i x_i x_i} \\ & \Rightarrow m(\sum_i x_i x_i + \bar{x} \sum_i x_i) = \sum_i x_i y_i - \bar{y} \sum_i x_i \\ & \Rightarrow m = \frac{\sum_i x_i y_i - \bar{y} \sum_i x_i}{\sum_i x_i x_i + \bar{x} \sum_i x_i} \\ \end{align} $$

End

In [8]:
import numpy as np

w = np.arange(1.0, 16.0, 1.0)
d = 5.0 * w + 10.0 + np.random.normal(0.0, 5.0, w.size)

# Calculate the best values for m and c.
w_avg = np.mean(w)
d_avg = np.mean(d)

w_zero = w - w_avg
d_zero = d - d_avg

m = np.sum(w_zero * d_zero) / np.sum(w_zero * w_zero)
c = d_avg - m * w_avg

print("m is %8.6f and c is %6.6f." % (m, c))


x, y, x_avg, y_avg = w, d, w_avg, d_avg
m2 = (np.sum(x * y) - y_avg * np.sum(x)) / (np.sum(x * x) - x_avg * np.sum(x))
c2 = y_avg - m2 * x_avg
m2, c2
m is 4.728824 and c is 12.756653.
Out[8]:
(4.7288241778626832, 12.756652779225512)
In [13]:
q1 = (np.sum(x * y) - y_avg * np.sum(x)) / (np.sum(x*x) - x_avg * np.sum(x))
q2 = (np.sum(x*y) - y_avg * np.sum(x) - x_avg * np.sum(y) + len(x) * x_avg * y_avg) / \
                                      (np.sum(x * x) - x_avg * np.sum(x) - x_avg * np.sum(x) + len(x) * x_avg * x_avg)
q3 = (np.sum(x * y) - (x.size * x_avg * y_avg)) / (np.sum(x * x) - x.size * x_avg * x_avg)

q1, q2, q3
Out[13]:
(4.7288241778626832, 4.7288241778626832, 4.7288241778626832)
In [ ]: