In [2]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

def basic_linear_reg(x, y):
# linear reg on 2 axes
length = len(x)
sum_x = sum(x)
sum_y = sum(y)
sum_xsq = sum(map(lambda a: a * a, x))
sum_xy = sum(map(lambda a, b: a * b, x, y))

a = (sum_xy - sum_x * sum_y / length) / (sum_xsq - sum(x) ** 2 / length)
b = (sum_y - a * sum_x) / length
return [a, b]

x = np.random.uniform(0, 100, 1000)
y = np.log(x) + np.random.normal(0, 0.3, 1000)

plt.figure(1)
plt.scatter(x, y, label="log(x) with some noise")
plt.plot(np.arange(1, 100), np.log(np.arange(1, 100)), c="chartreuse", label="log(x) true function")
plt.xlabel("x")
plt.ylabel("f(x) = log(x)")
plt.legend(loc="best")
plt.title("A Basic Log Function")

plt.figure(2)
reg = basic_linear_reg(x, y)
r_x, r_y = zip(*((i, i*reg[0] + reg[1]) for i in range(100)))
plt.plot(r_x, r_y, c='g', label="linear regression")
plt.scatter(x, y - (x * reg[0] + reg[1]), c='red', label="residuals")
plt.xlabel("x")
plt.ylabel("y")
plt.legend(loc="best")

plt.figure(3)
r_x, r_y = zip(*((i, i*reg[0] + reg[1]) for i in range(100)))
plt.plot(r_x, r_y, c='g', label="linear regression")
plt.scatter(x, y - (x * reg[0] + reg[1]), c='red', label="residuals")
plt.xlabel("x")
plt.ylabel("y")
plt.legend(loc="best")

Out[2]:
<matplotlib.legend.Legend at 0x10aa6a190>
In [64]:
%matplotlib inline

from sklearn.ensemble import RandomForestRegressor
from sklearn.learning_curve import learning_curve

rfg = RandomForestRegressor()
r_x = np.array(r_x)
rfg.fit(x.reshape(-1, 1), y)

plt.plot(r_x, rfg.predict(r_x.reshape(-1, 1)), c="yellow", label = "random forest fit")
plt.plot(np.arange(1, 100), np.log(np.arange(1, 100)), c="chartreuse", label="log(x) true function")
plt.scatter(x, y, label="log(x) with some noise")
plt.xlabel("x")
plt.ylabel("y")

plt.figure(2)
plt.plot(r_x, rfg.predict(r_x.reshape(-1, 1)), c="green", label = "random forest fit")
plt.scatter(x, y - rfg.predict(x.reshape(-1, 1)), c="r", label = "residual")

Out[64]:
<matplotlib.collections.PathCollection at 0x10dc52fd0>