#!/usr/bin/env python # coding: utf-8 # In[2]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import matplotlib.pyplot as plt def basic_linear_reg(x, y): # linear reg on 2 axes length = len(x) sum_x = sum(x) sum_y = sum(y) sum_xsq = sum(map(lambda a: a * a, x)) sum_xy = sum(map(lambda a, b: a * b, x, y)) a = (sum_xy - sum_x * sum_y / length) / (sum_xsq - sum(x) ** 2 / length) b = (sum_y - a * sum_x) / length return [a, b] x = np.random.uniform(0, 100, 1000) y = np.log(x) + np.random.normal(0, 0.3, 1000) plt.figure(1) plt.scatter(x, y, label="log(x) with some noise") plt.plot(np.arange(1, 100), np.log(np.arange(1, 100)), c="chartreuse", label="log(x) true function") plt.xlabel("x") plt.ylabel("f(x) = log(x)") plt.legend(loc="best") plt.title("A Basic Log Function") plt.figure(2) reg = basic_linear_reg(x, y) r_x, r_y = zip(*((i, i*reg[0] + reg[1]) for i in range(100))) plt.plot(r_x, r_y, c='g', label="linear regression") plt.scatter(x, y - (x * reg[0] + reg[1]), c='red', label="residuals") plt.xlabel("x") plt.ylabel("y") plt.legend(loc="best") plt.figure(3) r_x, r_y = zip(*((i, i*reg[0] + reg[1]) for i in range(100))) plt.plot(r_x, r_y, c='g', label="linear regression") plt.scatter(x, y - (x * reg[0] + reg[1]), c='red', label="residuals") plt.xlabel("x") plt.ylabel("y") plt.legend(loc="best") # In[64]: get_ipython().run_line_magic('matplotlib', 'inline') from sklearn.ensemble import RandomForestRegressor from sklearn.learning_curve import learning_curve rfg = RandomForestRegressor() r_x = np.array(r_x) rfg.fit(x.reshape(-1, 1), y) plt.plot(r_x, rfg.predict(r_x.reshape(-1, 1)), c="yellow", label = "random forest fit") plt.plot(np.arange(1, 100), np.log(np.arange(1, 100)), c="chartreuse", label="log(x) true function") plt.scatter(x, y, label="log(x) with some noise") plt.xlabel("x") plt.ylabel("y") plt.figure(2) plt.plot(r_x, rfg.predict(r_x.reshape(-1, 1)), c="green", label = "random forest fit") plt.scatter(x, y - rfg.predict(x.reshape(-1, 1)), c="r", label = "residual")