#!/usr/bin/env python # coding: utf-8 # # Linear regression # In[1]: import numpy as np from numpy.linalg import pinv X = np.array([ [1, 2104, 5, 2104, 1, 45], [1, 1416, 3, 2104, 2, 40], [1, 1534, 3, 2104, 2, 30], [1, 852, 2, 2104, 1, 36], ]) y = np.array([ [460], [232], [315], [178], ]) XT = np.transpose(X) pinv(XT @ X) @ XT @ y # ### Noninvertibility # # Q: How do we calculate the inverse of $X^TX$ if it is non-invertible (singular/degenerate)? # # A: `pinv` will do it for you (pseudo-inverse) # In[2]: A = np.zeros((2, 2)) Inverse = pinv(A) Inverse # Q: When would it be non-invertible? # # A: A few causes: # # 1. Redundant features (linearly dependent) # - Example: $x_1$ = size in feet squared and $x_2$ = size in meters squared # 1. Too many features (example: $m \leq n$) # - Example: $m=10$ (10 training set items) but $n=100$ (100 features) # - To solve: delete some features or use **regularization**