In [1]:

import scipy,pylab,numpy
from numpy import *
from scipy import *
from pylab import *
from urllib import urlopen
from gzip import GzipFile
from scipy.spatial import distance
from scipy.spatial.distance import cdist

Linear Classification Problem¶

Let's start by generating a simple linear classification problem. That is, a problem in which the optimal decision boundary is known to be linear. We're also generating a problem in which the samples are perfectly linearly separable. This is the exception rather than the rule.

In [1]:

data = random_sample((1000,2))
labels = (data[:,0]*0.7+data[:,1]*0.4>0.5)
sum(labels)

Out[1]:

In [2]:

d0 = data[labels==False]
d1 = data[labels]
figure(figsize=(6,6)); xlim((0,1)); ylim((0,1))
plot(d0[:,0],d0[:,1],"bo")
plot(d1[:,0],d1[:,1],"ro")
print len(d0),len(d1)

452 548

As before, we augment the vectors to get rid of the inhomogeneous part.

In [3]:

augmented = concatenate([ones((len(data),1)),data],axis=1)
augmented[:3,:]

Out[3]:

array([[ 1.        ,  0.35265835,  0.82892312],
       [ 1.        ,  0.09204218,  0.87157948],
       [ 1.        ,  0.83583516,  0.77753499]])

It's convenient to use labels of (-1,1) instead of (0,1); that way, the weight vector we get out of least squares is comparable to the weight vector we're getting out of the perceptron learning algorithm.

In [4]:

labels = 2.0*labels-1.0

Least Square Solution¶

We now want to find a vector $a$ such that $a \cdot x_i \approx c_i$ for all $i$.

We can write this as a matrix and vector equation if we put the measurements into the rows of a matrix $X = {x_1\choose x_N}$ and $c = {c_1\choose c_N}$

$$X \cdot a^T = c$$

In order to solve this, we take the pseudo-inverse of $X$:

$$a^T = X^\dagger \cdot c$$

The pseudo-inverse is a generalization of the inverse matrix for non-rectangular and/or non-full-rank matrices.

In [5]:

a = dot(linalg.pinv(augmented),labels)
d,a0,a1 = a
print d,a0,a1

-1.81688247454 2.62589047604 1.2659712297

In [6]:

d0 = data[labels<=0]
d1 = data[labels>0]
figure(figsize=(6,6)); xlim((0,1)); ylim((0,1))
plot(d0[:,0],d0[:,1],"bo")
plot(d1[:,0],d1[:,1],"ro")
plot([0,-d/a0],[-d/a1,0],"g")
savefig("tmp.png")
print len(d0),len(d1)

452 548

In [7]:

xs = linspace(0.0,1.0,100)[:,newaxis]
ys = linspace(0.0,1.0,100)[newaxis,:]
xs = xs + 0*ys
ys = ys + 0*xs
image = xs*a0+ys*a1+d
imshow(image.T[::-1]); savefig("temp.png")

In [ ]: