Credits -- This IPython Notebook is based on this awesome tutorial and IPython Notebook by Stanford University.
# Import the required modules
%pylab inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
Populating the interactive namespace from numpy and matplotlib
# Generate the training set
#TODO
np.random.seed(0)
N = 100 # number of points per class
D = 2 # dimensionality
K = 3 # number of classes
X = np.zeros((N*K,D))
y = np.zeros(N*K, dtype='uint8')
for j in xrange(K):
ix = range(N*j,N*(j+1))
r = np.linspace(0.0,1,N) # radius
t = np.linspace(j*4,(j+1)*4,N) + np.random.randn(N)*0.2 # theta
X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
y[ix] = j
fig = plt.figure()
plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.Spectral)
plt.xlim([-1,1])
plt.ylim([-1,1])
(-1, 1)
# Initialize the weights and biases
W = 0.01 * np.random.randn(D,K)
b = np.zeros((1, K))
# Number of examples
num_examples = X.shape[0]
# alpha is the learning rate
alpha = 1e-0
# lambda is the regularization cofficient
# NOTE Since lambda is reserved work in python
# We will use lambdaa
lambdaa = 1e-3
$h = W * X + b$
where
D
, K
)num_examples
, D
)$p_k = \frac{e^{f_k}}{ \sum_j e^{f_j} } \hspace{1in} L_i =-\log\left(p_{y_i}\right)$
$\frac{\partial L_i }{ \partial f_k } = p_k - \mathbb{1}(y_i = k)$
$\frac{\partial L_i }{ \partial f_k } = p_k (y_i != k)$
for i in range(300):
# Calculate the scores
scores = np.dot(X, W) + b
# Calculate the normalized probabilites
exp_scores = np.exp(scores)
probs = exp_scores/np.sum(exp_scores, axis = 1, keepdims=True)
# Correct Normalized Probabilites
log_probs =-np.log(probs)
# Calculate the data loss
data_loss = np.sum(log_probs[range(num_examples), y])/num_examples
# Calculate the regularization loss
reg_loss = .5*lambdaa*np.sum(W*W)
# Get the total loss
loss = data_loss + reg_loss
# Print the Total loss at the ith iteration
if i % 30 == 0:
print "Loss at iteration {} is {}".format(i, loss)
# Calculate the gradients for backpropagation
dscores = probs
dscores[range(num_examples), y] -= 1
dscores /= num_examples
dW = np.dot(X.T, dscores)
db = np.sum(dscores, axis=0, keepdims=True)
# Regularize the weight gradient
dW += lambdaa * W
# No need to the regularize the bias gradients
# Update the weights and bias
W -= alpha*dW
b -= alpha*db
Loss at iteration 0 is 1.09691944027 Loss at iteration 30 is 0.822352268534 Loss at iteration 60 is 0.794682543764 Loss at iteration 90 is 0.78872623605 Loss at iteration 120 is 0.787049195401 Loss at iteration 150 is 0.786513941749 Loss at iteration 180 is 0.786331349242 Loss at iteration 210 is 0.786266671779 Loss at iteration 240 is 0.786243250465 Loss at iteration 270 is 0.786234655744
# evaluate training set accuracy
scores = np.dot(X, W) + b
predicted_class = np.argmax(scores, axis=1)
# plot the resulting classifier
h = 0.02
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Z = np.dot(np.c_[xx.ravel(), yy.ravel()], W) + b
Z = np.argmax(Z, axis=1)
Z = Z.reshape(xx.shape)
fig = plt.figure()
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral, alpha=0.8)
plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.Spectral)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
#fig.savefig('spiral_linear.png') br
(-1.8712034092398278, 1.8687965907601756)