## Week 5: Introduction to neural Networks¶

### Perceptron learning rule¶

This week, we will start working with neural networks. For each of the exercises below you can use the method of your choice but you should display the final boundary of your classifier.

#### Exercise 1.¶

As a first exercise, load the binary dataset below and code a few steps of the perceptron learning rule.

In [84]:
#

import scipy.io as sio

from numpy import linalg as LA

data1 = data1['perceptron_data_class1']
data2 = data2['perceptron_data_class2']

# We first build the matrix of features (here [1, x, y])

sz1 = np.shape(data1)
sz2 = np.shape(data2)

targetsClass1 = np.ones((sz1[0],))
targetsClass2 = -1 * np.ones((sz2[0],))

total_targets = np.hstack((targetsClass1, targetsClass2))
total_data = np.vstack((data1, data2))

# precomputing the product y(x_i)t_i
product_yiti = np.multiply(np.vstack((data1, data2)),(np.ones((2,1))* total_targets).T)

total_Xtilde = np.hstack((np.ones((np.shape(total_data)[0],1)), total_data))

# Then we initialize the beta_tilde (here I chose to take beta_tilde random Gaussian but any other choice is also possible)

sigma = 1
beta = np.random.normal(0, 1, (2,))
beta0 = np.random.normal(0, 1, 1)

beta_tilde_int = np.hstack((beta0, beta))
betaTotal = beta_tilde_int

# Initialization of the max number of iter and learning rate
eta = .01
iter_num = 1
max_iter = 100

while iter_num < 200:

# We start by looking for the misclassified points (in the case of the perceptron,
# the misclassified points are the points for which the sign of the product y(x_i)t_i is negative)

sign = np.sign(np.multiply(np.matmul(betaTotal,total_Xtilde.T),total_targets))
ind_misclassified = np.where(sign < 0)
misclassified_targets = total_targets[sign < 0]
# we then extract the misclassified products $x_it_i$
misclassified_yiti = product_yiti[sign < 0,:]
# now summing each of the misclassified vectors to get the gradient, we get

# updating Beta with learning rate eta

iter_num +=1
print(iter_num)

gradient
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
2
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
3
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
4
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
5
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
6
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
7
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
8
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
9
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
10
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
11
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
12
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
13
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
14
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
15
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
16
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
17
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
18
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
19
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
20
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
21
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
22
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
23
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
24
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
25
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
26
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
27
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
28
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
29
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
30
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
31
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
32
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
33
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
34
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
35
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
36
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
37
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
38
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
39
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
40
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
41
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
42
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
43
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
44
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
45
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
46
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
47
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
48
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
49
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
50
[ 22.         372.75345622 793.64963504]
877.1025494880358
[372.75345622 793.64963504]
51
[ 21.         345.01728111 790.69343066]
862.9449724900206
[345.01728111 790.69343066]
52
[ 20.         327.07373272 787.88321168]
853.3095463429563
[327.07373272 787.88321168]
53
[ 20.         327.07373272 787.88321168]
853.3095463429563
[327.07373272 787.88321168]
54
[ 19.         293.23156682 783.46715328]
836.7595425544075
[293.23156682 783.46715328]
55
[ 19.         293.23156682 783.46715328]
836.7595425544075
[293.23156682 783.46715328]
56
[ 19.         293.23156682 783.46715328]
836.7595425544075
[293.23156682 783.46715328]
57
[ 18.         253.28341014 776.86131387]
817.3065439826896
[253.28341014 776.86131387]
58
[ 18.         253.28341014 776.86131387]
817.3065439826896
[253.28341014 776.86131387]
59
[ 18.         253.28341014 776.86131387]
817.3065439826896
[253.28341014 776.86131387]
60
[ 17.         214.25691244 768.50364964]
797.9930350771989
[214.25691244 768.50364964]
61
[ 16.         192.62672811 761.89781022]
786.0339239492583
[192.62672811 761.89781022]
62
[ 16.         192.62672811 761.89781022]
786.0339239492583
[192.62672811 761.89781022]
63
[ 14.         110.65668203 739.34306569]
747.7092149133439
[110.65668203 739.34306569]
64
[ 12.          41.359447   717.08029197]
718.372291356967
[ 41.359447   717.08029197]
65
[ 10.         -15.72580645 696.71532847]
696.9645255747577
[-15.72580645 696.71532847]
66
[ 10.         -15.72580645 696.71532847]
696.9645255747577
[-15.72580645 696.71532847]
67
[ 10.         -15.72580645 696.71532847]
696.9645255747577
[-15.72580645 696.71532847]
68
[ 10.         -15.72580645 696.71532847]
696.9645255747577
[-15.72580645 696.71532847]
69
[  9.         -28.25460829 688.94160584]
689.5794799343245
[-28.25460829 688.94160584]
70
[  9.         -28.25460829 688.94160584]
689.5794799343245
[-28.25460829 688.94160584]
71
[  8.         -71.42857143 671.09489051]
674.9328802823502
[-71.42857143 671.09489051]
72
[   6.         -130.5875576   641.38686131]
654.5733083978846
[-130.5875576   641.38686131]
73
[   3.         -228.54262673  592.66423358]
635.209907032922
[-228.54262673  592.66423358]
74
[   3.         -228.54262673  592.66423358]
635.209907032922
[-228.54262673  592.66423358]
75
[   3.         -228.54262673  592.66423358]
635.209907032922
[-228.54262673  592.66423358]
76
[   3.         -228.54262673  592.66423358]
635.209907032922
[-228.54262673  592.66423358]
77
[   3.         -228.54262673  592.66423358]
635.209907032922
[-228.54262673  592.66423358]
78
[   2.         -257.89170507  574.81751825]
630.0216748878181
[-257.89170507  574.81751825]
79
[   2.         -257.89170507  574.81751825]
630.0216748878181
[-257.89170507  574.81751825]
80
[   2.         -257.89170507  574.81751825]
630.0216748878181
[-257.89170507  574.81751825]
81
[   1.         -263.39285714  560.91240876]
619.6769541413013
[-263.39285714  560.91240876]
82
[   0.         -287.32718894  541.60583942]
613.1017850192484
[-287.32718894  541.60583942]
83
[  -2.         -300.63364055  505.62043796]
588.2487680490048
[-300.63364055  505.62043796]
84
[  -4.         -359.10138249  459.41605839]
583.1234154238397
[-359.10138249  459.41605839]
85
[  -6.         -390.61059908  410.87591241]
566.94942941154
[-390.61059908  410.87591241]
86
[  -4.         -250.69124424  425.10948905]
493.5384256769653
[-250.69124424  425.10948905]
87
[  -8.         -215.43778802  191.97080292]
288.6697588593456
[-215.43778802  191.97080292]
88
[ -4.         -34.33179724 150.3649635 ]
154.28640429681616
[-34.33179724 150.3649635 ]
89
[ -13.         -316.33064516 -163.39416058]
356.2747939176852
[-316.33064516 -163.39416058]
90
[  1.          98.58870968 219.30656934]
240.44979732343998
[ 98.58870968 219.30656934]
91
[ -11.         -255.78917051 -144.19708029]
293.8399185157514
[-255.78917051 -144.19708029]
92
[  3.         141.96428571 238.21167883]
277.32230770419983
[141.96428571 238.21167883]
93
[ -10.         -228.05299539 -134.52554745]
264.96281177285
[-228.05299539 -134.52554745]
94
[  3.         141.96428571 238.21167883]
277.32230770419983
[141.96428571 238.21167883]
95
[  -9.         -198.70391705 -116.67883212]
230.60398200253536
[-198.70391705 -116.67883212]
96
[  4.         159.9078341  241.02189781]
289.27162085319554
[159.9078341  241.02189781]
97
[  -8.         -168.20276498  -93.43065693]
192.57584947513544
[-168.20276498  -93.43065693]
98
[  3.         132.40207373 197.48175182]
237.77794563946242
[132.40207373 197.48175182]
99
[  -6.         -121.02534562  -72.77372263]
141.34761755350667
[-121.02534562  -72.77372263]
100
[  2.         102.24654378 151.16788321]
182.51050553162744
[102.24654378 151.16788321]
101
[  -6.         -121.02534562  -72.77372263]
141.34761755350667
[-121.02534562  -72.77372263]
102
[  1.          74.04953917 112.15328467]
134.3975204904403
[ 74.04953917 112.15328467]
103
[  -5.         -103.08179724  -69.96350365]
124.68259206535166
[-103.08179724  -69.96350365]
104
[  2.         90.9562212 119.7810219]
150.41451851976154
[ 90.9562212 119.7810219]
105
[  -5.         -103.08179724  -69.96350365]
124.68259206535166
[-103.08179724  -69.96350365]
106
[  2.         90.9562212 119.7810219]
150.41451851976154
[ 90.9562212 119.7810219]
107
[  -5.         -103.08179724  -69.96350365]
124.68259206535166
[-103.08179724  -69.96350365]
108
[  2.         90.9562212 119.7810219]
150.41451851976154
[ 90.9562212 119.7810219]
109
[  -5.         -103.08179724  -69.96350365]
124.68259206535166
[-103.08179724  -69.96350365]
110
[  2.         90.9562212 119.7810219]
150.41451851976154
[ 90.9562212 119.7810219]
111
[ -4.         -81.33640553 -57.66423358]
99.78363943309886
[-81.33640553 -57.66423358]
112
[ 1.         58.03571429 75.51094891]
95.2420471073733
[58.03571429 75.51094891]
113
[ -4.         -81.33640553 -57.66423358]
99.78363943309886
[-81.33640553 -57.66423358]
114
[ 1.         58.03571429 75.51094891]
95.2420471073733
[58.03571429 75.51094891]
115
[ -4.         -81.33640553 -57.66423358]
99.78363943309886
[-81.33640553 -57.66423358]
116
[ 1.         58.03571429 75.51094891]
95.2420471073733
[58.03571429 75.51094891]
117
[ -3.         -53.36981567 -34.7080292 ]
63.73369999618191
[-53.36981567 -34.7080292 ]
118
[ 1.         58.03571429 75.51094891]
95.2420471073733
[58.03571429 75.51094891]
119
[ -3.         -53.36981567 -34.7080292 ]
63.73369999618191
[-53.36981567 -34.7080292 ]
120
[ 1.         58.03571429 75.51094891]
95.2420471073733
[58.03571429 75.51094891]
121
[ -2.         -36.46313364 -27.08029197]
45.4631975130738
[-36.46313364 -27.08029197]
122
[ 1.         58.03571429 75.51094891]
95.2420471073733
[58.03571429 75.51094891]
123
[ -2.         -36.46313364 -27.08029197]
45.4631975130738
[-36.46313364 -27.08029197]
124
[ 1.         58.03571429 75.51094891]
95.2420471073733
[58.03571429 75.51094891]
125
[ -2.         -36.46313364 -27.08029197]
45.4631975130738
[-36.46313364 -27.08029197]
126
[ 1.         58.03571429 75.51094891]
95.2420471073733
[58.03571429 75.51094891]
127
[ -2.         -36.46313364 -27.08029197]
45.4631975130738
[-36.46313364 -27.08029197]
128
[ 1.         58.03571429 75.51094891]
95.2420471073733
[58.03571429 75.51094891]
129
[ -2.         -36.46313364 -27.08029197]
45.4631975130738
[-36.46313364 -27.08029197]
130
[ 1.         58.03571429 75.51094891]
95.2420471073733
[58.03571429 75.51094891]
131
[ -2.         -36.46313364 -27.08029197]
45.4631975130738
[-36.46313364 -27.08029197]
132
[ 1.         58.03571429 75.51094891]
95.2420471073733
[58.03571429 75.51094891]
133
[ -2.         -36.46313364 -27.08029197]
45.4631975130738
[-36.46313364 -27.08029197]
134
[ 1.         58.03571429 75.51094891]
95.2420471073733
[58.03571429 75.51094891]
135
[ -2.         -36.46313364 -27.08029197]
45.4631975130738
[-36.46313364 -27.08029197]
136
[ 0.         24.07834101 35.18248175]
42.633009842087475
[24.07834101 35.18248175]
137
[ -2.         -36.46313364 -27.08029197]
45.4631975130738
[-36.46313364 -27.08029197]
138
[ 0.         24.07834101 35.18248175]
42.633009842087475
[24.07834101 35.18248175]
139
[ -2.         -36.46313364 -27.08029197]
45.4631975130738
[-36.46313364 -27.08029197]
140
[ 0.         24.07834101 35.18248175]
42.633009842087475
[24.07834101 35.18248175]
141
[ -1.         -12.52880184  -7.77372263]
14.778418018249504
[-12.52880184  -7.77372263]
142
[ 0.         24.07834101 35.18248175]
42.633009842087475
[24.07834101 35.18248175]
143
[ -1.         -12.52880184  -7.77372263]
14.778418018249504
[-12.52880184  -7.77372263]
144
[ 0.         24.07834101 35.18248175]
42.633009842087475
[24.07834101 35.18248175]
145
[ -1.         -12.52880184  -7.77372263]
14.778418018249504
[-12.52880184  -7.77372263]
146
[ -1.         -12.52880184  -7.77372263]
14.778418018249504
[-12.52880184  -7.77372263]
147
[  3.         103.48502304 127.55474453]
164.28135269978821
[103.48502304 127.55474453]
148
[ -1.         -12.52880184  -7.77372263]
14.778418018249504
[-12.52880184  -7.77372263]
149
[ 2.         70.56451613 83.28467153]
109.17732112878855
[70.56451613 83.28467153]
150
[ -1.         -12.52880184  -7.77372263]
14.778418018249504
[-12.52880184  -7.77372263]
151
[ 2.         70.56451613 83.28467153]
109.17732112878855
[70.56451613 83.28467153]
152
[ -1.         -12.52880184  -7.77372263]
14.778418018249504
[-12.52880184  -7.77372263]
153
[ 2.         70.56451613 83.28467153]
109.17732112878855
[70.56451613 83.28467153]
154
[ -1.         -12.52880184  -7.77372263]
14.778418018249504
[-12.52880184  -7.77372263]
155
[ 2.         70.56451613 83.28467153]
109.17732112878855
[70.56451613 83.28467153]
156
[ -1.         -12.52880184  -7.77372263]
14.778418018249504
[-12.52880184  -7.77372263]
157
[ 2.         70.56451613 83.28467153]
109.17732112878855
[70.56451613 83.28467153]
158
[0. 0. 0.]
0.0
[0. 0.]
159
[0. 0. 0.]
0.0
[0. 0.]
160
[0. 0. 0.]
0.0
[0. 0.]
161
[0. 0. 0.]
0.0
[0. 0.]
162
[0. 0. 0.]
0.0
[0. 0.]
163
[0. 0. 0.]
0.0
[0. 0.]
164
[0. 0. 0.]
0.0
[0. 0.]
165
[0. 0. 0.]
0.0
[0. 0.]
166
[0. 0. 0.]
0.0
[0. 0.]
167
[0. 0. 0.]
0.0
[0. 0.]
168
[0. 0. 0.]
0.0
[0. 0.]
169
[0. 0. 0.]
0.0
[0. 0.]
170
[0. 0. 0.]
0.0
[0. 0.]
171
[0. 0. 0.]
0.0
[0. 0.]
172
[0. 0. 0.]
0.0
[0. 0.]
173
[0. 0. 0.]
0.0
[0. 0.]
174
[0. 0. 0.]
0.0
[0. 0.]
175
[0. 0. 0.]
0.0
[0. 0.]
176
[0. 0. 0.]
0.0
[0. 0.]
177
[0. 0. 0.]
0.0
[0. 0.]
178
[0. 0. 0.]
0.0
[0. 0.]
179
[0. 0. 0.]
0.0
[0. 0.]
180
[0. 0. 0.]
0.0
[0. 0.]
181
[0. 0. 0.]
0.0
[0. 0.]
182
[0. 0. 0.]
0.0
[0. 0.]
183
[0. 0. 0.]
0.0
[0. 0.]
184
[0. 0. 0.]
0.0
[0. 0.]
185
[0. 0. 0.]
0.0
[0. 0.]
186
[0. 0. 0.]
0.0
[0. 0.]
187
[0. 0. 0.]
0.0
[0. 0.]
188
[0. 0. 0.]
0.0
[0. 0.]
189
[0. 0. 0.]
0.0
[0. 0.]
190
[0. 0. 0.]
0.0
[0. 0.]
191
[0. 0. 0.]
0.0
[0. 0.]
192
[0. 0. 0.]
0.0
[0. 0.]
193
[0. 0. 0.]
0.0
[0. 0.]
194
[0. 0. 0.]
0.0
[0. 0.]
195
[0. 0. 0.]
0.0
[0. 0.]
196
[0. 0. 0.]
0.0
[0. 0.]
197
[0. 0. 0.]
0.0
[0. 0.]
198
[0. 0. 0.]
0.0
[0. 0.]
199
[0. 0. 0.]
0.0
[0. 0.]
200

In [86]:
import numpy as np

# we now plot the classification

xx, yy = np.meshgrid(np.linspace(0,50,100),
np.linspace(0,50,100))

tmp = np.array([xx.ravel(), yy.ravel()]).T
tmp1 = np.ones((np.shape(tmp)[0],1))

phi_tilde = np.hstack((tmp1, tmp))

import matplotlib.pyplot as plt

C = np.array([ 'Red','Blue'])

Z = np.matmul(betaTotal, phi_tilde.T)
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, np.sign(Z), alpha=0.3, colors=C)

plt.scatter(data1[:,0], data1[:,1], facecolor='blue')
plt.scatter(data2[:,0], data2[:,1], facecolor='red')
plt.show()


#### Exercise 2.¶

2a. Load the data below. Using the neural_network module from scikit-learn and its MLPClassifier model, learn a classifier, for the dataset below using

• One hidden layer with a linear activation function and
• One neuron
• Two neurons
• One hidden layer with a non linear activation function (take Relu for example or a binary step)
• One neuron
• Two neurons

How many neurons, hidden layers do you need to learn the distribution of the data? Do you have an idea why?

Try increasing the number of neurons and hidden layers. Then try different values of the learning rate.

In [160]:
## 1) This is the solution for the one neuron exercise
import scipy.io as sio

data1 = data1['neural_net_class1']
data2 = data2['neural_net_class2']

from sklearn.neural_network import MLPClassifier

import matplotlib.pyplot as plt
import numpy as np

sz1 = np.shape(data1)
sz2 = np.shape(data2)
targetsClass1 = np.ones((sz1[0],))
targetsClass2 = -1 * np.ones((sz2[0],))

total_targets = np.hstack((targetsClass1, targetsClass2))

total_data = np.vstack((data1, data2))

from sklearn.neural_network import MLPClassifier

my_classifier = MLPClassifier(hidden_layer_sizes = 1, activation = 'identity')

my_classifier.fit(total_data, total_targets)

from matplotlib.colors import ListedColormap
# plot the decision surface

colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
cmap = ListedColormap(colors[:2])

xx, yy = np.meshgrid(np.linspace(0,50,100),
np.linspace(0,50,100))
Z = my_classifier.predict(np.array([xx.ravel(), yy.ravel()]).T)
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.2, cmap=cmap)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())

plt.scatter(data1[:,0], data1[:,1], facecolor='blue')
plt.scatter(data2[:,0], data2[:,1], facecolor='red')
plt.show()

## 2) Adding a couple more neurons won't change the ouptut a lot.

172
195
(172,)
(195,)
(367,)

In [164]:
## 2) Using a non linear activation function

my_classifier = MLPClassifier(hidden_layer_sizes = (100,), activation = 'relu')

my_classifier.fit(total_data, total_targets)

from matplotlib.colors import ListedColormap
# plot the decision surface

colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
cmap = ListedColormap(colors[:2])

xx, yy = np.meshgrid(np.linspace(0,50,100),
np.linspace(0,50,100))
Z = my_classifier.predict(np.array([xx.ravel(), yy.ravel()]).T)
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.2, cmap=cmap)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())

plt.scatter(data1[:,0], data1[:,1], facecolor='blue')
plt.scatter(data2[:,0], data2[:,1], facecolor='red')
plt.show()

In [149]:
## 3) changing the learning rate
## Try various values for the learning rate between .0001 and 1. What do you observe ?

my_classifier = MLPClassifier(hidden_layer_sizes = (20,20,20), activation = 'relu', learning_rate  = 'constant', learning_rate_init =.001, max_iter=20000)

my_classifier.fit(total_data, total_targets)

from matplotlib.colors import ListedColormap
# plot the decision surface

colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
cmap = ListedColormap(colors[:2])

xx, yy = np.meshgrid(np.linspace(0,50,100),
np.linspace(0,50,100))
Z = my_classifier.predict(np.array([xx.ravel(), yy.ravel()]).T)
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.2, cmap=cmap)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())

plt.scatter(data1[:,0], data1[:,1], facecolor='blue')
plt.scatter(data2[:,0], data2[:,1], facecolor='red')
plt.show()


2b. Keep the dataset from above. try to change the intialization of the training algorithm. Plot the resulting classifier for a couple of different initializations. What do you see?

Do it for a small network first. Then repeat those experiments for larger architectures. I.e. increase the number of neurons and the number of layers. What do you see when you change the initialization?

In [9]:
# Although it is a little more tricky, it is possible to use object oriented programming and inheritance to modify
# the _init_coef method from the MLPClassifier class in scikit learn. As an illustration of this, in the example below,
# we replace the built-in initialization of the MLP class and replace the initial weights by zeros.

from sklearn.neural_network import MLPClassifier

class MLPClassifierOverride(MLPClassifier):

def _init_coef(self, fan_in, fan_out):

if self.activation == 'logistic':

init_bound = np.sqrt(2. / (fan_in + fan_out))

elif self.activation in ('identity', 'tanh', 'relu'):

init_bound = np.sqrt(6. / (fan_in + fan_out))

else:

raise ValueError("Unknown activation function %s" %
self.activation)
coef_init = np.zeros(np.shape(self._random_state.uniform(-init_bound, init_bound,
(fan_in, fan_out))))

intercept_init = np.zeros(np.shape(self._random_state.uniform(-init_bound, init_bound,
fan_out)))
return coef_init, intercept_init

## Try various values for the learning rate between .0001 and 1. What do you observe ?

import scipy.io as sio

data1 = data1['neural_net_class1']
data2 = data2['neural_net_class2']

from sklearn.neural_network import MLPClassifier

import matplotlib.pyplot as plt
import numpy as np

sz1 = np.shape(data1)
sz2 = np.shape(data2)
targetsClass1 = np.ones((sz1[0],))
targetsClass2 = -1 * np.ones((sz2[0],))

total_targets = np.hstack((targetsClass1, targetsClass2))

total_data = np.vstack((data1, data2))

my_classifier = MLPClassifier(hidden_layer_sizes = (100,), activation = 'relu', learning_rate  = 'constant', learning_rate_init =.001)

my_classifier.fit(total_data, total_targets)

from matplotlib.colors import ListedColormap
## plot the decision surface

colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
cmap = ListedColormap(colors[:2])

xx, yy = np.meshgrid(np.linspace(0,50,100),
np.linspace(0,50,100))
Z = my_classifier.predict(np.array([xx.ravel(), yy.ravel()]).T)
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.2, cmap=cmap)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())

plt.scatter(data1[:,0], data1[:,1], facecolor='blue')
plt.scatter(data2[:,0], data2[:,1], facecolor='red')
plt.show()


#### Exercise 3.¶

3a.Load the data below. Try to build the best neural network you can for this dataset. Split the data between a training and a test set and evaluate the models you built. What is the best validation error you can get?

In [54]:
# The spiral is relatively hard because of its strong non-linearity. However, as shown below,
# it remains of course possible to fit it with a neural network provided that (1) the network is sufficiently over-parametrized
# (i.e. taking a sufficiently large number of layers and neurons in each layer) and (2) that one takes a sufficiently
# small learning with a (batch) gradient descent algorithm (taking all the samples into account, thus avoiding any randomness in the iterations).
# To avoid disproportionate complexity of the classifier, it is also good to set the regularization parameter to some relatively
# large constant, here I choose alpha = .1

import scipy.io as sio

data1 = data1['neural_net_ex2_class1']
data2 = data2['neural_net_ex2_class2']

from sklearn.neural_network import MLPClassifier

import matplotlib.pyplot as plt
import numpy as np

sz1 = np.shape(data1)
sz2 = np.shape(data2)
targetsClass1 = np.ones((sz1[0],))
targetsClass2 = -1 * np.ones((sz2[0],))

total_targets = np.hstack((targetsClass1, targetsClass2))
total_data = np.vstack((data1, data2))

from sklearn.neural_network import MLPClassifier

## We first try a simple MLP with Relu activation and without any additional features

my_classifier = MLPClassifier(hidden_layer_sizes = (100,100), activation = 'tanh', max_iter=40000, solver = 'lbfgs', alpha = .1)

my_classifier.fit(total_data, total_targets)

from matplotlib.colors import ListedColormap
# plot the decision surface

colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
cmap = ListedColormap(colors[:2])

xx, yy = np.meshgrid(np.linspace(0,50,100),
np.linspace(0,50,100))
Z = my_classifier.predict(np.array([xx.ravel(), yy.ravel()]).T)
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.2, cmap=cmap)

plt.scatter(data1[:,0], data1[:,1], facecolor='blue')
plt.scatter(data2[:,0], data2[:,1], facecolor='red')
plt.show()


3b. With the same dataset, add additional features to your model, e.g. $\sin(x), \sin(y)$ or other monomials. Can you improve your classifier ?

In [ ]:
# Even when using Neural networks, it might be interesting to add additional features. Not because the network won't
# be able to learn the classifier, in fact we know from the Universal approximation Theorem that a sufficiently large
# Perceptron can learn any distribution, but because adding a couple of features such as sin(x), cos(x) and x^2, y^2
# might lead to a simpler architecture. Random initialization however makes the learning tricky.


Why is the spiral example so difficult to learn?

In [42]:
## 1) Once again, the difficulty for the spiral comes from the strong non linearity of the data. Such a non linearity
# requires a sufficiently large architecture and some relatively good intuition on the way neural networks can capture a
# distribution. However, as shown below on yet another example, neural networks are perfectly able to recover a
# relatively regular boundary even on such strongly non linear dataset.

import numpy as np
import scipy.io as sio

data1 = data1['pointsSpiralClass1_1']
data2 = data2['pointsSpiralClass1_2']
data3 = data3['pointsSpiralClass2_1']

data1 = np.vstack((data1,data2))
print(np.shape(data1))

sz1 = np.shape(data1)
sz2 = np.shape(data3)

targets_class1 = np.ones((sz1[0],))
targets_class2 = -1*np.ones((sz2[0],))

total_data = np.vstack((data1, data3))
total_targets = np.hstack((targets_class1, targets_class2))

from sklearn.neural_network import MLPClassifier

## We first try a simple MLP with Relu activation and without any additional features

my_classifier = MLPClassifier(hidden_layer_sizes = (100,100), activation = 'tanh', max_iter=10000, batch_size=1000,learning_rate_init=0.001)

my_classifier.fit(total_data, total_targets)

from matplotlib.colors import ListedColormap

colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
cmap = ListedColormap(colors[:2])

xx, yy = np.meshgrid(np.linspace(1,14,100),
np.linspace(-2,12,100))

preprocessed = np.array([xx.ravel(), yy.ravel()]).T

# from sklearn.preprocessing import PolynomialFeatures
# poly = PolynomialFeatures(2)
# preprocessed = poly.fit_transform(preprocessed)
# preprocessed = preprocessed[:,1:]
# preprocessed = np.hstack((preprocessed,np.sin(preprocessed)) )

Z = my_classifier.predict(preprocessed)
Z = Z.reshape(xx.shape)

import matplotlib.pyplot as plt
plt.contourf(xx, yy, Z, alpha=0.2, cmap=cmap)
plt.scatter(data1[:,0], data1[:,1], facecolor='blue')
plt.scatter(data3[:,0], data3[:,1], facecolor='red')
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.show()

(557, 2)