Help with A3 Assignment

This notebook contains code that we developed during the lecture in response to students' questions about Assignment A3.

In [2]:
import numpy as np
In [32]:
class NeuralNetwork():
    
    def make_weights(self):
        
        self.all_weights = np.zeros(22)
        W1 = self.all_weights[:10].reshape(2, 5) # shape 2 x 5
        W2 = self.all_weights[10:].reshape(6, 2)
        self.Ws = [W1, W2]
        
    def initialize_weights(self):
        
        # W1[:] = np.random.uniform(-1, 1, size=(2, 5)) / np.sqrt(2)
        W = self.Ws[0]
        W[:] = np.random.uniform(-1, 1, size=(2, 5)) / np.sqrt(2)
In [33]:
nnet = NeuralNetwork()
In [34]:
nnet.make_weights()
In [35]:
nnet.all_weights
Out[35]:
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.])
In [36]:
nnet.Ws
Out[36]:
[array([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]),
 array([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]])]
In [37]:
nnet.Ws[0][1, 1] = 20
In [38]:
nnet.Ws
Out[38]:
[array([[ 0.,  0.,  0.,  0.,  0.],
        [ 0., 20.,  0.,  0.,  0.]]),
 array([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]])]
In [39]:
nnet.all_weights
Out[39]:
array([ 0.,  0.,  0.,  0.,  0.,  0., 20.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])
In [40]:
nnet.initialize_weights()
In [41]:
nnet.all_weights
Out[41]:
array([-0.1535123 ,  0.31137131,  0.44323087,  0.4013435 , -0.36696818,
       -0.3703361 , -0.30197005, -0.22070137, -0.53431622,  0.49782598,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ])

Questions about error_trace values

In [ ]:
import optimizers as opt

# def train(self, X, T, .....) function

    # deal with standardization, resulting in Xst, Tst
    
    optimizer = opt.Optimizers(self.all_weights)
    # assuming   def mse(self, X, T):
    # assuming   def backward(self, X, T):
    
    def error_convert(mse_st):
        # Unstandardize the error  mean((Tst - Yst)**2)
        # by doing  mean((T - Y)**2)
        # Since Tst = (T - Tmean) / Tstd
        #  then   T = Tst * Tstd + Tmean
        # So unstandardize mse_st by  mean((Tst * Tstd + Tmean  -  (Yst * Tstd + Tmean)) ** 2)
        #                         by  mean((Tst * Tstd + Tmean  -  Yst * Tstd - Tmean) ** 2)
        #                         by  mean((Tst * Tstd -  Yst * Tstd) ** 2)
        #                         by  mean( ((Tst -  Yst) * Tstd) ** 2)
        #                         by  mean((Tst - Yst)**2  Tstd**2)
        #  Now, with sqrt          sqrt(mean((Tst - Yst)**2  Tstd**2))
        #               or          sqrt(mean((Tst - Yst)**2)  * Tstd
        
        return np.sqrt(mse_st) * self.stand_params['Tstd']
        
    if method == 'sgd':
        optimizer = opt.Optimizers(self.all_weights).sgd
    elif method == 'adam:
        ...
    
    self.error_trace = optimizer(self.mse, self.backward, [Xst, Tst], 1000, 0.01, 
                                 error_convert_f=error_convert)

Does use() call forward() or not?

Answer: It doesn't have to. forward() deals with standardized variables. use() does not. But use() can call forward() after standardizing X, then unstandardize output Y before returning it.

You, or your user, just has to know NeuralNetwork, train, use functions. Knows nothing about standardization.

In [43]:
!head qsar_aquatic_toxicity.csv








In [44]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [45]:
!wc qsar_aquatic_toxicity.csv
  546   546 22903 qsar_aquatic_toxicity.csv
In [46]:
datadf = pd.read_csv('qsar_aquatic_toxicity.csv')
In [47]:
datadf
Out[47]:
0;0;0;2.419;1.225;0.667;0;0;3.740
0 0;0;0;2.638;1.401;0.632;0;0;4.330
1 9.23;11;0;5.799;2.93;0.486;0;0;7.019
2 9.23;11;0;5.453;2.887;0.495;0;0;6.723
3 9.23;11;0;4.068;2.758;0.695;0;0;5.979
4 215.34;327.629;3;0.189;4.677;1.333;0;4;6.064
... ...
540 24.06;35.776;2;3.326;2.837;0.849;2;0;4.651
541 9.23;11;0;3.275;2.727;0.874;0;0;3.953
542 0;0;0;5.165;3.111;0.732;0;0;6.219
543 13.14;9.507;0;2.859;2.614;0.827;0;0;4.995
544 0;0;0;2.255;1.8;0.917;0;0;2.480

545 rows × 1 columns

In [48]:
datadf = pd.read_csv('qsar_aquatic_toxicity.csv', delimiter=';')
datadf
Out[48]:
0 0.1 0.2 2.419 1.225 0.667 0.3 0.4 3.740
0 0.00 0.000 0 2.638 1.401 0.632 0 0 4.330
1 9.23 11.000 0 5.799 2.930 0.486 0 0 7.019
2 9.23 11.000 0 5.453 2.887 0.495 0 0 6.723
3 9.23 11.000 0 4.068 2.758 0.695 0 0 5.979
4 215.34 327.629 3 0.189 4.677 1.333 0 4 6.064
... ... ... ... ... ... ... ... ... ...
540 24.06 35.776 2 3.326 2.837 0.849 2 0 4.651
541 9.23 11.000 0 3.275 2.727 0.874 0 0 3.953
542 0.00 0.000 0 5.165 3.111 0.732 0 0 6.219
543 13.14 9.507 0 2.859 2.614 0.827 0 0 4.995
544 0.00 0.000 0 2.255 1.800 0.917 0 0 2.480

545 rows × 9 columns

In [49]:
datadf = pd.read_csv('qsar_aquatic_toxicity.csv', delimiter=';', header=None)
datadf
Out[49]:
0 1 2 3 4 5 6 7 8
0 0.00 0.000 0 2.419 1.225 0.667 0 0 3.740
1 0.00 0.000 0 2.638 1.401 0.632 0 0 4.330
2 9.23 11.000 0 5.799 2.930 0.486 0 0 7.019
3 9.23 11.000 0 5.453 2.887 0.495 0 0 6.723
4 9.23 11.000 0 4.068 2.758 0.695 0 0 5.979
... ... ... ... ... ... ... ... ... ...
541 24.06 35.776 2 3.326 2.837 0.849 2 0 4.651
542 9.23 11.000 0 3.275 2.727 0.874 0 0 3.953
543 0.00 0.000 0 5.165 3.111 0.732 0 0 6.219
544 13.14 9.507 0 2.859 2.614 0.827 0 0 4.995
545 0.00 0.000 0 2.255 1.800 0.917 0 0 2.480

546 rows × 9 columns

In [50]:
names = ['TPSA', 'SAacc', 'H-050', 'MLOGP', 'RDCHI', 'GATS1p', 'nN', 'C-040', 'LC50']
names
Out[50]:
['TPSA', 'SAacc', 'H-050', 'MLOGP', 'RDCHI', 'GATS1p', 'nN', 'C-040', 'LC50']
In [51]:
datadf.columns
Out[51]:
Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype='int64')
In [52]:
datadf.columns = names
datadf.columns
Out[52]:
Index(['TPSA', 'SAacc', 'H-050', 'MLOGP', 'RDCHI', 'GATS1p', 'nN', 'C-040',
       'LC50'],
      dtype='object')
In [53]:
datadf
Out[53]:
TPSA SAacc H-050 MLOGP RDCHI GATS1p nN C-040 LC50
0 0.00 0.000 0 2.419 1.225 0.667 0 0 3.740
1 0.00 0.000 0 2.638 1.401 0.632 0 0 4.330
2 9.23 11.000 0 5.799 2.930 0.486 0 0 7.019
3 9.23 11.000 0 5.453 2.887 0.495 0 0 6.723
4 9.23 11.000 0 4.068 2.758 0.695 0 0 5.979
... ... ... ... ... ... ... ... ... ...
541 24.06 35.776 2 3.326 2.837 0.849 2 0 4.651
542 9.23 11.000 0 3.275 2.727 0.874 0 0 3.953
543 0.00 0.000 0 5.165 3.111 0.732 0 0 6.219
544 13.14 9.507 0 2.859 2.614 0.827 0 0 4.995
545 0.00 0.000 0 2.255 1.800 0.917 0 0 2.480

546 rows × 9 columns

In [54]:
data = datadf.values
data
Out[54]:
array([[ 0.   ,  0.   ,  0.   , ...,  0.   ,  0.   ,  3.74 ],
       [ 0.   ,  0.   ,  0.   , ...,  0.   ,  0.   ,  4.33 ],
       [ 9.23 , 11.   ,  0.   , ...,  0.   ,  0.   ,  7.019],
       ...,
       [ 0.   ,  0.   ,  0.   , ...,  0.   ,  0.   ,  6.219],
       [13.14 ,  9.507,  0.   , ...,  0.   ,  0.   ,  4.995],
       [ 0.   ,  0.   ,  0.   , ...,  0.   ,  0.   ,  2.48 ]])
In [55]:
data.dtype
Out[55]:
dtype('float64')
In [58]:
plt.plot(data[:, 0:3])
Out[58]:
[<matplotlib.lines.Line2D at 0x7f6e3f6fc400>,
 <matplotlib.lines.Line2D at 0x7f6e3f6fc4f0>,
 <matplotlib.lines.Line2D at 0x7f6e3f6fc5b0>]
In [62]:
datadf.describe()
Out[62]:
TPSA SAacc H-050 MLOGP RDCHI GATS1p nN C-040 LC50
count 546.000000 546.000000 546.000000 546.000000 546.000000 546.000000 546.000000 546.000000 546.000000
mean 48.472930 58.869018 0.937729 2.313493 2.492299 1.046264 1.003663 0.353480 4.658421
std 46.763983 68.166554 1.618632 1.741797 0.811004 0.403677 1.397240 0.806827 1.665215
min 0.000000 0.000000 0.000000 -6.446000 1.000000 0.281000 0.000000 0.000000 0.122000
25% 15.790000 11.000000 0.000000 1.232500 1.975000 0.737000 0.000000 0.000000 3.601500
50% 40.460000 42.683000 0.000000 2.273500 2.344000 1.020500 1.000000 0.000000 4.516000
75% 70.022500 77.492750 1.000000 3.392750 2.911000 1.266500 2.000000 0.000000 5.607500
max 347.320000 571.952000 18.000000 9.148000 6.439000 2.500000 11.000000 11.000000 10.047000
In [64]:
plt.plot(data[:, 0] , data[:, -1], 'o')
Out[64]:
[<matplotlib.lines.Line2D at 0x7f6e3f543190>]
In [67]:
pd.plotting.scatter_matrix(datadf, figsize=(15, 15), marker='o', hist_kwds={'bins': 15},
                          s=10, alpha=0.8);
In [68]:
import seaborn as sns
sns.pairplot(datadf);