%load_ext autoreload
%autoreload 2
import gzip
import pickle
import os
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import MinMaxScaler
from vanilla_neural_nets.neural_network.training_batch_generator import MiniBatchGenerator
from vanilla_neural_nets.neural_network.optimization_algorithm import GradientDescent
from vanilla_neural_nets.neural_network.activation_function import SigmoidActivationFunction
from vanilla_neural_nets.neural_network.loss_function import MeanSquaredError, BinaryCrossEntropyLoss
from vanilla_neural_nets.neural_network.parameter_initialization import GaussianBiasInitializer, GaussianWeightInitializer
from vanilla_neural_nets.neural_network.network import VanillaNeuralNetwork
from vanilla_neural_nets.neural_network.data_object import HoldoutData
mnist = fetch_mldata('MNIST original', data_home='.')
mnist.target = np.array(pd.get_dummies(mnist.target))
holdout_set_mask = np.array([i % 7 == 0 for i in range(len(mnist.data))])
np.random.shuffle(holdout_set_mask)
X = mnist.data[~holdout_set_mask].astype(float)
y = mnist.target[~holdout_set_mask].astype(float)
X_holdout = mnist.data[holdout_set_mask].astype(float)
y_holdout = mnist.target[holdout_set_mask].astype(float)
X = MinMaxScaler().fit_transform(X)
X_holdout = MinMaxScaler().fit_transform(X_holdout)
HIDDEN_LAYER_SIZE = 50
LEARNING_RATE = 3.
N_EPOCHS = 10
TRAINING_BATCH_SIZE = 10
RANDOM_STATE = 123
GAUSSIAN_INITIALIZATER_STANDARD_DEVIATION = 1.
GAUSSIAN_WEIGHT_INITIALIZER = GaussianWeightInitializer(
standard_deviation=GAUSSIAN_INITIALIZATER_STANDARD_DEVIATION,
random_state=RANDOM_STATE
)
GAUSSIAN_BIAS_INITIALIZER = GaussianBiasInitializer(
standard_deviation=GAUSSIAN_INITIALIZATER_STANDARD_DEVIATION,
random_state=RANDOM_STATE
)
LAYER_SIZES = [X.shape[1], HIDDEN_LAYER_SIZE, HIDDEN_LAYER_SIZE, y.shape[1]]
vanilla_neural_net = VanillaNeuralNetwork(
layer_sizes=LAYER_SIZES,
training_batch_generator_class=MiniBatchGenerator,
loss_function_class=MeanSquaredError,
activation_function_class=SigmoidActivationFunction,
optimization_algorithm_class=GradientDescent,
learning_rate=LEARNING_RATE,
n_epochs=N_EPOCHS,
training_batch_size=TRAINING_BATCH_SIZE,
random_state=RANDOM_STATE,
weight_initializer=GAUSSIAN_WEIGHT_INITIALIZER,
bias_initializer=GAUSSIAN_BIAS_INITIALIZER,
holdout_data=HoldoutData(X=X_holdout, y=y_holdout)
)
vanilla_neural_net.fit(X, y)
Epoch: 0 | Accuracy: 0.9132 Epoch: 1 | Accuracy: 0.9247 Epoch: 2 | Accuracy: 0.9315 Epoch: 3 | Accuracy: 0.9438 Epoch: 4 | Accuracy: 0.9463 Epoch: 5 | Accuracy: 0.9454 Epoch: 6 | Accuracy: 0.9494 Epoch: 7 | Accuracy: 0.9514 Epoch: 8 | Accuracy: 0.9527 Epoch: 9 | Accuracy: 0.9553
LEARNING_RATE = .5
vanilla_neural_net = VanillaNeuralNetwork(
layer_sizes=LAYER_SIZES,
training_batch_generator_class=MiniBatchGenerator,
loss_function_class=BinaryCrossEntropyLoss,
activation_function_class=SigmoidActivationFunction,
optimization_algorithm_class=GradientDescent,
learning_rate=LEARNING_RATE,
n_epochs=N_EPOCHS,
training_batch_size=TRAINING_BATCH_SIZE,
random_state=RANDOM_STATE,
weight_initializer=GAUSSIAN_WEIGHT_INITIALIZER,
bias_initializer=GAUSSIAN_BIAS_INITIALIZER,
holdout_data=HoldoutData(X=X_holdout, y=y_holdout)
)
vanilla_neural_net.fit(X, y)
Epoch: 0 | Accuracy: 0.9222 Epoch: 1 | Accuracy: 0.9317 Epoch: 2 | Accuracy: 0.9355 Epoch: 3 | Accuracy: 0.9484 Epoch: 4 | Accuracy: 0.9493 Epoch: 5 | Accuracy: 0.9548 Epoch: 6 | Accuracy: 0.9551 Epoch: 7 | Accuracy: 0.9575 Epoch: 8 | Accuracy: 0.9579 Epoch: 9 | Accuracy: 0.9591