#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') # In[6]: import gzip import pickle import os import numpy as np import pandas as pd from sklearn.datasets import fetch_mldata from sklearn.preprocessing import MinMaxScaler from vanilla_neural_nets.neural_network.training_batch_generator import MiniBatchGenerator from vanilla_neural_nets.neural_network.optimization_algorithm import GradientDescent from vanilla_neural_nets.neural_network.activation_function import SigmoidActivationFunction from vanilla_neural_nets.neural_network.loss_function import MeanSquaredError, BinaryCrossEntropyLoss from vanilla_neural_nets.neural_network.parameter_initialization import GaussianBiasInitializer, GaussianWeightInitializer from vanilla_neural_nets.neural_network.network import VanillaNeuralNetwork from vanilla_neural_nets.neural_network.data_object import HoldoutData # # Load MNIST data # In[7]: mnist = fetch_mldata('MNIST original', data_home='.') mnist.target = np.array(pd.get_dummies(mnist.target)) # # Separate and scale train and holdout data # In[8]: holdout_set_mask = np.array([i % 7 == 0 for i in range(len(mnist.data))]) np.random.shuffle(holdout_set_mask) X = mnist.data[~holdout_set_mask].astype(float) y = mnist.target[~holdout_set_mask].astype(float) X_holdout = mnist.data[holdout_set_mask].astype(float) y_holdout = mnist.target[holdout_set_mask].astype(float) # In[9]: X = MinMaxScaler().fit_transform(X) X_holdout = MinMaxScaler().fit_transform(X_holdout) # # Initialize hyper-parameters # In[10]: HIDDEN_LAYER_SIZE = 50 LEARNING_RATE = 3. N_EPOCHS = 10 TRAINING_BATCH_SIZE = 10 RANDOM_STATE = 123 # In[11]: GAUSSIAN_INITIALIZATER_STANDARD_DEVIATION = 1. GAUSSIAN_WEIGHT_INITIALIZER = GaussianWeightInitializer( standard_deviation=GAUSSIAN_INITIALIZATER_STANDARD_DEVIATION, random_state=RANDOM_STATE ) GAUSSIAN_BIAS_INITIALIZER = GaussianBiasInitializer( standard_deviation=GAUSSIAN_INITIALIZATER_STANDARD_DEVIATION, random_state=RANDOM_STATE ) # In[12]: LAYER_SIZES = [X.shape[1], HIDDEN_LAYER_SIZE, HIDDEN_LAYER_SIZE, y.shape[1]] # # Train network # ### with mean squared error # In[13]: vanilla_neural_net = VanillaNeuralNetwork( layer_sizes=LAYER_SIZES, training_batch_generator_class=MiniBatchGenerator, loss_function_class=MeanSquaredError, activation_function_class=SigmoidActivationFunction, optimization_algorithm_class=GradientDescent, learning_rate=LEARNING_RATE, n_epochs=N_EPOCHS, training_batch_size=TRAINING_BATCH_SIZE, random_state=RANDOM_STATE, weight_initializer=GAUSSIAN_WEIGHT_INITIALIZER, bias_initializer=GAUSSIAN_BIAS_INITIALIZER, holdout_data=HoldoutData(X=X_holdout, y=y_holdout) ) # In[11]: vanilla_neural_net.fit(X, y) # ### with cross entropy loss # In[14]: LEARNING_RATE = .5 # In[15]: vanilla_neural_net = VanillaNeuralNetwork( layer_sizes=LAYER_SIZES, training_batch_generator_class=MiniBatchGenerator, loss_function_class=BinaryCrossEntropyLoss, activation_function_class=SigmoidActivationFunction, optimization_algorithm_class=GradientDescent, learning_rate=LEARNING_RATE, n_epochs=N_EPOCHS, training_batch_size=TRAINING_BATCH_SIZE, random_state=RANDOM_STATE, weight_initializer=GAUSSIAN_WEIGHT_INITIALIZER, bias_initializer=GAUSSIAN_BIAS_INITIALIZER, holdout_data=HoldoutData(X=X_holdout, y=y_holdout) ) # In[17]: vanilla_neural_net.fit(X, y)