# Create some toy data to check your implementations input_size = 4 hidden_size = 10 num_classes = 3 num_inputs = 5 def init_toy_model(): model = {} model['W1'] = np.linspace(-0.2, 0.6, num=input_size*hidden_size).reshape(input_size, hidden_size) model['b1'] = np.linspace(-0.3, 0.7, num=hidden_size) model['W2'] = np.linspace(-0.4, 0.1, num=hidden_size*num_classes).reshape(hidden_size, num_classes) model['b2'] = np.linspace(-0.5, 0.9, num=num_classes) return model def init_toy_data(): X = np.linspace(-0.2, 0.5, num=num_inputs*input_size).reshape(num_inputs, input_size) y = np.array([0, 1, 2, 2, 1]) return X, y model = init_toy_model() X, y = init_toy_data() from cs231n.classifier_trainer import ClassifierTrainer model = init_toy_model() trainer = ClassifierTrainer() # call the trainer to optimize the loss # Notice that we're using sample_batches=False, so we're performing Gradient Descent (no sampled batches of data) best_model, loss_history, _, _ = trainer.train(X, y, X, y, model, two_layer_net, reg=0.001, learning_rate=1e-1, momentum=0.0, learning_rate_decay=1, update='sgd', sample_batches=False, num_epochs=100, verbose=False) print 'Final loss with vanilla SGD: %f' % (loss_history[-1], ) model = init_toy_model() trainer = ClassifierTrainer() # call the trainer to optimize the loss # Notice that we're using sample_batches=False, so we're performing Gradient Descent (no sampled batches of data) best_model, loss_history, _, _ = trainer.train(X, y, X, y, model, two_layer_net, reg=0.001, learning_rate=1e-1, momentum=0.9, learning_rate_decay=1, update='momentum', sample_batches=False, num_epochs=100, verbose=False) correct_loss = 0.494394 print 'Final loss with momentum SGD: %f' % (loss_history[-1]) model = init_toy_model() trainer = ClassifierTrainer() # call the trainer to optimize the loss # Notice that we're using sample_batches=False, so we're performing Gradient Descent (no sampled batches of data) best_model, loss_history, _, _ = trainer.train(X, y, X, y, model, two_layer_net, reg=0.001, learning_rate=1e-1, momentum=0.9, learning_rate_decay=1, update='rmsprop', sample_batches=False, num_epochs=100, verbose=False) correct_loss = 0.439368 print 'Final loss with RMSProp: %f' % (loss_history[-1]) model = init_toy_model() trainer = ClassifierTrainer() # call the trainer to optimize the loss # Notice that we're using sample_batches=False, so we're performing Gradient Descent (no sampled batches of data) best_model, loss_history, _, _ = trainer.train(X, y, X, y, model, two_layer_net, reg=0.001, learning_rate=1e-1, momentum=0.9, learning_rate_decay=1, update='rmsprop+momentum', sample_batches=False, num_epochs=100, verbose=False) correct_loss = 0.439368 print 'Final loss with RMSProp+momentum: %f' % (loss_history[-1]) model = init_toy_model() trainer = ClassifierTrainer() # call the trainer to optimize the loss # Notice that we're using sample_batches=False, so we're performing Gradient Descent (no sampled batches of data) best_model, loss_history, _, _ = trainer.train(X, y, X, y, model, two_layer_net, reg=0.001, learning_rate=1e-1, momentum=0.9, learning_rate_decay=1, update='adagrad', sample_batches=False, num_epochs=100, verbose=False) correct_loss = 0.439368 print 'Final loss with Adagrad: %f' % (loss_history[-1]) from cs231n.data_utils import load_CIFAR10 def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the two-layer neural net classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image # Reshape data to rows X_train = X_train.reshape(num_training, -1) X_val = X_val.reshape(num_validation, -1) X_test = X_test.reshape(num_test, -1) return X_train, y_train, X_val, y_val, X_test, y_test # Invoke the above function to get our data. X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() print 'Train data shape: ', X_train.shape print 'Train labels shape: ', y_train.shape print 'Validation data shape: ', X_val.shape print 'Validation labels shape: ', y_val.shape print 'Test data shape: ', X_test.shape print 'Test labels shape: ', y_test.shape # use SGD from cs231n.classifiers.neural_net import init_two_layer_model model = init_two_layer_model(32*32*3, 50, 10) # input size, hidden size, number of classes trainer = ClassifierTrainer() best_model1, loss_history1, train_acc1, val_acc1 = trainer.train(X_train, y_train, X_val, y_val, model, two_layer_net, num_epochs=20, reg=1.0, update = 'sgd', momentum=0.9, learning_rate_decay = 0.95, learning_rate=1e-5, verbose=True) # use Momentum from cs231n.classifiers.neural_net import init_two_layer_model model = init_two_layer_model(32*32*3, 50, 10) # input size, hidden size, number of classes trainer = ClassifierTrainer() best_model2, loss_history2, train_acc2, val_acc2 = trainer.train(X_train, y_train, X_val, y_val, model, two_layer_net, num_epochs=20, reg=1.0, update = 'momentum', momentum=0.9, learning_rate_decay = 0.95, learning_rate=1e-5, verbose=True) # use RMSprop from cs231n.classifiers.neural_net import init_two_layer_model model = init_two_layer_model(32*32*3, 50, 10) # input size, hidden size, number of classes trainer = ClassifierTrainer() best_model3, loss_history3, train_acc3, val_acc3 = trainer.train(X_train, y_train, X_val, y_val, model, two_layer_net, num_epochs=20, reg=1.0, update = 'rmsprop', momentum=0.9, learning_rate_decay = 0.95, learning_rate=1e-5, verbose=True) # use RMSprop+Momentum from cs231n.classifiers.neural_net import init_two_layer_model model = init_two_layer_model(32*32*3, 50, 10) # input size, hidden size, number of classes trainer = ClassifierTrainer() best_model4, loss_history4, train_acc4, val_acc4 = trainer.train(X_train, y_train, X_val, y_val, model, two_layer_net, num_epochs=20, reg=1.0, update = 'rmsprop+momentum', momentum=0.9, learning_rate_decay = 0.95, learning_rate=1e-5, verbose=True) # AdaGrad # use RMSprop+Momentum from cs231n.classifiers.neural_net import init_two_layer_model model = init_two_layer_model(32*32*3, 50, 10) # input size, hidden size, number of classes trainer = ClassifierTrainer() best_model5, loss_history5, train_acc5, val_acc5 = trainer.train(X_train, y_train, X_val, y_val, model, two_layer_net, num_epochs=20, reg=1.0, update = 'adagrad', momentum=0.9, learning_rate_decay = 0.95, learning_rate=0.01, verbose=True) # Plot the loss function and train / validation accuracies plt.subplot(2, 1, 1) plt.plot(loss_history1) plt.title('Loss history') plt.xlabel('Iteration') plt.ylabel('Loss') plt.subplot(2, 1, 2) plt.plot(train_acc1) plt.plot(val_acc1) plt.legend(['Training accuracy', 'Validation accuracy'], loc='lower right') plt.xlabel('Epoch') plt.ylabel('Clasification accuracy') # Plot the loss function and train / validation accuracies plt.subplot(2, 1, 1) plt.plot(loss_history2) plt.title('Loss history') plt.xlabel('Iteration') plt.ylabel('Loss') plt.subplot(2, 1, 2) plt.plot(train_acc2) plt.plot(val_acc2) plt.legend(['Training accuracy', 'Validation accuracy'], loc='lower right') plt.xlabel('Epoch') plt.ylabel('Clasification accuracy') # Plot the loss function and train / validation accuracies plt.subplot(2, 1, 1) plt.plot(loss_history3) plt.title('Loss history') plt.xlabel('Iteration') plt.ylabel('Loss') plt.subplot(2, 1, 2) plt.plot(train_acc3) plt.plot(val_acc3) plt.legend(['Training accuracy', 'Validation accuracy'], loc='lower right') plt.xlabel('Epoch') plt.ylabel('Clasification accuracy') # Plot the loss function and train / validation accuracies plt.subplot(2, 1, 1) plt.plot(loss_history4) plt.title('Loss history') plt.xlabel('Iteration') plt.ylabel('Loss') plt.subplot(2, 1, 2) plt.plot(train_acc4) plt.plot(val_acc4) plt.legend(['Training accuracy', 'Validation accuracy'], loc='lower right') plt.xlabel('Epoch') plt.ylabel('Clasification accuracy') # Plot the loss function and train / validation accuracies plt.subplot(2, 1, 1) plt.plot(loss_history5[100:]) plt.title('Loss history -- after 100th iteration') plt.xlabel('Iteration') plt.ylabel('Loss') plt.subplot(2, 1, 2) plt.plot(train_acc5) plt.plot(val_acc5) plt.legend(['Training accuracy', 'Validation accuracy'], loc='lower right') plt.xlabel('Epoch') plt.ylabel('Clasification accuracy') loss_history5[0:50]