from google.colab import drive
drive.mount('/content/drive')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
import os
os.chdir("/content/drive/My Drive/Colab Notebooks/Optimization project")
os.getcwd()
file_path = "./MNIST"
try:
os.stat(file_path)
except:
os.mkdir(file_path)
from sug import *
import torch
from torch.optim import Optimizer
import math
import copy
class SUG(Optimizer):
def __init__(self, params, l_0, d_0=0, prob=1., eps=1e-4, momentum=0, dampening=0,
weight_decay=0, nesterov=False):
if l_0 < 0.0:
raise ValueError("Invalid Lipsitz constant of gradient: {}".format(l_0))
if d_0 < 0.0:
raise ValueError("Invalid disperion of gradient: {}".format(d_0))
if momentum < 0.0:
raise ValueError("Invalid momentum value: {}".format(momentum))
if weight_decay < 0.0:
raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
defaults = dict(L=l_0, momentum=momentum, dampening=dampening,
weight_decay=weight_decay, nesterov=nesterov)
if nesterov and (momentum <= 0 or dampening != 0):
raise ValueError("Nesterov momentum requires a momentum and zero dampening")
self.Lips = l_0
self.prev_Lips = l_0
self.D_0 = d_0
self.eps = eps
self.prob = prob
self.start_param = params
self.upd_sq_grad_norm = None
self.sq_grad_norm = None
self.loss = torch.tensor(0.)
self.cur_loss = 0
self.closure = None
super(SUG, self).__init__(params, defaults)
def __setstate__(self, state):
super(SUG, self).__setstate__(state)
for group in self.param_groups:
group.setdefault('nesterov', False)
def comp_batch_size(self):
"""Returns optimal batch size for given d_0, eps and l_0;
"""
return math.ceil(2 * self.D_0 * self.eps / self.prev_Lips)
def step(self, loss, closure):
"""Performs a single optimization step.
Arguments:
loss : current loss
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
self.start_params = []
self.loss = loss
self.sq_grad_norm = 0
self.cur_loss = loss
self.closure = closure
for gr_idx, group in enumerate(self.param_groups):
weight_decay = group['weight_decay']
momentum = group['momentum']
dampening = group['dampening']
nesterov = group['nesterov']
self.start_params.append([])
for p_idx, p in enumerate(group['params']):
self.start_params[gr_idx].append([p.data.clone()])
if p.grad is None:
continue
self.start_params[gr_idx][p_idx].append(p.grad.data.clone())
d_p = self.start_params[gr_idx][p_idx][1]
p_ = self.start_params[gr_idx][p_idx][0]
if weight_decay != 0:
d_p.add_(weight_decay, p.data)
self.cur_loss += weight_decay * torch.sum(p * p).item()
self.sq_grad_norm += torch.sum(d_p * d_p).item()
if momentum != 0:
param_state = self.state[p]
if 'momentum_buffer' not in param_state:
buf = param_state['momentum_buffer'] = torch.zeros_like(p.data)
buf.mul_(momentum).add_(d_p)
else:
buf = param_state['momentum_buffer']
buf.mul_(momentum).add_(1 - dampening, d_p)
if nesterov:
d_p = d_p.add(momentum, buf)
else:
d_p = buf
self.start_params[gr_idx][p_idx][1] = d_p
i = 0
self.Lips = max(self.prev_Lips / 2, 0.1)
difference = -1
while difference < 0 or i == 0:
if (i > 0):
self.Lips = max(self.Lips * 2, 0.1)
for gr_idx, group in enumerate(self.param_groups):
for p_idx, p in enumerate(group['params']):
if p.grad is None:
continue
start_param_val = self.start_params[gr_idx][p_idx][0]
start_param_grad = self.start_params[gr_idx][p_idx][1]
p.data = start_param_val - 1/(2*self.Lips) * start_param_grad
difference, upd_loss = self.stop_criteria()
i += 1
self.prev_Lips = self.Lips
return self.Lips, i
def stop_criteria(self):
"""Checks if the Lipsitz constant of gradient is appropriate
<g(x_k), w_k - x_k> + 2L_k / 2 ||x_k - w_k||^2 = - 1 / (2L_k)||g(x_k)||^2 + 1 / (4L_k)||g(x_k)||^2 = -1 / (4L_k)||g(x_k)||^2
"""
upd_loss = self.closure()
major = self.cur_loss - 1 / (4 * self.Lips) * self.sq_grad_norm
return major - upd_loss - self.l2_reg() + self.eps / 10, upd_loss
def get_lipsitz_const(self):
"""Returns current Lipsitz constant of the gradient of the loss function
"""
return self.Lips
def get_sq_grad(self):
"""Returns the current second norm of the gradient of the loss function
calculated by the formula
||f'(p_1,...,p_n)||_2^2 ~ \sum\limits_{i=1}^n ((df/dp_i) * (df/dp_i))(p1,...,p_n))
"""
self.upd_sq_grad_norm = 0
for gr_idx, group in enumerate(self.param_groups):
for p_idx, p in enumerate(group['params']):
if p.grad is None:
continue
self.upd_sq_grad_norm += torch.sum(p.grad.data * p.grad.data).item()
return self.upd_sq_grad_norm
def l2_reg(self):
"""Returns the current l2 regularization addiction
"""
self.upd_l2_reg = 0
for gr_idx, group in enumerate(self.param_groups):
weight_decay = group['weight_decay']
if weight_decay != 0:
for p_idx, p in enumerate(group['params']):
self.upd_l2_reg += weight_decay * torch.sum(p * p).item()
return self.upd_l2_reg
%matplotlib inline
import torch
from torch import nn
from torch import optim
from torch.autograd import Variable
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import time
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device
device(type='cuda', index=0)
batch_size = 512
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))])
trainset = torchvision.datasets.MNIST(root='./data', train=True,
download=True, transform=transform)
valid_dataset = torchvision.datasets.MNIST(root='/data', train=True,
download=True, transform=transform)
testset = torchvision.datasets.MNIST(root='./data', train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
shuffle=False, num_workers=2)
0it [00:00, ?it/s]
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /data/MNIST/raw/train-images-idx3-ubyte.gz
9920512it [00:01, 8639858.13it/s]
Extracting /data/MNIST/raw/train-images-idx3-ubyte.gz
0%| | 0/28881 [00:00<?, ?it/s]
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /data/MNIST/raw/train-labels-idx1-ubyte.gz
32768it [00:00, 130050.77it/s] 0%| | 0/1648877 [00:00<?, ?it/s]
Extracting /data/MNIST/raw/train-labels-idx1-ubyte.gz Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /data/MNIST/raw/t10k-images-idx3-ubyte.gz
1654784it [00:00, 2357733.35it/s] 0it [00:00, ?it/s]
Extracting /data/MNIST/raw/t10k-images-idx3-ubyte.gz Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /data/MNIST/raw/t10k-labels-idx1-ubyte.gz
8192it [00:00, 49203.85it/s]
Extracting /data/MNIST/raw/t10k-labels-idx1-ubyte.gz Processing... Done!
valid_size=0.15
num_train = len(trainset)
indices = list(range(num_train))
split = int(np.floor(valid_size * num_train))
np.random.seed(42)
np.random.shuffle(indices)
train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
trainloader = torch.utils.data.DataLoader(trainset,
batch_size=batch_size, sampler=train_sampler,
num_workers=2)
validloader = torch.utils.data.DataLoader(valid_dataset,
batch_size=batch_size, sampler=valid_sampler,
num_workers=2)
def show_batch(batch):
im = torchvision.utils.make_grid(batch)
plt.imshow(np.transpose(im.numpy(), (1, 2, 0)))
dataiter = iter(trainloader)
images, labels = dataiter.next()
print('Labels: ', labels)
print('Batch shape: ', images.size())
show_batch(images)
class LR(nn.Module):
def __init__(self):
super(LR, self).__init__()
self.linear1 = nn.Linear(28*28, 10)
def forward(self, x):
batch_size = x.size(0)
y_pred = F.log_softmax(self.linear1(x.view(batch_size, -1)), -1)
return y_pred
class FC(nn.Module):
def __init__(self):
super(FC, self).__init__()
self.linear1 = nn.Linear(28*28, 256)
self.linear2 = nn.Linear(256, 10)
def forward(self, x):
batch_size = x.size(0)
h_relu = F.relu(self.linear1(x.view(batch_size, -1)))
y_pred = self.linear2(h_relu)
return y_pred
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
self.conv2 = nn.Conv2d(32, 32, kernel_size=5)
self.conv3 = nn.Conv2d(32,64, kernel_size=5)
self.fc1 = nn.Linear(3*3*64, 256)
self.fc2 = nn.Linear(256, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
#x = F.dropout(x, p=0.5, training=self.training)
x = F.relu(F.max_pool2d(self.conv2(x), 2))
x = F.dropout(x, p=0.5, training=self.training)
x = F.relu(F.max_pool2d(self.conv3(x),2))
x = F.dropout(x, p=0.5, training=self.training)
x = x.view(-1,3*3*64 )
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.log_softmax(x, dim=1)
def time_since(since):
s = time.time() - since
m = math.floor(s / 60)
s -= m * 60
return '%dm %ds' % (m, s)
def model_step(model, optimizer, criterion, inputs, labels):
outputs = model(inputs)
loss = criterion(outputs, labels)
if model.training:
optimizer.zero_grad()
loss.backward(retain_graph=True)
if optimizer.__class__.__name__ != 'SUG':
optimizer.step()
else:
def closure():
optimizer.zero_grad()
upd_outputs = model(inputs)
upd_loss = criterion(upd_outputs, labels).item()
return upd_loss
optimizer.step(loss.item(), closure)
return loss.item()
def train(model, trainloader, criterion, optimizer, n_epochs=2, validloader=None, eps=1e-5, print_every=1):
tr_loss, val_loss, lips, times, grad, acc = ([] for i in range(6))
start_time = time.time()
model.to(device=device)
for ep in range(n_epochs):
model.train()
i = 0
for i, data in enumerate(trainloader):
inputs, labels = data
inputs, labels = Variable(inputs).to(device=device), Variable(labels).to(device=device)
#print(inputs.size())
tr_loss.append(model_step(model, optimizer, criterion, inputs, labels))
if optimizer.__class__.__name__ == 'SUG':
lips.append(optimizer.get_lipsitz_const())
grad.append(optimizer.get_sq_grad)
times.append(time_since(start_time))
if ep % print_every == 0:
print("Epoch {}, training loss {}, time passed {}".format(ep, sum(tr_loss[-i:]) / i, time_since(start_time)))
if validloader is None:
continue
model.zero_grad()
model.eval()
j = 0
count = 0
n_ex = 0
for j, data in enumerate(validloader):
inputs, labels = data
inputs, labels = inputs.to(device=device), labels.to(device=device)
outputs = model(inputs)
count += (torch.argmax(outputs, 1) == labels).float().sum().item()
n_ex += outputs.size(0)
val_loss.append(criterion(outputs, labels).item())
acc.append(count / n_ex)
if ep % print_every == 0:
print("Validation loss {}, validation accuracy {}".format(sum(val_loss[-j:]) / j, acc[-1]))
return tr_loss, times, val_loss, lips, grad, acc
print_every = 4
n_epochs = 10
tr_loss = {}
tr_loss['sgd'] = {}
val_loss = {}
val_loss['sgd'] = {}
lrs = [0.05, 0.01, 0.005]
criterion = nn.CrossEntropyLoss(reduction="mean")
torch.manual_seed(999)
<torch._C.Generator at 0x7f50374f9670>
def concat_states(state1, state2):
states = {
'epoch': state1['epoch'] + state2['epoch'],
'state_dict': state2['state_dict'],
'optimizer': state2['optimizer'],
'tr_loss' : state1['tr_loss'] + state2['tr_loss'],
'val_loss' : state1['val_loss'] + state2['val_loss'],
'lips' : state1['lips'] + state2['lips'],
'grad' : state1['grad'] + state2['grad'],
#'times' : state1['times'] + list(map(lambda x: x + state1['times'][-1],state2['times']))
'times' : state1['times'] + state2['times']
}
return states
n_epochs = 20
for lr in lrs:
model = LR()
print("SGD lr={}, momentum=0. :".format(lr))
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0., weight_decay=1e-3)
tr_loss['sgd'][lr], times, val_loss['sgd'][lr], lips, grad, acc = train(model, trainloader, criterion, optimizer, n_epochs=n_epochs, print_every=print_every, validloader=validloader)
states = {
'epoch': n_epochs,
'state_dict': model.state_dict(),
'optimizer': optimizer.state_dict(),
'tr_loss' : tr_loss['sgd'][lr],
'val_loss' : val_loss['sgd'][lr],
'lips' : lips,
'grad' : grad,
'times' : times,
'acc' : acc
}
torch.save(states, './MNIST/LR_' + str(lr))
SGD lr=0.05, momentum=0. : Epoch 0, training loss 0.8053867536963839, time passed 0m 4s Validation loss 0.4970751264516045, validation accuracy 0.8594444444444445 Epoch 4, training loss 0.3556738823953301, time passed 0m 29s Validation loss 0.34582727095660043, validation accuracy 0.8994444444444445 Epoch 8, training loss 0.324508320201527, time passed 0m 55s Validation loss 0.32668687315548167, validation accuracy 0.9034444444444445 Epoch 12, training loss 0.31013485217335246, time passed 1m 17s Validation loss 0.3108014885117026, validation accuracy 0.9085555555555556 Epoch 16, training loss 0.3015503261727516, time passed 1m 40s Validation loss 0.3028887212276459, validation accuracy 0.9145555555555556 SGD lr=0.01, momentum=0. : Epoch 0, training loss 1.2965486091796798, time passed 0m 4s Validation loss 0.8687916503233069, validation accuracy 0.8221111111111111 Epoch 4, training loss 0.49756273416557695, time passed 0m 27s Validation loss 0.4874144634779762, validation accuracy 0.8711111111111111 Epoch 8, training loss 0.42324275109503007, time passed 0m 49s Validation loss 0.4176199418656966, validation accuracy 0.8856666666666667 Epoch 12, training loss 0.39122442795772744, time passed 1m 14s Validation loss 0.38835134050425363, validation accuracy 0.8913333333333333 Epoch 16, training loss 0.3713635543380121, time passed 1m 36s Validation loss 0.3647502099766451, validation accuracy 0.8964444444444445 SGD lr=0.005, momentum=0. : Epoch 0, training loss 1.6245008020689993, time passed 0m 4s Validation loss 1.1842015701181747, validation accuracy 0.7812222222222223 Epoch 4, training loss 0.6250864417866023, time passed 0m 27s Validation loss 0.6046268624417922, validation accuracy 0.8561111111111112 Epoch 8, training loss 0.506018156054044, time passed 0m 53s Validation loss 0.498588469098596, validation accuracy 0.87 Epoch 12, training loss 0.45563414421948517, time passed 1m 15s Validation loss 0.45264510898029103, validation accuracy 0.8791111111111111 Epoch 16, training loss 0.4259264917686732, time passed 1m 38s Validation loss 0.42056286334991455, validation accuracy 0.883
l_0 = 20
model = LR()
optimizer = SUG(model.parameters(), l_0=l_0, momentum=0., weight_decay=1e-3)
tr_loss['sug'], times, val_loss['sug'], lips, grad, acc = train(model, trainloader, criterion, optimizer, n_epochs=n_epochs, print_every=print_every, validloader=validloader)
states = {
'epoch': n_epochs,
'state_dict': model.state_dict(),
'optimizer': optimizer.state_dict(),
'tr_loss' : tr_loss['sug'],
'val_loss' : val_loss['sug'],
'lips' : lips,
'grad' : grad,
'times' : times,
'acc' : acc
}
torch.save(states, './MNIST/LR_sug')
Epoch 0, training loss 0.8922390636771617, time passed 0m 4s Validation loss 0.5756662032183479, validation accuracy 0.8471111111111111 Epoch 4, training loss 0.36624673069125474, time passed 0m 29s Validation loss 0.35796097797505994, validation accuracy 0.8953333333333333 Epoch 8, training loss 0.3267748903746557, time passed 0m 54s Validation loss 0.32799020935507384, validation accuracy 0.906 Epoch 12, training loss 0.30954578579074205, time passed 1m 18s Validation loss 0.31454072805011973, validation accuracy 0.9114444444444444 Epoch 16, training loss 0.29919745840809564, time passed 1m 42s Validation loss 0.2986759543418884, validation accuracy 0.9141111111111111
n_epochs = 20
torch.manual_seed(999)
for lr in lrs:
model = FC()
print("SGD lr={}, momentum=0. :".format(lr))
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0., weight_decay=1e-3)
tr_loss['sgd'][lr], times, val_loss['sgd'][lr], lips, grad, acc = train(model, trainloader, criterion, optimizer, n_epochs=n_epochs, print_every=print_every, validloader=validloader)
states = {
'epoch': n_epochs,
'state_dict': model.state_dict(),
'optimizer': optimizer.state_dict(),
'tr_loss' : tr_loss['sgd'][lr],
'val_loss' : val_loss['sgd'][lr],
'lips' : lips,
'grad' : grad,
'times' : times,
'acc' : acc
}
torch.save(states, './MNIST/FC_' + str(lr))
SGD lr=0.05, momentum=0. : Epoch 0, training loss 0.9827026613432952, time passed 0m 4s Validation loss 0.521831205662559, validation accuracy 0.8611111111111112 Epoch 4, training loss 0.32333635019533563, time passed 0m 27s Validation loss 0.33837247420759764, validation accuracy 0.8983333333333333 Epoch 8, training loss 0.2735136151313782, time passed 0m 52s Validation loss 0.27770407059613395, validation accuracy 0.9204444444444444 Epoch 12, training loss 0.2350455638435152, time passed 1m 14s Validation loss 0.2334472729879267, validation accuracy 0.9327777777777778 Epoch 16, training loss 0.20252520448029643, time passed 1m 37s Validation loss 0.21084140153492198, validation accuracy 0.9381111111111111 SGD lr=0.01, momentum=0. : Epoch 0, training loss 1.8455105502196032, time passed 0m 6s Validation loss 1.3969286049113554, validation accuracy 0.7461111111111111 Epoch 4, training loss 0.5331706323406913, time passed 0m 28s Validation loss 0.5103560914011562, validation accuracy 0.8694444444444445 Epoch 8, training loss 0.4116000948530255, time passed 0m 52s Validation loss 0.4016827555263744, validation accuracy 0.8874444444444445 Epoch 12, training loss 0.3670172348166957, time passed 1m 17s Validation loss 0.3593411305371453, validation accuracy 0.8961111111111111 Epoch 16, training loss 0.34106487396991614, time passed 1m 42s Validation loss 0.3356891782844768, validation accuracy 0.901 SGD lr=0.005, momentum=0. : Epoch 0, training loss 2.0741889308197328, time passed 0m 4s Validation loss 1.842525559313157, validation accuracy 0.6163333333333333 Epoch 4, training loss 0.8207536977950973, time passed 0m 27s Validation loss 0.7696297238854801, validation accuracy 0.832 Epoch 8, training loss 0.5540273183524006, time passed 0m 52s Validation loss 0.5414836915100322, validation accuracy 0.8632222222222222 Epoch 12, training loss 0.46390234430631, time passed 1m 14s Validation loss 0.4573543685324052, validation accuracy 0.8742222222222222 Epoch 16, training loss 0.4175319692703209, time passed 1m 36s Validation loss 0.4172443712458891, validation accuracy 0.8842222222222222
l_0 = 20
model = FC()
optimizer = SUG(model.parameters(), l_0=l_0, momentum=0., weight_decay=1e-3)
tr_loss['sug'], times, val_loss['sug'], lips, grad, acc = train(model, trainloader, criterion, optimizer, n_epochs=n_epochs, print_every=print_every, validloader=validloader)
states = {
'epoch': n_epochs,
'state_dict': model.state_dict(),
'optimizer': optimizer.state_dict(),
'tr_loss' : tr_loss['sug'],
'val_loss' : val_loss['sug'],
'lips' : lips,
'grad' : grad,
'times' : times,
'acc' : acc
}
torch.save(states, './MNIST/FC_sug')
Epoch 0, training loss 0.841091928457973, time passed 0m 5s Validation loss 0.5276602085898904, validation accuracy 0.862 Epoch 4, training loss 0.3375983780080622, time passed 0m 31s Validation loss 0.32630446903845844, validation accuracy 0.9072222222222223 Epoch 8, training loss 0.2841122362649802, time passed 0m 55s Validation loss 0.28937043161953196, validation accuracy 0.914 Epoch 12, training loss 0.2409512424709821, time passed 1m 19s Validation loss 0.24036436747102177, validation accuracy 0.9277777777777778 Epoch 16, training loss 0.20427992352933594, time passed 1m 45s Validation loss 0.20693528213921716, validation accuracy 0.9405555555555556
torch.manual_seed(999)
for lr in lrs:
model = FC()
print("SGD lr={}, momentum=0.9 :".format(lr))
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-3)
tr_loss['sgd'][lr], times, val_loss['sgd'][lr], lips, grad, acc = train(model, trainloader, criterion, optimizer, n_epochs=n_epochs, print_every=print_every, validloader=validloader)
states = {
'epoch': n_epochs,
'state_dict': model.state_dict(),
'optimizer': optimizer.state_dict(),
'tr_loss' : tr_loss['sgd'][lr],
'val_loss' : val_loss['sgd'][lr],
'lips' : lips,
'grad' : grad,
'times' : times,
'acc' : acc
}
torch.save(states, './MNIST/FC_' + str(lr)+'_0.9')
SGD lr=0.05, momentum=0.9 : Epoch 0, training loss 0.5381249714680393, time passed 0m 5s Validation loss 0.28269324670819673, validation accuracy 0.9155555555555556 Epoch 4, training loss 0.12210867298070831, time passed 0m 29s Validation loss 0.1238884250907337, validation accuracy 0.9644444444444444 Epoch 8, training loss 0.08087855562417194, time passed 0m 56s Validation loss 0.11202001571655273, validation accuracy 0.9683333333333334 Epoch 12, training loss 0.06308559685794994, time passed 1m 18s Validation loss 0.08321712078417048, validation accuracy 0.9768888888888889 Epoch 16, training loss 0.05535656740569105, time passed 1m 40s Validation loss 0.09248179460273069, validation accuracy 0.9728888888888889 SGD lr=0.01, momentum=0.9 : Epoch 0, training loss 0.8471114117689807, time passed 0m 4s Validation loss 0.39922235643162446, validation accuracy 0.886 Epoch 4, training loss 0.2757103128565682, time passed 0m 29s Validation loss 0.26469812410719257, validation accuracy 0.9244444444444444 Epoch 8, training loss 0.20710186073274323, time passed 0m 51s Validation loss 0.2069076071767246, validation accuracy 0.9395555555555556 Epoch 12, training loss 0.16193968215675064, time passed 1m 13s Validation loss 0.16249041434596567, validation accuracy 0.9536666666666667 Epoch 16, training loss 0.1327487767645807, time passed 1m 39s Validation loss 0.1429281318012406, validation accuracy 0.9581111111111111 SGD lr=0.005, momentum=0.9 : Epoch 0, training loss 1.1291484489585415, time passed 0m 4s Validation loss 0.5271653725820429, validation accuracy 0.8605555555555555 Epoch 4, training loss 0.32374356130156856, time passed 0m 26s Validation loss 0.31370861039442177, validation accuracy 0.909 Epoch 8, training loss 0.2752047236820664, time passed 0m 49s Validation loss 0.2723114639520645, validation accuracy 0.9206666666666666 Epoch 12, training loss 0.2382019865091401, time passed 1m 15s Validation loss 0.23456529834691217, validation accuracy 0.9323333333333333 Epoch 16, training loss 0.20731118155850303, time passed 1m 37s Validation loss 0.21120107173919678, validation accuracy 0.9417777777777778
torch.manual_seed(999)
l_0 = 20
model = FC()
optimizer = SUG(model.parameters(), l_0=l_0, momentum=0.9, weight_decay=1e-3)
tr_loss['sug'], times, val_loss['sug'], lips, grad, acc = train(model, trainloader, criterion, optimizer, n_epochs=n_epochs, print_every=print_every, validloader=validloader)
states = {
'epoch': n_epochs,
'state_dict': model.state_dict(),
'optimizer': optimizer.state_dict(),
'tr_loss' : tr_loss['sug'],
'val_loss' : val_loss['sug'],
'lips' : lips,
'grad' : grad,
'times' : times,
'acc' : acc
}
torch.save(states, './MNIST/FC_sug_0.9')
Epoch 0, training loss 0.5774111350377401, time passed 0m 5s Validation loss 0.4330375124426449, validation accuracy 0.8753333333333333 Epoch 4, training loss 0.35169939471013617, time passed 0m 29s Validation loss 0.3514442969770992, validation accuracy 0.8977777777777778 Epoch 8, training loss 0.3251004270230881, time passed 0m 55s Validation loss 0.3274660198127522, validation accuracy 0.9045555555555556 Epoch 12, training loss 0.309954424398114, time passed 1m 20s Validation loss 0.3091518423136543, validation accuracy 0.9075555555555556 Epoch 16, training loss 0.2942978652438732, time passed 1m 47s Validation loss 0.2984069471850115, validation accuracy 0.9126666666666666
for lr in lrs:
model = CNN()
print("SGD lr={}, momentum=0. :".format(lr))
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0., weight_decay=1e-3)
tr_loss['sgd'][lr], times, val_loss['sgd'][lr], lips, grad, acc = train(model, trainloader, criterion, optimizer, n_epochs=n_epochs, print_every=print_every, validloader=validloader)
states = {
'epoch': n_epochs,
'state_dict': model.state_dict(),
'optimizer': optimizer.state_dict(),
'tr_loss' : tr_loss['sgd'][lr],
'val_loss' : val_loss['sgd'][lr],
'lips' : lips,
'grad' : grad,
'times' : times,
'acc' : acc
}
torch.save(states, './MNIST/CNN_' + str(lr))
#n_epochs = 8
l_0 = 20
model = CNN()
optimizer = SUG(model.parameters(), l_0=l_0, momentum=0., weight_decay=1e-3)
tr_loss['sug'], times, val_loss['sug'], lips, grad, acc = train(model, trainloader, criterion, optimizer, n_epochs=n_epochs, print_every=print_every, validloader=validloader)
states = {
'epoch': n_epochs,
'state_dict': model.state_dict(),
'optimizer': optimizer.state_dict(),
'tr_loss' : tr_loss['sug'],
'val_loss' : val_loss['sug'],
'lips' : lips,
'grad' : grad,
'times' : times,
'acc' : acc
}
torch.save(states, './MNIST/CNN_sug')