### define optimization methods¶

#### Generic optimizer¶

In [43]:
import sympy

def erase_buf(method):
try:
delattr(method,'buf')
except AttributeError:
pass

try:
v=getattr(method,'buf')
except AttributeError:
v=0
return v

def opt(method,function,starting_point,learning_rate=0.12,steps=10):
erase_buf(method)
x,y=sympy.symbols('x,y')
point=starting_point
plist=[point]
fx=function.diff(x)
fy=function.diff(y)
for i in range(steps):
point=method(function,fx,fy,point,learning_rate)
plist.append(point)
return plist


#### Optimizers¶

In [44]:
import sympy
import numpy as np
import math

def sgd_step(f,fx,fy,point,learning_rate=0.1):
x,y=sympy.symbols('x,y')
dx=np.array((fx.subs(x,point[0]).subs(y,point[1]),fy.subs(x,point[0]).subs(y,point[1])))
x=np.array(point) # not a good symbol, try to match stanford note, unfortunately symbol used earlier

x=x-learning_rate*dx
return (x[0],x[1])

# SGD with Momentum
def momentum_step(f,fx,fy,point,learning_rate=0.1,mu=0.5):

x,y=sympy.symbols('x,y')
dx=np.array((fx.subs(x,point[0]).subs(y,point[1]),fy.subs(x,point[0]).subs(y,point[1])))
x=np.array(point) # not a good symbol, try to match stanford note, unfortunately symbol used earlier

v=mu*v-learning_rate*dx
x=x+v

# save buf
momentum_step.buf = v
return (x[0],x[1])

# Nesterov Momentum
def nag_step(f,fx,fy,point,learning_rate=0.1,mu=0.5):

x,y=sympy.symbols('x,y')

dx=np.array((fx.subs(x,point[0]+mu*v[0]).subs(y,point[1]+mu*v[1]),
fy.subs(x,point[0]+mu*v[0]).subs(y,point[1]+mu*v[1])))
#print (dx)
x=np.array(point) # not a good symbol, try to match stanford note, unfortunately symbol used earlier

# update
v = mu*v-learning_rate*dx
x=x+v

# save buf
nag_step.buf = v
return (x[0],x[1])

x,y=sympy.symbols('x,y')
dx=np.array((fx.subs(x,point[0]).subs(y,point[1]),fy.subs(x,point[0]).subs(y,point[1])))
x=np.array(point) # not a good symbol, try to match stanford note, unfortunately symbol used earlier

cache += dx[0]**2 + dx[1]**2
x=x-learning_rate*dx/(math.sqrt(cache)+1e-7)

return (x[0],x[1])

# RMSProp
def rmsprop_step(f,fx,fy,point,learning_rate=0.1,decay_rate=0.5):

x,y=sympy.symbols('x,y')
dx=np.array((fx.subs(x,point[0]).subs(y,point[1]),fy.subs(x,point[0]).subs(y,point[1])))
x=np.array(point) # not a good symbol, try to match stanford note, unfortunately symbol used earlier

cache = decay_rate*cache + (1-decay_rate)*(dx[0]**2 + dx[1]**2)
x=x-learning_rate*dx/(math.sqrt(cache)+1e-7)

rmsprop_step.buf=cache
return (x[0],x[1])

t=buf[0]
m=buf[1:3]
v=buf[3]

x,y=sympy.symbols('x,y')
dx=np.array((fx.subs(x,point[0]).subs(y,point[1]),fy.subs(x,point[0]).subs(y,point[1])))
x=np.array(point) # not a good symbol, try to match stanford note, unfortunately symbol used earlier

t=t+1
m=beta1*m+(1-beta1)*dx
v=beta2*v+(1-beta2)*(dx[0]**2 + dx[1]**2)
mb=m/(1-beta1**t)
vb=v/(1-beta2**t)
x=x-learning_rate*mb/(math.sqrt(vb)+1e-7)

return (x[0],x[1])

# Newton
def newton_step(f,fx,fy,point,learning_rate=0.1,beta1=0.5,beta2=0.5):
x,y=sympy.symbols('x,y')
H=np.zeros((2,2))
H[0,0]=fx.diff(x).subs(x,point[0]).subs(y,point[1])
H[0,1]=fx.diff(y).subs(x,point[0]).subs(y,point[1])
H[1,0]=fy.diff(x).subs(x,point[0]).subs(y,point[1])
H[1,1]=fy.diff(y).subs(x,point[0]).subs(y,point[1])

dx=np.array((fx.subs(x,point[0]).subs(y,point[1]),fy.subs(x,point[0]).subs(y,point[1])))
x=np.array(point)-np.linalg.inv(H).dot(dx)

return (x[0],x[1])

In [60]:
import matplotlib.pyplot as plt
import string
import numpy as np
%matplotlib inline

def del_variable(var):
try:
del globals()[var]
except:
pass

def del_variables(varlist):
for var in varlist:
del_variable(var)

def form_legend_list(varlist):
li=[]
for var in varlist:
if var in globals():
li.append(var)

def plot_contour(z,rang=(-10,10,-10,10),delta=0.05):
xr=np.arange(rang[0],rang[1],delta)
yr=np.arange(rang[2],rang[3],delta)

[X,Y]=np.meshgrid(xr,yr)

cmd=string.upper(str(z))
Z=eval(cmd)

plt.contour(X,Y,Z)


### Main¶

In [84]:
import sympy

x,y=sympy.symbols('x,y')
z=x**2 + 8*y**2 # define function

sgd_learning_rate=0.12 #0.12
momentum_learning_rate=0.12 # more stable than sgd
nag_learning_rate=0.09 # less stable than sgd
rms_learning_rate=3 # tend to ocillate near optimum
adam_learning_rate=4 # more stable than rmsprop

starting_point=(9,9)
num_step=20

plist_sgd=opt(sgd_step,z,starting_point,sgd_learning_rate,num_step)
plist_momentum=opt(momentum_step,z,starting_point,momentum_learning_rate,num_step)
plist_nag=opt(nag_step,z,starting_point,nag_learning_rate,num_step)
plist_rmsprop=opt(rmsprop_step,z,starting_point,rms_learning_rate,num_step)
plist_newton=opt(newton_step,z,starting_point,0,num_step) # learning rate (0) is a dummy

plot_contour(z)
sgd,=plt.plot(np.array(plist_sgd).T[0],np.array(plist_sgd).T[1],label='sgd')
momentum,=plt.plot(np.array(plist_momentum).T[0],np.array(plist_momentum).T[1],label='momentum')
nag,=plt.plot(np.array(plist_nag).T[0],np.array(plist_nag).T[1],label='nag')
rmsprop,=plt.plot(np.array(plist_rmsprop).T[0],np.array(plist_rmsprop).T[1],label='rmsprop')
newton,=plt.plot(np.array(plist_newton).T[0],np.array(plist_newton).T[1],label='newton')

plt.legend(bbox_to_anchor=(0.4, 1),handles=legend_list)

Out[84]:
<matplotlib.legend.Legend at 0x7f457875afd0>
In [85]:
import sympy

x,y=sympy.symbols('x,y')
z=x**2 + 4*y**4 # define function

sgd_learning_rate=0.001 #0.12
momentum_learning_rate=0.001 # more stable than sgd
nag_learning_rate=0.001 # less stable than sgd
ada_learning_rate=10 # very stable (converge very slowly)
rms_learning_rate=10 # occillate near optimum
adam_learning_rate=5 # even more stable than rmsprop

starting_point=(9,9)
num_step=20

plist_sgd=opt(sgd_step,z,starting_point,sgd_learning_rate,num_step)
plist_momentum=opt(momentum_step,z,starting_point,momentum_learning_rate,num_step)
plist_nag=opt(nag_step,z,starting_point,nag_learning_rate,num_step)
plist_rmsprop=opt(rmsprop_step,z,starting_point,rms_learning_rate,num_step)
plist_newton=opt(newton_step,z,starting_point,0,num_step) # learning rate (0) is a dummy

plot_contour(z)
sgd,=plt.plot(np.array(plist_sgd).T[0],np.array(plist_sgd).T[1],label='sgd')
momentum,=plt.plot(np.array(plist_momentum).T[0],np.array(plist_momentum).T[1],label='momentum')
nag,=plt.plot(np.array(plist_nag).T[0],np.array(plist_nag).T[1],label='nag')

<matplotlib.legend.Legend at 0x7f4578616f90>