tf.train.Optimizer
¶TensorFlow
NOT numpy
in order to compare the result of my optimizer which is made by only numpy
.optimizer.tf.version.plot.ipynb
is implemented in order to compare all optimization pathes of various optimizers at a glance.from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import LogNorm
from matplotlib import animation
from IPython.display import HTML
from itertools import zip_longest
import tensorflow as tf
tf.enable_eager_execution()
def f(x, y):
return (1.5 - x + x*y)**2 + (2.25 - x + x*y**2)**2 + (2.625 - x + x*y**3)**2
minima = np.array([3., .5])
minima_ = minima.reshape(-1, 1)
print("minima (1x2 row vector shape): {}".format(minima))
print("minima (2x1 column vector shape):")
print(minima_)
minima (1x2 row vector shape): [3. 0.5] minima (2x1 column vector shape): [[3. ] [0.5]]
sgd = tf.train.GradientDescentOptimizer(0.01)
momentum = tf.train.MomentumOptimizer(0.005, momentum=0.9, use_nesterov=False)
nesterov = tf.train.MomentumOptimizer(0.005, momentum=0.9, use_nesterov=True)
adagrad = tf.train.AdagradOptimizer(0.1)
rmsprop = tf.train.RMSPropOptimizer(learning_rate=0.1, decay=0.9, epsilon=1e-10)
adam = tf.train.AdamOptimizer(learning_rate=0.1, beta1=0.9, beta2=0.999, epsilon=1e-8)
optimizers = [sgd, momentum, nesterov, adagrad, rmsprop, adam]
opt_names = ['sgd', 'momentum', 'nesterov', 'adagrad', 'rmsprop', 'adam']
all_pathes = []
for opt, name in zip(optimizers, opt_names):
x_init = 0.8
x = tf.get_variable('x', dtype=tf.float32, initializer=tf.constant(x_init))
y_init = 1.6
y = tf.get_variable('y', dtype=tf.float32, initializer=tf.constant(y_init))
#print("steps: {} z: {:.6f} x: {:.5f} y: {:.5f}".format(0, f(x, y).numpy(), x.numpy(), y.numpy()))
x_history = []
y_history = []
z_prev = 0.0
max_steps = 100
for step in range(max_steps):
with tf.GradientTape() as g:
#g.watch([x, y])
z = f(x, y)
x_history.append(x.numpy())
y_history.append(y.numpy())
dz_dx, dz_dy = g.gradient(z, [x, y])
opt.apply_gradients(zip([dz_dx, dz_dy], [x, y]))
#if (step+1) % 100 == 0:
# print("steps: {} z: {:.6f} x: {:.5f} y: {:.5f} dx: {:.5f} dy: {:.5f}".format(step+1, f(x, y).numpy(), x.numpy(), y.numpy(), dz_dx.numpy(), dz_dy.numpy()))
if np.abs(z_prev - z.numpy()) < 1e-6:
break
z_prev = z.numpy()
print("{} training done!".format(name))
print(" steps: {} z: {:.6f} x: {:.5f} y: {:.5f}".format(step+1, f(x, y).numpy(), x.numpy(), y.numpy()))
x_history = np.array(x_history)
y_history = np.array(y_history)
path = np.concatenate((np.expand_dims(x_history, 1), np.expand_dims(y_history, 1)), axis=1).T
all_pathes.append(path)
sgd training done! steps: 100 z: 0.098490 x: 2.44081 y: 0.32173 momentum training done! steps: 100 z: 0.008520 x: 3.25669 y: 0.55341 nesterov training done! steps: 100 z: 0.004242 x: 3.17920 y: 0.54142 adagrad training done! steps: 100 z: 10.390488 x: 0.59005 y: 0.68061 rmsprop training done! steps: 100 z: 0.015740 x: 2.79879 y: 0.46634 adam training done! steps: 100 z: 0.096163 x: 2.44514 y: 0.33627
# putting together our points to plot in a 3D plot
number_of_points = 50
margin = 4.5
x_min = 0. - margin
x_max = 0. + margin
y_min = 0. - margin
y_max = 0. + margin
x_points = np.linspace(x_min, x_max, number_of_points)
y_points = np.linspace(y_min, y_max, number_of_points)
x_mesh, y_mesh = np.meshgrid(x_points, y_points)
z = np.array([f(xps, yps) for xps, yps in zip(x_mesh, y_mesh)])
fig, ax = plt.subplots(figsize=(10, 8))
ax.contour(x_mesh, y_mesh, z, levels=np.logspace(-0.5, 5, 25), norm=LogNorm(), cmap=plt.cm.jet)
ax.plot(*minima, 'r*', markersize=20)
colors = ['red', 'green', 'blue', 'orange', 'purple', 'magenta']
for name, path, color in zip(opt_names, all_pathes, colors):
ax.quiver(path[0,:-1], path[1,:-1], path[0,1:]-path[0,:-1], path[1,1:]-path[1,:-1],
scale_units='xy', angles='xy', scale=1, color=color, lw=3)
ax.plot([], [], color=color, label=name, lw=3)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_xlim((x_min, x_max))
ax.set_ylim((y_min, y_max))
ax.legend(loc='upper left', prop={'size': 15})
plt.savefig('allplot.png', dpi=150, quality=100)
plt.show()
class TrajectoryAnimation(animation.FuncAnimation):
def __init__(self, *paths, labels=[], colors=[], fig=None, ax=None, frames=None,
interval=60, repeat_delay=5, blit=True, **kwargs):
if fig is None:
if ax is None:
fig, ax = plt.subplots()
else:
fig = ax.get_figure()
else:
if ax is None:
ax = fig.gca()
self.fig = fig
self.ax = ax
self.paths = paths
if frames is None:
frames = max(path.shape[1] for path in paths)
self.lines = [ax.plot([], [], label=label, color=color, lw=3)[0]
for _, label, color in zip_longest(paths, labels, colors)]
self.points = [ax.plot([], [], 'o', color=line.get_color())[0]
for line in self.lines]
super(TrajectoryAnimation, self).__init__(fig, self.animate, init_func=self.init_anim,
frames=frames, interval=interval, blit=blit,
repeat_delay=repeat_delay, **kwargs)
def init_anim(self):
for line, point in zip(self.lines, self.points):
line.set_data([], [])
point.set_data([], [])
return self.lines + self.points
def animate(self, i):
for line, point, path in zip(self.lines, self.points, self.paths):
line.set_data(*path[::,:i])
point.set_data(*path[::,i-1:i])
return self.lines + self.points
fig, ax = plt.subplots(figsize=(10, 8))
ax.contour(x_mesh, y_mesh, z, levels=np.logspace(-0.5, 5, 25), norm=LogNorm(), cmap=plt.cm.jet)
ax.plot(*minima, 'r*', markersize=20)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_xlim((x_min, x_max))
ax.set_ylim((y_min, y_max))
path_animation = TrajectoryAnimation(*all_pathes, labels=opt_names, colors=colors, ax=ax)
ax.legend(loc='upper left', prop={'size': 15})
<matplotlib.legend.Legend at 0x106e34208>
HTML(path_animation.to_html5_video())
mp4
file¶path_animation.save('allplot.animation.mp4')
# if you want to convert mp4 to gif
# !ffmpeg -i allplot.animation.mp4 -s 1500x1200 allplot.gif