Notebook

Optimize the Beale Function using `tf.train.Optimizer`¶

This code is implemented by TensorFlow NOT numpy in order to compare the result of my optimizer which is made by only numpy.
I've check that my codes are OK. ^o^
This code whose name is optimizer.tf.version.plot.ipynb is implemented in order to compare all optimization pathes of various optimizers at a glance.

In [1]:

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import time

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import LogNorm
from matplotlib import animation
from IPython.display import HTML

from itertools import zip_longest

import tensorflow as tf
tf.enable_eager_execution()

Define the Beale function¶

In [2]:

def f(x, y):
  return (1.5 - x + x*y)**2 + (2.25 - x + x*y**2)**2 + (2.625 - x + x*y**3)**2

In [3]:

minima = np.array([3., .5])
minima_ = minima.reshape(-1, 1)
print("minima (1x2 row vector shape): {}".format(minima))
print("minima (2x1 column vector shape):")
print(minima_)

minima (1x2 row vector shape): [3.  0.5]
minima (2x1 column vector shape):
[[3. ]
 [0.5]]

Create various optimizers¶

In [4]:

sgd = tf.train.GradientDescentOptimizer(0.01)
momentum = tf.train.MomentumOptimizer(0.005, momentum=0.9, use_nesterov=False)
nesterov = tf.train.MomentumOptimizer(0.005, momentum=0.9, use_nesterov=True)
adagrad = tf.train.AdagradOptimizer(0.1)
rmsprop = tf.train.RMSPropOptimizer(learning_rate=0.1, decay=0.9, epsilon=1e-10)
adam = tf.train.AdamOptimizer(learning_rate=0.1, beta1=0.9, beta2=0.999, epsilon=1e-8)

optimizers = [sgd, momentum, nesterov, adagrad, rmsprop, adam]
opt_names = ['sgd', 'momentum', 'nesterov', 'adagrad', 'rmsprop', 'adam']

Training¶

In [5]:

all_pathes = []
for opt, name in zip(optimizers, opt_names):
  x_init = 0.8
  x = tf.get_variable('x', dtype=tf.float32, initializer=tf.constant(x_init))
  y_init = 1.6
  y = tf.get_variable('y', dtype=tf.float32, initializer=tf.constant(y_init))

  #print("steps: {}  z: {:.6f}  x: {:.5f}  y: {:.5f}".format(0, f(x, y).numpy(), x.numpy(), y.numpy()))
  x_history = []
  y_history = []
  z_prev = 0.0
  max_steps = 100
  for step in range(max_steps):
    with tf.GradientTape() as g:
      #g.watch([x, y])
      z = f(x, y)
    x_history.append(x.numpy())
    y_history.append(y.numpy())
    dz_dx, dz_dy = g.gradient(z, [x, y])
    opt.apply_gradients(zip([dz_dx, dz_dy], [x, y]))

    #if (step+1) % 100 == 0:
    #  print("steps: {}  z: {:.6f}  x: {:.5f}  y: {:.5f}  dx: {:.5f}  dy: {:.5f}".format(step+1, f(x, y).numpy(), x.numpy(), y.numpy(), dz_dx.numpy(), dz_dy.numpy()))
      
    if np.abs(z_prev - z.numpy()) < 1e-6:
      break
    z_prev = z.numpy()
  print("{} training done!".format(name))
  print("  steps: {}  z: {:.6f}  x: {:.5f}  y: {:.5f}".format(step+1, f(x, y).numpy(), x.numpy(), y.numpy()))
      
  x_history = np.array(x_history)
  y_history = np.array(y_history)
  path = np.concatenate((np.expand_dims(x_history, 1), np.expand_dims(y_history, 1)), axis=1).T
  all_pathes.append(path)

sgd training done!
  steps: 100  z: 0.098490  x: 2.44081  y: 0.32173
momentum training done!
  steps: 100  z: 0.008520  x: 3.25669  y: 0.55341
nesterov training done!
  steps: 100  z: 0.004242  x: 3.17920  y: 0.54142
adagrad training done!
  steps: 100  z: 10.390488  x: 0.59005  y: 0.68061
rmsprop training done!
  steps: 100  z: 0.015740  x: 2.79879  y: 0.46634
adam training done!
  steps: 100  z: 0.096163  x: 2.44514  y: 0.33627

Static contour plot with optimization path¶

In [6]:

# putting together our points to plot in a 3D plot
number_of_points = 50
margin = 4.5
x_min = 0. - margin
x_max = 0. + margin
y_min = 0. - margin
y_max = 0. + margin
x_points = np.linspace(x_min, x_max, number_of_points) 
y_points = np.linspace(y_min, y_max, number_of_points)
x_mesh, y_mesh = np.meshgrid(x_points, y_points)
z = np.array([f(xps, yps) for xps, yps in zip(x_mesh, y_mesh)])

In [7]:

fig, ax = plt.subplots(figsize=(10, 8))

ax.contour(x_mesh, y_mesh, z, levels=np.logspace(-0.5, 5, 25), norm=LogNorm(), cmap=plt.cm.jet)
ax.plot(*minima, 'r*', markersize=20)
colors = ['red', 'green', 'blue', 'orange', 'purple', 'magenta']
for name, path, color in zip(opt_names, all_pathes, colors):
  ax.quiver(path[0,:-1], path[1,:-1], path[0,1:]-path[0,:-1], path[1,1:]-path[1,:-1],
            scale_units='xy', angles='xy', scale=1, color=color, lw=3)
  ax.plot([], [], color=color, label=name, lw=3)

ax.set_xlabel('x')
ax.set_ylabel('y')

ax.set_xlim((x_min, x_max))
ax.set_ylim((y_min, y_max))

ax.legend(loc='upper left', prop={'size': 15})

plt.savefig('allplot.png', dpi=150, quality=100)
plt.show()

Animiating contour plot with optimization path¶

In [8]:

class TrajectoryAnimation(animation.FuncAnimation):
  def __init__(self, *paths, labels=[], colors=[], fig=None, ax=None, frames=None, 
               interval=60, repeat_delay=5, blit=True, **kwargs):
    if fig is None:
      if ax is None:
        fig, ax = plt.subplots()
      else:
        fig = ax.get_figure()
    else:
      if ax is None:
        ax = fig.gca()

    self.fig = fig
    self.ax = ax

    self.paths = paths

    if frames is None:
      frames = max(path.shape[1] for path in paths)

    self.lines = [ax.plot([], [], label=label, color=color, lw=3)[0] 
                  for _, label, color in zip_longest(paths, labels, colors)]
    self.points = [ax.plot([], [], 'o', color=line.get_color())[0] 
                   for line in self.lines]

    super(TrajectoryAnimation, self).__init__(fig, self.animate, init_func=self.init_anim,
                                              frames=frames, interval=interval, blit=blit,
                                              repeat_delay=repeat_delay, **kwargs)

  def init_anim(self):
    for line, point in zip(self.lines, self.points):
      line.set_data([], [])
      point.set_data([], [])
    return self.lines + self.points

  def animate(self, i):
    for line, point, path in zip(self.lines, self.points, self.paths):
      line.set_data(*path[::,:i])
      point.set_data(*path[::,i-1:i])
    return self.lines + self.points

In [9]:

fig, ax = plt.subplots(figsize=(10, 8))

ax.contour(x_mesh, y_mesh, z, levels=np.logspace(-0.5, 5, 25), norm=LogNorm(), cmap=plt.cm.jet)
ax.plot(*minima, 'r*', markersize=20)

ax.set_xlabel('x')
ax.set_ylabel('y')

ax.set_xlim((x_min, x_max))
ax.set_ylim((y_min, y_max))

path_animation = TrajectoryAnimation(*all_pathes, labels=opt_names, colors=colors, ax=ax)

ax.legend(loc='upper left', prop={'size': 15})

Out[9]:

<matplotlib.legend.Legend at 0x106e34208>

In [10]:

HTML(path_animation.to_html5_video())

Out[10]:

Save to `mp4` file¶

In [11]:

path_animation.save('allplot.animation.mp4')
# if you want to convert mp4 to gif
# !ffmpeg -i allplot.animation.mp4 -s 1500x1200 allplot.gif

Optimize the Beale Function using tf.train.Optimizer¶

Define the Beale function¶

Create various optimizers¶

Training¶

Static contour plot with optimization path¶

Animiating contour plot with optimization path¶

Save to mp4 file¶

Optimize the Beale Function using `tf.train.Optimizer`¶

Save to `mp4` file¶