# Author: Joseph Salmon <joseph.salmon@telecom-paristech.fr>
# Mathurin Massias <mathurin.massias@gmail.com>
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.utils import check_random_state
# dirty local imports:
import sys
sys.path.append("./../../../")
from share_code.utils import my_saving_display, make_huber, make_abs, make_median, make_bisquare
sns.set_palette("colorblind")
%matplotlib notebook
dirname = "../prebuiltimages/"
imageformat = ".pdf" # should be .pdf or .png
# some colors I'll use
brown = (0.64, 0.16, 0.16)
purple = (148. / 255, 0, 211. / 255)
plt.close("all")
saving = False # True
color_blind_list = sns.color_palette("GnBu_d", 8)
c1 = color_blind_list[0]
c2 = color_blind_list[1]
c3 = color_blind_list[2]
def gradient_descent(func, func_prime, steps, theta_init, n_iter=10):
if not isinstance(steps, np.ndarray):
steps = np.repeat(steps, n_iter)
theta_init = np.atleast_1d(theta_init).astype(np.float64)
all_thetas = np.zeros([n_iter, theta_init.shape[0]])
all_objs = np.zeros([n_iter])
theta = theta_init.copy()
for it in range(n_iter):
theta -= steps[it] * func_prime(theta)
all_thetas[it] = theta
all_objs[it] = func(theta)
return all_thetas, all_objs
def weiszfeld(X, theta_init=3., n_iter=10):
"""
Compute the median of points in X with Weiszfeld algorithm.
X : array, shape (n_samples, n_features)
Array of points of which the median must be computed
"""
if X.ndim == 1: # hopefully it's not a single data point being passed
X = X[:, None]
theta_init = np.atleast_1d(theta_init).astype(np.float64)
if theta_init.shape[0] != X.shape[1]:
raise ValueError("theta_init has shape %s, expected %s" %
(theta_init.shape[0], X.shape[1]))
theta = theta_init.copy()
all_thetas = np.zeros([n_iter, theta_init.shape[0]])
all_thetas[0] = theta_init
all_objs = np.zeros([n_iter])
all_objs[0] = np.linalg.norm(X - theta[None, :], axis=1).sum()
for it in range(1, n_iter):
w = 1. / (np.linalg.norm(X - theta[None, :], axis=1) + 1e-16)
theta = np.sum(X * w[:, None], axis=0) / np.sum(w)
all_thetas[it] = theta
all_objs[it] = np.linalg.norm(X - theta[None, :], axis=1).sum()
return all_thetas, all_objs
def mm_visu(func, func_prime, theta, x, step):
nabla = func_prime(theta)
z = 1. / (2. * step) * (theta - step * nabla - x) ** 2 + \
func(theta) - 1. / 2. * step * nabla ** 2
return z
def min_gd(func, func_prime, theta, x, step):
nabla = func_prime(theta)
z = func(theta) - 1. / 2. * step * nabla ** 2
return z
def my_plot(func, func_prime, x, theta_init, n_iter, step, saving):
iterates_list,_ = gradient_descent(
func, func_prime, step, theta_init=theta_init, n_iter=n_iter)
ylims = [-2, 5]
xlims = [-5, 5]
mm = False,
name = r'$|\cdot|$,'
figname = "abs_step{:.1f}".format(step).replace(".", "pt")
fig1, ax = plt.subplots(figsize=(7, 7))
ax.set_xlim(*xlims)
ax.set_ylim(*ylims)
ax.plot(x, func(x), c=c3, lw=1.7, label=name)
ax.set_title(r"Gradient descent for $|\cdot|$ function; stepsize: $\alpha={:.1f}$".format(step))
my_saving_display(fig1, dirname, figname, imageformat, saving)
plt.legend(loc='lower left')
myarray = np.asarray(iterates_list[:0 + 1])
ax.plot(myarray, func(myarray), 'o')
my_saving_display(fig1, dirname, figname + "_with_init", imageformat, saving)
plt.legend(loc='lower left')
for index_to_plot in range(n_iter - 1):
fig1, ax = plt.subplots(figsize=(7, 7))
ax.set_title(r"Gradient descent for $|\cdot|$ function; stepsize: $\alpha={:.1f}$".format(step))
ax.set_xlim(*xlims)
ax.set_ylim(*ylims)
if mm:
z = mm_visu(func, func_prime,
iterates_list[index_to_plot], x, step=step)
ax.plot(x, z, c='purple', lw=1.1,
label=r"$g(\theta^{:1}, \cdot)$".format(index_to_plot))
minima = min_gd(func, func_prime,
iterates_list[index_to_plot], x, step=step)
ax.plot(iterates_list[index_to_plot + 1], minima, 'o', c='purple')
ax.plot(x, func(x), c=c3, lw=1.7, label=name)
myarray = np.asarray(iterates_list[:index_to_plot + 1])
ax.plot(myarray, func(myarray), 'o')
plt.legend(loc='lower left')
my_saving_display(fig1, dirname, figname + str(index_to_plot), imageformat, saving)
saving = True # True
x = np.linspace(-5, 5, 501)
theta_init = 3
n_iter = 5
step = 1.5
func, func_prime = make_abs()
my_plot(func, func_prime, x, theta_init, n_iter, step, saving)
rng = check_random_state(24)
n_samples, n_features = 21, 2
X = rng.normal(0, 1, size=(n_samples, n_features))
n_iter = 50
theta_init = np.mean(X, axis=0)
if X.shape[1] == 2:
plt.scatter(X[:, 0], X[:, 1])
from scipy.optimize import minimize
def cost_median(theta, X):
return np.linalg.norm(X - theta[None, :], axis=1).sum()
# numerical minimization of the cost function, starting with the mean of the dataset:
opt_result = minimize(lambda theta: cost_median(theta, X), x0=theta_init, method='BFGS')
true_median = opt_result["x"]
cost_min = cost_median(true_median, X)
print("scipy's solution: ", true_median)
print("scipy's ojective: ", cost_min)
weiszfeld_iterates, weiszfeld_objs = weiszfeld(X, theta_init=theta_init, n_iter=n_iter)
print("our solution: ", weiszfeld_iterates[-1])
print("last objective: ", weiszfeld_objs[-1])
n_iter = 100
sqrt_decay_step = 0.5 / np.sqrt(np.arange(n_iter) + 1)
func, func_prime = make_median(X)
_, list_gd_val10pt0_decay = gradient_descent(
func, func_prime, 10./ X.shape[0] * sqrt_decay_step,
theta_init=theta_init, n_iter=n_iter)
_, list_gd_val0pt01 = gradient_descent(
func, func_prime, 0.5 / X.shape[0],
theta_init=theta_init, n_iter=n_iter)
list_gd_val10pt0_decay[-10:]
fig1=plt.figure(figsize=(7,7))
plt.semilogy(list_gd_val10pt0_decay - cost_min,
label=r"GD $\alpha=\frac{1}{\sqrt{t}}$")
plt.semilogy(list_gd_val0pt01 - cost_min,
label=r"GD $\alpha=cst$")
plt.semilogy(weiszfeld_objs - cost_min, label="Weiszfeld")
plt.xlabel("Number of epochs")
plt.ylabel(r"$f(\theta) -f(\theta^{\star})$")
plt.legend()
figname = "WeiszfeldVsGD"
my_saving_display(fig1, dirname, figname, imageformat, saving)
def cost_bisquare(theta, X, threshold):
z = 1 - (1 - (np.linalg.norm(X - theta[None, :],axis=1) / threshold)**2)**3
z = np.minimum(1, z)
return np.sum(z)
rng = check_random_state(24)
n_samples, n_features = 10, 1
X = rng.normal(0, 1, size=(n_samples, n_features))
n_iter = 100
threshold = 0.505
# theta_init = np.mean(X, axis=0)
theta_init = -0.2*np.ones(X.shape[1])
print("Initial solution: ", theta_init)
print("Initial objective:", cost_bisquare(theta_init, X, threshold))
# numerical minimization of the cost function, starting with the mean of the dataset:
opt_result = minimize(lambda theta: cost_bisquare(theta, X,threshold), x0=theta_init, method='BFGS')
scipy_sol = opt_result["x"]
cost_min = cost_bisquare(scipy_sol, X, threshold)
print("scipy's solution: ", scipy_sol)
print("scipy's ojective: ", cost_min)
func, func_prime, func_weight = make_bisquare(X, threshold)
# In 1D the lipschitz constant is 6 as the $\rho''(x)
list_gd_iterate, list_gd_val = gradient_descent(
func, func_prime, 1 / (6 * X.shape[0]),
theta_init=theta_init, n_iter=n_iter)
print("GD's solution: ", list_gd_iterate[-1])
print("GD's objective: ", list_gd_val[-1])
def mm_bisquare(X, func_weight, theta_init, n_iter=10):
"""
Compute the M-estimator (biweights) of points in X with MM algorithm.
X : array, shape (n_samples, n_features)
Array of points of which the median must be computed
"""
if X.ndim == 1: # hopefully it's not a single data point being passed
X = X[:, None]
theta_init = np.atleast_1d(theta_init).astype(np.float64)
if theta_init.shape[0] != X.shape[1]:
raise ValueError("theta_init has shape %s, expected %s" %
(theta_init.shape[0], X.shape[1]))
theta = theta_init.copy()
all_thetas = np.zeros([n_iter, theta_init.shape[0]])
all_thetas[0] = theta_init
all_objs = np.zeros([n_iter])
all_objs[0] = cost_bisquare(theta, X, threshold)
for it in range(1, n_iter):
w = func_weight(theta)
if np.linalg.norm(w)>1e-12:
theta = np.sum(X * w[:, None], axis=0) / np.sum(w)
else:
theta = np.zeros(theta_init.shape)
all_thetas[it] = theta
all_objs[it] = cost_bisquare(theta, X, threshold)
return all_thetas, all_objs
mm_iterates, mm_objs = mm_bisquare(X, func_weight, theta_init=theta_init, n_iter=n_iter)
print("our solution: ", mm_iterates[-1])
print("last objective: ", mm_objs[-1])
fig1=plt.figure(figsize=(7,7))
plt.semilogy(list_gd_val - cost_min,
label=r"GD $\alpha=cst$")
plt.semilogy(mm_objs - cost_min, label="MM")
plt.xlabel("Number of epochs")
plt.ylabel(r"$f(\theta) -f(\theta^{\star})$")
plt.legend()
figname = "MMalgVsGD"
my_saving_display(fig1, dirname, figname, imageformat, saving)
cost_x = np.zeros(x.shape)
if n_features == 1:
for i, valx in enumerate(x):
theta1 = np.array([valx])
cost_x[i] = cost_bisquare(theta1, X, threshold)
fig1 = plt.figure()
plt.title("Objective function")
plt.plot(x, cost_x, lw=1.1)
plt.scatter(theta_init, cost_bisquare(
theta_init, X, threshold), s=21, label='init')
plt.scatter(mm_iterates[-1], cost_bisquare(
mm_iterates[-1], X, threshold), s=41, label='MM', marker='d')
plt.scatter(scipy_sol, cost_bisquare(
scipy_sol, X, threshold), s=21, label='BFGS', marker='x')
plt.scatter(list_gd_iterate[-1], cost_bisquare(
list_gd_iterate[-1], X, threshold), s=41, label='GD', marker='+')
plt.legend()
figname = "Bisquare1DLandscape"
my_saving_display(fig1, dirname, figname, imageformat, saving)
plt.figure()
plt.title("Concavity of $f$ for the bi-square case")
y = 1 - (1 - np.abs(x / threshold))**3
y[np.abs(x) > threshold] = 1
plt.plot(x, y)
# TODO movie for evolution of the objective function w.r.t. to threshold parameter
# (to show interest of homotopy methods)