#!/usr/bin/env python # coding: utf-8 # In[ ]: # Author: Joseph Salmon # Mathurin Massias import numpy as np import seaborn as sns import matplotlib.pyplot as plt from sklearn.utils import check_random_state # dirty local imports: import sys sys.path.append("./../../../") from share_code.utils import my_saving_display, make_huber, make_abs, make_median, make_bisquare sns.set_palette("colorblind") get_ipython().run_line_magic('matplotlib', 'notebook') # In[ ]: dirname = "../prebuiltimages/" imageformat = ".pdf" # should be .pdf or .png # some colors I'll use brown = (0.64, 0.16, 0.16) purple = (148. / 255, 0, 211. / 255) plt.close("all") saving = False # True # In[ ]: color_blind_list = sns.color_palette("GnBu_d", 8) c1 = color_blind_list[0] c2 = color_blind_list[1] c3 = color_blind_list[2] # In[ ]: def gradient_descent(func, func_prime, steps, theta_init, n_iter=10): if not isinstance(steps, np.ndarray): steps = np.repeat(steps, n_iter) theta_init = np.atleast_1d(theta_init).astype(np.float64) all_thetas = np.zeros([n_iter, theta_init.shape[0]]) all_objs = np.zeros([n_iter]) theta = theta_init.copy() for it in range(n_iter): theta -= steps[it] * func_prime(theta) all_thetas[it] = theta all_objs[it] = func(theta) return all_thetas, all_objs def weiszfeld(X, theta_init=3., n_iter=10): """ Compute the median of points in X with Weiszfeld algorithm. X : array, shape (n_samples, n_features) Array of points of which the median must be computed """ if X.ndim == 1: # hopefully it's not a single data point being passed X = X[:, None] theta_init = np.atleast_1d(theta_init).astype(np.float64) if theta_init.shape[0] != X.shape[1]: raise ValueError("theta_init has shape %s, expected %s" % (theta_init.shape[0], X.shape[1])) theta = theta_init.copy() all_thetas = np.zeros([n_iter, theta_init.shape[0]]) all_thetas[0] = theta_init all_objs = np.zeros([n_iter]) all_objs[0] = np.linalg.norm(X - theta[None, :], axis=1).sum() for it in range(1, n_iter): w = 1. / (np.linalg.norm(X - theta[None, :], axis=1) + 1e-16) theta = np.sum(X * w[:, None], axis=0) / np.sum(w) all_thetas[it] = theta all_objs[it] = np.linalg.norm(X - theta[None, :], axis=1).sum() return all_thetas, all_objs def mm_visu(func, func_prime, theta, x, step): nabla = func_prime(theta) z = 1. / (2. * step) * (theta - step * nabla - x) ** 2 + \ func(theta) - 1. / 2. * step * nabla ** 2 return z def min_gd(func, func_prime, theta, x, step): nabla = func_prime(theta) z = func(theta) - 1. / 2. * step * nabla ** 2 return z # In[ ]: def my_plot(func, func_prime, x, theta_init, n_iter, step, saving): iterates_list,_ = gradient_descent( func, func_prime, step, theta_init=theta_init, n_iter=n_iter) ylims = [-2, 5] xlims = [-5, 5] mm = False, name = r'$|\cdot|$,' figname = "abs_step{:.1f}".format(step).replace(".", "pt") fig1, ax = plt.subplots(figsize=(7, 7)) ax.set_xlim(*xlims) ax.set_ylim(*ylims) ax.plot(x, func(x), c=c3, lw=1.7, label=name) ax.set_title(r"Gradient descent for $|\cdot|$ function; stepsize: $\alpha={:.1f}$".format(step)) my_saving_display(fig1, dirname, figname, imageformat, saving) plt.legend(loc='lower left') myarray = np.asarray(iterates_list[:0 + 1]) ax.plot(myarray, func(myarray), 'o') my_saving_display(fig1, dirname, figname + "_with_init", imageformat, saving) plt.legend(loc='lower left') for index_to_plot in range(n_iter - 1): fig1, ax = plt.subplots(figsize=(7, 7)) ax.set_title(r"Gradient descent for $|\cdot|$ function; stepsize: $\alpha={:.1f}$".format(step)) ax.set_xlim(*xlims) ax.set_ylim(*ylims) if mm: z = mm_visu(func, func_prime, iterates_list[index_to_plot], x, step=step) ax.plot(x, z, c='purple', lw=1.1, label=r"$g(\theta^{:1}, \cdot)$".format(index_to_plot)) minima = min_gd(func, func_prime, iterates_list[index_to_plot], x, step=step) ax.plot(iterates_list[index_to_plot + 1], minima, 'o', c='purple') ax.plot(x, func(x), c=c3, lw=1.7, label=name) myarray = np.asarray(iterates_list[:index_to_plot + 1]) ax.plot(myarray, func(myarray), 'o') plt.legend(loc='lower left') my_saving_display(fig1, dirname, figname + str(index_to_plot), imageformat, saving) # # Geometric Median case # In[ ]: saving = True # True x = np.linspace(-5, 5, 501) theta_init = 3 n_iter = 5 step = 1.5 func, func_prime = make_abs() my_plot(func, func_prime, x, theta_init, n_iter, step, saving) # In[ ]: rng = check_random_state(24) n_samples, n_features = 21, 2 X = rng.normal(0, 1, size=(n_samples, n_features)) n_iter = 50 theta_init = np.mean(X, axis=0) # In[ ]: if X.shape[1] == 2: plt.scatter(X[:, 0], X[:, 1]) # In[ ]: from scipy.optimize import minimize def cost_median(theta, X): return np.linalg.norm(X - theta[None, :], axis=1).sum() # numerical minimization of the cost function, starting with the mean of the dataset: opt_result = minimize(lambda theta: cost_median(theta, X), x0=theta_init, method='BFGS') true_median = opt_result["x"] cost_min = cost_median(true_median, X) print("scipy's solution: ", true_median) print("scipy's ojective: ", cost_min) # In[ ]: weiszfeld_iterates, weiszfeld_objs = weiszfeld(X, theta_init=theta_init, n_iter=n_iter) print("our solution: ", weiszfeld_iterates[-1]) print("last objective: ", weiszfeld_objs[-1]) # In[ ]: n_iter = 100 sqrt_decay_step = 0.5 / np.sqrt(np.arange(n_iter) + 1) func, func_prime = make_median(X) _, list_gd_val10pt0_decay = gradient_descent( func, func_prime, 10./ X.shape[0] * sqrt_decay_step, theta_init=theta_init, n_iter=n_iter) _, list_gd_val0pt01 = gradient_descent( func, func_prime, 0.5 / X.shape[0], theta_init=theta_init, n_iter=n_iter) list_gd_val10pt0_decay[-10:] # In[ ]: fig1=plt.figure(figsize=(7,7)) plt.semilogy(list_gd_val10pt0_decay - cost_min, label=r"GD $\alpha=\frac{1}{\sqrt{t}}$") plt.semilogy(list_gd_val0pt01 - cost_min, label=r"GD $\alpha=cst$") plt.semilogy(weiszfeld_objs - cost_min, label="Weiszfeld") plt.xlabel("Number of epochs") plt.ylabel(r"$f(\theta) -f(\theta^{\star})$") plt.legend() figname = "WeiszfeldVsGD" my_saving_display(fig1, dirname, figname, imageformat, saving) # # Biweight case # In[ ]: def cost_bisquare(theta, X, threshold): z = 1 - (1 - (np.linalg.norm(X - theta[None, :],axis=1) / threshold)**2)**3 z = np.minimum(1, z) return np.sum(z) # In[ ]: rng = check_random_state(24) n_samples, n_features = 10, 1 X = rng.normal(0, 1, size=(n_samples, n_features)) n_iter = 100 threshold = 0.505 # In[ ]: # theta_init = np.mean(X, axis=0) theta_init = -0.2*np.ones(X.shape[1]) print("Initial solution: ", theta_init) print("Initial objective:", cost_bisquare(theta_init, X, threshold)) # In[ ]: # numerical minimization of the cost function, starting with the mean of the dataset: opt_result = minimize(lambda theta: cost_bisquare(theta, X,threshold), x0=theta_init, method='BFGS') scipy_sol = opt_result["x"] cost_min = cost_bisquare(scipy_sol, X, threshold) print("scipy's solution: ", scipy_sol) print("scipy's ojective: ", cost_min) # In[ ]: func, func_prime, func_weight = make_bisquare(X, threshold) # In 1D the lipschitz constant is 6 as the $\rho''(x) list_gd_iterate, list_gd_val = gradient_descent( func, func_prime, 1 / (6 * X.shape[0]), theta_init=theta_init, n_iter=n_iter) print("GD's solution: ", list_gd_iterate[-1]) print("GD's objective: ", list_gd_val[-1]) # In[ ]: def mm_bisquare(X, func_weight, theta_init, n_iter=10): """ Compute the M-estimator (biweights) of points in X with MM algorithm. X : array, shape (n_samples, n_features) Array of points of which the median must be computed """ if X.ndim == 1: # hopefully it's not a single data point being passed X = X[:, None] theta_init = np.atleast_1d(theta_init).astype(np.float64) if theta_init.shape[0] != X.shape[1]: raise ValueError("theta_init has shape %s, expected %s" % (theta_init.shape[0], X.shape[1])) theta = theta_init.copy() all_thetas = np.zeros([n_iter, theta_init.shape[0]]) all_thetas[0] = theta_init all_objs = np.zeros([n_iter]) all_objs[0] = cost_bisquare(theta, X, threshold) for it in range(1, n_iter): w = func_weight(theta) if np.linalg.norm(w)>1e-12: theta = np.sum(X * w[:, None], axis=0) / np.sum(w) else: theta = np.zeros(theta_init.shape) all_thetas[it] = theta all_objs[it] = cost_bisquare(theta, X, threshold) return all_thetas, all_objs # In[ ]: mm_iterates, mm_objs = mm_bisquare(X, func_weight, theta_init=theta_init, n_iter=n_iter) print("our solution: ", mm_iterates[-1]) print("last objective: ", mm_objs[-1]) # In[ ]: fig1=plt.figure(figsize=(7,7)) plt.semilogy(list_gd_val - cost_min, label=r"GD $\alpha=cst$") plt.semilogy(mm_objs - cost_min, label="MM") plt.xlabel("Number of epochs") plt.ylabel(r"$f(\theta) -f(\theta^{\star})$") plt.legend() figname = "MMalgVsGD" my_saving_display(fig1, dirname, figname, imageformat, saving) # In[ ]: cost_x = np.zeros(x.shape) if n_features == 1: for i, valx in enumerate(x): theta1 = np.array([valx]) cost_x[i] = cost_bisquare(theta1, X, threshold) fig1 = plt.figure() plt.title("Objective function") plt.plot(x, cost_x, lw=1.1) plt.scatter(theta_init, cost_bisquare( theta_init, X, threshold), s=21, label='init') plt.scatter(mm_iterates[-1], cost_bisquare( mm_iterates[-1], X, threshold), s=41, label='MM', marker='d') plt.scatter(scipy_sol, cost_bisquare( scipy_sol, X, threshold), s=21, label='BFGS', marker='x') plt.scatter(list_gd_iterate[-1], cost_bisquare( list_gd_iterate[-1], X, threshold), s=41, label='GD', marker='+') plt.legend() figname = "Bisquare1DLandscape" my_saving_display(fig1, dirname, figname, imageformat, saving) # In[ ]: plt.figure() plt.title("Concavity of $f$ for the bi-square case") y = 1 - (1 - np.abs(x / threshold))**3 y[np.abs(x) > threshold] = 1 plt.plot(x, y) # In[ ]: # TODO movie for evolution of the objective function w.r.t. to threshold parameter # (to show interest of homotopy methods) # In[ ]: