#!/usr/bin/env python # coding: utf-8 # In[27]: import numpy as np import scipy as sp import matplotlib.pyplot as plt import random from scipy import stats from scipy.optimize import fmin import mpld3 mpld3.enable_notebook() figsize = (8,5) get_ipython().run_line_magic('matplotlib', 'inline') #figsize = (12, 8) # # Quantile estimation via Stochastic gradient descent (Robbins Monro) # # Let $(X_n)_n$ be a sequence of i.i.d. real-valued random variables distributed like $X$. Let $(\eta_n)_n$ be # a non-increasing sequence of positive numbers (the step sizes). Finally, let # $H:\mathbb{R}^2\mapsto\mathbb{R}$ be a function. We consider the Robbins-Monro # algorithm: # \begin{equation*} # x_{k+1}=x_k-\gamma_{k}H(x_k,X_{k+1}) # \end{equation*}and $x_0\in\mathbb{R}$ is the starting point of the algorithm. # # Let $p\in(0,1)$. For the quantile estimation problem via the Robbins-Monro algorithm, # we consider the function $$H(x,X)=I( X\leq x)-p.$$ In this # particular case, the RM algorithm is # \begin{equation*} # x_{k+1}=x_k-\gamma_{k}\big(I(X_{n+1}\leq x_k)-p\big) \mbox{ and } x_0\in\mathbb{R}. # \end{equation*} # We denote by $q_p$ the quantile of order $p$ of $X$: # \begin{equation*} # q_p = \min\big(x\in\mathbb{R}:F(x)\geq p\big) \mbox{ where } # F(x)=\mathbb{P}(X\leq x). # \end{equation*}When $X$ has a cdf $F$ which is continuous and strictly increasing on some interval, $q_p$ is the unique solution to $F(x)=p$. # # The pseudo code of the algorithm stopped at step N is: # #     1: Choose initial guess $x_0$
#     2: for $k=1,2,\ldots, N$ do
#     3:       $s_k=I(X_{k+1}\leq x_k)-p$
#     4:       $x_{k+1} = x_k - \gamma_{k+1} s_k$
#     5: end for # # Thanks to SGD, we can make progress right away and continue to make progress as we go through the data set. Therefore, stochastic gradient descent is often preferred to gradient descent or here to the empirical quantile when dealing with large data sets. # # Unlike gradient descent, stochastic gradient descent will tend to oscillate near a minimum value rather than continuously getting closer. It may never actually converge to the minimum though. One way around this is to slowly decrease the step size $\gamma_k$ like $1/k$ as the algorithm runs. # ### Simulating various densities # From now on, we focus on the median estimation (that is quantile estimation for $p=1/2$). We consider three different types of density such that: one has a cdf with a plateau of length 1 centered in $0$, one has a null density in the median and one having a density equal to $1$ at the median. # In[28]: def f1(x): if -2 data[i]: s_k = 0.5 # 1-p quand p=1/2 pour la médiane else: s_k = -0.5 theta = theta - gamma_k*s_k #print data[i], theta path.append(theta) return path # Now, we plot the path towards the quantile, which is the median here ($=0$ for data_nb = $2$ or $3$ but which is the interval $[-1,1]$ for data_nb = $1$). # ###density $f_1$ with a plateau [-1,1] for the median # In[32]: horizon = 10000 theta_0 = 2 path = RM(theta_0, horizon, 1) print 'x{} = {} -- median = plateau [-1,1]'.format(horizon, path[horizon-1]) #print len(iterations), len(path) plt.plot(range(horizon), path) plt.xlabel("iterations") plt.ylabel("RM") plt.show() # distribution of the RM $x_n$ (1000 simulations of $x_{5000}$) # In[33]: list_theta_limit = list() horizon = 5000 theta_0=-2 for i in range(1000): path = RM(theta_0, horizon, 1) list_theta_limit.append(path[horizon-1]) plt.hist(list_theta_limit) plt.show() # For the data set 1, it looks like the limit of the RM algo is either [uniformly distributed over the plateau] or [the limiting law depends on the starting point (if theta_0 is >0 or <0)]. It is not clear yet. Moreover, it really looks like the limiting depends on the step size and in particular, how fast the serie $\sum_k \gamma_k$ goes to $\infty$. # ###density $f_2$ with no plateau for the median but for which f(q_{1/2})=0 # For the data set number 2 (for which f(q)=0 but for which there is no plateau), the step size gamma_k=1/k does not lead to the convergence of the algorithm. It works for gamma_k=1/sqrt(k). # In[34]: horizon = 10000 theta_0 = 2 path = RM(theta_0, horizon, 2) print 'x{} = {} -- median = 0'.format(horizon, path[horizon-1]) #print len(iterations), len(path) plt.plot(range(horizon), path) plt.xlabel("iterations") plt.ylabel("RM") plt.plot(range(horizon), np.zeros(horizon),color = 'r') plt.show() # distribution of the RM $x_n$ (1000 simulations of $x_{5000}$) -- when $f(q_p)=0$ # In[35]: list_theta_limit = list() horizon = 5000 theta_0=-2 for i in range(1000): path = RM(theta_0, horizon, 2) list_theta_limit.append(path[horizon-1]) plt.hist(list_theta_limit) plt.show() # ###density $f_3$ for which the median $=0$ and $f(0)=1$ # In[36]: horizon = 10000 theta_0 = 2 path = RM(theta_0, horizon, 3, power_step=1) print 'x{} = {} -- median = 0'.format(horizon, path[horizon-1]) #print len(iterations), len(path) plt.plot(range(horizon), path) plt.xlabel("iterations") plt.ylabel("RM") plt.plot(range(horizon), np.zeros(horizon),color = 'r') plt.show() # distribution of the RM $x_n$ (1000 simulations of $x_{5000}$): when $f(q_p)>1/2$, # $$\sqrt{n}(x_n-q_p)\stackrel{d}{\rightarrow} \mathcal{N}(0,\sigma^2) \mbox{ where } \sigma^2=\frac{p(1-p)}{2f(q_p)-1}$$ # Here : $p=1/2$, $q_p=0$, $f(q_p)=1$ and $\sigma^2=1/4$ and so # $$x_n\approx \mathcal{N}(0,1/(4n))$$ # In[37]: from scipy.stats import norm list_theta_limit = list() horizon = 5000 theta_0=-2 nb_paths = 1000 for i in range(nb_paths): path = RM(theta_0, horizon, 3, power_step=1) list_theta_limit.append(path[horizon-1]) ####Gaussian limit Fx = 0 var = 1/float(4) x = np.linspace(min(list_theta_limit), max(list_theta_limit), 1000) y = norm.pdf(x, loc = Fx, scale = np.sqrt(var/float(horizon))) ####Plot f, ax = plt.subplots(1, 1, figsize = figsize) ax.hist(list_theta_limit, normed=True, bins=50) ax.plot(x,y) plt.show() # ##Comparison of estimators $x_n$ (RM) and empirical quantile $\widehat q_{n,p}$ for density $f_3$ # ###limit law of empirical quantile # $$\sqrt{n}(\widehat q_{n,p}-q_p)\stackrel{d}{\rightarrow}\mathcal{N}(0,\sigma^2)$$ # where $\sigma^2= p(1-p)/f(q_p)^2$. # # Here: $p=1/2$, $q_p=0$, $f(q_p)=1$ and $\sigma^2=1/4$ # # In[19]: from scipy.stats import norm #paramètres nb_paths, horizon, bins = 1000, 500, 20 print 'horizon = {}, number of paths={}'.format(horizon, nb_paths) #data Hatq = list() for j in range(nb_paths): data0 = S3(horizon) Hatq.append(np.median(data0)) #Gaussienne Fx = 0 var = 1/float(4) x = np.linspace(min(Hatq), max(Hatq), 1000) y = norm.pdf(x, loc = Fx, scale = np.sqrt(var/float(horizon))) #plot f, ax = plt.subplots(1,1,figsize = figsize) ax.hist(Hatq, bins = 50, normed = True) ax.plot(x, y, 'r') #ax.set_title('x = {}, var = {}'.format(x, var)) plt.show() # ###Comparing the speed of convergence of $\widehat q_{n,p}$ and $x_n$ # Here, for the median of $f_3$ both $\widehat q_{n,p}$ and $x_n$ converge towards $q_p=0$ at the speed of $1/\sqrt{n}$ with the same asymptotic variance = 1/4. # In[20]: horizon = 10000 data = S3(horizon) power_step = 1 # the limit law for power_step = 1/2 is not classical #######RM theta = 1 path_RM = list() for i in range(horizon): gamma_k = 1./(i+1)**power_step # step size 1/sqrt(k) if theta > data[i]: s_k = 0.5 # 1-p quand p=1/2 pour la médiane else: s_k = -0.5 theta = theta - gamma_k*s_k #print data[i], theta path_RM.append(theta) #######empirical quantile path_quant = [] for i in range(horizon): path_quant.append(np.median(data[0:i])) ########plot print 'horizon = {} \n quantile = {}\n RM = {}\n theoretical median = 0'.format(horizon, path_quant[horizon-1], path_RM[horizon-1]) #print len(iterations), len(path) plt.plot(range(horizon), path_quant, color = 'b', label = 'quantile') plt.plot(range(horizon), path_RM, color = 'g', label = 'RM') plt.xlabel("iterations") plt.ylabel("RM and empirical quantile") plt.plot(range(horizon), np.zeros(horizon),color = 'r') plt.legend(loc = 2) plt.show() # Warm start for RM : start with $\widehat q_{n,1/2}$ for $n=20$ # # Open question : non-asymptotic convergence result for Robbins-Monro ? # #Study of the Robbins-Monro algorithm for density $f_3$ (uniform over $[-0.5, 0.5]$) depending on the power in the step size ($=1/k^a$): # # $$x_{k+1} = x_k - \eta_k(I(X_{k+1}\leq x_k)-p) \mbox{ for } \eta_k=\frac{1}{k^a} \mbox{ and } a\in[1/2,1]$$ # # Asymptotic normality of the RM algorithm is known only when the step size is such that # $$\sum_k \eta_k = +\infty \mbox{ and } \sum_k \eta_k^2<\infty$$ # In[21]: import IPython.html.widgets as widgets from IPython.html.widgets import interact, interactive, fixed horizon = 5000 theta_0=-2 nb_paths = 1000 @interact(a=(0.5, 1)) def generate(a): list_theta_limit = list() for i in range(nb_paths): path = RM(theta_0, horizon, 3, power_step=a) list_theta_limit.append(path[horizon-1]) ####Gaussian limit Fx = 0 var = 1/float(4) x = np.linspace(min(list_theta_limit), max(list_theta_limit), 1000) y = norm.pdf(x, loc = Fx, scale = np.sqrt(var/float(horizon))) ####Plot f, ax = plt.subplots(1, 1, figsize = figsize) ax.hist(list_theta_limit, normed=True, bins=50) ax.plot(x,y) plt.show() # In[ ]: