#!/usr/bin/env python # coding: utf-8 # # Foundations of Computational Economics #31 # # by Fedor Iskhakov, ANU # # # ## Function approximation in Python # # # # # [https://youtu.be/liNputEfcXQ](https://youtu.be/liNputEfcXQ) # # Description: How to approximate functions which are only defined on grid of points. Spline and polynomial interpolation. # ### Interpolation problem # # - $ f(x) $ is function of interest, hard to compute # - Have data on values of $ f(x) $ in $ n $ points # $ (x_1,\dots,x_n) $ # # # $$ # f(x_1), f(x_2), \dots f(x_n) # $$ # # - Need to find the approximate value of the function $ f(x) $ in # arbitrary points $ x \in [x_1,x_n] $ # #### Approaches # # 1. *Piece-wise* approach (connect the dots) # # # - Which functional form to use for connections? # - What are advantages and disadvantages? # # # 1. Use a *similar* function $ s(x) $ to represent $ f(x) $ # between the data points # # # - Which simpler function? # - What data should be used? # - How to control the accuracy of the approximation? # #### Distinction between function approximation (interpolation) and curve fitting # # - Functions approximation and interpolation refers to the situations # when **data** on function values is matched **exactly** # - The approximation curve passes through the points of the data # - Curve fitting refers to the statistical problem when the data has # **noise**, the task is to find an approximation for the central # tendency in the data # - Linear and non-linear regression models, econometrics # - The model is *over-identified* (there is more data than needed to # exactly identify the regression function) # #### Extrapolation # # Extrapolation is computing the approximated function outside of the # original data interval # # **Should be avoided in general** # # - Exact *only* when theoretical properties of the extrapolated function # are known # - Can be used with extreme caution and based on the analysis of the model # - Always try to introduce wider bounds for the grid instead # ### Spline interpolation # # Spline = curve composed of independent pieces # # **Definition** A function $ s(x) $ on $ [a,b] $ is a spline of # order $ n $ ( = degree $ n-1 $) iff # # - $ s $ is $ C^{n-2} $ on $ [a,b] $ (has continuous derivatives # up to order $ n-2 $), # - given *knot* points $ a=x_0=fdata[0][0],xd<=fdata[0][-1])] if ifunc: plt.plot(xdi,ifunc(xdi),color=color,label=label) if label: plt.legend() elif label: plt.title(label) # In[3]: plot1(None,label='True function') # In[4]: from scipy import interpolate # Interpolation routines fi = interpolate.interp1d(x,func(x)) # returns the interpolation function plot1(fi,label='interp1d') # In[5]: help(interpolate.interp1d) # In[6]: fi = interpolate.interp1d(x,func(x),kind='linear') plot1(fi,label='Linear') # In[7]: for knd, clr in ('previous','m'),('next','b'),('nearest','g'): fi = interpolate.interp1d(x,func(x),kind=knd) plot1(fi,label=knd,color=clr) plt.show() # In[8]: for knd, clr in ('slinear','m'),('quadratic','b'),('cubic','g'): fi = interpolate.interp1d(x,func(x),kind=knd) plot1(fi,color=clr,label=knd) # In[9]: # Approximation errors # x = np.sort(np.random.uniform(-5,10,11)) # generate new data for knd, clr in ('slinear','m'),('quadratic','b'),('cubic','g'): fi = interpolate.interp1d(x,func(x),kind=knd,bounds_error=False) xd = np.linspace(-5,10,1000) erd=np.abs(func(xd)-fi(xd)) plt.plot(xd,erd,color=clr) print('Max error with %s splines is %1.5e'%(knd,np.nanmax(erd))) # In[10]: # Approximation errors for regular grid for knd, clr in ('slinear','m'),('quadratic','b'),('cubic','g'): fi = interpolate.interp1d(xr,func(xr),kind=knd,bounds_error=False) xd = np.linspace(-5,10,1000) erd=np.abs(func(xd)-fi(xd)) plt.plot(xd,erd,color=clr) print('Max error with %s splines is %1.5e'%(knd,np.nanmax(erd))) # #### Accuracy of the interpolation # # How to reduce approximation errors? # - Number of nodes (more is better) # - Location of nodes (regular is better) # - Interpolation type (match function of interest) # # # *In economic models we usually can control all of these* # ### Polynomial approximation/interpolation # # Back to the beginning to explore the idea of replacing original # $ f(x) $ with simpler $ g(x) $ # # - Data set $ \{(x_i,f(x_i)\}, i=0,\dots,n $ # - Functional form is polynomial of degree $ n $ such that $ g(x_i)=f(x_i) $ # - If $ x_i $ are distinct, coefficients of the polynomial are uniquely identified # # # Does polynomial $ g(x) $ converge to $ f(x) $ when there are # more points? # In[11]: from numpy.polynomial import polynomial degree = len(x)-1 # passing through all dots p = polynomial.polyfit(x,func(x),degree) fi = lambda x: polynomial.polyval(x,p) plot1(fi,label='Polynomial of degree %d'%degree,extrapolation=True) # In[12]: # now with regular grid degree = len(x)-1 # passing through all dots p = polynomial.polyfit(xr,func(xr),degree) fi = lambda x: polynomial.polyval(x,p) plot1(fi,fdata=(xr,func(xr)),label='Polynomial of degree %d'%degree,extrapolation=True) # In[13]: # how number of points affect the approximation (with degree=n-1) for n, clr in (5,'m'),(10,'b'),(15,'g'),(25,'r'): x2 = np.linspace(-5,10,n) p = polynomial.polyfit(x2,func(x2),n-1) fi = lambda x: polynomial.polyval(x,p) plot1(fi,fdata=(x2,func(x2)),label='%d points'%n,color=clr,extrapolation=True) plt.show() # In[14]: # how locations of points affect the approximation (with degree=n-1) np.random.seed(2025) n=8 for clr in 'b','g','c': x2 = np.linspace(-4,9,n) + np.random.uniform(-1,1,n) # perturb points a little p = polynomial.polyfit(x2,func(x2),n-1) fi = lambda x: polynomial.polyval(x,p) plot1(fi,fdata=(x2,func(x2)),label='%d points'%n,color=clr,extrapolation=True) plt.show() # In[15]: # how degree of the polynomial affects the approximation for degree, clr in (7,'b'),(9,'g'),(11,'m'): p=polynomial.polyfit(xr,func(xr),degree) fi=lambda x: polynomial.polyval(x,p) plot1(fi,fdata=(xr,func(xr)),label='Polynomial of degree %d'%degree,color=clr,extrapolation=True) # #### Least squares approximation # # We could also go back to **function approximation** and fit polynomials # of lower degree # # - Data set $ \{(x_i,f(x_i)\}, i=0,\dots,n $ # - **Any** functional form $ g(x) $ from class $ G $ that best # approximates $ f(x) $ # # # $$ # g = \arg\min_{g \in G} \lVert f-g \rVert ^2 # $$ # ### Orthogonal polynomial approximation/interpolation # # - Polynomials over domain $ D $ # - Weighting function $ w(x)>0 $ # # # Inner product # # $$ # \langle f,g \rangle = \int_D f(x)g(x)w(x)dx # $$ # # $ \{\phi_i\} $ is a family of orthogonal polynomials w.r.t. # $ w(x) $ iff # # $$ # \langle \phi_i,\phi_j \rangle = 0, i\ne j # $$ # #### Best polynomial approximation in L2-norm # # Let $ \mathcal{P}_n $ denote the space of all polynomials of degree $ n $ over $ D $ # # $$ # \lVert f - p \rVert_2 = \inf_{q \in \mathcal{P}_n} \lVert f - q \rVert_2 # = \inf_{q \in \mathcal{P}_n} \left[ \int_D ( f(x)-g(x) )^2 dx \right]^{\tfrac{1}{2}} # $$ # # if and only if # # $$ # \langle f-p,q \rangle = 0, \text{ for all } q \in \mathcal{P}_n # $$ # # *Orthogonal projection is the best approximating polynomial in L2-norm* # #### Uniform (infinity, sup-) norm # # $$ # \lVert f(x) - g(x) \rVert_{\infty} = \sup_{x \in D} | f(x) - g(x) | # = \lim_{n \rightarrow \infty} \left[ \int_D ( f(x)-g(x) )^n dx \right]^{\tfrac{1}{n}} # $$ # # Measures the absolute difference over the whole domain $ D $ # #### Chebyshev (minmax) approximation # # What is the best polynomial approximation in the uniform (infinity, sup) norm? # # $$ # \lVert f - p \rVert_{\infty} = \inf_{q \in \mathcal{P}_n} \lVert f - q \rVert_{\infty} # = \inf_{q \in \mathcal{P}_n} \sup_{x \in D} | f(x) - g(x) | # $$ # # Chebyshev proved existence and uniqueness of the best approximating polynomial in uniform norm. # #### Chebyshev polynomials # # - $ [a,b] = [-1,1] $ and $ w(x)=(1-x^2)^{(-1/2)} $ # - $ T_n(x)=\cos\big(n\cos^{-1}(x)\big) $ # - Recursive formulas: # # # $$ # \begin{eqnarray} # T_0(x)=1,\\ # T_1(x)=x,\\ # T_{n+1}(x)=2x T_n(x) - T_{n-1}(x) # \end{eqnarray} # $$ # #### Accuracy of Chebyshev approximation # # Suppose $ f: [-1,1]\rightarrow R $ is $ C^k $ function for some # $ k\ge 1 $, and let $ I_n $ be the degree $ n $ polynomial # interpolation of $ f $ with nodes at zeros of $ T_{n+1}(x) $. # Then # # $$ # \lVert f - I_n \rVert_{\infty} \le \left( \frac{2}{\pi} \log(n+1) +1 \right) \frac{(n-k)!}{n!}\left(\frac{\pi}{2}\right)^k \lVert f^{(k)}\rVert_{\infty} # $$ # # 📖 Judd (1988) Numerical Methods in Economics # # - achieves *best polynomial approximation in uniform norm* # - works for smooth functions # - easy to compute # - but *does not* approximate $ f'(x) $ well # #### General interval # # - Not hard to adapt the polynomials for the general interval # $ [a,b] $ through linear change of variable # # # $$ # y = 2\frac{x-a}{b-a}-1 # $$ # # - Orthogonality holds with weights function with the same change of # variable # #### Chebyshev approximation algorithm # # 1. Given $ f(x) $ and $ [a,b] $ # 1. Compute Chebyshev interpolation nodes on $ [-1,1] $ # 1. Adjust nodes to $ [a,b] $ by change of variable, $ x_i $ # 1. Evaluate $ f $ at the nodes, $ f(x_i) $ # 1. Compute Chebyshev coefficients $ a_i = g\big(f(x_i)\big) $ # 1. Arrive at approximation # # # $$ # f(x) = \sum_{i=0}^n a_i T_i(x) # $$ # In[16]: import numpy.polynomial.chebyshev as cheb for degree, clr in (7,'b'),(9,'g'),(11,'m'): fi=cheb.Chebyshev.interpolate(func,degree,[-5,10]) plot1(fi,fdata=(None,None),color=clr,label='Chebyshev with n=%d'%degree,extrapolation=True) # ### Multidimensional interpolation # # - there are multidimensional generalization to all the methods # - curse of dimensionality in the number of interpolation points when number of dimensions increase # - sparse Smolyak grids and adaptive sparse grids # - irregular grids require computationally expensive triangulation in the general case # - good application for machine learning! # # # **Generally much harder!** # ### Further learning resources # # - [https://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html](https://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html) # - [https://docs.scipy.org/doc/numpy/reference/generated/numpy.interp.html](https://docs.scipy.org/doc/numpy/reference/generated/numpy.interp.html) # - M.H. Mudde’s thesis on Chebyshev approximation [http://fse.studenttheses.ub.rug.nl/15406/1/Marieke_Mudde_2017_EC.pdf](http://fse.studenttheses.ub.rug.nl/15406/1/Marieke_Mudde_2017_EC.pdf)