#!/usr/bin/env python # coding: utf-8 # # Data Driven Modeling #
# ### PhD seminar series at Chair for Computer Aided Architectural Design (CAAD), ETH Zurich # # # [Vahid Moosavi](https://vahidmoosavi.com/) #
# # # # Second Session: Introduction to Probability #
# 27th Semptember 2016 # # ![](Images/DataDrivenModelingKW.png) # ### Topics to be discussed # # # **Probability Theory** # * Certainty and Determinism # * Laplace’s demon # * Poncare and the end of determinism # * Deterministic Unpredictability (Chaos Theory and Bifurcation) # * Uncertainty and Randomness # * Fuzziness, vagueness and ambiguity # * Variable and Parameter # * Random Variable # * Probability (Kolmogrov) axioms # * Independent Random Variables # * Joint Probability # * Baysian Rules and Conditional Probability # * Probability distributions # * Expected Value # * Variance # * Covariance # * Law of Large Numbers # * Central Limit Theorem # # # In[1]: import warnings warnings.filterwarnings("ignore") import pandas as pd import numpy as np from matplotlib import pyplot as plt pd.__version__ import sys import sompylib.sompy as SOM import sompylib1.sompy as SOM1 get_ipython().run_line_magic('matplotlib', 'inline') # ### Determinism # * **Starting gradually from 16th century** # * **Newotonian Mechanics and rapid growth of science** # * **To beiliev in Objective Truth** # * **Determinism turns to beileve in the truth of equations in describing the underlying mechansim of natural phenomena.** # * **Laplace's Deamon** # # # but it really works in many systems! # In[2]: from IPython.display import YouTubeVideo YouTubeVideo('EZNpnCd4ZBo',width=700, height=600) # ## Now simply a Double Pendulum! # # ![](https://upload.wikimedia.org/wikipedia/commons/thumb/7/78/Double-Pendulum.svg/294px-Double-Pendulum.svg.png) # # ### still one can write the underlying equations # # # ![](Images/Double_P.svg) # In[1]: # Double pendulum formula translated from the C code at # http://www.physics.usyd.edu.au/~wheat/dpend_html/solve_dpend.c from numpy import sin, cos import numpy as np import matplotlib.pyplot as plt import scipy.integrate as integrate import matplotlib.animation as animation G = 9.8 # acceleration due to gravity, in m/s^2 L1 = 1.0 # length of pendulum 1 in m L2 = 0.7 # length of pendulum 2 in m M1 = 1.0 # mass of pendulum 1 in kg M2 = 1.0 # mass of pendulum 2 in kg def derivs(state, t): dydx = np.zeros_like(state) dydx[0] = state[1] del_ = state[2] - state[0] den1 = (M1 + M2)*L1 - M2*L1*cos(del_)*cos(del_) dydx[1] = (M2*L1*state[1]*state[1]*sin(del_)*cos(del_) + M2*G*sin(state[2])*cos(del_) + M2*L2*state[3]*state[3]*sin(del_) - (M1 + M2)*G*sin(state[0]))/den1 dydx[2] = state[3] den2 = (L2/L1)*den1 dydx[3] = (-M2*L2*state[3]*state[3]*sin(del_)*cos(del_) + (M1 + M2)*G*sin(state[0])*cos(del_) - (M1 + M2)*L1*state[1]*state[1]*sin(del_) - (M1 + M2)*G*sin(state[2]))/den2 return dydx # create a time array from 0..100 sampled at 0.05 second steps dt = 0.05 t = np.arange(0.0, 30, dt) # th1 and th2 are the initial angles (degrees) # w10 and w20 are the initial angular velocities (degrees per second) th1 = 120.0 w1 = 0.0 th2 = -10.0 w2 = 0.0 # initial state state = np.radians([th1, w1, th2, w2]) # integrate your ODE using scipy.integrate. y = integrate.odeint(derivs, state, t) x1 = L1*sin(y[:, 0]) y1 = -L1*cos(y[:, 0]) x2 = L2*sin(y[:, 2]) + x1 y2 = -L2*cos(y[:, 2]) + y1 fig = plt.figure() ax = fig.add_subplot(111, autoscale_on=False, xlim=(-2, 2), ylim=(-2, 2)) ax.grid() line, = ax.plot([], [], 'o-r',markersize=3, lw=2) trace1, = ax.plot([], [], '-', c='r',lw=.4) trace2, = ax.plot([], [], '-', c='g',lw=.4) time_template = 'time = %.1fs' time_text = ax.text(0.05, 0.9, '', transform=ax.transAxes) def init(): line.set_data([], []) trace1.set_data([], []) trace2.set_data([], []) time_text.set_text('') return line, time_text def animate(i): thisx = [0, x1[i], x2[i]] thisy = [0, y1[i], y2[i]] line.set_data(thisx, thisy) trace1.set_data(x1[:i], y1[:i]) trace2.set_data(x2[:i], y2[:i]) time_text.set_text(time_template % (i*dt)) return line, time_text ani = animation.FuncAnimation(fig, animate, np.arange(1, len(y)), interval=25, blit=True, init_func=init) # ani.save('./Images/double_pendulum.mp4', fps=15) ani.save('./Images/double_pendulum.mp4', fps=15, extra_args=['-vcodec', 'libx264'],dpi=200) plt.close() # In[2]: from IPython.display import HTML HTML(""" """) # In[4]: from IPython.display import YouTubeVideo YouTubeVideo('04mbMblhXog',width=600, height=400) # ### In many real world problems, we just observe this kind of behaviours # * Stock market # * buidling energy # * weather # * People's movement patterns # # # # ## What can we say if we don't know the underlying mechanisms or equations? # In[5]: plt.plot(range(len(x2)),x2); # # Limits to Determinism # ### Dependency to initial conditions and parameters # #### Bifurcation process # In[6]: xa=.15 xb=3. C=np.linspace(xa,xb,num=100) # print C iter=range(1000) Y = C*0+1 YS = [] for x in iter: Y=Y**2-C # plt.plot(C,Y, '.k', markersize = 2) for x in iter: Y = Y**2 - C YS.append(Y) plt.plot(C,Y, '.k', markersize = 2); plt.xlabel('C') plt.ylabel('Y') plt.show(); # In[7]: YS = np.asarray(YS) #Change this parameter and see how the range of possible values is changing which_c = 50 plt.plot(YS[:,which_c][:200],'.-'); print 'C : {}'.format(C[which_c]) plt.xlabel('Iterations') plt.ylabel('Y') # ### Simulation of Lorenz Attractors # In[8]: #Code from: https://jakevdp.github.io/blog/2013/02/16/animating-the-lorentz-system-in-3d/ # import numpy as np # from scipy import integrate # # Note: t0 is required for the odeint function, though it's not used here. # def lorentz_deriv((x, y, z), t0, sigma=10., beta=8./3, rho=28.0): # """Compute the time-derivative of a Lorenz system.""" # return [sigma * (y - x), x * (rho - z) - y, x * y - beta * z] # x0 = [1, 1, 1] # starting vector # t = np.linspace(0, 3, 1000) # one thousand time steps # x_t = integrate.odeint(lorentz_deriv, x0, t) import numpy as np from scipy import integrate from matplotlib import pyplot as plt from mpl_toolkits.mplot3d import Axes3D from matplotlib.colors import cnames from matplotlib import animation get_ipython().run_line_magic('matplotlib', 'inline') N_trajectories = 30 #dx/dt = sigma(y-x) #dy/dt = x(rho-z)-y #dz/dt = xy-beta*z def lorentz_deriv((x, y, z), t0, sigma=10., beta=8./3, rho=28.0): """Compute the time-derivative of a Lorentz system.""" return [sigma * (y - x), x * (rho - z) - y, x * y - beta * z] # Choose random starting points, uniformly distributed from -15 to 15 np.random.seed(1) x0 = -15 + 30 * np.random.random((N_trajectories, 3)) # Solve for the trajectories t = np.linspace(0, 10, 1000) x_t = np.asarray([integrate.odeint(lorentz_deriv, x0i, t) for x0i in x0]) # Set up figure & 3D axis for animation fig = plt.figure() ax = fig.add_axes([0, 0, 1, 1], projection='3d') ax.axis('off') plt.set_cmap(plt.cm.YlOrRd_r) # choose a different color for each trajectory colors = plt.cm.jet(np.linspace(0, 1, N_trajectories)) # set up lines and points lines = sum([ax.plot([], [], [], '-', c=c) for c in colors], []) pts = sum([ax.plot([], [], [], 'o', c=c) for c in colors], []) # prepare the axes limits ax.set_xlim((-25, 25)) ax.set_ylim((-35, 35)) ax.set_zlim((5, 55)) # set point-of-view: specified by (altitude degrees, azimuth degrees) ax.view_init(30, 0) # initialization function: plot the background of each frame def init(): for line, pt in zip(lines, pts): line.set_data([], []) line.set_3d_properties([]) pt.set_data([], []) pt.set_3d_properties([]) return lines + pts # animation function. This will be called sequentially with the frame number def animate(i): # we'll step two time-steps per frame. This leads to nice results. i = (2 * i) % x_t.shape[1] for line, pt, xi in zip(lines, pts, x_t): x, y, z = xi[:i].T line.set_data(x, y) line.set_3d_properties(z) pt.set_data(x[-1:], y[-1:]) pt.set_3d_properties(z[-1:]) ax.view_init(30, 0.3 * i) fig.canvas.draw() return lines + pts # instantiate the animator. anim = animation.FuncAnimation(fig, animate, init_func=init, frames=500, interval=10, blit=True) # Save as mp4. This requires mplayer or ffmpeg to be installed anim.save('./Images/lorentz_attractor.mp4', fps=15, extra_args=['-vcodec', 'libx264'],dpi=200) plt.close() # In[9]: from IPython.display import HTML HTML(""" """) # In[5]: from IPython.display import YouTubeVideo YouTubeVideo('JZoGO0MrZPA',width=400, height=400) # In[10]: # No regularity in the behavior # The effect of initial value figure =plt.figure(figsize=(10,10)) for i in range(20): plt.subplot(5,4,i+1); plt.plot(x_t[i,:,1]); plt.xlabel('time') plt.ylabel('x') plt.tight_layout(); # ## Non-Predictable Determinism --- > End of Determinism?! # * **Dependency to initial conditions and parameters** # * **Butterfly Effect** # * **Henri Poncare's work on N-Body Problem 1880s** # * **Lorenz Attractors 1960s** # * Further readings: # * * Deterministic Nonperiodic Flow: #Paper: http://eaps4.mit.edu/research/Lorenz/Deterministic_63.pdf # * Heisenberg's uncertainty principle # * "God Doesn't play dice" # * Is randomness the nature of things or due to lack of understanding? # * Is it appropriate to look at probability and statistics as some sorts of pragmatism? # * Important people from the History of Probability and Statistics: http://www.economics.soton.ac.uk/staff/aldrich/Figures.htm # # # # # # Now Uncertainty and Randomness # # ![](https://upload.wikimedia.org/wikipedia/commons/7/77/Nuvola_apps_atlantik.png) # # Important terms # * ## Variable # * a symbolization of specific number, vector, matrix or even a function, which takes a range of values # * Then, we have **discrete** or **continuous** variables, depending on the range of values # * **Dependent** and **independent** variables # $$y = 2x + sin(x) $$ # * ## Parameter or Constant # * A Variable, which we assume is not varying (i.e. is constant) in our experiment # * ## Random Variable (contribution of probability theory) # * To add likelihood or chance (or formally probability) to any values of a variable # * ## Fuzziness, vagueness and ambiguity (possibility theory) # # # # # # # # Some important principles of Probability Theory # * ## Probability (Kolmogrov) axioms # https://en.wikipedia.org/wiki/Probability_axioms # # * **First axiom** # # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/ac12b631af7065f7f811d265b249a030f37484c8) # # * **Second axiom** # * sum of all probabilities # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/1f0f26c0fa97e701b5fd9459d1b7fe3b6f4ea326) # # * Third axiom # * probability of disjoint elements is the sum of their individual probabilities # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/47f22fe03df467b1d20785e5026bac39fabd9edc) # # * **Consequences** # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/76817e6fd9cc41a3e844f540590132c36cf9bade) # # ## # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/efd8ec693a89b5ea1c222660fefe5239565e6551) # # ## # # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/024906557ab6af34620cb2ac901fd61911372944) # ## # # # * **Some set theoretical intuitions** # * venn diagrams from set theory # # # ## # ![](Images/venn.gif) # # * **Conditional Probability** # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/c7f0ff7bcd50dd11514f9f02b1273dab360a4cef) # # ## # ![](https://upload.wikimedia.org/wikipedia/commons/thumb/9/9c/Probability_tree_diagram.svg/424px-Probability_tree_diagram.svg.png) # # ## # # * **Independent variables** # * results of coin toss and rolling a dice # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/fd349a98748a1e64afd94e53e11e5cc1e3996d4e) # ## # # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/7676b3c8f234867216f16c94eaa893354b1bca6a) # ## # # * **Law of total probability** # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/1f629ea8dda22bcc5fa6afe2d066ad753e215f2b) # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/a3fd649bac7848b022c2d1453bcd77070ab9a788) # # # # # * **Bayes Rule** # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/b1078eae6dea894bd826f0b598ff41130ee09c19) # # # # ![](https://upload.wikimedia.org/wikipedia/commons/thumb/b/bf/Bayes_theorem_visualisation.svg/600px-Bayes_theorem_visualisation.svg.png) # # # Further readings # * A First Course on Probability by Sheldon Ross # * Richard Feynman intro to probability theory: http://www.feynmanlectures.caltech.edu/I_06.html # # # # # ## Random Variable # * ### A random variable is always coming with a likelihood function (probability density) # * ### discrete random variable # * examples: Coin:{'head','tail'}, # * Dice:{1,...,6} # * **Probability mass function** indicates the likelihood of each discrete event # # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/289f3e31c4faf8cbf2c78ebdec04e3994092c6e5) # * ### continuous random variable # * temperature in a building # * Height of a random person # * **Probability Density function** indicates the likelihood of each discrete event # # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/45fd7691b5fbd323f64834d8e5b8d4f54c73a6f8) # # * **Cumulative distribution function** # # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/4080d882376474e2d20b1f5d942f890539308c6f) # # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/237edf4296a8ef4a946134c613b04b250d2de5be) # # * **joint functions** # # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/2925d7334ee04f933179892c7f407efaacd33123) # # # ## # # # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/f8d052b8b354ed30ad25da7f4bc8c3e87cdc71ea) # # * **Expected value** # # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/ef6f4efe003752f5353cfb1ed00235f374455805) # ## # # # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/caa946e993c976ed0f95e60748fcd7afce6bb2ff) # * **Variance** # # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/55622d2a1cf5e46f2926ab389a8e3438edb53731) # ## # # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/c2ace8c9ac8568598540df05d0db70c4e957192b) # # * **CoVariance** # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/7120384a1c843727d9589e2b33dbc33901d14f42) # ## # ![](https://wikimedia.org/api/rest_v1/media/math/render/svg/7331bb9b6e36128d1d9cb735b11b65427929105d) # ### Therefore, two independent (uncorelated) variables have a covariance of zero and not the other way necessarily # ### Covariance is the heart of many ML and statistical learning methods such as PCA. # ## Known probability functions # ![](Images/PDFSandPMFS.png) # # * There are many more : https://en.wikipedia.org/wiki/List_of_probability_distributions # # Gaussian (Normal) distribution # ![](https://upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Boxplot_vs_PDF.svg/598px-Boxplot_vs_PDF.svg.png) # ## # ###
PDF
# ![](https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Normal_Distribution_PDF.svg/720px-Normal_Distribution_PDF.svg.png) # ## # ###
CDF
# ![](https://upload.wikimedia.org/wikipedia/commons/thumb/c/ca/Normal_Distribution_CDF.svg/720px-Normal_Distribution_CDF.svg.png) # # Nevertheless, we use computers! # In[6]: from scipy.integrate import quad #Now CDF #P(a<=x<=b) for N(m,s) # Gaussian Distribution def Guassianf(m,s,x): return 1/(s*np.sqrt(2*np.pi)) * np.exp(-np.power((x-m),2)/(2*s*s)) def integrand(x,m,s,Guassianf): return Guassianf(m,s,x) a = -6 b= 6 m = 0 s = 1 P, err = quad(integrand, a, b,args=(m,s,Guassianf)) print P # In[12]: #Expected Value and Variance from scipy.integrate import quad def integrand(x,m,s,Guassianf): return x*Guassianf(m,s,x) a = -1*np.inf b= np.inf m = 2 s = 10 mu, err = quad(integrand, a, b,args=(m,s,Guassianf)) def integrand(x,m,s,Guassianf): return x*x*Guassianf(m,s,x) mu2, err = quad(integrand, a, b,args=(m,s,Guassianf)) sigma2 = mu2- mu*mu print mu,sigma2 # In[13]: from scipy.integrate import quad # Uniform Distribution def f(a,b): return 1/float(b-a) def integrand(x,a,b,f): return x*f(a,b) a = 0 b= 10 mu, err = quad(integrand, a, b,args=(a,b,f)) def integrand(x,a,b,f): return x*x*f(a,b) mu2, err = quad(integrand, a, b,args=(a,b,f)) sigma2 = mu2- mu*mu print mu,sigma2 # ## Excercise # **Calculate the mean and standard deviation of Exponential probability density functions.** # # # # # # # # # But before going further # * we talked about end of determinism, and transition from knowing to learning but # * It seems that there is again some sort of repetition of learning and knowing withing probability and statistics # In[14]: import datetime import pandas as pd import pandas.io.data import numpy as np from matplotlib import pyplot as plt import sys import getngrams as ng# from pandas import Series, DataFrame # import xkcd as fun get_ipython().run_line_magic('matplotlib', 'inline') # kw = ['Mainframe Computer','Personal Computer','Sensor','Computer Network','Internet', 'Data Mining','Pervasive Computing', # 'Smart Phone','Communication Technology','Simulation','Micro Simulation','Web 2.0' # ] kw = ['Probability','Statistics','Machine Learning','stochastics'] A = pd.DataFrame() for i in range(len(kw)): try: tmp = ng.runQuery('-nosave -noprint -startYear=1800 -smoothing=3 -endYear=2008 -caseInsensitive '+kw[i]) # A['year']=tmp.year.values[:] weights = tmp.values[:,1:] mx = np.max(weights,axis=0) mn = np.min(weights,axis=0) R = mx-mn weights = (weights-mn)/R tmp.ix[:,1:]=weights A[kw[i]]=tmp.values[:,1] except: print kw[i], 'not enough data' # In[15]: fig = plt.figure() for i in range(len(kw)): try: plt.plot(tmp.year,A[kw[i]],linewidth=1,label=kw[i]) xticks = np.arange(tmp.year[0],2011,10).astype(int) plt.xticks(xticks) # plt.yticks([]) except: print kw[i], 'not enough data' # A.plot(A.year,A.columns[1:],label='Date',colormap='jet') # plt.legend(loc='best',bbox_to_anchor = (1.0, 1.0),fontsize = 'medium') fig.set_size_inches(14,7) # ## Seems now it makes sense to use the term of Learning in Machine Learning! # # ### Further readings # * Statistical Modeling: The Two Cultures by Leo Breiman http://www.stat.uchicago.edu/~lekheng/courses/191f09/breiman.pdf # * all the fights between statisticians and ML guys! # * http://statweb.stanford.edu/~jhf/ftp/dm-stat.pdf # * http://statweb.stanford.edu/~tibs/stat315a/glossary.pdf # # Next sessions we talk about these estimation methods in details... # ### The answer to the excersice # In[2]: from scipy.integrate import quad # Exponential def f(m,x): return lam*np.exp(-1*lam*x) def integrand(x,lam,f): return x*f(lam,x) a = 0 b= np.inf lam = 2 mu, err = quad(integrand, a, b,args=(lam,f)) def integrand(x,lam,f): return x*x*f(lam,x) mu2, err = quad(integrand, a, b,args=(lam,f)) sigma2 = mu2- mu*mu print mu,sigma2