#!/usr/bin/env python # coding: utf-8 # # Data Driven Modeling #

# ### PhD seminar series at Chair for Computer Aided Architectural Design (CAAD), ETH Zurich # # # [Vahid Moosavi](https://vahidmoosavi.com/) #

# # # # First Session: Introduction #

# # 20th Semptember 2016 # ### #

# ## The general set up: # ### What is the purpose of these seminars? # ### What are the expectations from designers and architects? # ### How to doemsticate these new computational capacities into the realm of design or how to re-define design? # ### #

# ### What do we mean by modeling? and Why we need to model things? # # * **generalization** # * **formalization** # * **control** # * **ultimately to communicate** # * **what else?** # ### # # ## A formalism for the modeling process # # ![](Images/RosenModel.png) # ### # ### Main Tasks in Modeling # #### Prediction/Classification # #### Identification of relationships # #### finding important aspects in relation to the target # #### pattern recoginition # #### etc. # ### # ### There have been diffrent approaches for the notion of modeling # ### But different approaches have limits in different levels of complexity # ![](Images/Model_Coplexity.jpg) # ## But it has been always like that # ### Life and death of computational (Urban) modeling concepts # ![](Images/ngrams3.png) # ### And if we follow the trends ---> Nowadays Big Data! # # #### Google Trends # ![](Images/GoogleTrends.png) # # # ## And if we look at the undelrying mechansims # #### A historical view to computational modeling and advent of Big Data # ![](Images/Computational Capacities.png) # ## Some examples # * Analytical # * Many of classical physics equations # * Newton's Law of Motion # * F=MA # # ![](Images/NewtonSLaw.svg) # ### # # * Centralized simulation models # * Chaotic systems (Lorenz systems example) # * N-body problems in systems biology # * Decentralized simulation models # * Agent Based models in transportations # * Cellular automata # # An Example of unpredictable determinism: Lorenz Systems # In[9]: #Code from: https://jakevdp.github.io/blog/2013/02/16/animating-the-lorentz-system-in-3d/ # import numpy as np # from scipy import integrate # # Note: t0 is required for the odeint function, though it's not used here. # def lorentz_deriv((x, y, z), t0, sigma=10., beta=8./3, rho=28.0): # """Compute the time-derivative of a Lorenz system.""" # return [sigma * (y - x), x * (rho - z) - y, x * y - beta * z] # x0 = [1, 1, 1] # starting vector # t = np.linspace(0, 3, 1000) # one thousand time steps # x_t = integrate.odeint(lorentz_deriv, x0, t) import numpy as np from scipy import integrate from matplotlib import pyplot as plt from mpl_toolkits.mplot3d import Axes3D from matplotlib.colors import cnames from matplotlib import animation get_ipython().run_line_magic('matplotlib', 'inline') N_trajectories = 10 #dx/dt = sigma(y-x) #dy/dt = x(rho-z)-y #dz/dt = xy-beta*z def lorentz_deriv((x, y, z), t0, sigma=10., beta=8./3, rho=28.0): """Compute the time-derivative of a Lorentz system.""" return [sigma * (y - x), x * (rho - z) - y, x * y - beta * z] # Choose random starting points, uniformly distributed from -15 to 15 np.random.seed(1) x0 = -15 + 30 * np.random.random((N_trajectories, 3)) # Solve for the trajectories t = np.linspace(0, 7, 1000) x_t = np.asarray([integrate.odeint(lorentz_deriv, x0i, t) for x0i in x0]) # Set up figure & 3D axis for animation fig = plt.figure() ax = fig.add_axes([0, 0, 1, 1], projection='3d') ax.axis('off') plt.set_cmap(plt.cm.YlOrRd_r) plt.set_cmap(plt.cm.hot) # choose a different color for each trajectory colors = plt.cm.jet(np.linspace(0, 1, N_trajectories)) # set up lines and points lines = sum([ax.plot([], [], [], '-', c=c) for c in colors], []) pts = sum([ax.plot([], [], [], 'o', c=c) for c in colors], []) # prepare the axes limits ax.set_xlim((-25, 25)) ax.set_ylim((-35, 35)) ax.set_zlim((5, 55)) # set point-of-view: specified by (altitude degrees, azimuth degrees) ax.view_init(30, 0) # initialization function: plot the background of each frame def init(): for line, pt in zip(lines, pts): line.set_data([], []) line.set_3d_properties([]) pt.set_data([], []) pt.set_3d_properties([]) return lines + pts # animation function. This will be called sequentially with the frame number def animate(i): # we'll step two time-steps per frame. This leads to nice results. i = (2 * i) % x_t.shape[1] for line, pt, xi in zip(lines, pts, x_t): x, y, z = xi[:i].T line.set_data(x, y) line.set_3d_properties(z) pt.set_data(x[-1:], y[-1:]) pt.set_3d_properties(z[-1:]) ax.view_init(30, 0.3 * i) fig.canvas.draw() return lines + pts # instantiate the animator. anim = animation.FuncAnimation(fig, animate, init_func=init, frames=500, interval=10, blit=True) # Save as mp4. This requires mplayer or ffmpeg to be installed anim.save('./Images/lorentz_attractor.mp4', fps=15, extra_args=['-vcodec', 'libx264'],dpi=200) plt.close() # In[10]: from IPython.display import HTML HTML("""

""") # In[3]: from IPython.display import YouTubeVideo YouTubeVideo('JZoGO0MrZPA',width=700, height=600) # # # A major shift: # # Knowing Vs. Learning (Theory Driven Vs. Data Driven) Beyond domain expertism # # ### # ## The primary role of Data in Data Driven Modeling # ### # ![](Images/TheoryDrivenDataDriven.png) # ### # # # # # ### # ### Toward a new level of abstraction # ![](Images/Model_Coplexity.jpg) # # ### # # Theory Driven models get complicated even with Data # ### An example in urban transport modeling # ![](Images/MATSIM.png) # # # # ### # # But now we have new capacities # ## An inversion in the role of data in the process of modeling # ### # ![](Images/InversionLondon.png) # ### # In[4]: from IPython.display import YouTubeVideo YouTubeVideo('0aQxJgHknGs',width=700, height=600) # In[5]: from IPython.display import YouTubeVideo YouTubeVideo('VQ1f312SVqg',width=700, height=600) # In[6]: from IPython.display import YouTubeVideo YouTubeVideo('D6XTyLbO13w',width=700, height=600) # ## But really how to use this data? # #### 1. Info-graphics or just real time visualizations!! # #### 2. Data analytics in a real time or a large scale fashion! (80% of Big Data community) # #### 3. Looking seriously for universal laws in complex Systems? (Natural models in community of physicists) # #### 4. Optimizing the parametric set up of rational models? (rational Models) ---> Very common in engineering thesedays # #### 5. Or a new level of data-driven models in coexistence with urban data streams **(Pre-specifics)** # # ## Now assuming data is available WHAT should we learn for Data Driven Modeling? # # ![](Images/DataDrivenModelingElements.png) # # But HOW to learn them? # ## Resource Based --> Forward but slow and might take a BSc. # ## Market Oriented --> Backward, might work, but mostly we get simply puzzled # ## Connecting the dots in a guided path --- Wiki-based approach # # # ![](Images/DataDrivenModelingKW.png) # ### Topics to be discussed (Not in the order of sessions) # * This list will be updated over time. # * All the topics will be discussed with codes in Python # # **Probability Theory** # * Certainty and Determinism # * Laplace’s demon # * Poncare and the end of determinism # * Deterministic Unpredictability (Chaos Theory and Bifurcation) # * Uncertainty and Randomness # * Fuzziness, vagueness and ambiguity # * Variable and Parameter # * Random Variable # * Probability (Kolmogrov) axioms # * Probability distributions # * Expected Value # * Variance # * Covariance # * Independent Random Variables # * Joint Probability # * Baysian Rules and Conditional Probability # # **Statistics** # * Central Limit Theorem (CLT) # * Law of large numbers # * Statistical Measures # * Mean, Median, Standard Deviation, Skewness, Lp norms # * Outliers # * histograms # * Kernel Density Estimation # * Bias Vs. Variance # * Accuracy Vs. Precision # * Hypothesis Testing # * Causality and Correlations # * Random Walk # * Brownian motion # * Resampling # * Non-parametric Statistics # # **Statistical Learning** # * Least Square method # * Maximum Likelihood Estimation # * Regression models and curve fitting using polynomials # * Structure Learning in comparison to Polynomials # * Regularized Models # * Learning densities vs. designed densities # * Learning kernels vs. designed Kernels # * Learning Dictionaries vs. designed Dictionaries # * Over Fitting and generalization # * Probabilistic Graphical Models # * Quality Measures: Validation, Precision/Recall and other terms # * Markov chains and stochastic processes # * Baysian Networks # # **Linear Algebra** # * Points,Vectors and Matrices # * Matrix operations # * Algebraic Operations # * Systems of Linear Equations # * Similarity and distances between vectors # * Euclidean, Hamming, Mahalanobis, Hausdorff # * Linear Transformations: PCA, ICA # * Markov Chains # * Fourier Transformation # * Dictionary Learning, Sparse coding # # # **Optimization** # * Objective function # * Exact Methods # * Linear Programming # * Complexity # * Approximate Methods # * Gradient Descent # * Hill climbing methods # * Meta Heuristics # # # **Machine Learning** # * Supervised Learning # * Unsupervised Learning # * Reinforcement Learning # * Classification # * Clustering and pattern recognition # * Prediction # * Function approximation # * Feature selection/extraction # * Transfer Function # * Dimensionality reduction # * Manifold learning # * Decision trees # * Naïve Base classifier # * Ensemble methods # * Support Vector Machines (SVM) # * **Self Organizing Maps (SOM)** # * Structured Prediction and Random fields # * Kernels and Kernel learning: in Image processing examples # * Energy Based Models # * Dictionary Learning # * Autoencoders # * Deep Learning # #