#!/usr/bin/env python # coding: utf-8 # # # An introduction to the Sensitivity Analysis with the Monte Carlo method # # **Vinzenz Gregor Eck**, Expert Analytics # # Date: **Mar 9, 2017** # hello # # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') import numpy as np import monte_carlo from sensitivity_examples_nonlinear import generate_distributions from sensitivity_examples_nonlinear import calculate_sensitivity_indices_non_additive_model # # Introduction # Previously we conducted sensitivity analysis for the following model: # In[2]: # start the linear model def linear_model(w, z): assert w.shape == z.shape return np.sum(w*z, axis=1) # The analysis was done with the following lines of code: # In[3]: # sensitivity reference values S_book = np.array([0.0006, 0.009, 0.046, 0.145, 0, 0, 0, 0]) St_book = np.array([0.003, 0.045, 0.229, 0.723, 0.002, 0.036, 0.183, 0.578]) # get joint distributions joint_distribution = generate_distributions() number_of_samples = 1000000 A, B, C, Y_A, Y_B, Y_C, S, ST = calculate_sensitivity_indices_non_additive_model(number_of_samples, joint_distribution) print("First Order Indices Total Sensitivity Indices\n") print(' S_est | S_ref ST_est | ST_ref\n') for k, (s, sb, s_t, sb_t) in enumerate(zip(S, S_book, ST, St_book)): print('S_{} : {:>6.3f} | {:2.3f} ST_{} : {:>6.3f} | {:2.3f}'.format(k + 1, s, sb, k+1, s_t, sb_t)) # The actual algorithm calculating the sensitivity analysis was hidden in this function call which did the magic for us: # # ""A, B, C, Y_A, Y_B, Y_C, S, ST = calculate_sensitivity_indices_non_additive_model(number_of_samples, # joint_distribution)"" # # Now we want to look inside these function and look at the same time at the algorithm used. # # # Saltelli's Algorithm # # Brief declaration Introduction.. # # The Algorithm can be split up in three major parts: # # 1. Generate sample matrices # 2. Evaluate the model # 3. Approximate the sensitivity indices # # We implemented these steps as python functions to keep it close to the algorithm steps. # In[4]: # calculate sens indices of non additive model def calculate_sensitivity_indices_non_additive_model(number_of_samples, joint_distribution, sample_method='R'): number_of_parameters = len(joint_distribution) # 1. Generate sample matrices A, B, C = generate_sample_matrices_mc(number_of_samples, number_of_parameters, joint_distribution, sample_method) # 2. Evaluate the model Y_A, Y_B, Y_C = evaluate_non_additive_linear_model(A, B, C) # 3. Approximate the sensitivity indices S, ST = calculate_sensitivity_indices_mc(Y_A, Y_B, Y_C) return A, B, C, Y_A, Y_B, Y_C, S, ST # ## Generate sample matrices # In[5]: # sample matrices def generate_sample_matrices_mc(number_of_samples, number_of_parameters, joint_distribution, sample_method='R'): Xtot = joint_distribution.sample(2 * number_of_samples, sample_method).transpose() A = Xtot[0:number_of_samples, :] B = Xtot[number_of_samples:, :] C = np.empty((number_of_parameters, number_of_samples, number_of_parameters)) # create C sample matrices for i in range(number_of_parameters): C[i, :, :] = B.copy() C[i, :, i] = A[:, i].copy() return A, B, C # ## Evaluate the model # In[6]: # model evaluation def evaluate_non_additive_linear_model(A, B, C): number_of_parameters = A.shape[1] number_of_samples = A.shape[0] number_of_factors = int(number_of_parameters / 2) # 1. evaluate sample matrices A Z_A = A[:, :number_of_factors] W_A = A[:, number_of_factors:] Y_A = linear_model(W_A, Z_A) # 2. evaluate sample matrices B Z_B = B[:, :number_of_factors] W_B = B[:, number_of_factors:] Y_B = linear_model(W_B, Z_B) # 3. evaluate sample matrices C Y_C = np.empty((number_of_samples, number_of_parameters)) for i in range(number_of_parameters): x = C[i, :, :] z = x[:, :number_of_factors] w = x[:, number_of_factors:] Y_C[:, i] = linear_model(w, z) return Y_A, Y_B, Y_C # ## Approximate the sensitivity indices # In[7]: # mc algorithm for variance based sensitivity coefficients def calculate_sensitivity_indices_mc(y_a, y_b, y_c): number_of_samples, n_parameters = y_c.shape # Shift the data around zero y_mean = np.mean(np.hstack([y_a, y_b])) y_a -= y_mean y_b -= y_mean y_c -= y_mean f0sq = np.mean(y_a * y_b) # f0sq = (sum(Y_A) / number_of_samples) ** 2 y_a_var = np.sum(y_a ** 2.) / number_of_samples - f0sq y_b_var = np.sum(y_b ** 2.) / number_of_samples - f0sq s = np.zeros(n_parameters) st = np.zeros(n_parameters) for i in range(n_parameters): cond_var_X = np.sum(y_a * y_c[:, i]) / number_of_samples - f0sq s[i] = cond_var_X / y_b_var cond_var_not_X = np.sum(y_b * y_c[:, i]) / number_of_samples - f0sq st[i] = 1 - cond_var_not_X / y_a_var return s, st