Vinzenz Gregor Eck, Expert Analytics
**Leif Rune Hellevik**, NTNUDate: Jul 13, 2018
# ipython magic
%matplotlib notebook
%load_ext autoreload
%autoreload 2
# plot configuration
import matplotlib
import matplotlib.pyplot as plt
plt.style.use("ggplot")
# import seaborn as sns # sets another style
matplotlib.rcParams['lines.linewidth'] = 3
fig_width, fig_height = (7.0,5.0)
matplotlib.rcParams['figure.figsize'] = (fig_width, fig_height)
# font = {'family' : 'sans-serif',
# 'weight' : 'normal',
# 'size' : 18.0}
# matplotlib.rc('font', **font) # pass in the font dict as kwar
import numpy as np
import chaospy as cp
import monte_carlo
from sensitivity_examples_nonlinear import generate_distributions
from sensitivity_examples_nonlinear import monte_carlo_sens_nonlin
from sensitivity_examples_nonlinear import analytic_sensitivity_coefficients
from sensitivity_examples_nonlinear import polynomial_chaos_sens
The Monte Carlo method (MCM) is probably the most widely applied method for variance based uncertainty quantification and sensitivity analysis. Monte carlo methods are generally straight forward to use and may be applied to a wide variety of problems as they require few assumptions about the model or quantity of interest and require no modifications of the model itself, i.e. the model may be used as a black box. The basic idea is to calculate statistics (mean, standard deviation, variance, sobol indices) of $Y$ directly from large amount of sample evaluations from the black box model $y$.
Sample a set of input samples $\mathbf{z}^{(s)}$ from the input space $\Omega_\mathbf{Z}$ that is defined by the joint probability density function ${F_Z}$.
Evaluate the deterministic model $y(\mathbf{z})$ for each sample in $\mathbf{z}^{(s)}$ to produce a set of model outputs $y^{(s)}$.
Estimate all uncertainty measures and sensitivity indices from $y^{(s)}$.
For demonstration purposes we will use the same model as before:
# start the linear model
def linear_model(w, z):
return np.sum(w*z, axis=1)
Once the model outputs have been computed the expectation and variance of the output are computed with the normal estimators.
Below we demonstrate how chaospy
may be used for sampling and numpy
for the statistics.
# start uq
# generate the distributions for the problem
Nrv = 4
c = 0.5
zm = np.array([[0., i] for i in range(1, Nrv + 1)])
wm = np.array([[i * c, i] for i in range(1, Nrv + 1)])
jpdf = generate_distributions(zm, wm)
# 1. Generate a set of Xs
Ns = 20000
Xs = jpdf.sample(Ns, rule='R').T # <- transform the sample matrix
# 2. Evaluate the model
Zs = Xs[:, :Nrv]
Ws = Xs[:, Nrv:]
Ys = linear_model(Ws, Zs)
# 3. Calculate expectation and variance
EY = np.mean(Ys)
VY = np.var(Ys, ddof=1) # NB: use ddof=1 for unbiased variance estimator, i.e /(Ns - 1)
print('E(Y): {:2.5f} and Var(Y): {:2.5f}'.format(EY, VY))
In our sensitivity_introduction notebook model we calculated the sensitivity coefficients with the MCM in the following manner:
# sensitivity analytical values
Sa, Szw, Sta = analytic_sensitivity_coefficients(zm, wm)
# Monte Carlo
#Ns_mc = 1000000 # Number of samples mc
Ns_mc = 10000 # Number of samples mc
# calculate sensitivity indices with mc
A_s, B_s, C_s, f_A, f_B, f_C, Smc, Stmc = monte_carlo_sens_nonlin(Ns_mc, jpdf)
# compute with Polynomial Chaos
Ns_pc = 200
polynomial_order = 3
# calculate sensitivity indices with gpc
Spc, Stpc, gpce_reg = polynomial_chaos_sens(Ns_pc, jpdf, polynomial_order,return_reg=True)
# compare the computations
import pandas as pd
row_labels = ['X_'+str(x) for x in range(1,N_terms*2+1)]
S=np.column_stack((Sa,Spc,Smc,Sta,Stpc,Stmc))
S_table = pd.DataFrame(S, columns=['Sa','Spc','Smc','Sta','Stpc','Stmc'], index=row_labels)
print(S_table.round(3))
# Second order indices with gpc
S2 = cp.Sens_m2(gpce_reg, jpdf) # second order indices with gpc
# print all second order indices
print(pd.DataFrame(S2,columns=row_labels,index=row_labels).round(3))
# sum all second order indices
SumS2=np.sum(np.triu(S2))
print('\nSum Sij = {:2.2f}'.format(SumS2))
# sum all first and second order indices
print('Sum Si + Sij = {:2.2f}\n'.format(np.sum(Spc)+SumS2))
# compare nonzero second order indices with analytical indices
Szw_pc=[S2[i,i+N_terms] for i in range(N_terms) ]
Szw_table=np.column_stack((Szw_pc,Szw,(Szw_pc-Szw)/Szw))
print(pd.DataFrame(Szw_table,columns=['Szw','Szw pc','Error%']).round(3))
# end second order
convergence_analysis = False
if convergence_analysis:
# Convergence analysis
# Convergence Monte Carlo with random sampling
list_of_samples = np.array([10000, 50000, 100000, 500000, 1000000])
s_mc_err = np.zeros((len(list_of_samples), N_prms))
st_mc_err = np.zeros((len(list_of_samples), N_prms))
# average over
N_iter = 5
print('MC convergence analysis:')
for i, N_smpl in enumerate(list_of_samples):
print(' N_smpl {}'.format(N_smpl))
for j in range(N_iter):
A_s, XB, XC, Y_A, Y_B, Y_C, S, ST = monte_carlo_sens_nonlin(N_smpl,
jpdf,
sample_method='R')
s_mc_err[i] += np.abs(S - Sa)
st_mc_err[i] += np.abs(ST - Sta)
print(' finished with iteration {} of {}'.format(1 + j, N_iter))
s_mc_err[i] /= float(N_iter)
st_mc_err[i] /= float(N_iter)
# Plot results for monte carlo
fig_random = plt.figure('Random sampling - average of indices')
fig_random.suptitle('Random sampling - average of indices')
ax = plt.subplot(1, 2, 1)
plt.title('First order sensitivity indices')
_=plt.plot(list_of_samples / 1000, np.sum(s_mc_err, axis=1), '-')
ax.set_yscale('log')
_=plt.ylabel('abs error')
_=plt.xlabel('number of samples [1e3]')
ax1 = plt.subplot(1, 2, 2)
plt.title('Total sensitivity indices')
_=plt.plot(list_of_samples / 1000, np.sum(st_mc_err, axis=1), '-')
ax1.set_yscale('log')
_=plt.ylabel('abs error')
_=plt.xlabel('number of samples [1e3]')
# Plot results for monte carlo figure individual
fig_random = plt.figure('Random sampling')
fig_random.suptitle('Random sampling')
for l, (s_e, st_e) in enumerate(zip(s_mc_err.T, st_mc_err.T)):
ax = plt.subplot(1, 2, 1)
plt.title('First order sensitivity indices')
plt.plot(list_of_samples / 1000, s_e, '-', label='S_{}'.format(l))
ax.set_yscale('log')
_=plt.ylabel('abs error')
_=plt.xlabel('number of samples [1e3]')
_=plt.legend()
ax1 = plt.subplot(1, 2, 2)
plt.title('Total sensitivity indices')
_=plt.plot(list_of_samples / 1000, st_e, '-', label='ST_{}'.format(l))
ax1.set_yscale('log')
_=plt.ylabel('abs error')
_=plt.xlabel('number of samples [1e3]')
plt.legend()
# Convergence Polynomial Chaos
list_of_samples = np.array([140, 160, 200, 220])
s_pc_err = np.zeros((len(list_of_samples), N_prms))
st_pc_err = np.zeros((len(list_of_samples), N_prms))
polynomial_order = 3
# average over
N_iter = 4
print('PC convergence analysis:')
poly = cp.orth_ttr(polynomial_order, jpdf)
for i, N_smpl in enumerate(list_of_samples):
print(' N_smpl {}'.format(N_smpl))
for j in range(N_iter):
# calculate sensitivity indices
Spc, Stpc = polynomial_chaos_sens(N_smpl, jpdf, polynomial_order, poly)
s_pc_err[i] += np.abs(Spc - Sa)
st_pc_err[i] += np.abs(Stpc - Sta)
print(' finished with iteration {} of {}'.format(1 + j, N_iter))
s_pc_err[i] /= float(N_iter)
st_pc_err[i] /= float(N_iter)
# Plot results for polynomial chaos
fig_random = plt.figure('Polynomial Chaos - average of indices')
fig_random.suptitle('Polynomial Chaos - average of indices')
ax = plt.subplot(1, 2, 1)
plt.title('First order sensitivity indices')
_=plt.plot(list_of_samples, np.sum(s_pc_err, axis=1), '-')
ax.set_yscale('log')
_=plt.ylabel('abs error')
_=plt.xlabel('number of samples [1e3]')
ax1 = plt.subplot(1, 2, 2)
plt.title('Total sensitivity indices')
_=plt.plot(list_of_samples, np.sum(st_pc_err, axis=1), '-')
ax1.set_yscale('log')
_=plt.ylabel('abs error')
_=plt.xlabel('number of samples [1e3]')
# Plot results for polynomial chaos individual
fig_random = plt.figure('Polynomial Chaos')
fig_random.suptitle('Polynomial Chaos')
for l, (s_e, st_e) in enumerate(zip(s_pc_err.T, st_pc_err.T)):
ax = plt.subplot(1, 2, 1)
plt.title('First order sensitivity indices')
_=plt.plot(list_of_samples, s_e, '-', label='S_{}'.format(l))
ax.set_yscale('log')
plt.ylabel('abs error')
plt.xlabel('number of samples [1e3]')
plt.legend()
ax1 = plt.subplot(1, 2, 2)
plt.title('Total sensitivity indices')
_=plt.plot(list_of_samples, st_e, '-', label='ST_{}'.format(l))
ax1.set_yscale('log')
plt.ylabel('abs error')
plt.xlabel('number of samples [1e3]')
plt.legend()
# # Convergence Monte Carlo with sobol sampling
# list_of_samples = np.array([10000, 50000, 100000, 500000, 1000000])
# s_mc_err = np.zeros((len(list_of_samples), N_prms))
# st_mc_err = np.zeros((len(list_of_samples), N_prms))
# # average over
# N_iter = 10
# for i, N_smpl in enumerate(list_of_samples):
# for j in range(N_iter):
# A_s, XB, XC, Y_A, Y_B, Y_C, S, ST = monte_carlo_sens(N_smpl,
# jpdf,
# sample_method='S')
# s_mc_err[i] += np.abs(S - Sa)
# st_mc_err[i] += np.abs(ST - Sta)
#
# print('MC convergence analysis: N_smpl {} - finished with iteration {} of {}'.format(N_smpl, 1 + j, N_iter))
# s_mc_err[i] /= float(N_iter)
# st_mc_err[i] /= float(N_iter)
#
# fig_sobol = plt.figure('Sobol sampling')
# fig_sobol.suptitle('Sobol sampling')
# for l, (s_e, st_e) in enumerate(zip(s_mc_err.T, st_mc_err.T)):
# ax = plt.subplot(1, 2, 1)
# plt.title('First order sensitivity indices')
# plt.plot(list_of_samples/1000, s_e, '-', label='S_{}'.format(l))
# ax.set_yscale('log')
# plt.ylabel('abs error')
# plt.xlabel('number of samples [1e3]')
# plt.legend()
#
# ax1 = plt.subplot(1, 2, 2)
# plt.title('Total sensitivity indices')
# plt.plot(list_of_samples/1000, st_e, '-', label='ST_{}'.format(l))
# ax1.set_yscale('log')
# plt.ylabel('abs error')
# plt.xlabel('number of samples [1e3]')
# plt.legend()
#
# fig_random = plt.figure('Sobol sampling - average of indices')
# fig_random.suptitle('Sobol sampling - average of indices')
#
# ax = plt.subplot(1, 2, 1)
# plt.title('First order sensitivity indices')
# plt.plot(list_of_samples / 1000, np.sum(s_mc_err, axis=1), '-')
# ax.set_yscale('log')
# plt.ylabel('abs error')
# plt.xlabel('number of samples [1e3]')
#
# ax1 = plt.subplot(1, 2, 2)
# plt.title('Total sensitivity indices')
# plt.plot(list_of_samples / 1000, np.sum(st_mc_err, axis=1), '-')
# ax1.set_yscale('log')
# plt.ylabel('abs error')
# plt.xlabel('number of samples [1e3]')
plt.show()
plt.close()
The actual algorithm calculating the sensitivity analysis was hidden in this function call which did the magic for us: A_s, B_s, C_s, f_A, f_B, f_C, Smc, Stmc = monte_carlo_sens_nonlin(Ns_mc, jpdf)
Below we explain in greater detail Saltelli's algorithm which is used to compute the Sobol indices.
Calculating the sensitivity coefficients with MCM directly is computationally very expensive. To see this, consider how on would estimate $\operatorname{Var}\mathbb{E}(Y|Z_i))$ which is the numerator in the Sobol indices, in a direct brute force, manner. Let $M$ be the evaluations needed to estimate the inner, conditional expected value $\mathbb{E}(Y|Z_i)$ for a fixed $Z_i$. To get an approxiamation of the outer variance, one would have to repeat this process for the whole range of $Z_i$, which could also amount to $\propto M$. Finally, this would have to be done for all $r$ input random variables of $Y$. Consecquently, the number of evalutations amounts to $\mathcal{O}(M^2 \;r)$. To get a impression of what this could to, note that in many cases a reasonable $M$ could be $5000$ which would results in $M^2 =25 000 000$ necessary evaluations!
Luckily Saltelli came up with an algorithm to approximate of the sensitivity first order coefficients using $M(p+2)$ evaluations in total There are many adaptations and improvements of the algorithm available, here we will present the basic idea of the algorithm.
Use a sampling method to draw a set of input samples $\mathbf{z}^{(s)}$
Evaluate the deterministic model $y(\mathbf{z})$ for each sample
Estimate all sensitivity indices from $y^{(s)}$.
Thus, the blackbox function mentioned above, follows these steps:
# calculate sens indices of non additive model
def monte_carlo_sens_nonlin(Ns, jpdf, sample_method='R'):
N_prms = len(jpdf)
# 1. Generate sample matrices
XA, XB, XC = generate_sample_matrices_mc(Ns, N_prms, jpdf, sample_method)
# 2. Evaluate the model
Y_A, Y_B, Y_C = evaluate_non_additive_linear_model(XA, XB, XC)
# 3. Approximate the sensitivity indices
S, ST = calculate_sensitivity_indices_mc(Y_A, Y_B, Y_C)
return XA, XB, XC, Y_A, Y_B, Y_C, S, ST
In addition we create $P$ additional matrices $C_i$ of the size $M\times P$ compound of matrix $A$ and matrix $B$. In a matrix $C_i$ all colums will be have the same values as the $B$ matrix, except the $i$-th column, which will have the values of $A$:
This was implemented in the method:
A, B, C = generate_sample_matrices_mc(number_of_samples, number_of_parameters, joint_distribution, sample_method)
.
# sample matrices
def generate_sample_matrices_mc(Ns, number_of_parameters, jpdf, sample_method='R'):
Xtot = jpdf.sample(2*Ns, sample_method).transpose()
A = Xtot[0:Ns, :]
B = Xtot[Ns:, :]
C = np.empty((number_of_parameters, Ns, number_of_parameters))
# create C sample matrices
for i in range(number_of_parameters):
C[i, :, :] = B.copy()
C[i, :, i] = A[:, i].copy()
return A, B, C
In the second step we evaluate the model for samples in the matrices and save the results in vectors $Y_{\mathbf{A}}$, $Y_{\mathbf{B}}$ and $Y_{\mathbf{C_i}}$:
The corresponding python code for our example:
# model evaluation
def evaluate_non_additive_linear_model(X_A, X_B, X_C):
N_prms = X_A.shape[1]
Ns = X_A.shape[0]
N_terms = int(N_prms / 2)
# 1. evaluate sample matrices X_A
Z_A = X_A[:, :N_terms] # Split X in two vectors for X and W
W_A = X_A[:, N_terms:]
Y_A = linear_model(W_A, Z_A)
# 2. evaluate sample matrices X_B
Z_B = X_B[:, :N_terms]
W_B = X_B[:, N_terms:]
Y_B = linear_model(W_B, Z_B)
# 3. evaluate sample matrices X_C
Y_C = np.empty((Ns, N_prms))
for i in range(N_prms):
x = X_C[i, :, :]
z = x[:, :N_terms]
w = x[:, N_terms:]
Y_C[:, i] = linear_model(w, z)
return Y_A, Y_B, Y_C
In the final step the first order and total Sobol indices are estimated. Since the numerical approximation of all indices are quite demanding, approximations are used to speed up the process. For both, the first and total sensitivity index, exist several approximations, which the most common can be found in ([saltelli2010]).
The first order indices are defined as:
Both, the nominator and denominator are now approximated numerically, whereas the variance (nominator) is defined with:
with $f_0^2$ which is $\left(\mathbb{E}(Y)\right)^2$. For $f_0^2$ exist several approximations, two common are:
The conditional variance is approximated as:
Here the variance is estimated accordingly, but taking the matrix A:
here $f_0^2$ is approximated with:
And the conditional variance of not given $Z_i$ is approximated with:
Those equations are implemented in the following way:
# mc algorithm for variance based sensitivity coefficients
def calculate_sensitivity_indices_mc(y_a, y_b, y_c):
# single output value y_a for one set of samples
if len(y_c.shape) == 2:
Ns, n_parameters = y_c.shape
# for the first order index
f0sq_first = np.sum(y_a*y_b)/ Ns
y_var_first = np.sum(y_b**2.)/(Ns-1) - f0sq_first
# for the total index
f0sq_total = (sum(y_a)/Ns)**2
y_var_total = np.sum(y_a**2.)/(Ns-1) - f0sq_total
s = np.zeros(n_parameters)
st = np.zeros(n_parameters)
for i in range(n_parameters):
# first order index
cond_var_X = np.sum(y_a*y_c[:, i])/(Ns - 1) - f0sq_first
s[i] = cond_var_X/y_var_first
# total index
cond_exp_not_X = np.sum(y_b*y_c[:, i])/(Ns - 1) - f0sq_total
st[i] = 1 - cond_exp_not_X/y_var_total
# vector output value y_a,.. for one set of samples
elif len(y_c.shape) == 3:
n_y, Ns, n_parameters = y_c.shape
# for the first order index
f0sq_first = np.sum(y_a*y_b, axis=1) / Ns
y_var_first = np.sum(y_b ** 2., axis=1) / (Ns - 1) - f0sq_first
# for the total index
f0sq_total = (np.sum(y_a, axis=1) / Ns) ** 2
y_var_total = np.sum(y_a ** 2., axis=1) / (Ns - 1) - f0sq_total
s = np.zeros((n_parameters, n_y))
st = np.zeros((n_parameters, n_y))
for i in range(n_parameters):
# first order index
cond_var_X = np.sum(y_a * y_c[:, :, i], axis=1) / (Ns - 1) - f0sq_first
s[i, :] = cond_var_X / y_var_first
# total index
cond_exp_not_X = np.sum(y_b * y_c[:, :, i], axis=1) / (Ns - 1) - f0sq_total
st[i, :] = 1 - cond_exp_not_X / y_var_total
return s, st