# Bias¶

In [1]:
%matplotlib inline

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as ss
import seaborn as sns

plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = 14, 10


## Location and scale¶

In [3]:
x = np.linspace(-10.0, 10.0, 1000)

plt.fill(x, ss.norm.pdf(x, loc= 0.0, scale=1.0), label="$\mu = 0.0, \sigma = 1.0$", c='b', alpha=0.6, lw=3.0)
plt.fill(x, ss.norm.pdf(x, loc= 2.0, scale=1.0), label="$\mu = 2.0, \sigma = 1.0$", c='r', alpha=0.6, lw=3.0)
plt.fill(x, ss.norm.pdf(x, loc= 0.0, scale=2.0), label="$\mu = 0.0, \sigma = 2.0$", c='g', alpha=0.6, lw=3.0)
plt.fill(x, ss.norm.pdf(x, loc=-1.0, scale=0.6), label="$\mu =-1.0, \sigma = 0.6$", c='y', alpha=0.6, lw=3.0)

plt.title("Normal distribution for different $\mu$ and $\sigma$")
plt.legend();


## Probability¶

In [4]:
x = np.linspace(-3.0, 3.0, 1000)
y = ss.norm.pdf(x, loc= 0.0, scale=1.0)

xseg = x[np.logical_and(-1.0 < x,  x < 1.4)]
yseg = y[np.logical_and(-1.0 < x,  x < 1.4)]

plt.plot(x, y, color='k', alpha=0.5)
plt.fill_between(xseg, yseg, color='b', alpha=0.5)
plt.axvline(x=-1.0, color='grey', linestyle=':')
plt.axvline(x= 1.4, color='grey', linestyle=':')

plt.text(0.2, 0.15,
r'$P (a \leq X \leq b) = \int_a^b \frac{1}{\sqrt{2\pi\sigma^2} } e^{ -\frac{(x-\mu)^2}{2\sigma^2} } \, dx$',
horizontalalignment='center', size=17)

plt.axhline(y= 0.0, color='black', linestyle='-')

plt.title("Probability of $x$ between $a$ and $b$");

In [5]:
x = np.linspace(-5.0, 5.0, 10000)
plt.plot(x, ss.norm.pdf(x, loc=0.0, scale=1.0), 'k-', lw=1.0)

xseg = x[np.logical_and(-5.0 <= x, x <= -3.0)]
plt.fill_between(xseg, ss.norm.pdf(xseg), color='y', alpha=0.5, linewidth=0)
xseg = x[np.logical_and(-3.0 <= x, x <= -2.0)]
plt.fill_between(xseg, ss.norm.pdf(xseg), color='r', alpha=0.5, linewidth=0)
xseg = x[np.logical_and(-2.0 <= x, x <= -1.0)]
plt.fill_between(xseg, ss.norm.pdf(xseg), color='g', alpha=0.5, linewidth=0)
xseg = x[np.logical_and(-1.0 <= x, x <=  1.0)]
plt.fill_between(xseg, ss.norm.pdf(xseg), color='b', alpha=0.5, linewidth=0, label="$1 \sigma = 68.27\%$")
xseg = x[np.logical_and( 1.0 <= x, x <=  2.0)]
plt.fill_between(xseg, ss.norm.pdf(xseg), color='g', alpha=0.5, linewidth=0, label="$2 \sigma = 95.45\%$")
xseg = x[np.logical_and( 2.0 <= x, x <=  3.0)]
plt.fill_between(xseg, ss.norm.pdf(xseg), color='r', alpha=0.5, linewidth=0, label="$3 \sigma = 99.73\%$")
xseg = x[np.logical_and( 3.0 <= x, x <=  5.0)]
plt.fill_between(xseg, ss.norm.pdf(xseg), color='y', alpha=0.5, linewidth=0)

plt.title("Normal Disribution - Probability Distribution Function")
plt.legend();


## Sampling distribution¶

In [6]:
np.set_printoptions(formatter={'float': lambda x: "{0:6.3f}".format(x)})

sampsize = 10
nosamps = 1000

samp = np.random.standard_normal((nosamps, sampsize))

print(samp)

[[ 0.473  0.342 -1.088 ... -0.103  0.332  0.327]
[-0.159 -0.917  0.213 ...  0.645  1.101 -1.220]
[-1.581  0.369  0.565 ... -0.689  0.547 -1.267]
...
[-0.826 -0.147 -0.641 ...  0.508 -0.303  0.708]
[ 0.292  0.795 -0.420 ... -0.036  0.512 -1.995]
[ 0.835 -0.075 -0.684 ...  1.716  0.502  1.194]]

In [7]:
mean = samp.sum(axis=1) / sampsize
print(mean)

[ 0.030 -0.096 -0.529  0.188 -0.198 -0.141 -0.106  0.214 -0.385  0.119
0.059  0.399  0.260  0.456  0.142  0.258 -0.091  0.259  0.397  0.265
0.296  0.086  0.115 -0.071 -0.304  0.097 -0.158 -0.265  0.246 -0.500
-0.383 -0.143  0.123  0.062  0.692 -0.176 -0.062  0.439 -0.055  0.429
-0.529 -0.266 -0.052  0.282 -0.188 -0.446  0.176  0.289  0.292  0.381
0.614  0.587  0.386 -0.043 -0.093  0.317 -0.105 -0.108 -0.161 -0.099
-0.022 -0.597  0.114  0.109  0.026 -0.469  0.377 -0.127  0.141  0.197
0.407  0.777 -0.099  0.343 -0.275  0.058 -0.218  0.058 -0.312  0.279
0.006  0.422  0.053 -0.125 -0.647 -0.206 -0.070 -0.325  0.827 -0.048
0.037 -0.374 -0.238  0.632 -0.254  0.098  0.387 -0.393  0.150  0.254
-0.330 -0.076  0.214  0.014  0.326  0.121 -0.154  0.125  0.214  0.347
0.339 -0.086 -0.326 -0.008 -0.398 -0.237  0.530 -0.158  0.390 -0.529
-0.248 -0.632 -0.136  0.221  0.029  0.267  1.002  0.359  0.208 -0.460
-0.077 -0.365 -0.256 -0.044 -0.240  0.020  0.626  0.406  0.277 -0.142
-0.393 -0.278 -0.071 -0.164  0.137 -0.071  0.177  0.143 -0.028  0.100
-0.154 -0.522 -0.103  0.190  0.477  0.104  0.334 -0.103 -0.521 -0.333
-0.510  0.637  0.124 -0.199 -0.073  0.236 -0.228 -0.341 -0.010  0.275
0.032  0.443  0.337  0.731  0.095  0.315 -0.087  0.768  0.219  0.263
-0.158  0.225 -0.100  0.085  0.501 -0.371 -0.176 -0.388 -0.164 -0.370
-0.042  0.241  0.792  0.343 -0.311  0.123 -0.206 -0.140 -0.411  0.188
-0.200 -0.072 -0.121  0.231 -0.696  0.137  0.735  0.646 -0.705 -0.512
-0.365 -0.139 -0.094  0.071  0.223  0.423  0.090  0.016  0.064  0.236
-0.530  0.709 -0.396  0.153  0.276  0.133 -0.299  0.378 -0.052 -0.055
0.176  0.307  0.198 -0.003  0.149 -0.045 -0.099  0.089 -0.692 -0.137
-0.486  0.242 -0.268  0.070  0.091  0.321  0.391  0.063  0.380  0.007
0.128  0.255 -0.784  0.218 -0.047 -0.767  0.434  0.677 -0.451 -0.465
0.164  0.299  0.204  0.161  0.489 -0.372  0.147 -0.176 -0.091 -0.096
-0.060  0.095  0.188  0.140 -0.158  0.191  0.215  0.133  0.069 -0.220
0.276  0.163 -0.173  0.291  0.328 -0.644 -0.127 -0.445  0.371  0.136
0.343  0.457  0.667 -0.454  0.430  0.048  0.136 -0.082 -0.216  0.188
0.599 -0.649 -0.024  0.324  0.682  0.886 -0.410  0.174  0.166 -0.118
0.164 -0.191  0.532 -0.039 -0.195 -0.019 -0.416 -0.033  0.309  0.425
0.303  0.110 -0.197  0.322  0.093  0.160 -0.271 -0.659 -0.404  0.396
0.268 -0.328 -0.314  0.149 -0.107 -0.444  0.135 -0.031 -0.560 -0.171
-0.143  0.825  0.090  0.028 -0.329  0.196 -0.131  0.049 -0.126  0.345
-0.152  0.805 -0.072 -0.163 -0.018  0.011  0.497 -0.612 -0.264 -0.396
-0.161  0.554  0.452  0.225 -0.699  0.122 -0.192 -0.140 -0.047  0.488
0.662  0.351 -0.020 -0.032 -0.165  0.537  0.094  0.036 -0.616  0.111
0.225  0.121 -0.478  0.324  0.319  0.168  0.322 -0.335  0.672 -0.369
-0.616  0.609  0.227  0.121  0.125  0.150  0.125  0.058 -0.072  0.137
-0.138  0.320  0.285  0.050  0.113 -0.206  0.231  0.158  0.072  0.689
0.170  0.151 -0.072 -0.305  0.130  0.026 -0.022  0.078  0.300  0.254
0.376 -0.007  0.284  0.696  0.305  0.105 -0.326 -0.048  0.245 -0.719
-0.039 -0.066  0.156  0.338 -0.351  0.250  0.441 -0.291  0.057  0.074
-0.529  0.324 -0.337  0.325 -0.488 -0.302 -0.134  0.317 -0.281 -0.205
-0.430 -0.092 -0.380  0.360  0.078 -0.103  0.730  0.533 -0.025  0.454
0.007  0.434 -0.444  0.374  0.713  0.354  0.236  0.358 -0.412 -0.225
-0.577  0.004 -0.061  0.289  0.283 -0.430 -0.447 -0.354 -0.371  0.828
0.622 -0.421  0.349  0.258 -0.097 -0.267  0.102  0.158  0.297 -0.289
-0.228 -0.478 -0.222 -0.206  0.267 -0.325  0.108 -0.590 -0.036  0.213
-0.193  0.569  0.011  0.210  0.224 -0.576  0.120  0.444 -0.285 -0.044
-0.309  0.062 -0.168 -0.168  0.201  0.239 -0.133 -0.297  0.317  0.046
0.101 -0.430  0.288  0.019 -0.040  0.130  0.217  0.082  0.361 -0.052
-0.233  0.167  0.056 -0.174 -0.127  0.062  0.289 -0.267  0.526 -0.220
-0.049  0.070  0.005 -0.209 -0.429 -0.044 -0.281  0.301 -0.139 -0.738
0.065 -0.469  0.726  0.353  0.505 -0.031 -0.308  0.506 -0.170 -0.510
0.665  0.046 -0.539  0.256 -0.117 -0.287  0.356  0.026  0.173  0.460
0.296 -0.010  0.007 -0.195  0.144 -0.267  0.062 -0.377 -0.086  0.394
-0.400  0.293 -0.171  0.457  0.236 -0.232  0.168 -0.587 -0.204 -0.176
0.187 -0.895  0.655 -0.887  0.061  0.309 -0.421 -0.004 -0.337 -0.238
0.013  0.403 -0.090 -0.317 -0.114 -0.438  0.251  0.110  0.246  0.175
0.325 -0.308  0.313 -0.234 -0.062  0.502 -0.091 -0.565 -0.030  0.171
0.054  0.066 -0.260 -0.065  0.872  0.306 -0.133 -0.192 -0.160  0.244
-0.168  0.243 -0.267  0.235  0.322  0.020  0.253  0.381 -0.126  0.447
-0.128 -0.444  0.269 -0.222  0.110  0.207  0.058  0.096  0.343  0.402
-0.294 -0.050  0.393  0.336 -0.082  0.373  0.135 -0.266  0.146  0.255
-0.398  0.078 -0.714 -0.433  0.084  0.630 -0.169  0.312 -0.174  0.104
-0.111  0.193  0.016  0.234  0.366 -0.223  0.122  0.055  0.221 -0.050
0.273  0.385 -0.216 -0.028 -0.213  0.054  0.409  0.349  0.003  0.775
-0.160  0.237 -0.314  0.166 -0.194  0.132 -0.019  0.126 -0.049  0.411
-0.261 -0.112  0.429 -0.670 -0.381 -0.013 -0.164 -0.114 -0.088  0.191
0.268  0.271  0.536 -0.500 -0.127  0.338  0.220 -0.157 -0.053 -0.123
-0.541 -0.145  0.356 -0.030 -0.248  0.169 -0.462 -0.100  0.577  0.063
-0.392 -0.390 -0.141  0.449  0.165  0.195  0.011  0.596 -0.131  0.378
0.543  0.032 -0.066  0.045 -0.370 -0.009 -0.289  0.143 -0.258  0.485
-0.069 -0.480 -0.239  0.101 -0.136 -0.191  0.130 -0.094  0.183  0.028
0.339 -0.352 -0.177 -0.116 -0.061  0.095  0.208 -0.436 -0.286 -0.308
-0.150  0.074  0.329  0.207 -0.380  0.064  0.206  0.223  0.101 -0.369
-0.354 -0.230  0.021  0.387  0.346  0.473  0.193 -0.187  0.062  0.589
-0.274 -0.126  0.093  0.085 -0.382  0.414 -0.307  0.006  0.379  0.006
0.184 -0.023 -0.296 -0.260 -0.465 -0.301  0.064 -0.365  0.364  0.347
0.105 -0.175  0.118  0.067 -0.734 -0.512 -0.380 -0.009  0.065  0.306
-0.124  0.168 -0.045 -0.559 -0.470 -0.209 -0.053  0.028  0.056 -0.331
0.236 -0.074  0.494 -0.247  0.285 -0.475 -0.022  0.098 -0.460  0.144
0.291 -0.610 -0.110 -0.299 -0.100 -0.375 -0.384 -0.074 -0.001 -0.274
0.098  0.283 -0.187 -0.266 -0.091 -0.254  0.279  0.146 -0.264 -0.182
0.229  0.675 -0.166 -0.069 -0.108  0.459 -0.251 -0.758  0.478 -0.106
0.235  0.037 -0.329 -0.220 -0.284  0.436  0.574 -0.028  0.627 -0.571
0.624 -0.301 -0.671 -0.032 -0.491  0.200  0.376 -0.318 -0.610 -0.332
-0.502  0.057 -0.071  1.048  0.775 -0.028  0.607  0.283  0.018 -0.527
0.563  0.069 -0.389 -0.370 -0.598 -0.086  0.407 -0.103  0.336 -0.079
0.209 -0.009  0.036  0.425 -0.354  0.268  0.705  0.339  0.259 -0.355
-0.393 -0.628  0.679 -0.006  0.181  0.434 -0.160 -0.041  0.660 -0.471
-0.081  0.113  0.344 -0.607  0.121  0.108 -0.438  0.781  0.244  0.157
0.110 -0.281 -0.119 -0.191  0.324  0.210 -0.637  0.509 -0.080 -0.239
-0.140  0.437 -0.648  0.083 -0.463  0.728  0.131 -0.477  0.697 -0.122
0.047  0.387  0.440  0.261 -0.077  0.468 -0.629 -0.549  0.244 -0.209
0.338  0.105  0.304 -0.332  0.259  0.408  0.146  0.272 -0.042  0.436
-0.017  0.314  0.188 -0.444  0.233 -0.012 -0.212 -0.274  0.301  0.414
-0.366 -0.001  0.358 -0.339 -0.120 -0.060  0.072 -0.122 -0.359  0.424]

In [8]:
# Calculate the variance.
vari = (samp - mean[:, np.newaxis])**2
vari = vari.sum(axis=1) / (sampsize)

# Without the correction.
mean_vari = vari.sum() / nosamps
print(mean_vari)

# Using Bessel's correction: https://en.wikipedia.org/wiki/Bessel%27s_correction
print(mean_vari * (sampsize / (sampsize - 1.0)))

0.9026900725759749
1.002988969528861

In [9]:
sns.distplot(vari);

C:\Users\mclou\Anaconda3\lib\site-packages\scipy\stats\stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use arr[tuple(seq)] instead of arr[seq]. In the future this will be interpreted as an array index, arr[np.array(seq)], which will result either in an error or a different result.
return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval