Exercises for Session 3

In [1]:
from __future__ import print_function
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats, optimize, integrate

Exercises 1, 2 and 3

In [2]:
from scipy.special import yv, y0_zeros  # Bessel fn of 2nd kind
    
def f(z):
    return yv(0, z)

x = numpy.arange(0, 20, 0.01)
plt.plot(x, f(x))
plt.hlines(0, 0, 20)
plt.axis(xmin=0, xmax=20)
plt.xlabel('$x$')
Out[2]:
<matplotlib.text.Text at 0x11676cf60>
In [3]:
def yv0_roots(n):
    i = 1
    prevr = 0.0
    t = 1.0
    dt = 1.0
    integral = 0.0
    rlist = []  # store individual roots for checking
    while i <= n:
        # This is not totally general, but for simplicity we can use the
        # observation that the roots are approx regularly separated
        # (actually root separation tends to pi as n tends to infinity)
        # and select starting points for fsolve which are spaced a
        # bit closer together (here a factor of ~3).
        r, info, flag, mesg = optimize.fsolve(f, t, full_output=True)
        r = r[0]
        deltar = abs(r - prevr)
        # need to be careful to reject solutions that have been
        # found previously or are too distant from starting point
        if flag == 1 and abs(r - prevr) > 1e-5 and abs(r - t) < 1.5*dt:
            # just add integral over new range, which is faster and more
            # accurate when there are many oscillations within full range
            a, aerr = integrate.quad(f, prevr, r)
            integral += a
            if i == n:
                print('Root {:d} is at {:.3f}'.format(i, r))
                print('Integral from zero to root {:d} is {:.3g}'.format(i, integral))
            i += 1
            prevr = r
            rlist.append(r)
        t += dt
    return np.array(rlist)
In [4]:
n = 20
roots = yv0_roots(n)
roots
Root 20 is at 60.478
Integral from zero to root 20 is 0.103
Out[4]:
array([  0.89357697,   3.95767842,   7.08605106,  10.22234504,
        13.36109747,  16.50092244,  19.6413097 ,  22.78202805,
        25.92295765,  29.06403025,  32.20520412,  35.34645231,
        38.48775665,  41.62910447,  44.77048661,  47.91189633,
        51.05332855,  54.19477936,  57.3362457 ,  60.47772516])
In [5]:
# check roots
if n < 140:
    # this uses an hardcoded method which only applies up to n=139
    rtrue = y0_zeros(n)[0]
    ok = (numpy.abs(rtrue - roots) < 1e-5).all()
    print('Checks OK:', ok)
else:
    # check the interval between each root is sensible (tested to n = 10000)
    d = roots[1:] - roots[:-1]
    ok = (d > 3) & (d < numpy.pi+1e-5)
    print('Checks OK:', ok.all())
    if not ok.all():
        bad = numpy.logical_not(ok)
        printbad.nonzero()[0], d[bad]
Checks OK: True

Exercises 4, 5 and 6

In [6]:
# create 100 samples from a Normal distribution
# with mean 5 and standard deviation 1
np.random.seed(54321)
n = 100
m = 5
s = 1
d = stats.norm.rvs(m, s, size=n)
In [7]:
# histogram the data - this can be done with numpy,
# or simultaneously with plotting with pyplot
#hx, hy = numpy.histogram(d, bins=10)
hy, hx, p = plt.hist(d, bins=10, histtype='stepfilled')
In [8]:
# get the width of the bins and hence normalisation
w = hx[1]-hx[0]
a = n*w
# get the centres of the bins
hxc = 0.5*(hx[:-1] + hx[1:])
In [9]:
# returns the value of a Gaussian with specified amplitude,
# mean and sigma, evaluated at x (just for convenience)
def g(p, a, x):
    m, s = p
    return a * stats.norm.pdf(x, m, s)
In [10]:
# returns chisq of specified Gaussian for data (x, y)
def f(p, a, x, y):
    return ((y - g(p, a, x))**2).sum()
In [11]:
# find parameter set which minimises chisq
results = optimize.minimize(f, (3, 3), args=(a, hxc, hy))
results
Out[11]:
      fun: 101.20417934635331
 hess_inv: array([[  4.10260182e-04,   1.14938238e-05],
       [  1.14938238e-05,   3.01092772e-04]])
      jac: array([  0.00000000e+00,  -1.90734863e-06])
  message: 'Optimization terminated successfully.'
     nfev: 481
      nit: 9
     njev: 120
   status: 0
  success: True
        x: array([ 5.26031668,  0.8557135 ])
In [12]:
# Specify parameter bounds
results = optimize.minimize(f, (3, 3), args=(a, hxc, hy), bounds=((None, None), (0.01, None)))
results
Out[12]:
      fun: 101.20417934635324
 hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
      jac: array([ -7.10542736e-06,   8.52651283e-06])
  message: b'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL'
     nfev: 81
      nit: 15
   status: 0
  success: True
        x: array([ 5.26031668,  0.8557135 ])
In [13]:
# get fit parameters
r1 = results.x
In [14]:
# use MLE method provided in stats (actually understands that this is samples from a distribution)
r2 = stats.norm.fit(d)
In [15]:
# plot true function and fit results over histogram
x = np.arange(m-3*s, m+3*s, s/10.0)
plt.hist(d, bins=hx, histtype='stepfilled', linestyle=None,
         alpha=0.25, label='sample')
plt.plot(x, g((m, s), a, x), ':', label='true', lw=3)
plt.plot(x, g(r1, a, x), label='optimize.fmin', lw=2)
plt.plot(x, g(r2, a, x), label='stats.norm.fit', lw=2)
plt.legend(prop={'size':'small'})

# neatly print results to screen
print('true function:  mean = {:5.3f}, sigma = {:5.3f}'.format(m, s))
print('optimize.fmin:  mean = {:5.3f}, sigma = {:5.3f}'.format(*r1))
print('stats.norm.fit: mean = {:5.3f}, sigma = {:5.3f}'.format(*r2))
true function:  mean = 5.000, sigma = 1.000
optimize.fmin:  mean = 5.260, sigma = 0.856
stats.norm.fit: mean = 5.223, sigma = 0.939
In [16]:
# Additional note:
# trick for drawing a binned function without original sample
# add zero bins at each end of distribution
hhxc = np.concatenate(([hxc[0]-w], hxc, [hxc[-1]+w]))
hhy = np.concatenate(([0], hy, [0]))
plt.plot(hhxc, hhy, drawstyle='steps-mid', label='sample')
plt.fill_between(hhxc, 0, hhy, step='mid', alpha=0.25)
_ = plt.axis(xmin=2, xmax=9, ymin=0, ymax=30)