%run ../../common_functions/import_all.py
from scipy import optimize
from scipy.integrate import quad, odeint
from scipy.interpolate import interp1d
from scipy.signal import detrend
from scipy.spatial import distance
from matplotlib.legend_handler import HandlerLine2D
from functools import partial
from ipywidgets import interact
from common_functions.setup_notebook import set_css_style, setup_matplotlib, config_ipython
config_ipython()
setup_matplotlib()
set_css_style()
dataset = '../../datasets/oldfaithful.txt'
Some high-level notes to these two libraries, not intended to be comprehensive, plus some function divertissements.
optimize
: fit and solve equationsstats
module: statistical computationinterpolate
module: Note: some of the stuff presented here comes from the Scipy lectures http://www.scipy-lectures.org/index.html
Divertissements with functions
Using the interact
widget to visualise how a function can change with parameters.
"Num" stands for "numerical", this is a library devoted to numerical computation and it's a building block of many other projects.
These objects constitute the basic blocks for manipulation
# NOTE: np.array is function to create an ndarray (n-dimensional array), ndarray not to be used to create
# Vector: an array
print('* A vector')
vec = np.array([1, 2, 3, 4, 5])
print(' Dimension %d and shape %s: %s' %(vec.ndim, vec.shape, vec))
print(' Slice of vector: ', vec[2:4])
print(' Slice of vector with step: ', vec[1:4:2])
print(' Slice with newaxis on column: ', vec[:, np.newaxis])
print(' Slice with newaxis on row: ', vec[np.newaxis, :])
* A vector Dimension 1 and shape (5,): [1 2 3 4 5] Slice of vector: [3 4] Slice of vector with step: [2 4] Slice with newaxis on column: [[1] [2] [3] [4] [5]] Slice with newaxis on row: [[1 2 3 4 5]]
# Matrix, a 2D array
print('* A matrix')
mat = np.array([[1,2,3, 0], [2,3,4, 1], [4, 0, 1, 0]])
print(' This is an ndarray of dimension %d and shape %s:\n %s' %(mat.ndim, mat.shape, mat))
print(' It contains %d elements (size)' %mat.size)
print(' Its element on row 2, col 3 is %d' %mat[1,2])
print(' A slice of it is (second column)', mat[:,1])
* A matrix This is an ndarray of dimension 2 and shape (3, 4): [[1 2 3 0] [2 3 4 1] [4 0 1 0]] It contains 12 elements (size) Its element on row 2, col 3 is 4 A slice of it is (second column) [2 3 0]
# Reshaping, resizing and flattening
print('* Reshaping and resizing', ['t_red'])
a = np.arange(6)
print(' Original vector: ', a)
print(' Reshaping vector to shape (2, 3) (change shape, return reshaped ndarray): ')
a.reshape((2, 3))
print(' Resizing vector to shape (2, 2) (change shape and size, do in place and return None): ')
a
print(' Original matrix: ', mat)
print(' Flattened matrix: ', mat.ravel())
* Reshaping and resizing ['t_red'] Original vector: [0 1 2 3 4 5] Reshaping vector to shape (2, 3) (change shape, return reshaped ndarray):
array([[0, 1, 2], [3, 4, 5]])
Resizing vector to shape (2, 2) (change shape and size, do in place and return None):
array([0, 1, 2, 3, 4, 5])
Original matrix: [[1 2 3 0] [2 3 4 1] [4 0 1 0]] Flattened matrix: [1 2 3 0 2 3 4 1 4 0 1 0]
# Matrices of zeros and ones
print('* Matrices of zeros and ones', ['t_red'])
np.zeros((3, 4))
np.ones((1, 5))
* Matrices of zeros and ones ['t_red']
array([[0., 0., 0., 0.], [0., 0., 0., 0.], [0., 0., 0., 0.]])
array([[1., 1., 1., 1., 1.]])
# Setting the vars to use
print('* Vectors and matrices to use')
v = np.array([1, 2, 3])
A = np.array([[1,2, 0], [2,3,4]])
B = np.array([[0, 1], [2, 0], [1, 1]])
C = np.array([[3, 5, 1], [0, 0, 2]])
print(' vector v is', v)
print(' Matrix A is')
print(A)
print(' Matrix B is')
print(B)
print(' Matrix C is')
print(C)
* Vectors and matrices to use vector v is [1 2 3] Matrix A is [[1 2 0] [2 3 4]] Matrix B is [[0 1] [2 0] [1 1]] Matrix C is [[3 5 1] [0 0 2]]
# matrix transpose
print('* Matrix and transpose')
print(' Transpose of A is\n', A.T)
* Matrix and transpose Transpose of A is [[1 2] [2 3] [0 4]]
# Some Matrix properties of elements
print('* Properties of matrix elements')
print(' Sum, mean, std of elements: ', A.sum(), A.mean(), A.std())
print(' Sum of A on axis 0: ', A.sum(axis=0))
print(' Sum of A on axis 1: ', A.sum(axis=1))
* Properties of matrix elements Sum, mean, std of elements: 12 2.0 1.2909944487358056 Sum of A on axis 0: [3 5 4] Sum of A on axis 1: [3 9]
# Product of matrices
print('* Matrix product')
print(' Product A * B is')
print(np.dot(A, B))
* Matrix product Product A * B is [[ 4 1] [10 6]]
# scalar * vector; scalar * matrix
print('* Multiplication and sum with scalar (v and A)')
print(' 2 * v: ', 2 * v)
print(' 2 * A: ', 2 * A)
print(' v + 1: ', v + 1)
print(' A + 1: ', A + 1)
* Multiplication and sum with scalar (v and A) 2 * v: [2 4 6] 2 * A: [[2 4 0] [4 6 8]] v + 1: [2 3 4] A + 1: [[2 3 1] [3 4 5]]
# Matrix arithmetics
print('* Arithmetics with matrices')
print(' Summing A and C')
print(A + C)
print(' Subtracting C from A')
print(A - C)
* Arithmetics with matrices Summing A and C [[4 7 1] [2 3 6]] Subtracting C from A [[-2 -3 -1] [ 2 3 2]]
# sorting a matrix
print('* Sorting matrix')
A_copy = A.copy()
A.sort()
print(' Sorting A: ', A)
A = A_copy.copy()
A.sort(axis=0)
print(' Sorting A on axis 0', A)
A = A_copy.copy()
A.sort(axis=1)
print(' Sorting A on axis 1', A)
* Sorting matrix Sorting A: [[0 1 2] [2 3 4]] Sorting A on axis 0 [[1 2 0] [2 3 4]] Sorting A on axis 1 [[0 1 2] [2 3 4]]
# array shuffling at random
print('* Randomly shuffling array:')
np.random.shuffle(v) # shuffles and returns None
print(v)
* Randomly shuffling array: [3 2 1]
# set the vars to use
v = np.array([1, 2, 1, 1])
A = np.array([[1, 2, 3], [0, 1, 0], [0, 2, 1]])
# determinant & co
print('* Determinant, trace, inverse, norm')
print(' Determinant of matrix A: ', np.linalg.det(A))
print(' Trace of matrix A:', np.trace(A))
print(' Inverse of matrix A: ', np.linalg.inv(A))
print(' Norm of vector v and of matrix A:', np.linalg.norm(v), np.linalg.norm(A))
* Determinant, trace, inverse, norm Determinant of matrix A: 1.0 Trace of matrix A: 3 Inverse of matrix A: [[ 1. 4. -3.] [ 0. 1. 0.] [-0. -2. 1.]] Norm of vector v and of matrix A: 2.6457513110645907 4.47213595499958
# eigenthings
print('* Eigenvalues/Eigenvectors')
print(' Eigenvalues and eigenvectors of matrix A:', np.linalg.eig(A))
* Eigenvalues/Eigenvectors Eigenvalues and eigenvectors of matrix A: (array([1., 1., 1.]), array([[ 1.00000000e+00, -1.00000000e+00, 1.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 8.21730110e-33], [ 0.00000000e+00, 7.40148683e-17, -7.40148683e-17]]))
# SVD
print('* Single Value Decomposition')
print(' SVD of matrix A:', np.linalg.svd(A))
* Single Value Decomposition SVD of matrix A: (array([[ 0.86349489, 0.47752005, -0.16233045], [ 0.15573818, -0.55857386, -0.81470293], [ 0.47971053, -0.67821077, 0.55669378]]), array([4.27194722, 1.3109472 , 0.17856195]), array([[ 0.20213145, 0.66530528, 0.71868753], [ 0.36425575, -0.73226084, 0.57542317], [-0.9090988 , -0.14547494, 0.39035422]]))
# System of linear equations
A = np.array([[1, 2], [3, 1]])
b = np.array([0, 1])
print('* Linear systems')
print(' Solving linear system Ax = b:', np.linalg.solve(A, b))
print(' Least Square solution for Ax = b: ', np.linalg.lstsq(A, b))
* Linear systems Solving linear system Ax = b: [ 0.4 -0.2] Least Square solution for Ax = b: (array([ 0.4, -0.2]), array([], dtype=float64), 2, array([3.61803399, 1.38196601]))
/Users/martina/Desktop/Mallzee/repos/plantation/venv/lib/python3.7/site-packages/ipykernel_launcher.py:7: FutureWarning: `rcond` parameter will change to the default of machine precision times ``max(M, N)`` where M and N are the input matrix dimensions. To use the future default and silence this warning we advise to pass `rcond=None`, to keep using the old, explicitly pass `rcond=-1`. import sys
print('y = x^2 -2x + 1')
y = np.poly1d([1, -2, 1])
print('* Order of polynomial is ', y.order)
print('* y(2) = ', y(2))
print('* Roots of polynomial are ', y.roots)
y = x^2 -2x + 1 * Order of polynomial is 2 * y(2) = 1 * Roots of polynomial are [1. 1.]
# Setting x's to be 50 linearly spaced points in [0, 1]
x = np.linspace(0, 1, num=20)
# Setting y to be a noised cosine
y = np.cos(x) + 0.3*np.random.rand(20)
# Fitting points (x, y) to a polynomial of deg 3
fit_coeff = np.polyfit(x, y, 3)
# Build the polynomial with the fitting coefficients
p = np.poly1d(fit_coeff)
# Considering another interval of x's and plotting original points and fitting curve
x2 = np.linspace(0, 1, 200)
plt.plot(x, y, 'o', x2, p(x2), '-')
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.title('A noisy cosine')
plt.show();
# Load data from text file (including csv) into Numpy matrix
data = np.loadtxt(dataset,
delimiter=' ',
skiprows=1, # skip the first 1 lines (header)
usecols=(1,2) # select cols 1 (x) and 2 (y)
)
# If there are missing values, run this instead
# data = np.genfromtxt(data_folder + filename,
# delimiter=delimiter,
# skip_header=1, # skip the first 1 lines (header)
# usecols=features, # select which features to use
# missing='?' # what missing values are mapped to
# )
print(np.log(0), np.log(-1))
print(np.isnan([1.,np.log(-1), 2]))
-inf nan [False True False]
/usr/local/lib/python3.6/site-packages/ipykernel/__main__.py:1: RuntimeWarning: divide by zero encountered in log if __name__ == '__main__': /usr/local/lib/python3.6/site-packages/ipykernel/__main__.py:1: RuntimeWarning: invalid value encountered in log if __name__ == '__main__': /usr/local/lib/python3.6/site-packages/ipykernel/__main__.py:2: RuntimeWarning: invalid value encountered in log from ipykernel import kernelapp as app
data = np.ones((3, 3))
# Saving and loading numpy array
np.save('mat.npy', data)
data_loaded = np.load('mat.npy')
"Sci" stands, quite obviously, for "scientific": this is the library devolted to calculus, statistics and all the blocks of scientific manipulation.
optimize
¶# function definition and plot of it
def f(x):
return x**2 + 10*np.sin(x)
x = np.arange(-10, 10, 0.1)
plt.plot(x, f(x))
plt.show();
# Gradient Descent through BFGS (finds local min)
print('BFGS')
optimize.fmin_bfgs(f, 0)
# Global optimizer though bashinopping (local optimizer + random sampling of starting points)
print('BASINHOPPING')
optimize.basinhopping(f, 0)
# NOTE: there are several other optimizers available
[<matplotlib.lines.Line2D at 0x10af69048>]
BFGS Optimization terminated successfully. Current function value: -7.945823 Iterations: 5 Function evaluations: 18 Gradient evaluations: 6
array([-1.30644012])
BASINHOPPING
fun: -7.9458233756152845 lowest_optimization_result: fun: -7.9458233756152845 hess_inv: array([[ 0.08581276]]) jac: array([ 1.19209290e-07]) message: 'Optimization terminated successfully.' nfev: 15 nit: 3 njev: 5 status: 0 success: True x: array([-1.30644001]) message: ['requested number of basinhopping iterations completed successfully'] minimization_failures: 0 nfev: 1524 nit: 100 njev: 508 x: array([-1.30644001])
roots = []
#f = x**2 + 10*np.sin(x) is the function defined above
root = optimize.fsolve(f, 1) # 1 is the initial guess
print(root)
roots.append(root)
# from plot, function has another root, so let us start from -2.5
root = optimize.fsolve(f, -2.5) # 1 is the initial guess
print(root)
roots.append(root)
[ 0.] [-2.47948183]
x = np.linspace(-10, 10, num=20)
y = f(x) + np.random.randn(x.size)
def f_to_fit(x, a, b):
return a*x**2 + b*np.sin(x)
guess = [2, 2]
params, params_covariance = optimize.curve_fit(f_to_fit, x, y, guess)
# Plotting all stuff together
f_line, = plt.plot(x, f(x), label='original function')
fitted_points, = plt.plot(x, f_to_fit(x, params[0], params[1]), 'o', label='points of fitted curve')
original, = plt.plot(roots, f(np.array(roots)), 'x', label='roots')
legend = plt.legend(handler_map={f_line: HandlerLine2D(numpoints=2)}, loc=2)
plt.show();
stats
module¶# Build the bins separators, width 1, from -10 to 10
bins_sep = np.arange(-9, 10)
# Extract 1000 normally distributed points
normal_points = np.random.normal(size=1000)
# Histogram the points and compute the normal PDF over bins
histogram = np.histogram(normal_points, bins=bins_sep, normed=True)[0]
bins = 0.5*(bins_sep[1:] + bins_sep[:-1]) # to get the bins centers
# Normal PDF over specified bins
normal_pdf = stats.norm.pdf(bins)
# Plot both things
hist_line, = plt.plot(bins, histogram, 'o', label='hist norm points')
pdf_line, = plt.plot(bins, normal_pdf, label='Normal PDF')
legend = plt.legend(handler_map={hist_line: HandlerLine2D(numpoints=2)}, loc=2)
plt.show();
# 50 percentile of previously defined normal points
print(stats.scoreatpercentile(normal_points, 50))
-0.0508135291078
interpolate
module¶# Defining noisy sine wave
t = np.linspace(0, 1, 10)
middle_t = np.linspace(0, 1, 20)
noise = (np.random.random(10)*2 - 1) * 1e-1
noisy_sin = np.sin(2 * np.pi * t) + noise
# Defining linear interpolation between points
linear_int = interp1d(t, noisy_sin)(middle_t)
# Defining cubic interpolation between points
cubic_int = interp1d(t, noisy_sin, kind='cubic')(middle_t)
# Plot all
noisy_sin_line, = plt.plot(t, noisy_sin, label='noisy sine wave')
noisy_sin_points, = plt.plot(t, noisy_sin, 'o', color='blue')
linear_int_points, = plt.plot(middle_t, linear_int, 'x', label='lin int')
cubic_int_points, = plt.plot(middle_t, cubic_int, 'x', label='cub int')
legend = plt.legend(handler_map={hist_line: HandlerLine2D(numpoints=2)}, loc=3)
plt.show();
integrate
module¶res, err = quad(np.sin, 0, np.pi/2)
print(res, err)
# NOTE: there are other integration methods
0.9999999999999999 1.1102230246251564e-14
# Defining dy/dt, iteration_count is the counter for the iteration till convergence
def y_dot(y, t, iteration_count):
iteration_count += 1
return -2 * y
# Defining t, the vector of iteration count, then solve dy/dt = -2y
t = np.linspace(0, 10, 100)
iteration_count = np.zeros((1,), dtype=np.uint16)
y_sol, infodict = odeint(y_dot, 1, t, args=(iteration_count,), full_output=True)
# Plot solution y(t)
plt.plot(t, y_sol)
plt.plot()
plt.show();
# NOTE: odeint solves a system of ODEs as well
# NOTE: there is no partial diff eq solver in Scipy
signal
module¶x = t + np.random.normal(size=100)
plt.plot(t, x, label='original')
plt.plot(t, detrend(x), label='detrended')
legend = plt.legend(handler_map={hist_line: HandlerLine2D(numpoints=2)}, loc=2)
plt.show();
Quick runthrough of many possible ways to compute the distance between points, as supported by SciPy.
# Define two 1D arrays and two matrices
u = np.array([1, 1, 2])
v = np.array([1, 3, 1])
A = np.array([[1, 2, 0], [2,3,4]])
B = np.array([[0, 1, 1], [2, 0, 1]])
print('* Distances between the two vectors')
print(' Euclidean:', distance.euclidean(u, v))
print(' Cosine:', distance.cosine(u, v))
print(' Manhattan:', distance.cityblock(u, v))
print(' Minkowski with p=3:', distance.minkowski(u, v, 3))
print(' Chebyshev:', distance.chebyshev(u, v))
print(' Hamming:', distance.hamming(u, v))
* Distances between the two vectors Euclidean: 2.2360679775 Cosine: 0.261451054124 Manhattan: 3 Minkowski with p=3: 2.08008382305 Chebyshev: 2 Hamming: 0.666666666667
print('* Distances between each pair of rows in matrices')
print(distance.cdist(A, B, metric='euclidean'))
# cdist is built as (numbers are row indices)
# A0B0 A0B1 A0B2
# A1B0 A1B1 A1B2
* Distances between each pair of rows in matrices [[ 1.73205081 2.44948974] [ 4.12310563 4.24264069]]
print('* Pairwise distances between rows in matrix')
print(distance.pdist(np.array([[0, 0], [1, 1], [1, 2]]), metric='euclidean'))
# pdist is built as (numbers are row indices)
# A0A1 A0A2 A1A2 # len will be N*(N-1) where N is the number of rows
* Pairwise distances between rows in matrix [ 1.41421356 2.23606798 1. ]
# Define a line and a parabola parametrically
def f_linear(x, a, b):
"""A parametric linear function."""
return a*x + b
def quad(x, a, b, c):
"""A parametric quadratic function (parabola)."""
return a*x**2 + b*x + c
# define a range for x
x = np.linspace(-10, 10, num=50)
# plot the function with some chosen params and some noisy points (value given by the function with added random noise)
f = partial(f_linear, a=1,b=10) # this will behave like the called function but with chosen parameters
y = [f_linear(x_, 1, 10) + np.random.randint(-10, 10) for x_ in x]
plt.scatter(x, y, label='noisy points')
plt.plot(x, f(x), c='b', label='f');
plt.title('$x +10$');
plt.legend();
# now do the same but use the widget to have choseable params
# the noisy points will be the same as earlier
@interact(a_=1, b_=10)
def plot_with_widget(a_,b_):
f = partial(f_linear, a=a_,b=b_)
plt.scatter(x, y, label='noisy points')
plt.plot(x, f(x), c='b', label='f');
plt.title('${a}x + {b}$'.format(a=a_, b=b_));
plt.legend();
interactive(children=(IntSlider(value=1, description='a_', max=3, min=-1), IntSlider(value=10, description='b_…