#!/usr/bin/env python # coding: utf-8 # **Disclaimer:** Most of the content in this notebook is coming from [www.scipy-lectures.org](http://www.scipy-lectures.org/intro/index.html) # # Scipy - High-level Scientific Computing # # The [scipy](https://www.scipy.org/) package contains various toolboxes dedicated to common issues in scientific computing. Its different submodules correspond to different applications, such as interpolation, integration, optimization, image processing, statistics, special functions, etc. # # `scipy` is composed of task-specific sub-modules: # # scipy.cluster Clustering package # scipy.constants Constants # scipy.fftpack Discrete Fourier transforms # scipy.integrate Integration and ODEs # scipy.interpolate Interpolation # scipy.io Input and output # scipy.linalg Linear algebra # scipy.misc Miscellaneous routines # scipy.ndimage Multi-dimensional image processing # scipy.odr Orthogonal distance regression # scipy.optimize Optimization and root finding # scipy.signal Signal processing # scipy.sparse Sparse matrices # scipy.sparse.linalg Sparse linear algebra # scipy.sparse.csgraph Compressed Sparse Graph Routines # scipy.spatial Spatial algorithms and data structures # scipy.special Special functions # scipy.stats Statistical functions # scipy.stats.mstats Statistical functions for masked arrays # # They all depend on `numpy`, but are mostly independent of each # other. The standard way of importing Numpy and these Scipy modules # is: # In[ ]: import numpy as np from scipy import stats # same for other sub-modules # **Warning:** This tutorial is far from a complete introduction to numerical computing. As enumerating the different submodules and functions in scipy would be very boring, we concentrate instead on a few examples to give a general idea of how to use ``scipy`` for scientific computing. # ## File input/output: `scipy.io` # # ### Load and Save `MATLAB` files # In[ ]: from scipy import io as spio a = np.ones((3, 3)) spio.savemat('file.mat', {'a': a}) # savemat expects a dictionary # In[ ]: ls *mat # In[ ]: data = spio.loadmat('file.mat') data['a'] # ### See also # # * Load text files: `numpy.loadtxt`/`numpy.savetxt` # * Clever loading of text/csv files: `numpy.genfromtxt`/`numpy.recfromcsv` # * Fast and efficient, but numpy-specific, binary format: `numpy.save`/`numpy.load` # * More advanced input/output of images in scikit-image: `skimage.io` # ## Linear algebra operations: `scipy.linalg` # # The `scipy.linalg` module provides standard linear algebra operations. # The `scipy.linalg.det` function computes the determinant of a square matrix: # In[ ]: from scipy import linalg arr = np.array([[1, 2], [3, 4]]) linalg.det(arr) # The `scipy.linalg.inv` function computes the inverse of a square matrix: # In[ ]: arr = np.array([[1, 2], [3, 4]]) iarr = linalg.inv(arr) iarr # More advanced operations are available, for example **singular-value decomposition (SVD)**: # In[ ]: arr = np.arange(9).reshape((3, 3)) + np.diag([1, 0, 1]) arr # In[ ]: uarr, spec, vharr = linalg.svd(arr) # The resulting array spectrum is spec # The original matrix can be re-composed by matrix multiplication of the outputs of ``svd`` with ``np.dot``: # In[ ]: sarr = np.diag(spec) svd_mat = uarr.dot(sarr).dot(vharr) svd_mat # SVD is commonly used in statistics and signal processing. Many other standard decompositions (QR, LU, Cholesky, Schur), as well as solvers for linear systems, are available in `scipy.linalg`. # ## Interpolation: `scipy.interpolate` # # `scipy.interpolate` is useful for fitting a function from experimental data and thus evaluating points where no measure exists. # In[ ]: # Let's create some data measured_time = np.linspace(0, 1, 10) noise = (np.random.random(10)*2 - 1) * 1e-1 measures = np.sin(2 * np.pi * measured_time) + noise # `scipy.interpolate.interp1d` can build a linear interpolation # function: # In[ ]: from scipy.interpolate import interp1d linear_interp = interp1d(measured_time, measures) interpolation_time = np.linspace(0, 1, 50) linear_results = linear_interp(interpolation_time) # In[ ]: get_ipython().run_line_magic('matplotlib', 'inline') from matplotlib import pyplot as plt plt.plot(measured_time, measures, 'o', ms=6, label='measures') plt.plot(interpolation_time, linear_results, label='linear interp') plt.legend(); # A cubic interpolation can also be selected by providing the ``kind`` optional keyword argument: # In[ ]: cubic_interp = interp1d(measured_time, measures, kind='cubic') cubic_results = cubic_interp(interpolation_time) # In[ ]: plt.plot(measured_time, measures, 'o', ms=6, label='measures') plt.plot(interpolation_time, cubic_results, label='cubic interp') plt.legend() # ## Optimization and fit: `scipy.optimize` # # Optimization is the problem of finding a numerical solution to a minimization or equality. # # The `scipy.optimize` module provides algorithms for function minimization (scalar or multi-dimensional), curve fitting and root finding. # In[ ]: from scipy import optimize # ### Curve fitting # # Suppose we have data on a sine wave, with some noise: # In[ ]: x_data = np.linspace(-5, 5, num=50) y_data = 2.9 * np.sin(1.5 * x_data) + np.random.normal(size=50) # In[ ]: # And plot it import matplotlib.pyplot as plt plt.figure(figsize=(6, 4)) plt.scatter(x_data, y_data) plt.show() # If we know that the data lies on a sine wave, but not the amplitudes or the period, we can find those by least squares curve fitting. First, we have to define the test function to fit, here a sine with unknown amplitude and period: # In[ ]: def test_func(x, a, b): return a * np.sin(b * x) # We then use `scipy.optimize.curve_fit` to find $a$ and $b$: # In[ ]: params, params_covariance = optimize.curve_fit( test_func, x_data, y_data, p0=[2, 2]) print(params) # In[ ]: # And plot the resulting curve on the data plt.figure(figsize=(6, 4)) plt.scatter(x_data, y_data, label='Data') plt.plot(x_data, test_func(x_data, params[0], params[1]), label='Fitted function') plt.legend(loc='best') plt.show() # ### Finding the minimum of a scalar function # # Let's define the following function: # In[ ]: def f(x): return x**2 + 10*np.sin(x) # and plot it: # In[ ]: x = np.arange(-10, 10, 0.1) plt.plot(x, f(x)) plt.show() # This function has a global minimum around -1.3 and a local minimum around 3.8. # # Searching for minimum can be done with `scipy.optimize.minimize`, given a starting point `x0`, it returns the location of the minimum that it has found: # In[ ]: result = optimize.minimize(f, x0=0) result # In[ ]: result.x # The coordinate of the minimum # ## Fast Fourier transforms: `scipy.fftpack` # # The `scipy.fftpack` module computes fast Fourier transforms (FFTs) # and offers utilities to handle them. The main functions are: # # * `scipy.fftpack.fft` to compute the FFT # * `scipy.fftpack.fftfreq` to generate the sampling frequencies # * `scipy.fftpack.ifft` computes the inverse FFT, from frequency space to signal space # # As an illustration, let's create a (noisy) input signal (``sig``), and its FFT: # In[ ]: from scipy import fftpack time_step = 0.02 period = 1. / 0.5 # 0.5 Hz time_vec = np.arange(0, 20, time_step) sig = (np.sin(2 * np.pi / period * time_vec) + 0.5 * np.random.randn(time_vec.size)) # In[ ]: plt.figure(figsize=(10, 4)) plt.plot(time_vec, sig, label='Original signal') plt.title('Signal') plt.show() # In[ ]: # The FFT of the signal sig_fft = fftpack.fft(sig) # And the power (sig_fft is of complex dtype) power = np.abs(sig_fft) # The corresponding frequencies sample_freq = fftpack.fftfreq(sig.size, d=time_step) # Plot the FFT power plt.figure(figsize=(10, 4)) plt.plot(sample_freq[:500], power[:500]) plt.xlabel('Frequency [Hz]') plt.ylabel('plower') plt.title('FFT') plt.show() # The peak signal frequency can be found with ``sample_freq[power.argmax()]`` # In[ ]: peak_freq = sample_freq[power.argmax()] peak_freq # Setting the Fourier component above this frequency to zero and inverting the FFT with `scipy.fftpack.ifft`, gives a filtered signal: # In[ ]: high_freq_fft = sig_fft.copy() high_freq_fft[np.abs(sample_freq) > peak_freq] = 0 filtered_sig = fftpack.ifft(high_freq_fft) plt.figure(figsize=(6, 5)) plt.plot(time_vec, sig, label='Original signal') plt.plot(time_vec, filtered_sig, linewidth=3, label='Filtered signal') plt.xlabel('Time [s]') plt.ylabel('Amplitude') plt.legend(loc='best') plt.show(); # ## Signal processing: `scipy.signal` # # `scipy.signal` is for typical signal processing: 1D, regularly-sampled signals. # ### Detrending `scipy.signal.detrend` - remove linear trend from signal: # In[ ]: # Generate a random signal with a trend import numpy as np t = np.linspace(0, 5, 100) x = t + np.random.normal(size=100) # In[ ]: # Detrend from scipy import signal x_detrended = signal.detrend(x) # In[ ]: # Plot from matplotlib import pyplot as plt plt.figure(figsize=(8, 4)) plt.plot(t, x, label="x") plt.plot(t, x_detrended, label="x_detrended") plt.legend(loc='best') plt.show()