#!/usr/bin/env python # coding: utf-8 # Introduction # ============ # # # Setting up `Python` # ------------------------------ # # The data we are going to use as illustrations were recorded by Andreas # Pippow [Kloppenburg Lab, University of Cologne](http://cecad.uni-koeln.de/Prof-Peter-Kloppenburg.82.0.html) and # are freely available in [HDF5](http://www.hdfgroup.org/HDF5/) format at # the following URLs: # - for the # calibration data. # - for the POMC # data. # # `Python 3` is used here so if you want to do the same with `Python 2` # you should start with: # # ``` {.python} # from __future__ import print_function, division, unicode_literals, absolute_import # ``` # # Loading the data into `Python` requires the installation of # [h5py](http://docs.h5py.org/en/latest/index.html), a module available in # the [anaconda](http://continuum.io/downloads) distribution. # # ### Loading the POMC data set in `Python` # # We start by downloading the data set on our local disk. To do that we # need the `urllib` module: # In[1]: from urllib.request import urlretrieve urlretrieve("http://xtof.disque.math.cnrs.fr/data/Data_POMC.hdf5","Data_POMC.hdf5") # `Python 2` users should type instead: # # ``` {.python} # import urllib # urllib.urlretrieve("http://xtof.disque.math.cnrs.fr/data/Data_POMC.hdf5","Data_POMC.hdf5") # ``` # # Once the data are on the local disk, they are loaded into `Python` with: # In[1]: import h5py # In[2]: pomc = h5py.File("Data_POMC.hdf5","r") # Information on the content of this `HDF5` file can be found in its # `README` attribute: # In[4]: pomc.attrs['README'] # We create next variables pointing to the `time` vector and to the image # `stack`: # In[3]: time_pomc = pomc['time'][...] stack_pomc = pomc['stack'][...] # We then close the file: # In[4]: pomc.close() # ### Loading the Calibration data set in `Python` # # We start by downloading the data set on our local disk. To do that we # need the `urllib` module: # In[ ]: urlretrieve("http://xtof.disque.math.cnrs.fr/data/CCD_calibration.hdf5","CCD_calibration.hdf5") # `Python 2` users should type instead: # # ``` {.python} # urllib.urlretrieve("http://xtof.disque.math.cnrs.fr/data/CCD_calibration.hdf5","CCD_calibration.hdf5") # ``` # # Once the data are on the local disk, they are loaded into `Python` with: # In[5]: calibration = h5py.File("CCD_calibration.hdf5","r") list(calibration) # The file is made of 10 `groups` with the above names. Each group # contains 2 `datasets`: # In[8]: list(calibration['10ms']) # The dataset `stack` contains, as its name says, the image stack and its # shape is: # In[9]: calibration['10ms/stack'].shape # So the CCD captor is made of 60 x 80 pixels and the 100 exposures take # the dimension with index 2 (that is the third dimension). # # The data set `time` contains a vector of times at which the different # exposure were done: # In[11]: calibration['10ms/time'].shape # All this information can be found in the `README` attribute of the file: # In[12]: calibration.attrs['README'] # ### loading key modules # # We are going to use the usual scientific python modules plus `SymPy`: # In[6]: import numpy as np import matplotlib.pyplot as plt import sympy as sy import scipy plt.ion() get_ipython().run_line_magic('matplotlib', 'inline') # ### Some function definitions # In[7]: def plotSignal(stack,lw=1): import numpy as np import matplotlib.pyplot as plt n_x, n_y, n_t = stack.shape amp_min = np.min(stack) amp_max = np.max(stack) amp_diff = np.ptp(stack) x_domain = np.arange(n_t)/n_t y_domain = (0,n_y) for r_idx in range(n_x): for c_idx in range(n_y): y_min = n_x - r_idx - 1 sig = stack[r_idx,c_idx,:] Y = (sig-amp_min)/amp_diff + y_min X = x_domain + c_idx plt.plot(X,Y,lw=lw,color='black') plt.ylim([0,n_y-1]) plt.axis('off') # The variability inherent to fluorescence imaging data # ------------------------------------------------------------------ # In[8]: plt.figure(dpi=600,figsize=(10,8)) plotSignal(stack_pomc[20:33,33:44,:],lw=1) # ADU counts (raw data) from Fura-2 excited at 340 nm. Each square # corresponds to a pixel. 25.05 s of data are shown. Same scale on each # sub-plot. Data recorded by Andreas Pippow (Kloppenburg Lab. Cologne # University). # In[9]: plt.figure(dpi=600,figsize=(10,8)) plt.plot(time_pomc,stack_pomc[27,39,:],lw=2) plt.xlabel("Time (s)",fontsize=25) plt.ylabel("ADU count",fontsize=25) plt.grid() plt.xlim([525,550]) # One of the central pixels of the previous figure. # What do we want? (1) # ----------------------------- # # Given the data set illustrated on the last two slides we might want to # estimate parameters like: # - the peak amplitude # - the decay time constant(s) # - the baseline level # - the whole time course (strictly speaking, a function). # # If we have a model linking the calcium dynamics---the time course of the # free calcium concentration in the cell---to the fluorescence intensity # like: # $$\frac{\mathrm{d}Ca_t}{\mathrm{dt}} \left(1 + \kappa_{F}(Ca_t) + \kappa_{E}(Ca_t) \right) + \frac{j(Ca_t)}{v} = 0 \, , $$ # where $Ca_t$ stands for $[Ca^{2+}]_{free}$ at time t, $v$ is the volume # of the neurite---within which diffusion effects can be neglected---and # $$j(Ca_t) \equiv \gamma (Ca_t - Ca_{steady}) \, ,$$ is the model of # calcium extrusion---$Ca_{steady}$ is the steady state # $[Ca^{2+}]_{free}$--- # $$\kappa_{F}(Ca_t) \equiv \frac{F_{total} \, K_{F}}{(K_{F} + Ca_t)^2} \quad \mathrm{and} \quad \kappa_{E}(Ca_t) \equiv \frac{E_{total} \, K_{E}}{(K_{E} + Ca_t)^2} \, ,$$ # where $F$ stands for the fluorophore en $E$ for the *endogenous* buffer. # # In the previous slide, assuming that the fluorophore (Fura) parameters: # $F_{total}$ and $K_F$ have been calibrated, we might want to estimate: # - the extrusion parameter: $\gamma$ # - the endogenous buffer parameters: $E_{total}$ and $K_E$ # # using an equation relating measured fluorescence to calcium: # $$Ca_t = K_{F} \, \frac{S_t - S_{min}}{S_{max} - S_t} \, ,$$ where $S_t$ # is the fluorescence (signal) measured at time $t$, $S_{min}$ and # $S_{max}$ are *calibrated* parameters corresponding respectively to the # fluorescence in the absence of calcium and with saturating $[Ca^{2+}]$ # (for the fluorophore). # # - The variability of our signal---meaning that under replication of # our measurements *under the exact same conditions* we wont get the # exact same signal---implies that our estimated parameters will also # fluctuate upon replication. # - Formally our parameters are modeled as *random variables* and **it # is not enough to summarize a random variable by a single number**. # - If we cannot get the full distribution function for our parameters, # we want to give at least ranges within which the true value of the # parameter should be found with a given probability. # - In other words: **an analysis without confidence intervals is not an # analysis**, it is strictly speaking useless since it can't be # reproduced---if I say that my time constant is 25.76 ms the # probability that upon replication I get again 25.76 is essentially # 0; if I say that the actual time constant has a 0.95 probability to # be in the interval [24,26.5], I can make a comparison with # replications. # # A proper handling of the "variability" matters # ----------------------------------------------------------- # # Let us consider a simple data generation model: # $$Y_i \sim \mathcal{P}(f_i)\, , \quad i=0,1,\ldots,K \; ,$$ where # $\mathcal{P}(f_i)$ stands for the *Poisson distribution* with parameter # $f_i$ : # $$\mathrm{Pr}\{Y_i = n\} = \frac{(f_i)^n}{n!} \exp (-f_i)\, , \quad \mathrm{for} \quad n=0,1,2,\ldots $$ # and # $$f_i = f(\delta i| f_{\infty}, \Delta, \beta) = f_{\infty} + \Delta \, \exp (- \beta \, \delta i)\; ,$$ # δ is a time step and $f_{\infty}$, Δ and β are model parameters. # In[10]: beta_true = 1.0 f_infinite = 100 Delta = 900 X = np.linspace(0,5*beta_true,51) Theo = Delta*np.exp(-X*beta_true)+f_infinite np.random.seed(20061001) Observations = np.random.poisson(Theo) plt.figure(dpi=600,figsize=(10,8)) plt.plot(X,Observations,'o') plt.xlabel("Time (s)",fontsize=25) plt.ylabel("Observations",fontsize=25) plt.plot(X,Theo,'r') plt.plot(X[[3,30]],Theo[[3,30]],'sk') plt.plot([X[3],X[3]],[0,Theo[3]],'--k') plt.plot([0,X[3]],[Theo[3],Theo[3]],'--k') plt.plot([X[30],X[30]],[0,Theo[30]],'--k') plt.plot([0,X[30]],[Theo[30],Theo[30]],'--k') plt.text(0.1,730,r'$y_1$',fontsize=25) plt.text(1.5,110,r'$y_2$',fontsize=25) # Data simulated according to the previous model. We are going to assume # that $f_{\infty}$ and $\Delta$ are known and that $(t_1,y_1)$ and # $(t_2,y_2)$ are given. We want to estimate $\beta$. # Two estimators # --------------------------- # # We are going to consider two # [estimators](https://en.wikipedia.org/wiki/Estimator) for $\beta$: # - The "classical" least square estimator: # $$ \tilde{\beta} = \arg \min \tilde{L}(\beta) \; ,$$ where # $$ \tilde{L}(\beta) = \sum_j \big( y_j - f(t_j \mid \beta) \big)^2 \; .$$ # - The least square estimator applied to the *square root* of the data: # $$\hat{\beta} = \arg \min \hat{L}(\beta) \; ,$$ where # $$ \hat{L}(\beta) = \sum_j \big( \sqrt{y_j} - \sqrt{f(t_j \mid \beta)} \big)^2 \; .$$ # # # We perform an empirical study as follows: # - We simulate 100,000 experiments such that: # $$ (Y_1,Y_2) \sim \big(\mathcal{P}(f(0.3|\beta_0), \mathcal{P}(f(3|\beta_0)\big) \; ,$$ # with $\beta_0=1$. # - For each simulated pair, $(y_1,y_2)^{[k]}$ ($k=1,\ldots,10^5$), we # minimize $\tilde{L}(\beta)$ and $\hat{L}(\beta)$ to obtain: # $(\tilde{\beta}^{[k]},\hat{\beta}^{[k]})$. # - We build histograms for $\tilde{\beta}^{[k]}$ and # $\hat{\beta}^{[k]}$ as density estimators of our estimators. # # ## Simulations with the two estimators # # # ### Definitions of utility functions required for applying Newton's method to the first estimator # # We want to minimize: $$\tilde{\beta} = \arg \min \tilde{L}(\beta) \; ,$$ # where # $$\tilde{L}(\beta) = \big( y_1 - f(t_1 \mid \beta) \big)^2 + \big( y_2 - f(t_2 \mid \beta) \big)^2 \; .$$ # In other words, we want the root of the derivative. We can get this # derivative by hand or with the `SymPy` module. We will next use the # latter. # # In[18]: sy.init_printing() x_1,x_2,y_1,y_2,Delta,beta,f_infini = sy.symbols('x_1,x_2,y_1,y_2,Delta,beta,f_infini',real=True) L_tilde = (y_1 - Delta*sy.exp(-beta*x_1)-f_infini)**2 + (y_2 - Delta*sy.exp(-beta*x_2)-f_infini)**2 L_tilde # In[19]: G_tilde = sy.diff(L_tilde,beta) G_tilde # The following `print` command is usefull to generate the code: # In[20]: print(G_tilde) # In[21]: G_prime_tilde = sy.diff(G_tilde,beta) G_prime_tilde # In[22]: print(G_prime_tilde) # We define next a "constructor" function returning the functions required # to implement Newton's method: # In[11]: def mk_g_dg_tilde(x_1,y_1,x_2,y_2,Delta=900,f_infini=100): def g(beta): return 2*Delta*x_1*(-Delta*np.exp(-beta*x_1) - \ f_infini + y_1)*np.exp(-beta*x_1) + \ 2*Delta*x_2*(-Delta*np.exp(-beta*x_2) - \ f_infini + y_2)*np.exp(-beta*x_2) def dg(beta): return 2*Delta**2*x_1**2*np.exp(-2*beta*x_1) + \ 2*Delta**2*x_2**2*np.exp(-2*beta*x_2) - \ 2*Delta*x_1**2*(-Delta*np.exp(-beta*x_1) - \ f_infini + y_1)*np.exp(-beta*x_1) - \ 2*Delta*x_2**2*(-Delta*np.exp(-beta*x_2) - \ f_infini + y_2)*np.exp(-beta*x_2) return (g,dg) # We then create the required functions: # In[12]: g_tilde, dg_tilde = mk_g_dg_tilde(X[3],Observations[3],X[30],Observations[30]) # We define next a function implementing Newton's method given an initial # guess, a derivative of the target function and a second derivative of # the target: # In[13]: def newton(initial_guess, target_d, target_dd, tolerance=1e-6, iter_max=100): pos = initial_guess value = target_d(pos) idx = 0 while idx <= iter_max and abs(value) > tolerance : pos -= value/target_dd(pos) idx += 1 value = target_d(pos) return (pos,value,idx) # A short test: # In[14]: newton(1.0,g_tilde,dg_tilde) # ### Definitions of utility functions required for applying Newton's method to the second estimator # # We now want to minimize: $$\hat{\beta} = \arg \min \hat{L}(\beta) \; ,$$ # where # $$\hat{L}(\beta) = \big( \sqrt{y_1} - \sqrt{f(t_1 \mid \beta)} \big)^2 + \big( \sqrt{y_2} - \sqrt{f(t_2 \mid \beta)} \big)^2 \; .$$ # We use `Sympy` since doing these calculations by hand is rather "heavy": # In[27]: L_hat = (sy.sqrt(y_1) - sy.sqrt(Delta*sy.exp(-beta*x_1) + f_infini))**2 + (sy.sqrt(y_2) - sy.sqrt(Delta*sy.exp(-beta*x_2) + f_infini))**2 G_hat = sy.diff(L_hat,beta) G_hat # In[30]: print(G_hat) # In[28]: G_prime_hat = sy.diff(G_hat,beta) G_prime_hat # In[29]: print(G_prime_hat) # We define next the corresponding constructor function: # In[15]: def mk_g_dg_hat(x_1,y_1,x_2,y_2,Delta=900,f_infini=100): def g(beta): return Delta*x_1*(np.sqrt(y_1) - np.sqrt(Delta*np.exp(-beta*x_1) + f_infini))*np.exp(-beta*x_1)/np.sqrt(Delta*np.exp(-beta*x_1) + f_infini) + Delta*x_2*(np.sqrt(y_2) - np.sqrt(Delta*np.exp(-beta*x_2)+ f_infini))*np.exp(-beta*x_2)/np.sqrt(Delta*np.exp(-beta*x_2) +f_infini) def dg(beta): return Delta**2*x_1**2*(np.sqrt(y_1) - np.sqrt(Delta*np.exp(-beta*x_1) + f_infini))*np.exp(-2*beta*x_1)/(2*(Delta*np.exp(-beta*x_1) + f_infini)**(3/2)) + Delta**2*x_1**2*np.exp(-2*beta*x_1)/(2*(Delta*np.exp(-beta*x_1) + f_infini)) + Delta**2*x_2**2*(np.sqrt(y_2) - np.sqrt(Delta*np.exp(-beta*x_2) + f_infini))*np.exp(-2*beta*x_2)/(2*(Delta*np.exp(-beta*x_2) + f_infini)**(3/2)) + Delta**2*x_2**2*np.exp(-2*beta*x_2)/(2*(Delta*np.exp(-beta*x_2) + f_infini)) - Delta*x_1**2*(np.sqrt(y_1) - np.sqrt(Delta*np.exp(-beta*x_1) + f_infini))*np.exp(-beta*x_1)/np.sqrt(Delta*np.exp(-beta*x_1) + f_infini) - Delta*x_2**2*(np.sqrt(y_2) - np.sqrt(Delta*np.exp(-beta*x_2) + f_infini))*np.exp(-beta*x_2)/np.sqrt(Delta*np.exp(-beta*x_2) + f_infini) return (g,dg) # A little test: # In[16]: g_hat, dg_hat = mk_g_dg_hat(X[3],Observations[3],X[30],Observations[30]) newton(1.0,g_hat,dg_hat) # ### The simulation # In[17]: n_rep = int(1e5) beta_tilde = np.zeros((n_rep,3)) beta_hat = np.zeros((n_rep,3)) np.random.seed(20110928) for rep_idx in range(n_rep): Y = np.random.poisson(Theo[[3,30]]) g_tilde, dg_tilde = mk_g_dg_tilde(X[3],Y[0],X[30],Y[1]) beta_tilde[rep_idx,:] = newton(1.0,g_tilde,dg_tilde) g_hat, dg_hat = mk_g_dg_hat(X[3],Y[0],X[30],Y[1]) beta_hat[rep_idx,:] = newton(1.0,g_hat,dg_hat) # We check that every simulation ended before the maximal allowed number # of iteration: # In[18]: (any(beta_tilde[:,2] == 100),any(beta_hat[:,2] == 100)) # In[19]: def Ffct(beta): return 900 * np.exp(-X[[3,30]]*beta) + 100 def dFfct(beta): return -X[[3,30]]*900 * np.exp(-X[[3,30]]*beta) sd0 = np.sqrt((np.sum(dFfct(1.0)**2*Ffct(1.0))/np.sum(dFfct(1.0)**2)**2)) sd1 = np.sqrt(1.0/np.sum(dFfct(1.0)**2/Ffct(1.0))) beta_vector = np.linspace(0.6,1.6,501) plt.figure(dpi=600,figsize=(10,8)) useless_stuff = plt.hist([beta_tilde[:,0],beta_hat[:,0]],bins=50,normed=True,histtype='step',lw=2) plt.xlabel(r'$\beta$',fontsize=25) plt.ylabel('Density',fontsize=25) plt.title(r'Densities of $\widetilde{\beta}$ and $\widehat{\beta}$',fontsize=25) plt.plot(beta_vector,np.exp(-0.5*(beta_vector-1)**2/sd0**2)/sd0/np.sqrt(2*np.pi),color='black',lw=2) plt.plot(beta_vector,np.exp(-0.5*(beta_vector-1)**2/sd1**2)/sd1/np.sqrt(2*np.pi),color='black',lw=2) # Both histograms are built with 50 bins. $\hat{\beta}$ (green) is __clearly__ # better than $\tilde{\beta}$ (blue) since its variance is smaller. The # derivation of the theoretical (large sample) densities is given in # [Joucla et al (2010)](http://intl-jn.physiology.org/cgi/content/short/103/2/1130). # CCD camera noise # ========== # # CCD basics # -------------- # # ![](zerosept_vanVliet1998F3.png) # # Source: L. van Vliet et col. (1998) [Digital Fluorescence Imaging Using Cooled CCD Array Cameras](http://homepage.tudelft.nl/e3q6n/publications/1998/AP98LVDSTY/AP98LVDSTY.html) (figure 3). # # # "Noise" sources in CCD # ----------------------------------- # # - The "Photon noise" or "shot noise" arises from the fact the # measuring a fluorescence intensity, λ, implies **counting # photons**---unless one changes the laws of Physics there is nothing # one can do to eliminate this source of variability (improperly # called "noise")---: # $$\mathrm{Pr}\{N=n\} = \frac{\lambda^n}{n!} \exp -\lambda\, , \quad n \, = \, 0,1,\ldots\, , \quad \lambda > 0\; .$$ # - The "thermal noise" arises from thermal agitation which "dumps" # electrons in potential wells; this "noise" also follows a Poisson # distribution but it can be made negligible by *cooling down* the # camera. # - The "read out noise" arises from the conversion of the number of # photo-electrons into an equivalent tension; it follows a normal # distribution whose variance is independent of the mean (as long as # reading is not done at too high a frequency). # - The "digitization noise" arises from the mapping of a continuous # value, the tension, onto a grid; it is negligible as soon as more # than 8 bit are used. # # A simple CCD model # ------------------------------- # # - We can easily obtain a simple CCD model taking into account the two # main "noise" sources (photon and read-out). # - To get this model we are going the fact (a theorem) that when a # **large number of photon are detected**, the Poisson distribution is # well approximated by ([converges in # distribution](http://en.wikipedia.org/wiki/Convergence_in_distribution#Convergence_in_distribution) # to) a normal distribution with identical mean and variance: # $$\mathrm{Pr}\{N=n\} = \frac{\lambda^n}{n!} \exp -\lambda \approx \mathcal{N}(\lambda,\lambda) \; .$$ # - In other words: # $$ N \approx \lambda + \sqrt{\lambda} \, \epsilon \; ,$$ where # $\epsilon \sim \mathcal{N}(0,1)$ (follows a standard normal # distribution). # - A read-out noise is added next following a normal distribution with # 0 mean and variance $\sigma_{R}^2$. # - We are therefore adding to the random variable $N$ a new # **independent** random variable $R \sim \mathcal{N}(0,\sigma_{R}^2)$ # giving: # $$M \equiv N+R \approx \lambda + \sqrt{\lambda+\sigma_{R}^2} \, \epsilon \; ,$$ # where the fact that the sum of two independent normal random # variables is a normal random variable whose mean is the sum of the # mean and whose variance is the sum of the variances has been used. # - Since the capacity of the photo-electron weels is finite (35000 for # the camera used in the first slides) and since the number of # photon-electrons will be digitized on 12 bit (4096 levels), a "gain" # $G$ **smaller than one** must be applied if we want to represent # faithfully (without saturation) an almost full well. # - We therefore get: # $$Y \equiv G \cdot M \approx G \, \lambda + \sqrt{G^2 \, (\lambda+\sigma_{R}^2)} \, \epsilon \; .$$ # # For completeness: Convergence in distribution of a Poisson toward a normal rv # ------------------------------------------------------------------------------------------------------- # # We use the [moment-generating function](http://en.wikipedia.org/wiki/Moment-generating_function) and # the following theorem (_e.g._ John Rice, 2007, _Mathematical Statistics # and Data Analysis_, Chap. 5, Theorem A): # - If the moment-generating function of each element of the rv sequence # $X_n$ is $m_n(t)$, # - if the moment-generating function of the rv $X$ is $m(t)$, # - if $m_n(t) \rightarrow m(t)$ when $n \rightarrow \infty$ for all # $|t| \le b$ where $b > 0$ # - then $X_n \xrightarrow{D} X$. # # Lets show that: $$Y_n = \frac{X_n - n}{\sqrt{n}} \; , $$ where $X_n$ # follows a Poisson distribution with parameter $n$, converges in # distribution towards $Z$ standard normal rv. # # We have: $$m_n(t) \equiv \mathrm{E}\left[\exp(Y_n t)\right] \; ,$$ # therefore: # $$m_n(t) = \sum_{k=0}^{\infty} \exp\left(\frac{k-n}{\sqrt{n}}t\right) \frac{n^k}{k!} \exp(-n) \; ,$$ # # # $$m_n(t) = \exp(-n) \exp(-\sqrt{n}t) \sum_{k=0}^{\infty} \frac{\left(n \exp\left(t/\sqrt{n}\right)\right)^k}{k!}$$ # $$m_n(t) = \exp\left(-n - \sqrt{n} t+ n \exp(t/\sqrt{n})\right)$$ # $$m_n(t) = \exp\left(-n - \sqrt{n} t+ n \sum_{k=0}^{\infty} \left(\frac{t}{\sqrt{n}}\right)^k \frac{1}{k!}\right)$$ # $$m_n(t) = \exp\left(-n - \sqrt{n} t+ n + \sqrt{n} t + \frac{t^2}{2} + n \sum_{k=3}^{\infty} \left(\frac{t}{\sqrt{n}}\right)^k \frac{1}{k!}\right)$$ # $$m_n(t) = \exp\left( \frac{t^2}{2} + n \sum_{k=3}^{\infty} \left(\frac{t}{\sqrt{n}}\right)^k \frac{1}{k!}\right)$$ # # # We must show: # $$n \sum_{k=3}^{\infty}\left(\frac{t}{\sqrt{n}}\right)^k \frac{1}{k!} \rightarrow_{n \rightarrow \infty} 0 \quad \forall\ |t| \le b, \quad \text{where} # \quad b > 0\, ,$$ since $\exp(-t^2/2)$ is the moment-generating # function of a standard normal rv. But # $$\left| n \sum_{k=3}^{\infty} \left(\frac{t}{\sqrt{n}}\right)^k \frac{1}{k!} \right| \rightarrow_{n \rightarrow \infty} 0 \quad \forall\ |t| \le b, \quad \text{where} \quad b > 0\,$$ # implies that since $$- \left|n \sum_{k=3}^{\infty} # \left(\frac{t}{\sqrt{n}}\right)^k \frac{1}{k!} \right| \le n # \sum_{k=3}^{\infty} # \left(\frac{t}{\sqrt{n}}\right)^k \frac{1}{k!} \le \left| n # \sum_{k=3}^{\infty} # \left(\frac{t}{\sqrt{n}}\right)^k \frac{1}{k!} \right| \, .$$ # # But for all $|t| \le b$ where $b > 0$ # # \begin{displaymath} # \begin{array}{lcl} # 0 \le \left| n \sum_{k=3}^{\infty} # \left(\frac{t}{\sqrt{n}}\right)^k \frac{1}{k!} \right| & \le & n # \sum_{k=3}^{\infty} # \left(\frac{|t|}{\sqrt{n}}\right)^k \frac{1}{k!} \\ # & \le & \frac{|t|^3}{\sqrt{n}} \sum_{k=0}^{\infty} # \left(\frac{|t|}{\sqrt{n}}\right)^k \frac{1}{(k+3)!} \\ # & \le & \frac{|t|^3}{\sqrt{n}} \sum_{k=0}^{\infty} # \left(\frac{|t|}{\sqrt{n}}\right)^k \frac{1}{k!} \\ # & \le & \frac{|t|^3}{\sqrt{n}} # \exp\left(\frac{|t|}{\sqrt{n}}\right) \rightarrow_{n \rightarrow # \infty} 0 \, , # \end{array} # \end{displaymath} # # which completes the proof. # # ### Illustrations # # We define first a utility function: # In[20]: from scipy.stats import norm, poisson ZZ = np.linspace(-3,3,501) FZ = norm.cdf(ZZ) def poisson_steps(par,start,end): xpairs = [[(i-par)/np.sqrt(par),(i+1-par)/np.sqrt(par)] for i in range(start,end+1)] ypairs = [[poisson.cdf(i,par),poisson.cdf(i,par)] for i in range(start,end+1)] xlist = [] ylist = [] for xends,yends in zip(xpairs,ypairs): xlist.extend(xends) xlist.append(None) ylist.extend(yends) ylist.append(None) return [xlist,ylist] # In[21]: FY = poisson_steps(5,0,25) plt.figure(dpi=600,figsize=(10,8)) plt.plot(ZZ,FZ,color='red',lw=2) plt.xlabel('Z',fontsize=25) plt.ylabel('CDF',fontsize=25) plt.xlim([-3,3]) plt.plot(FY[0],FY[1],color='black',lw=3) # Cumulative distribution functions (CDF) of $Y_5$ (black) and $Z$ a # standard normal (red). # In[22]: FY = poisson_steps(50,0,500) plt.figure(dpi=600,figsize=(10,8)) plt.plot(ZZ,FZ,color='red',lw=2) plt.xlabel('Z',fontsize=25) plt.ylabel('CDF',fontsize=25) plt.xlim([-3,3]) plt.plot(FY[0],FY[1],color='black',lw=2) # Cumulative distribution functions (CDF) of $Y_{50}$ (black) and $Z$ a # standard normal (red). # In[23]: FY = poisson_steps(500,500-67,500+67) plt.figure(dpi=600,figsize=(10,8)) plt.plot(ZZ,FZ,color='red',lw=2) plt.xlabel('Z',fontsize=25) plt.ylabel('CDF',fontsize=25) plt.xlim([-3,3]) plt.plot(FY[0],FY[1],color='black',lw=2) # Cumulative distribution functions (CDF) of $Y_{500}$ (black) and $Z$ a standard normal (red). # In[24]: FY = poisson_steps(5000,5000-213,5000+213) plt.figure(dpi=600,figsize=(10,8)) plt.plot(ZZ,FZ,color='red',lw=2) plt.xlabel('Z',fontsize=25) plt.ylabel('CDF',fontsize=25) plt.xlim([-3,3]) plt.plot(FY[0],FY[1],color='black',lw=2) # Cumulative distribution functions (CDF) of $Y_{5000}$ (black) and $Z$ a standard normal (red). # CCD calibration # ========= # # If what I just exposed is correct, with the two (main) "noise" sources, # the observations $Y$ (from a CCD pixel) follow: # $$Y \sim G \, \lambda + \sqrt{G^2 \, (\lambda+\sigma_{R}^2)} \, \epsilon \; ,$$ # where $G$ is the camera gain, $\sigma_{R}^2$ is the read-out variance # and $\epsilon$ is a standard normal rv. The values of $G$ and # $\sigma_{R}^2$ are specified by the manufacturer for each camera, but # experience shows that manufacturers tend to be overoptimistic when it # comes to their product performances---they can for instance give an # underestimated $\sigma_{R}^2$. **Its therefore a good idea to measure # these parameters with calibration experiments**. **Such calibration # experiments are also the occasion to check that our simple model is # relevant**. # # - Our problem becomes: How to test # $Y \sim G \, \lambda + \sqrt{G^2 \, (\lambda+\sigma_{R}^2)} \, \epsilon$ # ? Or how to set different values for $\lambda$? # - Let's consider a pixel of our CCD "looking" at a fixed volume of a # [fluorescein](http://en.wikipedia.org/wiki/Fluorescein) solution # with a given (and stable) concentration. We have two ways of # modifying λ: # - Change the intensity $i_{e}$ of the light source exciting the # fluorophore. # - Change the exposure time $\tau$. # # # We can indeed write our $\lambda$ as: # $$\lambda = \phi v c i_{e} \tau \, ,$$ where # - $v$ is the solution's volume "seen" by a given pixel, # - $c$ is the fluorophore's concentration, # - $\phi$ is the [quantum yield](http://en.wikipedia.org/wiki/Quantum_yield). # # In practice it is easier to vary the exposure time *τ*and that's what # was done in the experiments described next... **Question: Can you guess # what these experiments are?** # # # Sebastien Joucla and myself asked our collaborators from the # [Kloppenburg lab](http://cecad.uni-koeln.de/Prof-Peter-Kloppenburg.82.0.html) # (Cologne University) to: # - choose 10 exposure times, # - for each of the 10 times, perform 100 exposures, # - for each of the 10 x 100 exposures, record the value $y_{ij}$ of the # rv $Y_{ij}$ of CCD's pixel $i,j$. # # We introduce a rv $Y_{ij}$ for each pixel because it is very difficult # (impossible) to have a uniform intensity ($i_e$) and a uniform volume # ($v$) and a uniform quantum yield ($\phi$). We have therefore for each # pixel: # $$Y_{i,j} \sim G \, p_{i,j} \tau + \sqrt{G^2 \, (p_{i,j} \tau+\sigma_{R}^2)} \, \epsilon_{i,j}\; ,$$ # where $p_{i,j} = c \phi_{i,j} v_{i,j} i_{e,i,j}$. # # - If our model is correct we should have for each pixel $i,j$, for a # given exposure time, a mean value: # $$\bar{y}_{i,j} = \frac{1}{100} \sum_{k=1}^{100} y_{i,j,k} \approx G \, p_{i,j} \tau $$ # - and a variance: # $$S_{i,j}^2 = \frac{1}{99} \sum_{k=1}^{100} (y_{i,j,k}-\bar{y}_{i,j})^2 \approx G^2 \, (p_{i,j} \tau+\sigma_{R}^2) \; .$$ # - The graph of $S_{i,j}^2$ *vs* $\bar{y}_{i,j}$ should be a straight # line with slope $G$ ordinate at 0, $G^2 \sigma_{R}^2$. # In[25]: plt.figure(dpi=600,figsize=(10,8)) plt.imshow(np.transpose(calibration['10ms/stack'][:,:,0]),origin='lower') plt.set_cmap('gray') plt.colorbar() # The first exposure of 10 ms (experiment performed by Andreas Pippow, Kloppenburg lag, Cologne University). # CCD calibration: Checking the assumptions # ------------------------------------------------------ # # - The data are going to be analyzed as if the $Y_{i,j,k}$ were IID, # **but they were sequentially recorded**. It is therefore **strongly # recommended** to check that the IID hypothesis is reasonable. # - The small example of the next figure shows that there are no # (obvious) trends. # - We must also check the correlation function. # # In[27]: plt.figure(dpi=600,figsize=(10,8)) plt.subplot(311) plt.plot(calibration['10ms/stack'][31,41,:],lw=2) plt.ylabel("ADU",fontsize=25) plt.grid() plt.subplot(312) plt.plot(calibration['10ms/stack'][31,40,:],lw=2) plt.ylabel("ADU",fontsize=25) plt.grid() plt.subplot(313) plt.plot(calibration['10ms/stack'][31,39,:],lw=2) plt.xlabel("Time (1 unit = 100 ms)",fontsize=25) plt.ylabel("ADU",fontsize=25) plt.grid() # Counts time evolution for three neighboring pixels (10 ms exposure time). # - If the $Y_{i,j,k}$ are not IID we expect a more or less linear # trend---due to bleaching of the dye. # - Rather then looking at each individual pixel sequence like on the # previous slide, we can fit the following linear model model to each # pixel: $$Y_{i,j,k} = \beta_0 + \beta_1 k + \sigma \epsilon_{i,j}$$ # where the $\epsilon_{i,j} \stackrel{IID}{\sim} \mathcal{N}(0,1)$, # and check if $\beta_1$ can be reasonably considered as null; while a # trend due to bleaching would give a negative $\beta_1$. # - Without a trend, the theoretical distribution of # $\hat{\beta}_1/\hat{\sigma}_{\beta_1}$---$\hat{\beta}_1$ is the # estimate of $\beta_1$ and $\hat{\sigma}_{\beta_1}$ its estimated # standard error---is a Student's t distribution with 98 degrees of # freedom. # - Applying this idea to the central pixel of the previous slide we # get... # # In[28]: D_matrix = np.transpose(np.array([np.ones(100),np.arange(100)])) P_matrix = np.linalg.solve(np.dot(np.transpose(D_matrix),D_matrix),np.transpose(D_matrix)) Y = calibration['10ms/stack'][31,40,:] beta = np.dot(P_matrix,Y) beta[1] # In[29]: Y_hat = np.dot(D_matrix,beta) s2_hat = np.sum((Y-Y_hat)**2)/98 beta_se = np.sqrt(s2_hat*np.linalg.inv(np.dot(np.transpose(D_matrix),D_matrix))) beta_se[1,1] # In[30]: from scipy.stats import t (beta[1]-beta_se[1,1]*t.ppf(0.975,98),beta[1]+beta_se[1,1]*t.ppf(0.975,98)) # In[31]: plt.figure(dpi=600,figsize=(10,8)) plt.plot(D_matrix[:,1],Y,lw=2,color='black') plt.grid() plt.xlabel("Time (1 unit = 100 ms)",fontsize=25) plt.ylabel("ADU",fontsize=25) plt.plot(D_matrix[:,1],Y_hat,lw=2,color='red') # We get $\hat{\beta}_1 = 0.032$ and a 95 % conf. int. for it is: $[-0.018,0.082]$. # We can use the fact that, under the null hypothesis (no trend): # $$\hat{\beta}_1/\hat{\sigma}_{\beta_1} \sim t_{98}$$ by constructing the # empirical cumulative distribution function (ECDF) of the 60 x 80 pixels # at each exposure time to get the maximal difference (in absolute value) # with the theoretical CDF to apply a Kolmogorov test. The critical value # of the latter for a 99% level and a sample size of 100 is **0.163**. We # get the following values: # In[32]: def linear_fit_stack(stack): I,J,K = stack.shape D_matrix = np.transpose(np.array([np.ones(K),np.arange(K)])) P_matrix = np.linalg.solve(np.dot(np.transpose(D_matrix),D_matrix),np.transpose(D_matrix)) the_inv = np.linalg.inv(np.dot(np.transpose(D_matrix),D_matrix))[1,1] res = np.zeros((I,J)) for i in range(I): for j in range(J): beta = np.dot(P_matrix,stack[i,j,:]) Y_hat = np.dot(D_matrix,beta) s2_hat = np.sum((stack[i,j,:]-Y_hat)**2)/(K-2) res[i,j] = beta[1]/np.sqrt(s2_hat*the_inv) return res b1stats = [linear_fit_stack(calibration[n+'/stack']) for n in list(calibration)] # In[33]: [[n for n in list(calibration)], [int(1000*np.max(np.abs(np.arange(1,len(b1s)+1)/len(b1s)-t.cdf(b1s,98))))/1000 for b1s in [np.sort(b1.flatten()) for b1 in b1stats]]] # The values at 50 and 70 ms are too large. # In[34]: tt = np.linspace(-4,4,501) plt.figure(dpi=600,figsize=(10,8)) junk = plt.hist(np.concatenate([b1.flatten() for b1 in b1stats]),bins=100,normed=True,color='black') plt.xlabel(r'$\hat{\beta}_1/\hat{\sigma}_{\beta_1}$',fontsize=25) plt.ylabel('Density',fontsize=25) plt.title(r'Density of all $\hat{\beta}_1/\hat{\sigma}_{\beta_1}$',fontsize=25) plt.plot(tt,t.pdf(tt,98),color='orange',lw=5) # Empirical density in black, theoretical one (t with 98 df) in orange. # - We now look for potential correlations between recording from # different pixels. # - We do that by computing the empirical correlation between pixels # $(i,j)$ and $(u,v)$. # - We get the empirical mean at each pixel (for a given exposure time) # that is: $\overline{Y}_{ij} = (1/K) \sum_{k=1}^K Y_{ijk}$. # - We get the empirical variance: # $S^2_{ij} = 1/(K-1) \sum_{k=1}^K (Y_{ijk}-\overline{Y}_{ij})^2$. # - We then obtain the normalized signal or *standard score*: # $N_{ijk} = (Y_{ijk}-\overline{Y}_{ij})/\sqrt{S^2_{ij}}$. # - The correlation coefficient is then: # $\rho(ij,uv) = 1/(K-1) \sum_{k=1}^K N_{ijk} N_{uvk}$. # - Under the null hypothesis, no correlation, # $\rho(ij,uv) \sim \mathcal{N}(0,1/K)$. # # In[35]: def correlations(stack): n_row, n_col, n_time = stack.shape n_pixel = n_row*n_col result = np.zeros((n_pixel*(n_pixel-1)/2,)) stack_score = np.copy(stack) stack_score = (stack_score - stack_score.mean(2).reshape((n_row,n_col,1)))/stack_score.std(2).reshape((n_row,n_col,1)) idx = 0 for i in range(n_pixel-1): for j in range(i+1,n_pixel): pos1 = (i//n_col,i-(i//n_col)*n_col) pos2 = (j//n_col,j-(j//n_col)*n_col) coef = np.sum(stack_score[pos1[0],pos1[1],:]*stack_score[pos2[0],pos2[1],:])/n_time result[idx] = coef idx += 1 return result # In[36]: corr10 = correlations(calibration['10ms/stack']) # In[37]: from scipy.stats import norm plt.figure(dpi=600,figsize=(10,8)) hist_corr10 = plt.hist(corr10,bins=100,normed=True,color='black') plt.xlabel(r'$\rho(ij,uv)$',fontsize=25) plt.ylabel('Density',fontsize=25) plt.title('Density of correlation coefficients at 10 ms',fontsize=25) plt.plot(tt/10,norm.pdf(tt/10,0,0.1),color='orange',lw=5) # Empirical density in black, theoretical one, $\mathcal{N}(0,0.01)$, in # orange. # The empirical variance (x 100 and rounded to the third decimal) of the # samples of correlation coefficients (1 sample per exposure duration) # are: # In[38]: var_of_corr_list = [np.var(correlations(calibration[n+'/stack'])) for n in list(calibration)] [[n for n in list(calibration)], [int(100000*v)/1000 for v in var_of_corr_list]] # We wrote previously : # - If our model is correct we should have for each pixel $i,j$, for a # given exposure time, a mean value: # $$\bar{y}_{i,j} = \frac{1}{100} \sum_{k=1}^{100} y_{i,j,k} \approx G \, p_{i,j} \tau $$ # - and a variance: # $$S_{i,j}^2 = \frac{1}{99} \sum_{k=1}^{100} (y_{i,j,k}-\bar{y}_{i,j})^2 \approx G^2 \, (p_{i,j} \tau+\sigma_{R}^2) \; .$$ # - **The graph of $S_{i,j}^2$ *vs* $\bar{y}_{i,j}$ should be a straight # line with slope $G$ ordinate at 0, $G^2 \sigma_{R}^2$.** # # In[39]: plt.figure(dpi=600,figsize=(10,8)) for x,y in zip([np.mean(calibration[n+'/stack'],2) for n in list(calibration)], [np.var(calibration[n+'/stack'],2) for n in list(calibration)]): plt.scatter(x.flatten(),y.flatten(),0.05) plt.xlabel(r'$\overline{ADU}$',fontsize=25) plt.ylabel("Var(ADU)",fontsize=25) plt.xlim([0,4500]) plt.ylim([0,1000]) # We do see the expected linear relation: # $\mathrm{Var}[ADU] = G^2 \sigma_{R}^2 + G \mathrm{E}[ADU]$. # ### Linear fit # # The [heteroscedasticity](http://en.wikipedia.org/wiki/Heteroscedasticity) # (inhomogeneous variance) visible on the graph is also expected since the # variance of a variance for an IID sample of size $K$ from a normal # distribution with mean $\mu$ and variance $\sigma^2$ is: # $$\mathrm{Var}[S^2] = \frac{2\sigma^4}{(K-1)} \; .$$ # # - This means than when we do our linear fit, # $$y_k = a + b x_k + \sigma_k \epsilon_k \, ,$$ we should use # weights. # - Here $$x_k = \overline{ADU}_k \quad y_k = \mathrm{Var}[ADU]_k \, ,$$ # $$b = G \quad a = G^2 \sigma_R^2 \, .$$ # - It's easy to show that the least square estimates are: # $$\hat{a} = \frac{1}{Z} \sum_k \frac{y_k-\hat{b} x_k}{\sigma_k^2} \quad \text{where} \quad Z = \sum_k \frac{1}{\sigma_k^2}$$ # and # $$\hat{b} = \left(\sum_k \frac{x_k}{\sigma_k^2} \left(y_k - \frac{1}{Z}\sum_j \frac{y_j}{\sigma_j^2}\right)\right) / \left(\sum_k \frac{x_k}{\sigma_k^2}\left(x_k - \frac{1}{Z}\sum_j \frac{x_j}{\sigma_j^2}\right)\right) \, .$$ # - We don't know $\sigma_k$ but we have an estimation: # $\hat{\sigma}_k^2 = \mathrm{Var}[S_k^2]$ we can "plug-in" this value # to get our weights. # # In[40]: X_k = np.concatenate([x.flatten() for x in [np.mean(calibration[n+'/stack'],2) for n in list(calibration)]]) y_k = np.concatenate([x.flatten() for x in [np.var(calibration[n+'/stack'],2) for n in list(calibration)]]) sigma2_k = 2*y_k**2/(calibration['10ms/stack'].shape[2]-1) Z = sum(1/sigma2_k) num1 = sum(X_k/sigma2_k*(y_k-sum(y_k/sigma2_k)/Z)) denom1 = sum(X_k/sigma2_k*(X_k-sum(X_k/sigma2_k)/Z)) b_hat0 = num1/denom1 a_hat0 = sum((y_k-b_hat0*X_k)/sigma2_k)/Z # In[41]: plt.figure(dpi=600,figsize=(10,8)) for x,y in zip([np.mean(calibration[n+'/stack'],2) for n in list(calibration)], [np.var(calibration[n+'/stack'],2) for n in list(calibration)]): plt.scatter(x.flatten(),y.flatten(),0.05) plt.xlabel(r'$\overline{ADU}$',fontsize=25) plt.ylabel("Var(ADU)",fontsize=25) aa = np.linspace(0,4500) plt.plot(np.linspace(0,4500,101),a_hat0+b_hat0*np.linspace(0,4500,101),color='red',lw=2) plt.xlim([0,4500]) plt.ylim([0,1000]) # In[42]: G_hat0 = b_hat0 S2_hat0 = a_hat0/b_hat0**2 (G_hat0, S2_hat0) # We have here $\hat{G} = 0.14$ and $\hat{\sigma}_R^2 = 290$. # ### Checking the fit # In[43]: plt.figure(dpi=600,figsize=(10,8)) for x,y in zip([np.mean(calibration[n+'/stack'],2) for n in list(calibration)], [np.var(calibration[n+'/stack'],2) for n in list(calibration)]): plt.scatter(a_hat0+b_hat0*x.flatten(),(y.flatten()-a_hat0-b_hat0*x.flatten())*np.sqrt(99/2)/(a_hat0+b_hat0*x.flatten()),0.05) plt.xlabel('Fitted value',fontsize=25) plt.ylabel("Normalized residuals (Normal scores)",fontsize=25) plt.axhline(0,color='red',lw=2) # Some remarks # ---------------- # # - When we use a linear regression, we are (implicitly) assuming that # the "independent" variable, here $\overline{ADU}_k$, is *exactly* # known. # - This was clearly not the case here since $\overline{ADU}_k$ was # measured (with an error). # - We could and will therefore refine our fit. # # Error propagation and variance stabilization # ========================= # # Error propagation # -------------------------- # # - Let us consider two random variables: $Y$ and $Z$ such that: # - $Y \approx \mathcal{N}(\mu_Y,\sigma^2_Y)$ or # $Y \approx \mu_Y + \sigma_Y \, \epsilon$ # - $Z = f(Y)$, with $f$ continuous and differentiable. # - Using a first order Taylor expansion we then # have:$$ \begin{array}{lcl} Z & \approx & f(\mu_Y + \sigma_Y \, \epsilon) \\ & \approx & f(\mu_Y) + \sigma_Y \, \epsilon \, \frac{d f}{d Y}(\mu_Y) \end{array}$$ # - $\mathrm{E}Z \approx f(\mu_Y) = f(\mathrm{E}Y)$ # - $\mathrm{Var}Z \equiv \mathrm{E}[(Z-\mathrm{E}Z)^2] \approx \sigma^2_Y \, \frac{d f}{d Y}^2(\mu_Y)$ # - $Z \approx f(\mu_Y) + \sigma_Y\left| \frac{d f}{d Y}(\mu_Y)\right| \, \epsilon$ # # Variance stabilization # ------------------------------ # # - For our CCD model we have (for a given pixel): # $$Y \sim G \, \lambda + \sqrt{G^2 \, (\lambda+\sigma_{R}^2)} \, \epsilon = \mu_Y + \sqrt{G \, \mu_Y + G^2 \, \sigma_{R}^2} \, \epsilon \, .$$ # - Then if $Z = f(Y)$ we get: # $$Z \approx f(\mu_Y) + \mid f'(\mu_Y) \mid G \sqrt{\mu_Y / G+\sigma_{R}^2} \, \epsilon$$ # - What happens then if we take: \$f(x) = 2   # \sqrt{x/G + \sigma_{R}^2} \$? # - We have: $$f'(x) = \frac{1}{G \sqrt{ x / G + \sigma_{R}^2}}$$ # - Leading to: # $$Z \approx 2 \, \sqrt{\mu_Y / G + \sigma_{R}^2} + \epsilon$$ # # In[44]: plt.figure(dpi=600,figsize=(10,8)) for x,y in zip([np.mean(2*np.sqrt(calibration[n+'/stack']/G_hat0+S2_hat0),2) for n in list(calibration)], [np.var(2*np.sqrt(calibration[n+'/stack']/G_hat0+S2_hat0),2) for n in list(calibration)]): plt.scatter(x.flatten(),y.flatten(),0.05) plt.xlabel(r'$\mathrm{E}(2 \sqrt{ADU/\hat{G}+\hat{\sigma}_R^2})$',fontsize=20) plt.ylabel(r'$\mathrm{Var}(2 \sqrt{ADU/\hat{G}+\hat{\sigma}_R^2})$',fontsize=20) plt.xlim([100,350]) plt.axhline(1,color='red',lw=2) # Application # ======= # Back to where we started # --------------------------------- # # ![](figs/POMC-raw-data.png) # # ADU counts (raw data) from Fura-2 excited at 340 nm. Each square # corresponds to a pixel. 25.05 s of data are shown. Same scale on each # sub-plot. Data recorded by Andreas Pippow (Kloppenburg Lab. Cologne University). # Quick ROI detection: Motivation # ------------------------------------------ # # - After variance stabilization: # $Z_{i,j,k} = 2 \, \sqrt{ADU_{i,j,k} / G + \sigma_{R}^2}$, the # variance at each pixel $(i,j)$ at each time, $k$, should be 1. # - If a pixel contains no dynamical signal---that is nothing more than # a constant background signal---the following statistics: # $$RSS_{i,j} \equiv \sum_{k=1}^{K} (Z_{i,j,k} - \overline{Z}_{i,j})^2 \quad \mathrm{with} \quad \overline{Z}_{i,j} \equiv \frac{1}{K} \sum_{k=1}^{K} Z_{i,j,k}$$ # should follow a $\chi^2$ distribution with $K-1$ degrees of freedom. # - We could therefore compute the values of the complementary # cumulative distribution function of the theoretical $\chi_{K-1}^2$ # distribution:$$1 - F_{\chi_{K-1}^2}(RSS_{i,j})$$ and look for very # small values---that is very small probabilities---(using a log scale # helps here). # # In[46]: stack_pomc_stab = 2*np.sqrt(np.copy(stack_pomc)/G_hat0+S2_hat0) stack_pomc_stab_rss = np.sum((stack_pomc_stab-stack_pomc_stab.mean(2).reshape((60,80,1)))**2,2) # In[47]: from scipy.stats import chi2 plt.figure(dpi=600,figsize=(10,8)) plt.subplot(121) plt.contour(range(60),range(80), np.transpose(np.log(chi2.sf(stack_pomc_stab_rss,df=stack_pomc_stab.shape[2]-1))), linewidths=2,linestyles='solid',colors='black') plt.grid() plt.title('Full field', fontsize=20) plt.subplot(122) plt.contour(range(60),range(80), np.transpose(np.log(chi2.sf(stack_pomc_stab_rss,df=stack_pomc_stab.shape[2]-1))), linewidths=2,linestyles='solid',colors='black') plt.xlim([23,33]) plt.ylim([33,43]) plt.title('Cell body', fontsize=20) plt.grid() # Contour plots of $\log\left(1 - F_{\chi_{K-1}^2}(RSS_{i,j})\right)$ # Pointwise time course estimation # --------------------------------------------- # # - We are going to be (very) conservative and keep as our ROI the # pixels having an # $\log\left(1 - F_{\chi_{K-1}^2}(RSS)\right) \le -300$. # - We are then left with 12 pixels. # - We are going to model the fluorescence intensity of each of these # pixels by: $$S_{i,j}(t) = \phi_{i,j} \, f(t) + b \; ,$$ where $f(t)$ # is a signal time course to all pixels of the ROI, $\phi_{i,j}$ is a # pixel specific parameter and $b$ is a background fluorescence # assumed identical for each pixel. # - The time $t$ is in fact a discrete variable, $t = \delta \, k$ # ($\delta$ = 150 ms) and we are seeking a pointwise estimation: # $\{f_1,f_2,\ldots,f_K\}$ ($K$ = 168) where $f_k = f(\delta \, k)$. # - We end up with 12 ($\phi_{i,j}$) + 168 ($f_k$) + 1 ($b$) = 181 # parameters for 12 x 168 = 2016 measurements. # - We need to add a constraint since with our model specification: # $$S_{i,j,k} = \phi_{i,j} \, f_k + b \; ,$$ we can multiply all the # $\phi_{i,j}$ by 2 and divide all the $f_k$ by 2 and get the same # prediction. # - We are going to set $f_k=1$ for the first 5 time points (the # stimulation comes at the 11th) and our pointwise estimation relates # to what is usually done with this type of data, $\Delta S(t) / S_0$ # (where $S_0$ is a baseline average) through: # $$\Delta S(t) / S_0 = \frac{S(t) - S_0}{S_0} = f(t) - 1 + \mathrm{noise}\, .$$ # - **Notice that no independent background measurement is used**. # - With variance stabilization we end up minimizing: # $$RSS\left(b,(\phi_{i,j}),(f_k)_{k=5,\ldots,168}\right) = \sum_{(i,j) \in \mathrm{ROI}} \sum_{k=1}^{168} \left(Z_{ijk}-F_{ijk}\right)^2 \, ,$$ # where $$Z_{ijk} = 2*\sqrt{ADU_{ijk}/\hat{G}+\hat{\sigma}_R^2}$$ and # $$F_{ijk} = 2*\sqrt{\phi_{i,j} \, f_k + b + \hat{\sigma}_R^2}\, .$$ # - If our model is correct we should have: # $$RSS\left(\hat{b},(\hat{\phi}_{i,j}),(\hat{f}_k)_{k=5,\ldots,168}\right) \sim \chi^2_{12 \times 168 - 176}\, .$$ # - The method also generates confidence intervals for the estimated # parameters. # # Pointwise time course estimation: Technical details # ---------------------------------------------------------------- # # - To solve this 176 dimensional optimization problem in a reasonable # time (\< 10 s) in `Python` we use Newton's method with conjugate # gradients for the inversion of the Hessian matrix. # - That means we have to define a function returning the # gradient---vector of first derivatives---and the Hessian---matrix of # second derivatives---of the `RSS` we just defined (**that's a # painful work**). # - To improve numerical behavior, since all parameters are positive, we # work with the log of the parameters. # # Doing the fit # ------------------------ # # ### Symbolic derivation setting # # We want now to define three functions : # # 1. One that returns the residuals sum of square given the model's # parameters (and the data): # \$RSS\left(b,(*ϕ*~i,j~),(f~k~)~k=5,…,168~\right) = # ∑~(i,j)\\  ∈ *R**O**I*~ ∑~k=1~^168^ \left(Z~ijk~-F~ijk~\right)^2^   # ,\$ where $Z_{ijk} = 2*\sqrt{ADU_{ijk}/\hat{G}+\hat{\sigma}_R^2}$ # and $F_{ijk} = 2*\sqrt{\phi_{i,j} \, f_k + b + \hat{\sigma}_R^2}$. # 2. One that returns the gradient vector of this RSS. # 3. One that returns the Hessian matrix. # # As explained above, since all the model parameters are positive we are # going to define these three functions as functions of the log of the # parameters. We are going to use the `SymPy` module to get the analytical # expressions we need to define our functions. # # We start by defining a symbolic "unitary" rss corresponding to one term # $(i,j,k)$ in the sum: # $$\left(2*\sqrt{ADU_{ijk}/\hat{G}+\hat{\sigma}_R^2} - 2*\sqrt{\exp \log \phi_{i,j} \, \exp \log f_k + \exp \log b + \hat{\sigma}_R^2}\right)^2 \, .$$ # # In the code that follows: # # - `y_k` stands for $2*\sqrt{ADU_{ijk}/\hat{G}+\hat{\sigma}_R^2}$. # - `logphi_i` stands for $\log \phi_{i,j}$. # - `logf_k` stands for $\log f_k$. # - `logb_sy` stands for $\log b$. # - `S2_L` stands for $\hat{\sigma}_R^2$. # # In[68]: y_k,logphi_i,logf_k,logb_sy,S2_L = sy.symbols('y_k,logphi_i,logf_k,logb_sy,S2_L',real=True) rss_u = (y_k - 2*sy.sqrt(sy.exp(logphi_i)*sy.exp(logf_k)+sy.exp(logb_sy)+S2_L))**2 rss_u # ### Symbolic gradient # # We get the first derivative with respect to `logb_sy` with: # In[69]: sy.diff(rss_u,logb_sy) # In[70]: print(sy.diff(rss_u,logb_sy)) # When this "unitary" expression is re-injected into the "full" $RSS$ # expression, the summation is made over all the pixels $(i,j)$ at all # times $k$. We get the first derivative with respect to `logphi_i` with: # In[71]: sy.diff(rss_u,logphi_i) # In[72]: print(sy.diff(rss_u,logphi_i)) # When this "unitary" expression is re-injected into the "full" $RSS$ # expression, the summation is made over **a single pixel** $(i,j)$ at all # times $k$. We get the first derivative with respect to `logf_k` with: # In[73]: sy.diff(rss_u,logf_k) # In[74]: print(sy.diff(rss_u,logf_k)) # When this "unitary" expression is re-injected into the "full" $RSS$ # expression, the summation is made over **all the pixels** $(i,j)$ at **a # single time** $k$. # # ### Symbolic Hessian # # We get the second derivative with respect to `logb_sy` with: # # In[75]: sy.collect(sy.simplify(sy.diff(rss_u,logb_sy,logb_sy)),sy.exp(logb_sy)) # In[76]: print(sy.collect(sy.simplify(sy.diff(rss_u,logb_sy,logb_sy)),sy.exp(logb_sy))) # When this "unitary" expression is re-injected into the "full" $RSS$ # expression, the summation is made over **all the pixels** $(i,j)$ at # **all times** $k$. We get the second derivative with respect to # `logb_sy` and `logphi_i` with: # In[77]: sy.collect(sy.simplify(sy.diff(rss_u,logb_sy,logphi_i)),sy.exp(logf_k)*sy.exp(logphi_i)) # In[78]: print(sy.collect(sy.simplify(sy.diff(rss_u,logb_sy,logphi_i)),sy.exp(logf_k)*sy.exp(logphi_i))) # When this "unitary" expression is re-injected into the "full" $RSS$ # expression, the summation is made over **a single pixel** $(i,j)$ at # **all times** $k$. We get the second derivative with respect to # `logb_sy` and `logf_k` with: # In[79]: sy.collect(sy.simplify(sy.diff(rss_u,logb_sy,logf_k)),sy.exp(logf_k)*sy.exp(logphi_i)) # In[80]: print(sy.collect(sy.simplify(sy.diff(rss_u,logb_sy,logf_k)),sy.exp(logf_k)*sy.exp(logphi_i))) # When this "unitary" expression is re-injected into the "full" $RSS$ # expression, the summation is made over **all the pixels** $(i,j)$ at # **at a single time** $k$. We get the second derivative with respect to # `logphi_i` with: # In[81]: sy.collect(sy.simplify(sy.diff(rss_u,logphi_i,logphi_i)),sy.exp(logf_k)*sy.exp(logphi_i)) # In[82]: print(sy.collect(sy.simplify(sy.diff(rss_u,logphi_i,logphi_i)),sy.exp(logf_k)*sy.exp(logphi_i))) # When this "unitary" expression is re-injected into the "full" $RSS$ # expression, the summation is made over **a single pixel** $(i,j)$ at # **all times** $k$. The second derivative with respect to the $\phi$ of # two different pixels is null. We get the second derivative with respect # to `logphi_i` and `logf_k` with: # In[83]: sy.collect(sy.simplify(sy.diff(rss_u,logphi_i,logf_k)),sy.exp(logf_k)*sy.exp(logphi_i)) # In[84]: print(sy.collect(sy.simplify(sy.diff(rss_u,logphi_i,logf_k)),sy.exp(logf_k)*sy.exp(logphi_i))) # When this "unitary" expression is re-injected into the "full" $RSS$ # expression, the summation is made over **a single pixel** $(i,j)$ at **a # single time** $k$. We get the second derivative with respect to `logf_k` # with: # In[85]: sy.collect(sy.simplify(sy.diff(rss_u,logf_k,logf_k)),sy.exp(logf_k)*sy.exp(logphi_i)) # In[86]: print(sy.collect(sy.simplify(sy.diff(rss_u,logf_k,logf_k)),sy.exp(logf_k)*sy.exp(logphi_i))) # When this "unitary" expression is re-injected into the "full" $RSS$ # expression, the summation is made over **all the pixels** $(i,j)$ at **a # single time** $k$. The block of the Hessian matrix generated by the # second derivatives with respect to the `logf_k` parameters is # **diagonal**. # # ### Extracting the data for the fit # In[48]: from scipy.stats import chi2 threshold = -300 good_pix = np.where(np.log(chi2.sf(stack_pomc_stab_rss,df=stack_pomc_stab.shape[2]-1)) < -300) data4fit = stack_pomc_stab[good_pix[0],good_pix[1],:] # ### Functions definitions # # We come now to the definition of the three functions ($RSS$, gradient # and Hessian). We start in fact with a "constructor" function, that is a # function taking the data as argument and returning a triplet of # functions: # In[49]: def mk_rss(data=data4fit, S2=S2_hat0, base_length=5): npix,K = data.shape f = np.ones((K,)) nabla = np.zeros((K-base_length+npix+1,)) hessian = np.zeros((K-base_length+npix+1,K-base_length+npix+1)) d2phi = np.zeros((npix,)) dphidf = np.zeros((npix,K)) d2f = np.zeros((K,)) pred = np.zeros(data.shape) def rss(par): par = np.exp(np.copy(par)) b = par[0] phi = par[1:(npix+1)] f[:]=1. f[base_length:] = par[(npix+1):] pred[:,:] = 2*np.sqrt(np.outer(phi,f)+b+S2) return np.sum((data-pred)**2) def grad(par): par = np.exp(np.copy(par)) b = par[0] phi = par[1:(npix+1)] f[:]=1. f[base_length:] = par[(npix+1):] pred[:,:] = np.sqrt(np.outer(phi,f)+b+S2) nabla[0] = np.sum(-2*b*(data - 2*pred)/pred) nabla[1:(npix+1)] = np.sum(-2*f.reshape((1,K))*phi.reshape((npix,1))*(data - 2*pred)/pred,1) nabla[(npix+1):] = np.sum(-2*f.reshape((1,K))*phi.reshape((npix,1))*(data - 2*pred)/pred,0)[base_length:] return nabla def hess(par): par = np.exp(np.copy(par)) b = par[0] phi = par[1:(npix+1)] f[:]=1. f[base_length:] = par[(npix+1):] pred[:,:] = np.sqrt(np.outer(phi,f)+b+S2) hessian[:,:] = 0. hessian[0,0] = np.sum(data*b**2/pred**3+(4-2*data/pred)*b) hessian[0,1:(npix+1)] = np.sum(data*b*f.reshape((1,K))*phi.reshape((npix,1))/pred**3,1) hessian[1:(npix+1),0] = np.sum(data*b*f.reshape((1,K))*phi.reshape((npix,1))/pred**3,1) hessian[0,(npix+1):] = np.sum(data*b*f.reshape((1,K))*phi.reshape((npix,1))/pred**3,0)[base_length:] hessian[(npix+1):,0] = np.sum(data*b*f.reshape((1,K))*phi.reshape((npix,1))/pred**3,0)[base_length:] dphidf[:,:] = data*f.reshape((1,K))**2*phi.reshape((npix,1))**2/pred**3+(4-2*data/pred)*f.reshape((1,K))*phi.reshape((npix,1)) d2phi[:] = np.sum(dphidf,1) for i in range(npix): hessian[1+i,1+i] = d2phi[i] hessian[1+i,(npix+1):] = dphidf[i,base_length:] hessian[(npix+1):,i+1] = dphidf[i,base_length:] d2f[:] = np.sum(dphidf,0) for i in range(K-base_length): hessian[(npix+1+i),(npix+1+i)] = d2f[i+base_length] return hessian return (rss,grad,hess) # We then "instantiate" the three functions: # In[50]: rss0, grad0, hess0 = mk_rss(data4fit) # ### Getting the initial guesses # # We use a classical $\Delta F / F_0$ to get the initial guesses. We set # the (initial guess for the) background fluorescence to an arbitrary # small but postive value: # In[51]: baseline = stack_pomc[good_pix[0],good_pix[1],:5].mean(1) f_0 = np.mean(stack_pomc[good_pix[0],good_pix[1],:]/baseline.reshape((12,1)),0) b_0 = 100 phi_0 = np.mean((stack_pomc[good_pix[0],good_pix[1],:]-b_0)/f_0.reshape((1,168)),1) par_0 = np.zeros((1+12+163,)) par_0[0] = b_0 par_0[1:13] = phi_0 par_0[13:] = f_0[5:] # ### Checking the analytical expressions # # The codes returning the gradient and Hessian matrix are somewhat # tedious, meaning that we could have made mistakes typing them. It's # therefore a good policy to check their results against slow but less # error prone codes doing the gradient and Hessian matrix evaluation # numerically. We define first a function returning the gradient using the # central difference: # In[52]: def grad_num(fct=rss0, par=np.log(np.copy(par_0)), delta=1e-6): n_par = len(par) res = np.zeros((n_par,)) for i in range(n_par): par[i] += delta res[i] += fct(par) par[i] -= 2*delta res[i] -= fct(par) par[i] += delta return res/2/delta # We define next a function returning a numerical evaluation of the # Hessian matrix: # In[53]: def hess_num(fct=rss0, par=np.log(np.copy(par_0)), delta=1e-4): n_par = len(par) res = np.zeros((n_par,n_par)) f0 = fct(par) for i in range(n_par): for j in range(i,n_par): par[i] += delta par[j] += delta f3 = fct(par) par[i] -= delta f2 = fct(par) par[i] += delta par[j] -= delta f1 = fct(par) par[i] -= delta res[i,j] = (f3-f2-f1+f0)/delta**2 if j > i: res[j,i] = (f3-f2-f1+f0)/delta**2 return res # We can now do the tests: # In[54]: g0 = grad0(np.log(par_0)) g0n = grad_num(rss0,np.log(np.copy(par_0))) h0 = hess0(np.log(par_0)) h0n = hess_num(rss0,np.log(np.copy(par_0)),delta=1e-3) not_zero = np.abs(h0) > 0. print('Initial RSS value: \n', rss0(np.log(par_0)), '\n') print('Maximal relative difference (in absolute value) between analytical and numerical gradient: \n', np.max(np.abs(g0-g0n)/np.abs(g0)),'\n') print('Maximal relative difference (in absolute value) between analytical and numerical Hessian: \n', np.max(np.abs(h0n[not_zero]-h0[not_zero])/np.abs(h0[not_zero])), '\n') # ### Doing the fit # In[55]: from scipy.optimize import minimize res = minimize(rss0,np.log(par_0),method='Newton-CG',jac=grad0,hess=hess0,options={'disp': True, 'xtol': 1e-8}) # We see here the clear reduction in the $RSS$, starting at 17547562 and # ending at 1975. # # ### Definition of a function returning the prediction # In[56]: def mk_pred(p_vector, log_scale=False, stabilise_var=True, S2=S2_hat0, G=G_hat0, base_length=5): if log_scale: p_vector = np.exp(p_vector) phi = p_vector[1:13] b = p_vector[0] f = np.ones((168,)) f[base_length:] = p_vector[13:] pred = np.outer(phi,f)+b if stabilise_var: return 2*np.sqrt(pred+S2) else: return G*pred # Pointwise time course estimation: Time course estimate # ------------------------------------------------------------------- # In[57]: f_hat = np.zeros((168,)) f_hat[5:] = res.x[13:] par_se = np.sqrt(np.diag(scipy.linalg.inv(hess0(res.x)))) f_up = np.zeros((168,)) f_up[5:] = res.x[13:] + 1.96*par_se[13:] f_low = np.zeros((168,)) f_low[5:] = res.x[13:] - 1.96*par_se[13:] fig, ax = plt.subplots(dpi=600,figsize=(10,8)) ax.fill_between(np.arange(168)*0.15,np.exp(f_low),np.exp(f_up),facecolor='grey',edgecolor='grey') ax.plot(np.arange(168)*0.15,np.exp(f_hat),lw=1,color='red') plt.grid() plt.xlabel('Time (s)',fontsize=25) plt.ylabel(r'$\hat{f}_k$',fontsize=25) plt.xlim([0,25]) # In[58]: pred = mk_pred(res.x,True,True) the_range = [np.min(data4fit),np.max(data4fit)] plt.figure(dpi=600,figsize=(10,8)) plt.subplot(341) plt.plot(np.arange(168)*0.15,pred[0,:],color='red') plt.plot(np.arange(168)*0.15,data4fit[0,:],color='black') plt.xlim([0,25]) plt.ylim(the_range) plt.subplot(342) plt.plot(np.arange(168)*0.15,pred[1,:],color='red') plt.plot(np.arange(168)*0.15,data4fit[1,:],color='black') plt.xlim([0,25]) plt.ylim(the_range) plt.subplot(343) plt.plot(np.arange(168)*0.15,pred[2,:],color='red') plt.plot(np.arange(168)*0.15,data4fit[2,:],color='black') plt.xlim([0,25]) plt.ylim(the_range) plt.subplot(344) plt.plot(np.arange(168)*0.15,pred[3,:],color='red') plt.plot(np.arange(168)*0.15,data4fit[3,:],color='black') plt.xlim([0,25]) plt.ylim(the_range) plt.subplot(345) plt.plot(np.arange(168)*0.15,pred[4,:],color='red') plt.plot(np.arange(168)*0.15,data4fit[4,:],color='black') plt.xlim([0,25]) plt.ylim(the_range) plt.subplot(346) plt.plot(np.arange(168)*0.15,pred[5,:],color='red') plt.plot(np.arange(168)*0.15,data4fit[5,:],color='black') plt.xlim([0,25]) plt.ylim(the_range) plt.subplot(347) plt.plot(np.arange(168)*0.15,pred[6,:],color='red') plt.plot(np.arange(168)*0.15,data4fit[6,:],color='black') plt.xlim([0,25]) plt.ylim(the_range) plt.subplot(348) plt.plot(np.arange(168)*0.15,pred[7,:],color='red') plt.plot(np.arange(168)*0.15,data4fit[7,:],color='black') plt.xlim([0,25]) plt.ylim(the_range) plt.subplot(349) plt.plot(np.arange(168)*0.15,pred[8,:],color='red') plt.plot(np.arange(168)*0.15,data4fit[8,:],color='black') plt.xlim([0,25]) plt.ylim(the_range) plt.subplot(3,4,10) plt.plot(np.arange(168)*0.15,pred[9,:],color='red') plt.plot(np.arange(168)*0.15,data4fit[9,:],color='black') plt.xlim([0,25]) plt.ylim(the_range) plt.subplot(3,4,11) plt.plot(np.arange(168)*0.15,pred[10,:],color='red') plt.plot(np.arange(168)*0.15,data4fit[10,:],color='black') plt.xlim([0,25]) plt.ylim(the_range) plt.subplot(3,4,12) plt.plot(np.arange(168)*0.15,pred[11,:],color='red') plt.plot(np.arange(168)*0.15,data4fit[11,:],color='black') plt.xlim([0,25]) plt.ylim(the_range) # Data and fit after variance stabilization. The `RSS` is 1976 giving a # probability of 0.986 (a bit large). # # Thanks # --------------- # # This work was done in collaboration with: # - Sebastien Joucla # - Romain Franconville # - Andeas Pippow # - Peter Kloppenburg # # Thank you for your attention! # # In[59]: calibration.close()