#!/usr/bin/env python # coding: utf-8 # # Testing ICA-AROMA regression # The [current fmriprep pipeline](https://github.com/poldracklab/fmriprep/tree/1.1.1) extracts confounds (csf, white matter, global signal, etc) from the non-denoised bold series, whereas in [Pruim et al. (2015)](https://www.sciencedirect.com/science/article/pii/S1053811915001822?via%3Dihub), ICA-AROMA is applied after spatial smoothing, but before any additional nuisance regression. Directly from the paper, the authors say: # > Within the typical fMRI participant-level preprocessing stream ICA-AROMA is applied after spatial smoothing but prior to high-pass filtering and further nuisance regression # # However, it is unknown whether choosing the nuisance regressors before or after denoising with ICA-AROMA has a detriment on outcome measures. # # The first test will mirror [Hallquist's The Nuisance of Nuisance Regression](https://www.ncbi.nlm.nih.gov/pubmed/23747457) where I will replicate the simulation signal they used and test the impact of an additional nuisance (I'll call it csf) when it is cleaned (e.g. derived from the denoised bold series) or unclean (derived from the raw bold series) # # * Please see the [pull request](https://github.com/poldracklab/fmriprep/pull/959), [issue 817](https://github.com/poldracklab/fmriprep/issues/817), and [issue 1049](https://github.com/poldracklab/fmriprep/issues/1049) associated with this topic for more information. # In[2]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import matplotlib.pyplot as plt plt.style.use('ggplot') plt.rcParams['figure.figsize'] = [15, 5] import seaborn as sns from scipy.signal import periodogram # In[53]: # number of timepoints tp = 200 # create array that contains 200 elements ix = np.arange(tp) # 0.2 hz signal1 = np.sin(2*np.pi*ix*(4/tp)) # 0.035 hz signal2 = np.sin(2*np.pi*ix*(7/tp)) # .11 hz (to be pulled out by ica-aroma) motion1 = np.cos(2*np.pi*ix*(22/tp)) # .25 hz (to be pulled out by ica-aroma) motion2 = np.cos(2*np.pi*ix*(50/tp)) # create totals for the signals and the noise signal_total = signal1 + signal2 motion_total = motion1 + motion2 # make the csf signal share variance with one of the noise components csf = (np.cos(2*np.pi*ix*(60/tp)) + motion1) / 2 # the simulated bold timecourse bold = signal_total + motion_total + csf # In[54]: plt.plot(signal_total) plt.title('Ground Truth') plt.ylabel('Signal Intensity (AU)') plt.xlabel('Time (Seconds)') # In[55]: plt.plot(bold) plt.title('Simulated BOLD') plt.ylabel('Signal Intensity (AU)') plt.xlabel('Time (Seconds)') # In[56]: # non-aggresive denoising # Create a Melodic design matrix, where we model the intercept, the two signal components # and the two noise components design = np.ones((tp, 5)) design[:, 1] = signal1 design[:, 2] = signal2 design[:, 3] = motion1 design[:, 4] = motion2 unmixMatrix = np.linalg.pinv(design) maps = np.dot(unmixMatrix, bold) # only select the noise maps noisemap = maps[3:] # calculate the residual nonagg_residual = np.dot(np.stack((motion1, motion2), axis=1), noisemap) # denoise the bold bold_denoised = bold - nonagg_residual # In[57]: plt.plot(bold_denoised) plt.title('denoised BOLD') plt.ylabel('Signal Intensity (AU)') plt.xlabel('Time (Seconds)') # In[58]: # apply the same regression to the csf component # same as extracting csf after nonaggressive denoising csf_clean = csf - nonagg_residual # make design matrices for "cleaned" csf and non-cleaned csf csf_clean_design = np.stack((np.ones(tp), csf_clean), axis=1) csf_clean_residual = np.dot(csf_clean_design, np.dot(np.linalg.pinv(csf_clean_design), bold_denoised)) # remove the csf clean residual bold_denoised_nuisance = bold_denoised - csf_clean_residual # In[59]: plt.plot(bold_denoised_nuisance) plt.title('denoised->nuisance BOLD') plt.ylabel('Signal Intensity (AU)') plt.xlabel('Time (Seconds)') # In[60]: # raw csf design csf_raw_design = np.stack((np.ones(tp), csf), axis=1) csf_residual = np.dot(csf_raw_design, np.dot(np.linalg.pinv(csf_raw_design), bold_denoised)) # remove the csf_tot uncleaned residual bold_nuisance_denoised = bold_denoised - csf_residual # In[61]: plt.plot(bold_nuisance_denoised) plt.title('nuisance->denoised BOLD') plt.ylabel('Signal Intensity (AU)') plt.xlabel('Time (Seconds)') # In[62]: # compare correlations denoised_nuisance_ground_truth_correlation = np.corrcoef(signal_total, bold_denoised_nuisance) nuisance_denoised_ground_truth_correlation = np.corrcoef(signal_total, bold_nuisance_denoised) plt.figure(1) plt.subplot(211) sns.heatmap(denoised_nuisance_ground_truth_correlation, annot=True) plt.title('denoised->nuisance correlation with ground truth') plt.subplot(212) sns.heatmap(nuisance_denoised_ground_truth_correlation, annot=True) plt.title('nuisance->denoised correlation with ground truth') plt.tight_layout() # In[63]: # compare periodograms frequencies, ground_truth_density = periodogram(signal_total) plt.semilogy(frequencies, ground_truth_density) plt.ylim([1e-31, 1e3]) plt.xlabel('Frequency Range (Hz)') plt.ylabel('Frequency Power') # In[65]: frequencies, denoised_nuisance_density = periodogram(bold_denoised_nuisance) frequencies, nuisance_denoised_density = periodogram(bold_nuisance_denoised) frequencies, csf_density = periodogram(csf) plt.figure(1) plt.subplot(131) plt.semilogy(frequencies, denoised_nuisance_density) plt.ylim([1e-31, 1e3]) plt.title('denoised->nuisance') plt.xlabel('Frequency Range (Hz)') plt.ylabel('Frequency Power') plt.subplot(132) plt.semilogy(frequencies, nuisance_denoised_density) plt.ylim([1e-31, 1e3]) plt.title('nuisance->denoised') plt.xlabel('Frequency Range (Hz)') plt.ylabel('Frequency Power') plt.subplot(133) plt.semilogy(frequencies, csf_density) plt.ylim([1e-31, 1e3]) plt.title('csf noise') plt.xlabel('Frequency Range (Hz)') plt.ylabel('Frequency Power') plt.tight_layout() # ## Hallquist Conclusion # The correlations show a slightly stronger relationship between the nuisance->denoised bold as opposed to the denoised->nuisance bold. # It also appears extracting the nuisance regressor and then denoising (the middle graph) suppresses one of the csf peaks, whereas denoising and then extracting the csf nuisance (left graph) does not suppress the peak. # # # ## Repeated Simulation # # I will now perform many iterations of the above example with random numbers for each component as opposed to defined frequency components # In[8]: np.random.seed(123) denoised_nuisance_collector = [] nuisance_denoised_collector = [] for _ in range(1000): random_signal1 = np.random.rand(200) random_signal2 = np.random.rand(200) random_motion1 = np.random.rand(200) random_motion2 = np.random.rand(200) random_csf = (np.random.rand(200) + random_motion1) / 2 random_bold = random_signal1 + random_signal2 + random_motion1 + random_motion2 + random_csf # non-aggresive denoising Rdesign = np.ones((200, 5)) Rdesign[:, 1] = random_signal1 Rdesign[:, 2] = random_signal2 Rdesign[:, 3] = random_motion1 Rdesign[:, 4] = random_motion2 RunmixMatrix = np.linalg.pinv(Rdesign) Rmaps = np.dot(RunmixMatrix, random_bold) Rnoisemap = Rmaps[3:] Rnonagg_residual = np.dot(np.stack((random_motion1, random_motion2), axis=1), Rnoisemap) random_bold_denoised = random_bold - Rnonagg_residual # apply the same regression to the csf component # same as extracting csf after nonaggressive denoising random_csf_clean = random_csf - Rnonagg_residual # make design matrices for denoised csf and non-denoised csf Rcsf_clean_design = np.stack((np.ones(200), random_csf_clean), axis=1) Rcsf_design = np.stack((np.ones(200), random_csf), axis=1) Rcsf_clean_residual = np.dot(Rcsf_clean_design, np.dot(np.linalg.pinv(Rcsf_clean_design), random_bold_denoised)) # remove the denoised csf residual random_bold_denoised_nuisance = random_bold_denoised - Rcsf_clean_residual Rcsf_residual = np.dot(Rcsf_design, np.dot(np.linalg.pinv(Rcsf_design), random_bold_denoised)) # remove the raw csf residual random_bold_nuisance_denoised = random_bold_denoised - Rcsf_residual signal_total = random_signal1 + random_signal2 denoised_nuisance_collector.append(np.corrcoef(signal_total, random_bold_denoised_nuisance)) nuisance_denoised_collector.append(np.corrcoef(signal_total, random_bold_nuisance_denoised)) mean_denoised_nuisance_correlation = np.mean(np.array(denoised_nuisance_collector), axis=0) mean_nuisance_denoised_correlation = np.mean(np.array(nuisance_denoised_collector), axis=0) plt.figure(1) plt.subplot(211) sns.heatmap(mean_denoised_nuisance_correlation, annot=True) plt.title('denoised->nuisance correlation with ground truth') plt.subplot(212) sns.heatmap(mean_nuisance_denoised_correlation, annot=True) plt.title('nuisance->denoised correlation with ground truth') plt.tight_layout() # ## Conclusion # # The simulation suggests a slight benefit towards extracting the nuisance regressor before denoising as opposed to extracting the nuisance regressor after denoising. # ### Assumptions # 1. The nuisance csf is correlated with the noise components (e.g. csf signal is related to motion). When csf noise is generated independently, the nuisance->denoised order perfectly reconstructs the signal # # 2. I didn't model autocorrelation (would that be relevant in this context?) # # 3. ??? # In[ ]: