#!/usr/bin/env python # coding: utf-8 # In[2]: # path operations from glob import glob import os from pathlib import Path # data format and storage from collections import namedtuple import pickle # numerical tools import numpy as np import scipy.stats import pandas as pd # plotting tools from matplotlib import pyplot as plt # %matplotlib notebook # interactive notebook features from tqdm import tqdm_notebook as tqdm from ipywidgets import interact # meg analysis import mne # ## Find all available subjects # # Define where you store your `camcan` data in the variable `camcanroot`. # In[9]: # camcanroot = Path('/Volumes') / 'Seagate Expansion Drive' /'camcan' # camcanroot = Path('D:') / 'camcan' # camcanroot = Path('/data') / 'group' / 'FANS' / 'camcan-meg' / 'camcan165' / 'camcan165' camcanroot = Path('/Users') / 'jan' / 'Documents' / 'eeg-data' / 'camcan' megdataroot = camcanroot / 'cc700' / 'mri' / 'pipeline' / 'release004' / 'BIDSsep' / 'megraw' subjects = list(megdataroot.glob('sub-*')) ids = [os.path.split(subject)[-1][4:] for subject in subjects] print(f'{len(subjects)} subjects found in {megdataroot}') restfiles = list(megdataroot.glob(pattern='*/meg/rest_raw.fif')) taskfiles = list(megdataroot.glob(pattern='*/meg/task_raw.fif')) # filter out no-files print(f'{len(restfiles)} subjects have resting-state recordings.') print(f'{len(taskfiles)} subjects have task recordings.') # find the demographic info subject_details = pd.DataFrame.from_csv(camcanroot / 'cc700-scored' / 'participant_data.csv') print(f'Found subject information on {len([pid for pid in ids if pid in subject_details.index])} subjects.') # ## Set up MEG analysis variables # In[10]: veog = 'EOG062' heog = 'EOG061' ecg = 'ECG063' recording='task' fmin, fmax = 2, 24 # ## Loop over MEG data # # First, make a data structure we can put the data in # In[11]: sub_params = namedtuple('sub_params', ['pid', 'slopes', 'age', 'gender', 'intercepts', 'rsquared']) # For the pre-processing, we'll also use Maxwell-filtering to correct for extraneous influence. These are the necessary files: # In[12]: ctfile = megdataroot / 'ct_sparse.fif' ssscal = megdataroot / 'sss_cal.dat' # Then, populate this data structure for each subject # In[40]: raw = mne.io.read_raw_fif(taskfiles[1], verbose='WARNING') raw = raw.crop(tmin=20, tmax=500) no_eog_raws = [raw.copy().crop(tmin=tmin, tmax=tmin+2) for tmin in np.arange(0, 480, step=2) if not np.any((eog_timepoints > tmin*raw.info['sfreq']) & (eog_timepoints < (tmin+2)*raw.info['sfreq']))] no_eog_raw = mne.concatenate_raws(no_eog_raws) # raw = raw.load_data() # raw = raw.resample(256) # In[38]: raw = mne.concatenate_raws(no_eog_raws) # In[42]: raw.plot_psd(fmin=0, fmax=45); no_eog_raw.plot_psd(fmin=0, fmax=45); # In[32]: np.sum([1 for tmin in np.arange(0, 480, step=2) if not np.any((eog_timepoints > tmin*raw.info['sfreq']) & (eog_timepoints < (tmin+2)*raw.info['sfreq']))]) np.sum([1 for tmin in np.arange(0, 480, step=2) ]) # In[23]: eog_timepoints = eog_events[:, 0] no_eog_raws = [r.crop() for tstart] # In[21]: eog_events = mne.preprocessing.find_eog_events(raw, 998, ch_name=veog) tmin, tmax = -1, 1 epochs = mne.Epochs(raw, eog_events, 998, tmin, tmax, picks=mne.pick_types(raw.info, meg=False, eog=True)) data = epochs.get_data() plt.plot(1e3 * epochs.times, np.squeeze(data[:, 1, :]).T) plt.xlabel('Times (ms)') plt.ylabel('EOG (muV)') plt.show() # In[ ]: all_parameters = [] psds = [] for subject in tqdm(range(2)): # resting state file restfile = subjects[subject] / 'meg' / (recording + '_raw.fif') # raw data raw = mne.io.read_raw_fif(restfile, verbose='WARNING') # crop raw = raw.crop(tmin=20, tmax=500) # resample raw = raw.load_data() raw = raw.resample(256) # filter # filter the MEG data (exclude line noise) raw = raw.filter(0.5, 30, picks=mne.pick_types(raw.info, meg=True)) # filter the EOG data raw = raw.filter(0.5, 15, picks=mne.pick_types(raw.info, meg=False, eog=True)) # filter the ECG data raw = raw.filter(0.5, 15, picks=mne.pick_types(raw.info, meg=False, ecg=True)) # maxwell-correction raw = mne.preprocessing.maxwell_filter(raw, cross_talk=str(ctfile), calibration=str(ssscal), st_duration=10, st_correlation=0.98) # pick gradiometers picks = mne.pick_types(raw.info, meg='grad', eeg=False, stim=False, eog=False, exclude='bads') # run an ICA try: ica = mne.preprocessing.run_ica(raw, n_components=0.95, picks=picks, eog_ch=veog, ecg_ch=ecg) raw = ica.apply(raw, exclude=ica.exclude) except RuntimeError: continue # do the PSD analysis psd, freqs = mne.time_frequency.psd_welch( raw, picks=picks, fmin=fmin, fmax=fmax, n_fft=2000, n_overlap=1000, verbose='WARNING', n_jobs=4 ) # Do the linear regression findices = (freqs < 7) | (freqs > 14) linfits = [scipy.stats.linregress(freqs[findices], np.log10(psd.T[findices, grad])) for grad in range(psd.shape[0])] psds.append(psd) all_parameters.append( sub_params(pid=ids[subject], slopes=np.array([l.slope for l in linfits]), intercepts=np.array([l.intercept for l in linfits]), rsquared=np.array([l.rvalue**2 for l in linfits]), age=subject_details.loc[ids[subject]].age, gender=subject_details.loc[ids[subject]].gender_code) ) with open(Path('.') / 'pickles' / (ids[subject] + '-' + recording + '.pickle'), 'wb+') as f: pickle.dump((psd, sub_params), f) # ### Save data to pickles # In[ ]: # save data to file with open(Path('.')/'pickles'/('psds-'+recording+'.pickle'), 'wb+') as f: pickle.dump(psds, f) with open(Path('.')/'pickles'/('all_parameters-'+recording+'.pickle'), 'wb+') as f: pickle.dump(all_parameters, f)