#!/usr/bin/env python
# coding: utf-8

# In[2]:


# path operations
from glob import glob
import os
from pathlib import Path

# data format and storage
from collections import namedtuple
import pickle

# numerical tools
import numpy as np
import scipy.stats
import pandas as pd

# plotting tools
from matplotlib import pyplot as plt
# %matplotlib notebook

# interactive notebook features
from tqdm import tqdm_notebook as tqdm
from ipywidgets import interact

# meg analysis
import mne


# ## Find all available subjects
# 
# Define where you store your `camcan` data in the variable `camcanroot`.

# In[9]:


# camcanroot = Path('/Volumes') / 'Seagate Expansion Drive' /'camcan'
# camcanroot = Path('D:') / 'camcan'
# camcanroot = Path('/data') / 'group' / 'FANS' / 'camcan-meg' / 'camcan165' / 'camcan165'
camcanroot = Path('/Users') / 'jan' / 'Documents' / 'eeg-data' / 'camcan'

megdataroot = camcanroot / 'cc700' / 'mri' / 'pipeline' / 'release004' / 'BIDSsep' / 'megraw'
subjects = list(megdataroot.glob('sub-*'))
ids = [os.path.split(subject)[-1][4:] for subject in subjects]

print(f'{len(subjects)} subjects found in {megdataroot}')

restfiles = list(megdataroot.glob(pattern='*/meg/rest_raw.fif'))
taskfiles = list(megdataroot.glob(pattern='*/meg/task_raw.fif'))

# filter out no-files
print(f'{len(restfiles)} subjects have resting-state recordings.')
print(f'{len(taskfiles)} subjects have task recordings.')

# find the demographic info
subject_details = pd.DataFrame.from_csv(camcanroot / 'cc700-scored' / 'participant_data.csv')
print(f'Found subject information on {len([pid for pid in ids if pid in subject_details.index])} subjects.')


# ## Set up MEG analysis variables

# In[10]:


veog = 'EOG062'
heog = 'EOG061'
ecg = 'ECG063'
recording='task'
fmin, fmax = 2, 24


# ## Loop over MEG data
# 
# First, make a data structure we can put the data in

# In[11]:


sub_params = namedtuple('sub_params',
                        ['pid', 'slopes', 'age', 'gender',
                         'intercepts', 'rsquared'])


# For the pre-processing, we'll also use Maxwell-filtering to correct for extraneous influence. These are the necessary files:

# In[12]:


ctfile = megdataroot / 'ct_sparse.fif'
ssscal = megdataroot / 'sss_cal.dat'


# Then, populate this data structure for each subject

# In[40]:


raw = mne.io.read_raw_fif(taskfiles[1], verbose='WARNING')
raw = raw.crop(tmin=20, tmax=500)

no_eog_raws = [raw.copy().crop(tmin=tmin, tmax=tmin+2)
               for tmin in np.arange(0, 480, step=2)
               if not np.any((eog_timepoints > tmin*raw.info['sfreq']) &
                             (eog_timepoints < (tmin+2)*raw.info['sfreq']))]

no_eog_raw = mne.concatenate_raws(no_eog_raws)
# raw = raw.load_data()
# raw = raw.resample(256)


# In[38]:


raw = mne.concatenate_raws(no_eog_raws)


# In[42]:


raw.plot_psd(fmin=0, fmax=45);
no_eog_raw.plot_psd(fmin=0, fmax=45);


# In[32]:


np.sum([1 for tmin in np.arange(0, 480, step=2)
        if not np.any((eog_timepoints > tmin*raw.info['sfreq']) &
                      (eog_timepoints < (tmin+2)*raw.info['sfreq']))])

np.sum([1 for tmin in np.arange(0, 480, step=2)
        ])


# In[23]:


eog_timepoints = eog_events[:, 0]


no_eog_raws = [r.crop() for tstart]


# In[21]:


eog_events = mne.preprocessing.find_eog_events(raw, 998, ch_name=veog)

tmin, tmax = -1, 1
epochs = mne.Epochs(raw, eog_events, 998, tmin, tmax,
                    picks=mne.pick_types(raw.info, meg=False, eog=True))
data = epochs.get_data()

plt.plot(1e3 * epochs.times, np.squeeze(data[:, 1, :]).T)
plt.xlabel('Times (ms)')
plt.ylabel('EOG (muV)')
plt.show()


# In[ ]:


all_parameters = []
psds = []

for subject in tqdm(range(2)):
    # resting state file
    restfile = subjects[subject] / 'meg' / (recording + '_raw.fif')
    # raw data
    raw = mne.io.read_raw_fif(restfile, verbose='WARNING')
    # crop
    raw = raw.crop(tmin=20, tmax=500)
    # resample
    raw = raw.load_data()
    raw = raw.resample(256)
    # filter
    # filter the MEG data (exclude line noise)
    raw = raw.filter(0.5, 30, picks=mne.pick_types(raw.info, meg=True))
    # filter the EOG data
    raw = raw.filter(0.5, 15, picks=mne.pick_types(raw.info, meg=False, eog=True))
    # filter the ECG data
    raw = raw.filter(0.5, 15, picks=mne.pick_types(raw.info, meg=False, ecg=True))
    # maxwell-correction
    raw = mne.preprocessing.maxwell_filter(raw, cross_talk=str(ctfile), calibration=str(ssscal),
                                           st_duration=10, st_correlation=0.98)
    # pick gradiometers
    picks = mne.pick_types(raw.info, meg='grad', eeg=False, stim=False, eog=False, exclude='bads')
    # run an ICA
    try:
        ica = mne.preprocessing.run_ica(raw, n_components=0.95,
                                        picks=picks,
                                        eog_ch=veog, ecg_ch=ecg)
        raw = ica.apply(raw, exclude=ica.exclude)
    except RuntimeError:
        continue
    
    
    # do the PSD analysis
    psd, freqs = mne.time_frequency.psd_welch(
        raw, picks=picks, fmin=fmin, fmax=fmax, n_fft=2000, n_overlap=1000,
        verbose='WARNING', n_jobs=4
    )

    # Do the linear regression
    findices = (freqs < 7) | (freqs > 14)
    linfits = [scipy.stats.linregress(freqs[findices], np.log10(psd.T[findices, grad]))
               for grad in range(psd.shape[0])]

    psds.append(psd)
    all_parameters.append(
        sub_params(pid=ids[subject],
                   slopes=np.array([l.slope for l in linfits]),
                   intercepts=np.array([l.intercept for l in linfits]),
                   rsquared=np.array([l.rvalue**2 for l in linfits]),
                   age=subject_details.loc[ids[subject]].age,
                   gender=subject_details.loc[ids[subject]].gender_code)
    )
    with open(Path('.') / 'pickles' / (ids[subject] + '-' + recording + '.pickle'), 'wb+') as f:
        pickle.dump((psd, sub_params), f)


# ### Save data to pickles

# In[ ]:


# save data to file
with open(Path('.')/'pickles'/('psds-'+recording+'.pickle'), 'wb+') as f:
    pickle.dump(psds, f)
with open(Path('.')/'pickles'/('all_parameters-'+recording+'.pickle'), 'wb+') as f:
    pickle.dump(all_parameters, f)