#!/usr/bin/env python # coding: utf-8 # # Processing the power spectra # # This notebook loads the pickle files that contain the power spectra for each participant. It also writes the relevant data (regression slope, intercept) to file, and plots it so you can check everything looks good. # # Data is written to separate CSV files depending on where it comes from. The following are produced: # # ``` # ../csv/ # |--megbehavior.csv # |--rest.csv # |--task.csv # |--agesplit-spectra.csv # |--vstm-behavior.csv # ``` # In[1]: # path operations from glob import glob import os from pathlib import Path # data format and storage from collections import namedtuple import pickle # numerical tools import numpy as np import scipy.stats import pandas as pd import scikit.bootstrap as boot # for cluster stats, we need to make partial functions from functools import partial # plotting tools from matplotlib import pyplot as plt # %matplotlib notebook import matplotlib.cm import matplotlib.colors # custom function from plotting_aids import plot_linfit # interactive notebook features try: from tqdm import tqdm_notebook as tqdm except: from tqdm import tqdm from ipywidgets import interact # meg analysis import mne # ### Load the task data from the pickles: # # Define the data structure for the summary data: # (Using namedtuple for this) # In[2]: sub_params = namedtuple('sub_params', ['pid', 'slopes', 'age', 'gender', 'intercepts', 'rsquared']) # Define the filepaths: # In[3]: # camcanroot = Path('/Volumes') / 'Seagate Expansion Drive' /'camcan' # camcanroot = Path('D:') / 'camcan' # camcanroot = Path('/data') / 'group' / 'FANS' / 'camcan-meg' / 'camcan165' / 'camcan165' camcanroot = Path('/Users') / 'jan' / 'Documents' / 'eeg-data' / 'camcan' # specific paths megdataroot = camcanroot / 'cc700' / 'mri' / 'pipeline' / 'release004' / 'BIDSsep' / 'megraw' megfiles = list(megdataroot.glob('*/meg/task_raw.fif')) pickleroot = Path('.') / 'pickles' csvroot = Path('.') / 'csv' # In[4]: raw = mne.io.read_raw_fif(megfiles[0]) channel_names = [raw.ch_names[i] for i in mne.pick_types(raw.info, meg='grad')] # In[5]: pos = mne.channels.layout._auto_topomap_coords( raw.info, mne.pick_types(raw.info, meg='grad')[0::2] ) # Load the task data # In[6]: psds = {} picklelist = {} ids = {} # load the task data from pickles for recording in ['rest', 'task']: picklelist[recording] = list(pickleroot.glob(pattern='*' + recording + '.pickle')) print(f'{len(picklelist[recording])} {recording} files found.') tuplist = [pickle.load(open(f, 'rb')) for f in picklelist[recording]] tuplist = list(zip(*tuplist)) psds[recording] = list(tuplist[0]) ids[recording] = [file.name[0:8] for file in picklelist[recording]] # In[7]: subject_details = pd.DataFrame.from_csv(camcanroot / 'cc700-scored' / 'participant_data.csv') # Re-do the linear regression for each subject to populate the data structure # In[8]: # re-construct the frequencies that were used to evaluate the PSD: freqs = np.linspace(2, 24, num=psds['task'][0].shape[-1]) # the frequencies over which the line should be fit: findices = (freqs > 14) | (freqs < 7) summary_data = {'rest': [], 'task': []} for name, data in tqdm(psds.items()): for subject, psd in tqdm(enumerate(data)): # Do the linear regression linfits = [scipy.stats.linregress(freqs[findices], np.log10(psd.T[findices, grad])) for grad in range(psd.shape[0])] summary_data[name].append( sub_params(pid=ids[name][subject], slopes=np.array([l.slope for l in linfits]), intercepts=np.array([l.intercept for l in linfits]), rsquared=np.array([l.rvalue**2 for l in linfits]), age=subject_details.loc[ids[name][subject], 'age'], gender=1) ) # In[9]: bad_elecs = {'rest': [], 'task': []} for name, data in summary_data.items(): for p in data: bads = np.zeros_like(p.rsquared, dtype='bool') bads = bads | (p.slopes > 0) bads = bads | (p.intercepts > p.intercepts.mean() + 2 * p.intercepts.std()) bads = bads | (p.intercepts < p.intercepts.mean() - 2 * p.intercepts.std()) bad_elecs[name].append(bads) # ### Save the data to CSV files # In[10]: for recording in list(summary_data.keys()): outdicts = [{ 'pid': p.pid, 'removed_elecs': e.sum(), 'average.slope': p.slopes[~e].mean(), 'average.intercept': p.intercepts[~e].mean(), 'average.rsquared': p.rsquared[~e].mean(), 'age': p.age, 'sex': p.gender } for p, e in zip(summary_data[recording], bad_elecs[recording]) ] with open(csvroot / (recording + '.csv'), 'w+') as f: pd.DataFrame.from_dict(outdicts).to_csv(f, index=False) # # Visualising data # # Now we're gonna plot the data. First, we define a color scheme: # In[11]: a = matplotlib.cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin=0, vmax=1), cmap='viridis') colorscheme = {'rest': 'lightblue', 'task': 'pink', 'old': a.to_rgba(0.75), 'young': a.to_rgba(0.25)} # ### Compare resting-state to task # In[12]: x = freqs plt.figure(figsize=(16, 8)) # actually plot the spectra plt.subplot2grid((1, 4), (0, 0), colspan=3) lines = [] colors = [[1, 0.4, 0.4], [0.4, 0.4, 1]] labels = ['Task', 'Rest'] for name, data in psds.items(): pltdata = np.log10( np.stack([p[~e, :].mean(axis=0) for p, e in zip(data, bad_elecs[name])], axis=-1) ) line = plt.plot(x, np.mean(pltdata, axis=-1), color=colorscheme[name], label=name.capitalize()) lines.append(line) # regression line linfit = scipy.stats.linregress(freqs[findices], np.mean(pltdata, axis=-1)[findices]) x2 = np.array([freqs.min(), freqs.max()]) y2 = linfit.slope*x2 + linfit.intercept plt.plot(x2, y2, '-.', color=colorscheme[name]) plt.fill_between( x, np.mean(pltdata, axis=-1)-scipy.stats.sem(pltdata, axis=-1), np.mean(pltdata, axis=-1)+scipy.stats.sem(pltdata, axis=-1), color=colorscheme[name], alpha=0.3 ) plt.legend() # plot the slopes between settings ax = plt.subplot2grid((1, 4), (0, 3)) plt.title('Slopes across the two conditions') plt.ylabel('Regression slopes (arb. units)') # collect all the average slopes slopevecs = [[p.slopes[~e].mean() for p, e in zip(data, bad_elecs[name])] for name, data in summary_data.items()] # plot the boxes boxplt = plt.violinplot(slopevecs, showextrema=False, showmeans=True) # color the boxes appropriately boxplt['cmeans'].set_color('black') for b, n in zip(boxplt['bodies'], psds.keys()): b.set_facecolor(colorscheme[n]) b.set_edgecolor('black') b.set_linewidth(2) if scipy.stats.ttest_ind(*slopevecs).pvalue < 0.001: plt.text(x=1.4, y=np.max(ax.get_ylim()) - 0.2*np.diff(ax.get_ylim()), s='***') elif scipy.stats.ttest_ind(*slopevecs).pvalue < 0.01: plt.text(x=1.4, y=np.max(ax.get_ylim()) - 0.2*np.diff(ax.get_ylim()), s='**') elif scipy.stats.ttest_ind(*slopevecs).pvalue < 0.05: plt.text(x=1.4, y=np.max(ax.get_ylim()) - 0.2*np.diff(ax.get_ylim()), s='*') else: plt.text(x=1.4, y=np.max(ax.get_ylim()) - 0.2*np.diff(ax.get_ylim()), s='n.d.') plt.subplots_adjust(wspace=0.5) plt.show() # ### Check if the two correlate # In[13]: ids = {name: [p.pid for p in data] for name, data in summary_data.items()} coords = [[np.mean(p.slopes[~e]) for p, e in zip(data, bad_elecs[name]) if p.pid in ids['rest'] and p.pid in ids['task']] for name, data in summary_data.items()] # x = [np.mean(p.slopes[~e]) for p, e in zip(summary_data['rest'], bad_elecs) if p.pid in restids] # y = [np.mean(p.slopes[~e]) for p, e in zip(restall_parameters, restbad_elecs) if p.pid in taskids] plt.figure(figsize=(12, 8)) plt.scatter(coords[0], coords[1]) plt.xlabel(f'Slope during {list(summary_data.keys())[0]}', fontsize=20) plt.ylabel(f'Slope during {list(summary_data.keys())[1]}', fontsize=20) plt.show() print(scipy.stats.pearsonr(*coords)) # ### Make a split power spectrum for age groups in `task` # In[14]: recording= 'task' ages = np.array([p.age for p in summary_data[recording]]) agesplit = {'young': ages < np.percentile(ages, 25), 'old': ages > np.percentile(ages, 75)} young = ages < np.percentile(ages, 25) old = ages > np.percentile(ages, 75) plt.figure(figsize=(16, 8)) plt.subplot2grid((1, 4), (0, 0), colspan=3) legendentries = [] for groupname, group in agesplit.items(): pltdata = np.log10(np.stack([p[~e, :].mean(axis=0) for p, e, g in zip(psds[recording], bad_elecs[recording], group) if g], axis=-1)) plt.fill_between( freqs, np.mean(pltdata, axis=-1)-scipy.stats.sem(pltdata, axis=-1), np.mean(pltdata, axis=-1)+scipy.stats.sem(pltdata, axis=-1), color=colorscheme[groupname], alpha=0.3 ) legendentries.append(plt.plot(freqs, np.mean(pltdata, axis=-1), color=colorscheme[groupname], label=groupname.capitalize())) plt.ylabel('Power ($log_{10}(mV^2)$)', fontsize=20) plt.xlabel('Frequency (Hz)', fontsize=20) plt.title('Average power spectra for two age groups', fontsize=20) plt.legend(fontsize=16) # plot the slopes between settings ax = plt.subplot2grid((1, 4), (0, 3)) plt.title('Average slopes', fontsize=20) plt.ylabel('Regression slopes (arb. units)') # collect all the average slopes slopevecs = [[p.slopes[~e].mean() for i, (p, e) in enumerate(zip(summary_data[recording], bad_elecs[recording])) if g[i]] for groupname, g in agesplit.items()] # plot the boxes violins = plt.violinplot(slopevecs, showextrema=False, showmeans=True) # color the boxes appropriately for patch, groupname in zip(violins['bodies'], agesplit.keys()): patch.set_facecolor(colorscheme[groupname]) patch.set_edgecolor('black') patch.set_alpha(0.3) # make the mean line black violins['cmeans'].set_color('black') if scipy.stats.ttest_ind(*slopevecs).pvalue < 0.001: plt.text(x=1.4, y=np.max(ax.get_ylim()) - 0.2*np.diff(ax.get_ylim()), s='***') elif scipy.stats.ttest_ind(*slopevecs).pvalue < 0.01: plt.text(x=1.4, y=np.max(ax.get_ylim()) - 0.2*np.diff(ax.get_ylim()), s='**') elif scipy.stats.ttest_ind(*slopevecs).pvalue < 0.05: plt.text(x=1.4, y=np.max(ax.get_ylim()) - 0.2*np.diff(ax.get_ylim()), s='*') else: plt.text(x=1.4, y=np.max(ax.get_ylim()) - 0.2*np.diff(ax.get_ylim()), s='n.d.') plt.subplots_adjust(wspace=0.5) plt.show() # In[15]: # write the split data to a csv file outdicts = [ { **{'pid': p.pid, 'group': 'young' if p.age < np.percentile(ages, 25) else 'old'}, **{str(freq): power for freq, power in zip(freqs, np.log10(psd[~e, :]).mean(axis=0))} } for p, e, psd in zip(summary_data[recording], bad_elecs[recording], psds[recording]) if (p.age < np.percentile(ages, 25)) or (p.age > np.percentile(ages, 75)) ] with open(csvroot / 'agesplit-spectra.csv', 'w+') as f: pd.DataFrame.from_dict(outdicts).to_csv(f, index=False) # In[16]: for recording, data in summary_data.items(): x = np.array([p.age for p in data]) y = np.array([np.mean(p.slopes) for p in data]) plt.figure(figsize=(12, 8)) plt.scatter(x, y, c=x) plot_linfit(x, y) plt.xlabel(r'Age (Years)', fontsize=20) plt.ylabel(r'Regression slope ($log_{10}(mV^2) \times Hz^{-1}$)', fontsize=20) plt.title(recording.capitalize()) plt.show() print(scipy.stats.pearsonr(x, y)) # In[17]: # Load the short-term memory data stm = pd.read_csv( camcanroot / 'cc700-scored' / 'VSTMcolour' / 'release001' / 'summary' / 'VSTMcolour_summary.txt', sep='\t', skiprows=8, index_col=0 ) with open(csvroot / 'vstm-behavior.csv', 'w+') as f: stm.to_csv(f) # In[18]: recording = 'task' ages = np.array([p.age for p in summary_data[recording] if p.pid in stm.index]) slopes = [np.mean(p.slopes[~e]) for p, e in zip(summary_data[recording], bad_elecs[recording]) if p.pid in stm.index] x = np.array(slopes) y = np.array( [float(stm.loc[p.pid, 'Prcsn_ss3']) for p in summary_data[recording] if p.pid in stm.index] ) # exclusion params include = y < 1 x = x[include] y = y[include] c = ages[include] print(scipy.stats.pearsonr(x, y)) plt.figure(figsize=(12, 8)) plt.scatter(x, y, c=c) plot_linfit(x, y) plt.ylabel(r'Short term memory performance', fontsize=20) plt.xlabel(r'Regression slope ($log_{10}(mV^2) \times Hz^{-1}$)', fontsize=20) plt.show() # In[19]: recording = 'task' megbehaviorroot = camcanroot / 'cc700-scored' / 'MEG' / 'release001' / 'data' megmeanrt = [] megmedianrt = [] ages = [] rtdicts = [] for p in summary_data[recording]: try: meg_behaviour = pd.read_csv( megbehaviorroot / p.pid / ('MEG_' + p.pid + '_scored.txt'), sep='\t' ) meanrt = meg_behaviour.loc[0, 'mRT_inv_trim'] medianrt = meg_behaviour.loc[0, 'mdnRT_inv_trim'] megmeanrt.append(meanrt) megmedianrt.append(medianrt) ages.append(p.age) rtdicts.append({'pid': p.pid, 'age': p.age, 'mean.rt': meanrt, 'median.rt': medianrt}) except: ages.append(np.nan) megmeanrt.append(np.nan) megmedianrt.append(np.nan) slopes = np.array([np.mean(p.slopes[~e]) for p, e in zip(summary_data[recording], bad_elecs[recording])]) x = slopes y = np.array(megmedianrt) include = (~np.isnan(x) & ~np.isnan(y)) x = x[include] y = y[include] c = np.array(ages)[include] print(scipy.stats.pearsonr(x, y)) plt.figure(figsize=(12, 8)) plt.scatter(x, y, c=c) plot_linfit(x, y) plt.ylabel(r'Task performance (reaction time)', fontsize=20) plt.xlabel(r'Regression slope ($log_{10}(mV^2) \times Hz^{-1}$)', fontsize=20) plt.show() # In[20]: with open(csvroot / 'megbehavior.csv', 'w+') as f: pd.DataFrame.from_dict(rtdicts).to_csv(f, index=False) # --- # # # Check if correlations are locally more strong # # # ### Costom correlation function for clustering # In[108]: # use this function to make a function that correlates with a fixed vector def corrtest(y): def correlate_t_transform(y, x): ts = np.zeros(x.shape[-1]) for sensor in range(x.shape[-1]): # run the pearson correlation include = ~np.isnan(y) & ~np.isnan(x[..., sensor]) r, p = scipy.stats.pearsonr(y[include], x[include, sensor]) # t-transorm ts[sensor] = r / np.sqrt((1 - r**2) / (len(x) - 2)) return ts return partial(correlate_t_transform, y) # use this function for the bootstrap (no p values) def pearsonr_nop(x, y): r, _ = scipy.stats.pearsonr(x, y) return r # ### Cluster analysis: 1/f noise and age # # # In[183]: rvals = {} pvals = {} obs = {} clusters = {} cluster_pvs = {} rcis = {} for i, recording in enumerate(['task', 'rest']): rvals[recording], pvals[recording], rcis[recording] = [], [], [] y = np.array([p.age for p in summary_data[recording]]) # set things up for the cluster analysis connectivity, ch_names = mne.channels.read_ch_connectivity('neuromag306mag') obs[recording] = np.zeros((y.size, len(ch_names))) for channel in tqdm(range(0, len(summary_data[recording][0].slopes), 2)): slopes = np.array( [np.mean([p.slopes[channel], p.slopes[channel+1]]) if not e[channel] else np.nan for p, e in zip(summary_data[recording], bad_elecs[recording])] ) obs[recording][:, channel//2] = slopes # exclude individuals for which this channel was bad include = ~np.isnan(slopes) & ~np.isnan(y) rval, pval = scipy.stats.pearsonr(slopes[include], y[include]) rvals[recording].append(rval) pvals[recording].append(pval) # calculate the confidence intervals for the corr coefficient rvals[recording] = np.array(rvals[recording]) pvals[recording] = np.array(pvals[recording]) corrt = corrtest(y) _, clusters[recording], cluster_pvs[recording], _ = mne.stats.permutation_cluster_1samp_test( obs[recording], stat_fun=corrt, connectivity=connectivity, threshold=scipy.stats.t.isf(0.05, len(y))) for recording in ['task', 'rest']: # plot, with the appropriate masking mask = np.zeros_like(clusters[recording][0], dtype=bool) for clust, p in zip(clusters[recording], cluster_pvs[recording]): if p < 0.05: mask = mask | clust im, _ = mne.viz.plot_topomap( data=rvals[recording], pos=pos, cmap='viridis', mask=mask, vmin=np.min([rvals['task'].min(), rvals['rest'].min()]), vmax=np.max([rvals['task'].max(), rvals['rest'].max()]), show=False) ax = plt.gca() # plt.title(recording.capitalize()) plt.savefig(str(Path('.') / 'ccn-submission' / 'figures' / ('r-' + recording + '.pdf')), transparent=True, bbox_inches='tight') plt.show() # fig = plt.figure(figsize=(4.5, 0.5)) ax = plt.gca() cbar = plt.colorbar(im, cax = ax, orientation='horizontal') cbar.set_label('Correlation coefficient (slope, age)', size=14) plt.savefig(str(Path('.') / 'ccn-submission' / 'figures' / 'r-cbar.pdf'), transparent=True, bbox_inches='tight') plt.show() # ### Compare the correlations across the two tasks # In[148]: # list of indices for the bootstrap straplist = list(boot.bootstrap.bootstrap_indexes(np.array([p.age for p in summary_data[recording]]))) bootrs = {} for i, recording in enumerate(['task', 'rest']): rvals[recording], pvals[recording], rcis[recording] = [], [], [] bootrs[recording] = np.zeros((len(straplist), len(ch_names))) y = np.array([p.age for p in summary_data[recording]]) # set things up for the cluster analysis connectivity, ch_names = mne.channels.read_ch_connectivity('neuromag306mag') for channel in tqdm(range(0, len(summary_data[recording][0].slopes), 2)): # bootstrap the correlation coefficient for this channel slopes = np.array( [np.mean([p.slopes[channel], p.slopes[channel+1]]) if not e[channel] else np.nan for p, e in zip(summary_data[recording], bad_elecs[recording])] ) for i, indices in enumerate(straplist): islopes = slopes[indices] iy = y[indices] # exclude NaNs include = ~np.isnan(islopes) & ~np.isnan(iy) # run the correlation rval, _ = scipy.stats.pearsonr(np.random.randislopes[include], iy[include]) bootrs[recording][i, channel//2] = rval # In[181]: # take the difference of each bootstrapped iteration bootdiff = bootrs['task']-bootrs['rest'] pvs = np.amin( np.stack([(bootdiff > 0).mean(axis=0), (bootdiff < 0).mean(axis=0)], axis=-1), axis=-1 ) # make a "mask" for all the channels that are significant mask = pvs < 0.05 im, _ = mne.viz.plot_topomap( data=bootdiff.mean(axis=0), pos=pos, cmap='viridis', mask=mask, show=False) plt.savefig(str(Path('.') / 'ccn-submission' / 'figures' / 'r-difference.pdf'), transparent=True, bbox_inches='tight') plt.show() fig = plt.figure(figsize=(4.5, 0.5)) ax = plt.gca() cbar = plt.colorbar(im, cax = ax, orientation='horizontal', ticks=[-0.1, 0, 0.1]) cbar.set_label('Difference in correlation coefficients\n(Task $-$ Rest)', size=14) plt.savefig(str(Path('.') / 'ccn-submission' / 'figures' / 'r-difference-cbar.pdf'), transparent=True, bbox_inches='tight') plt.show() # ### Topographic analysis: sensorimotor rt & sensorimotor 1/_f_ noise # In[37]: rvals = [] pvals = [] recording = 'task' megbehaviorroot = megbehaviorroot = camcanroot / 'cc700-scored' / 'MEG' / 'release001' / 'data' megfiles = list(megbehaviorroot.glob(pattern='*/*_scored.txt')) megdfs = [] for file in tqdm(megfiles): megdf = pd.read_csv(file, sep='\t') if len(megdf.index == 0): megdf['CCID'] = pd.Series([file.name[4:12]]) megdfs.append(megdf) megdata = pd.concat(megdfs) megdata.set_index('CCID') megdata.index = megdata['CCID'] megdata = megdata.drop_duplicates(subset='CCID', keep='first') y = np.array( [megdata.mdnRT_inv_trim[p.pid] for p in summary_data[recording] if p.pid in megdata.index] ) include = ~np.isnan(y) # set things up for the cluster analysis connectivity, ch_names = mne.channels.read_ch_connectivity('neuromag306mag') obs = np.zeros((y.size, len(ch_names))) for channel in tqdm(range(0, len(summary_data[recording][0].slopes), 2)): slopes = np.array( [np.mean([p.slopes[channel], p.slopes[channel+1]]) if not e[channel] else np.nan for p, e in zip(summary_data[recording], bad_elecs[recording]) if p.pid in megdata.index] ) obs[:, channel//2] = slopes # exclude individuals for which this channel was bad include = ~np.isnan(slopes) & ~np.isnan(y) rval, pval = scipy.stats.pearsonr(slopes[include], y[include]) rvals.append(rval) pvals.append(pval) rvals = np.array(rvals) pvals = np.array(pvals) # corrt = corrtest(y) # T_obs, clusters, cluster_pvs, h = mne.stats.permutation_cluster_1samp_test( # obs, stat_fun=corrt, connectivity=connectivity, threshold=scipy.stats.t.isf(0.05, len(y)) # ) # In[40]: corrt = corrtest(y) T_obs, clusters, cluster_pvs, h = mne.stats.permutation_cluster_1samp_test( obs, stat_fun=corrt, connectivity=connectivity, threshold=scipy.stats.t.isf(0.05, len(y)) ) # In[25]: # mask = np.zeros_like(clusters[0], dtype=bool) # for clust, p in zip(clusters, cluster_pvs): # if p < 0.05: # mask = mask | clust mne.viz.plot_topomap(data=rvals, pos=pos, vmin=-0.2, vmax=0.2, cmap='bwr_r') mne.viz.plot_topomap(data=pvals, pos=pos, vmin=0, vmax=1, cmap='viridis_r'); # ### Topographic analysis: VSTM & resting 1/_f_ noise # In[44]: rvals = [] pvals = [] recording = 'rest' y = np.array( [float(stm.loc[p.pid, 'Prcsn_ss2']) for p in summary_data[recording] if p.pid in stm.index] ) # set things up for the cluster analysis connectivity, ch_names = mne.channels.read_ch_connectivity('neuromag306mag') obs = np.zeros((y.size, len(ch_names))) for channel in range(0, len(summary_data[recording][0].slopes), 2): slopes = np.array( [np.mean((p.slopes[channel], p.slopes[channel+1])) if not e[channel] else np.nan for p, e in zip(summary_data[recording], bad_elecs[recording]) if p.pid in stm.index] ) include = ~np.isnan(y) & ~np.isnan(slopes) obs[:, channel//2] = slopes rval, pval = scipy.stats.pearsonr(slopes[include], y[include]) rvals.append(rval) pvals.append(pval) pvals, rvals = np.array(pvals), np.array(rvals) # run the cluster analysis # Do cluster analysis corrt = corrtest(y) T_obs, clusters, cluster_pvs, h = mne.stats.permutation_cluster_1samp_test( obs, stat_fun=corrt, connectivity=connectivity, threshold=scipy.stats.t.isf(0.05, len(y)) ) # In[46]: pvals # In[48]: mask = np.zeros_like(clusters[0], dtype=bool) for clust, p in zip(clusters, cluster_pvs): if p < 0.05: mask = mask | clust mne.viz.plot_topomap(data=rvals, pos=pos, vmin=-0.2, vmax=0.2, cmap='bwr_r', mask=mask) mne.viz.plot_topomap(data=pvals, pos=pos, vmin=0, vmax=1, cmap='viridis_r', mask=mask); # ### Topographic analysis: motor learning # In[ ]: motorlearning = pd.read_csv( camcanroot / 'cc700-scored' / 'MotorLearning' / 'release001' / 'summary' / 'MotorLearning_summary.txt', sep='\t', skiprows=8, index_col=0 ) # In[ ]: recording = 'rest' # the various behavioural metrics preexposure = np.array([motorlearning.LatePreExposureTrajectoryErrorSD[p.pid] for p in summary_data[recording] if p.pid in motorlearning.index]) exposurestart = np.array([motorlearning.EarlyExposureTrajectoryErrorMean[p.pid] for p in summary_data[recording] if p.pid in motorlearning.index]) exposureend = np.array([motorlearning.LateExposureTrajectoryErrorMean[p.pid] for p in summary_data[recording] if p.pid in motorlearning.index]) postexposure = np.array([motorlearning.EarlyPostExposureTrajectoryErrorSD[p.pid] for p in summary_data[recording] if p.pid in motorlearning.index]) # choose which metric you want to correlate: y = np.double(postexposure) - np.double(preexposure) # set things up for the cluster analysis connectivity, ch_names = mne.channels.read_ch_connectivity('neuromag306mag') obs = np.zeros((y.size, len(ch_names))) pvals = [] rvals = [] for channel in range(0, len(summary_data[recording][0].slopes), 2): slopes = np.array( [np.mean((p.slopes[channel], p.slopes[channel+1])) for p, e in zip(summary_data[recording], bad_elecs[recording]) if p.pid in motorlearning.index] ) obs[:, channel//2] = slopes rval, pval = scipy.stats.pearsonr(slopes, y) rvals.append(rval) pvals.append(pval) pvals, rvals = np.array(pvals), np.array(rvals) # run the cluster analysis # Do cluster analysis corrt = corrtest(y) T_obs, clusters, cluster_pvs, h = mne.stats.permutation_cluster_1samp_test( obs, stat_fun=corrt, connectivity=connectivity, threshold=scipy.stats.t.isf(0.05, len(y)) ) # In[ ]: mask = np.zeros_like(clusters[0]) for cluster, p in zip(clusters, cluster_pvs): if p < 0.05: mask = mask | cluster mne.viz.plot_topomap(data=rvals, pos=pos, vmin=-0.2, vmax=0.2, cmap='bwr_r', mask=mask) mne.viz.plot_topomap(data=pvals, pos=pos, vmin=0, vmax=1, cmap='viridis_r'); # ### Topographic analysis: force matching # In[ ]: forcematching = pd.read_csv( camcanroot / 'cc700-scored' / 'ForceMatching' / 'release001' / 'summary' / 'ForceMatching_summary.txt', sep='\t', skiprows=8, index_col=0 ) # In[ ]: recording = 'rest' # the various behavioural metrics fingercomp = np.array([forcematching.FingerOverCompensationSD[p.pid] for p in summary_data[recording] if p.pid in forcematching.index]) slidercomp = np.array([forcematching.SliderOverCompensationSD[p.pid] for p in summary_data[recording] if p.pid in forcematching.index]) # choose which metric you want to correlate: y = np.double(fingercomp) # - np.double(slidercomp) include = ~np.isnan(y) y = y[include] pvals = [] rvals = [] for channel in range(0, len(summary_data[recording][0].slopes), 2): slopes = np.array( [np.mean((p.slopes[channel], p.slopes[channel+1])) for p in summary_data[recording] if p.pid in forcematching.index] ) slopes = slopes[include] rval, pval = scipy.stats.pearsonr(slopes, y) rvals.append(rval) pvals.append(1-pval) mne.viz.plot_topomap(data=rvals, pos=pos, vmin=-0.2, vmax=0.2, cmap='bwr_r') mne.viz.plot_topomap(data=pvals, pos=pos, vmin=0.2, vmax=1, cmap='viridis'); # ### Topographic analysis: force matching # In[ ]: hoteltask = pd.read_csv( camcanroot / 'cc700-scored' / 'Hotel' / 'release001' / 'summary' / 'Hotel_summary.txt', sep='\t', skiprows=8, index_col=0 ) # In[ ]: recording = 'rest' # the various behavioural metrics performance = np.array([hoteltask.Time[p.pid] for p in summary_data[recording] if p.pid in hoteltask.index]) qc = np.array([hoteltask.QCflag[p.pid] for p in summary_data[recording] if p.pid in hoteltask.index]) # choose which metric you want to correlate: y = np.double(performance) # - np.double(slidercomp) include = (~np.isnan(y)) & (np.double(qc) == 0) y = y[include] # set things up for the cluster analysis obs = np.zeros((y.size, len(ch_names))) connectivity, ch_names = mne.channels.read_ch_connectivity('neuromag306planar', picks=range(0, 204, 2)) pvals = [] rvals = [] for channel in range(0, len(summary_data[recording][0].slopes), 2): slopes = np.array( [np.mean((p.slopes[channel], p.slopes[channel+1])) for p in summary_data[recording] if p.pid in hoteltask.index] ) # take out NaNs slopes = slopes[include] # populate data for cluster analysis obs[:, channel//2] = slopes rval, pval = scipy.stats.pearsonr(slopes, y) rvals.append(rval) pvals.append(pval) rvals = np.array(rvals) pvals = np.array(pvals) # Do cluster analysis corrt = corrtest(y) T_obs, clusters, cluster_pvs, h = mne.stats.permutation_cluster_1samp_test( obs, stat_fun=corrt, connectivity=connectivity ) # In[ ]: mask = np.zeros_like(clusters[0]) for cluster, p in zip(clusters, cluster_pvs): if p < 0.05: mask = mask | cluster mne.viz.plot_topomap(data=rvals, pos=pos, vmin=-0.2, vmax=0.2, cmap='bwr_r', mask=mask) mne.viz.plot_topomap(data=pvals, pos=pos, vmin=0, vmax=1, cmap='viridis_r');