#!/usr/bin/env python # coding: utf-8 # # Regularized NMF for automatic segmentation of odor maps in the mouse OB # This Notebook reproduces results of the manuscript http://dx.doi.org/10.1016/j.neuroimage.2014.04.041. # It also allows to inspect the data in more detail. # In[16]: import sys import os from collections import defaultdict from scipy.spatial.distance import pdist,squareform import scipy.stats as stats import numpy as np import matplotlib.pyplot as plt from matplotlib import gridspec import matplotlib get_ipython().run_line_magic('pylab', 'inline') #the datamaker and ImageAnalysisComponents utilities reside two levels above this notebook. utility_path = os.path.realpath(os.path.join(os.path.pardir, os.path.pardir)) sys.path.append(utility_path) from regnmf import datamaker from regnmf import ImageAnalysisComponents as ia from regnmf.progress_bar import ProgressBar sys.modules['datamaker'] = datamaker import pickle # Define the location of the data. Data can be downloaded from zenodo.org at http://dx.doi.org/10.5281/zenodo.12352 . # In[17]: datapath = 'Data' #on my machine it's '../../../../Data' # ## Surrogate Data # For biological data it is generally difficult to asses factorization performance. # To obtain nevertheless a detailed picture on the terms of performance for regularized NMF and sICA we constructed a parameterized surrogate dataset. # With this dataset we could address two important questions: # First, what is the influence of method inherent parameters and their optimal choice? # And second, what is the application domain of both methods with respect to strength of pixel noise and size of measured stimuli set? # #### Create surrogate data # In[18]: load_paperdata = True # load the surrogate dataset that was used in the manuscript # In[19]: param = {'act_time': [0.01, 0.1, 0.3, 0.8, 1.0, 1.0], 'cov': 0.3, 'latents': 40, 'mean': 0.2, 'no_samples': 50, 'noisevar': 0.2, 'shape': (50, 50), 'width':0.1, 'var': 0.08} # In[20]: if load_paperdata : with open(os.path.join(datapath,'surrogate','data.pik'),'rb') as f: # python 2 -> 3 pickle fix # https://stackoverflow.com/questions/11305790/pickle-incompatibility-of-numpy-arrays-between-python-2-and-3 data = pickle.load(f, encoding='latin1') else: data = datamaker.Dataset(param) os.mkdir(datapath) # #### Define general layout for figures # In[21]: # fontsizes global_fs = 10 fs_num = 10 matplotlib.rcParams['axes.linewidth']=0.4 matplotlib.rcParams['axes.edgecolor']='k' matplotlib.rcParams['xtick.major.size']=1 matplotlib.rcParams['xtick.minor.size']=0.5 matplotlib.rcParams['xtick.major.width']=0.3 matplotlib.rcParams['xtick.minor.width']=0.3 matplotlib.rcParams['ytick.major.size']=1 matplotlib.rcParams['ytick.minor.size']=0.5 matplotlib.rcParams['ytick.major.width']=0.3 matplotlib.rcParams['ytick.minor.width']=0.3 matplotlib.rcParams['xtick.labelsize']=global_fs matplotlib.rcParams['ytick.labelsize']=global_fs matplotlib.rcParams['text.usetex']=False matplotlib.rcParams['mathtext.default']='regular' # In[22]: # parameter for visualizing sources cmap_tmp = matplotlib.colors.LinearSegmentedColormap.from_list('tmp', ['w',np.array([147,112,219])/256.,'m','b']) rgb_list = [cmap_tmp(i) for i in np.linspace(0,1,100)]+[plt.cm.jet(i) for i in np.linspace(0,1,101)] cmap_bases = matplotlib.colors.LinearSegmentedColormap.from_list('bases', rgb_list) param_imshow = {'cmap':cmap_bases, 'interpolation':'none', 'vmax':1, 'vmin':-1} # axes layout for time courses def mydecor(ax): ax.set_ylabel('activation', fontsize=global_fs, labelpad=-0.2) ax.yaxis.set_tick_params(labelsize=global_fs, labelright=True, labelleft=False) ax.yaxis.set_label_position('right') ax.xaxis.set_tick_params(labelsize=global_fs, pad=2) ax.set_xticks(range(60, 6*50, 60)) # timecourses plotter splitnum = param['no_samples'] xparts = np.hsplit(np.arange(splitnum*len(param['act_time'])), splitnum) def timeplot(ax, data): ax.plot(data, 'o', ms=1.5, mfc='k', mec='none') for x in xparts: ax.plot(x, data[x], '--', dashes=(2,2), linewidth=0.4, color='k') #ax.set_ylabel('activation', fontsize=global_fs, labelpad=-0.2) ax.yaxis.set_tick_params(labelsize=global_fs, labelright=True, labelleft=False) ax.yaxis.set_label_position('right') ax.xaxis.set_tick_params(labelsize=global_fs, pad=2) ax.set_xticks(range(60, 6*50, 60)) ax.set_yticks([0,1]) def no_ticks(ax): ax.set_xticks([]) ax.set_yticks([]) def little_spines(ax): ax.spines['top'].set_color('none') ax.spines['right'].set_color('none') ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('left') def no_spines(ax): ax.spines['top'].set_color('none') ax.spines['right'].set_color('none') ax.spines['bottom'].set_color('none') ax.spines['left'].set_color('none') no_ticks(ax) # ### Surrogate Data Construction # To obtain useful conclusions, we've constructed sources resembling the main characteristics of the biological case: # Glomeruli are arranged side by side with slightly overlapping spatial signal distribution (Fig. 1a) # The response spectra of surrogate glomeruli are narrowly tuned and correlated (Fig. 1b) and each glomeruli rises to peak activation with a model time-course (Fig. 1c). The data to enter factorization is the concurrent observation of forty glomeruli in response to $n_{stim}$ stimuli (e.g. odours) corrupted by additional pixel noise $\sigma_{\text{noise}}$. # In[23]: fig_dim = np.array((3.54, 1.1))*4 fig1 = plt.figure(figsize=fig_dim) fig_ratio = fig_dim[0]/fig_dim[1] panel_width = 0.18 panel_bottom = 0.23 # ================================================================================ # Panel (a) # ================================================================================ ax = fig1.add_axes([0.05, panel_bottom, panel_width, panel_width*fig_ratio]) ax.text(0.08, 1.03, '(a)', transform=ax.transAxes, fontsize=fs_num, fontweight='bold', va='bottom', ha='right') # contours for c in data.spt_sources: ax.contour(c.reshape((50,50))[::-1], [0.5] ,colors=['k'], label='0.3', linewidths=0.4) # centers p = ax.scatter(np.array(data.points)[:param['latents'],1], 49-np.array(data.points)[:param['latents'],0], marker='x', s=4, label='max', linewidths=0.4) ax.set_xlim([0.5,49.5]) ax.set_ylim([-0.5,48.5]) ax.set_xticks([]) ax.set_yticks([]) # ================================================================================= # Panel (b) # ================================================================================= leftpos = 0.365 ax = fig1.add_axes([leftpos, panel_bottom,0.2, panel_width*fig_ratio]) ax.text(-0.3, 1.03, '(b)', transform=ax.transAxes, fontsize=fs_num, fontweight='bold', va='bottom', ha='right') # plot marginal ax.plot(np.arange(1E-2,4,0.001), datamaker.adjusted_gamma(param['mean'], param['var']).pdf(np.arange(1E-2,4,0.001))) ax.set_yscale('log') ax.set_xlabel('$a_{peak}$', fontsize=global_fs, labelpad=0.5) ax.set_xticks([0,1,2,3,4]) ax.set_ylabel('pdf', fontsize=global_fs, labelpad=0.5) # plot covariance ax = fig1.add_axes([leftpos+0.1, 0.5, 0.12, 0.12*fig_ratio]) im = ax.imshow(data.cov, interpolation='none', vmin=0, vmax=1, cmap=plt.cm.binary) ax.set_xticks([]) ax.set_yticks([]) axbar = fig1.add_axes([leftpos+0.11+0.12,0.5,0.012,0.12*fig_ratio]) # plot covariance colorscale cbar = fig1.colorbar(im, axbar) cbar.set_ticks([0,1]) cbar.set_label('$\\rho$', size=global_fs, labelpad=-0.5) # =================================================================================== # panel (c) # =================================================================================== ax = fig1.add_axes([0.77,panel_bottom,0.2,panel_width*fig_ratio]) ax.text(-0.3, 1.03, '(c)', transform=ax.transAxes, fontsize=fs_num, fontweight='bold', va='bottom', ha='right') ax.plot(param['act_time'], 'o--', ms=2, mfc='k', linewidth=0.6, color='k', dashes=(2,2)) ax.set_xlabel('sample points', fontsize=global_fs, labelpad=1) ax.set_ylim((-0.05, 1.05)) ax.set_ylabel('% of $a_{peak}$', fontsize=global_fs, labelpad=-3) ax.set_yticks([0,0.5,1]) ax.set_yticklabels([0,50,100]) plt.show() # __Fig. 1__ Surrogate data (a) 40 Gaussian shaped sources were randomly placed on a regular $9\times 9$ grid in a $50\times 50\text{px}$ image. Crosses mark centres of pixel participation and lines indicate half maximum (HM). # (b) The peak activations for each stimulus were drawn from a gamma distribution ($\mu=0.2$, $\sigma=0.28$). Inset shows covariance $\rho$ introduced via a Gaussian copula into the sources' activation. # (c) Each stimulus activation was extended to a 6 point model time-course. # In[24]: fig_dim = np.array((3.54, 1.8))*4 fig = plt.figure(figsize=fig_dim, dpi=200) for i in range(50): ax = fig.add_subplot(5,10,i+1) ax.imshow(data.observed[(i+1)*len(param['act_time'])-1].reshape(param['shape']), interpolation='none', vmin=-2.5, vmax=2.5) ax.set_xlabel('t=%s'%((i+1)*6), size=global_fs, labelpad=0.5) ax.set_xticks([]) ax.set_yticks([]) plt.show() # __Fig. 2__ Observation of the combined signal at peak activation for different stimuli. # Signal is corrupted with pixel noise ($\sigma_{\text{noise}}=0.2$). # ### Examplatory Factorizations # We started our analysis with a dataset roughly mimicking the properties of our IOS data with $n_{stim}=50$ stimulus observations and a noise level of $\sigma_{noise}=0.2$. # Figure 3 shows exemplary recovered sources from both regularized NMF ($\alpha_{sm}=2$, $\alpha_{sp}=0.5$) and sICA, illustrating the general characteristics of the methods. # Regularized NMF indeed showed the desired properties of a localized, sparse and smooth pixel participation, accurately reproducing the spatial and temporal characteristics of the source. # In contrast plain sICA (with no additional processing applied) generates more holistic pixel participations, containing global noise contributions besides the local source contribution. # This implies a more noisy reconstruction of the activation courses by sICA, especially for weaker signals. # In[25]: # general NMF param anal_param = {'sparse_param': 0.5, 'factors': 80, 'smooth_param': 2, 'init':'convex', 'sparse_fct':'global_sparse', 'verbose':0 } # In[26]: input_data = ia.TimeSeries(data.observed, shape=param['shape']) nmf = ia.NNMF(maxcount=50, num_components=anal_param['factors'], **anal_param) nmf_out = nmf(input_data) sica = ia.sICA(num_components=anal_param['factors']) sica_out = sica(input_data) # In[27]: nmf_match, nmf_cor, _ = data.cor2source(nmf_out) sica_match, sica_cor, _ = data.cor2source(sica_out) # In[28]: figsize = np.array((20,40)) fig = plt.figure(figsize=figsize) gs = gridspec.GridSpec(40,3) gs.update(left = 0.01, top = 0.96, bottom = 0.01) for source_ind in range(40): gs_temp = matplotlib.gridspec.GridSpecFromSubplotSpec(1, 2, subplot_spec=gs[source_ind,0], width_ratios=[1,4], wspace=0.01) ax = fig.add_subplot(gs_temp[0]) im = ax.imshow(data.spt_sources[source_ind].reshape((50,50)), **param_imshow) ax.set_axis_off() ax = fig.add_subplot(gs_temp[1]) timeplot(ax, data.activation[:,source_ind]) gs_temp = matplotlib.gridspec.GridSpecFromSubplotSpec(1, 2, subplot_spec=gs[source_ind,1], width_ratios=[1,4], wspace=0.01) ax = fig.add_subplot(gs_temp[0]) im = ax.imshow(nmf_out.base.shaped2D()[nmf_match[source_ind]], **param_imshow) ax.set_axis_off() ax = fig.add_subplot(gs_temp[1]) timeplot(ax, nmf_out._series[:,nmf_match[source_ind]]) ax.text(10, ax.get_ylim()[1], r'$r^{tmp} = %.2f$'%nmf_cor[source_ind], size=7, va='top') gs_temp = matplotlib.gridspec.GridSpecFromSubplotSpec(1, 2, subplot_spec=gs[source_ind,2], width_ratios=[1,4], wspace=0.01) ax = fig.add_subplot(gs_temp[0]) im = ax.imshow(sica_out.base.shaped2D()[sica_match[source_ind]], **param_imshow) ax.set_axis_off() ax = fig.add_subplot(gs_temp[1]) timeplot(ax, sica_out._series[:,sica_match[source_ind]]) ax.text(10, ax.get_ylim()[1], r'$r^{tmp} = %.2f$'%sica_cor[source_ind], size=7, va='top') # colorbar axbar = fig.add_axes([0.01, 0.992, 0.2, 0.003]) cbar = fig.colorbar(im, axbar, orientation='horizontal') cbar.set_ticks([-1,0,1]) cbar.set_label('pixel participation', size=global_fs, labelpad=2) cbar.ax.xaxis.set_tick_params(labelsize=global_fs, pad=2) fig.text(0.15,0.97,'source') fig.text(0.45,0.97,'NMF') fig.text(0.75,0.97,'sICA') plt.show() # __Fig. 3__ Recovered sources in NMF and sICA. First column shows original sources, second column NMF and third column sICA reconstructions. NMF yields a localized source approximation, whereas the sICA component contains small pixel participations in the whole image. Furthermore some sICA components recover the source's activation course less well than the components in the NMF approach. # ### NMF parameter choice # NMF factorisation clearly depends on the choice of parameters. # To this end we evaluated their influence on factorization results and provide a heuristic for suitable parameter choice. # #### Influence of smoothness parameter # We first started with the parameter for smoothness regularisation, $\alpha_{\text{sm}}$. # This cell will take a bit longer to compute, so we're using a progress bar to shorten the wait ;) # In[29]: # do analysis for different smoothness settings nmf_sm = {} anal_param['sparse_param']=0. parampoints = [{'smooth_param':i} for i in [0., 0.25, 0.5, 1, 2, 4, 8]] pb = ProgressBar(len(parampoints)) pb.animate(0) for npa,pa in enumerate(parampoints): anal_param.update(pa) # calculate nmf = ia.NNMF(maxcount=50, num_components=anal_param['factors']) nmf.param.update(anal_param) nmf_out = nmf(ia.TimeSeries(data.observed, shape=param['shape'])) nmf_sm['nmf_sm'+str(pa['smooth_param'])] = nmf_out pb.animate(npa+1) # In[41]: n_bases = 20 #number of bases to show fig = plt.figure(figsize=(1*len(nmf_sm), 1*n_bases)) gs = gridspec.GridSpec(n_bases, len(nmf_sm)) gs.update(left=0.01, right=0.99, top=0.98, bottom=0.01) for k_ix, k in enumerate(np.sort(list(nmf_sm.keys()))): for base_ix, base in enumerate(nmf_sm[k].base.shaped2D()[:n_bases]): ax = fig.add_subplot(gs[base_ix, k_ix]) im = ax.imshow(base, **param_imshow) ax.set_axis_off() if base_ix ==0: ax.set_title(k, size=10) plt.show() # __Fig.4__ NMF components ($k=80$, $\alpha_{\text{sp}}=0$) influenced by increasing smoothness regularisation. Small scale structures were progressively disappearing. # #### Influence of sparseness parameter # Even with strong smoothness regularisation components contain contributions from different sources. Therefore we additionally introduce sparseness regularisation $\alpha_{\text{sp}}>0$, which regulates the number of components a pixel is participating. With regularisation too low a source is covered by multiple components. Under moderate regularisation, a component exactly describes a single source (Fig. 5) and a source is covered exactly by a single component. # But setting $\alpha_{\text{sp}}$ too high results in disrupted components as any source overlap regions were prevented by the regularisation. # In[42]: # do example factorization for different sparseness settings nmf_sp = {} anal_param['smooth_param']=2. sparseness = [{'sparse_param':0.125}, {'sparse_param':0.5}, {'sparse_param':2}, {'sparse_param':4}] pb = ProgressBar(len(sparseness)) pb.animate(0) for spi,sp in enumerate(sparseness): anal_param.update(sp) # calculate nmf = ia.NNMF(maxcount=50, num_components=anal_param['factors']) nmf.param.update(anal_param) out = nmf(ia.TimeSeries(data.observed, shape=param['shape'])) nmf_sp['nmf_sp'+str(sp['sparse_param'])] = out pb.animate(spi+1) # In[45]: n_bases = 10 #number of bases to show figsize = np.array((1*len(nmf_sp), 1*n_bases))*2 fig = plt.figure(figsize=figsize) gs = gridspec.GridSpec(n_bases+1, len(nmf_sp)) gs.update(left=0.03, right=0.99, top=1-0.4/(n_bases+1), bottom=0.02) #show contours of all bases for k_ix, k in enumerate(np.sort(list(nmf_sp.keys()))): ax = fig.add_subplot(gs[0, k_ix], aspect='equal') for base_ix, base in enumerate(nmf_sp[k].base.shaped2D()): im = ax.contour(base[::-1], [0.5], colors=['k'], alpha=0.5) ax.set_xticks([]) ax.set_yticks([]) if base_ix ==0: ax.set_title(k, size=10) # show single bases for k_ix, k in enumerate(np.sort(list(nmf_sp.keys()))): for base_ix, base in enumerate(nmf_sp[k].base.shaped2D()[:n_bases]): ax = fig.add_subplot(gs[base_ix+1, k_ix]) im = ax.imshow(base, **param_imshow) ax.set_axis_off() plt.show() # __Fig. 5__ NMF components with increasing sparseness regularisation. First row shows half maximum contours of all components. Further rows show pixel participation of single components. With regularisation too low a source is covered by multiple components. Strongly regularized components cover only part of the source. # To further quantify the effect of sparseness regularization we employ two measures. First, we exploit our knowledge on the actual sources and calculate their recovery $SR_{k}$ by the components. # Second, we measure shared explanation of pixel groups in multiple components via spatial correlation between components $r_{k}^{\text{comp}}$. # This is an uninformed measure not depending on any knowledge of actual sources, and hence is also applicable when the ground truth is not known, i.e. in real experimental data. # It shows, that the range of maximal source recovery is in a regime where all strong component correlations became eliminated but some residual correlation is left. # In[46]: num_testsets = 5 datasets = [datamaker.Dataset(param) for dummy in range(num_testsets)] # The next cell takes rather long to compute. Depending on your hardware, it will easily take 20 minutes or more. # In[49]: #This takes quite a long time (>20 min, depending on your hardware). #Consider loading the cached results below. sp_param = [0]+ [2**i for i in np.arange(-7,3, dtype=float)] sp_cors, st_cors, max_basecor, mse, mse_local = [defaultdict(list) for i in range(5)] pb = ProgressBar(len(datasets)*len(sp_param)) pb.animate(0) for dsi,dataset in enumerate(datasets): #do NMF with different sparseness for spi,sp in enumerate(sp_param): key = 'nmf_'+str(sp) anal_param.update({'sparse_param':sp}) factors = anal_param['factors'] # calculate nmf = ia.NNMF(maxcount=50, num_components=factors) nmf.param.update(anal_param) nmf_out = nmf(ia.TimeSeries(dataset.observed, shape=param['shape'])) #performance criteria _, st_cor, sp_cor = dataset.cor2source(nmf_out) sp_cors[key].append(sp_cor) st_cors[key].append(st_cor) max_basecor[key].append(np.nanmax(np.corrcoef(nmf_out.base._series) - np.diag(np.ones(factors)),0)) mse[key].append(dataset.mse2source(nmf_out)) mse_local[key].append(dataset.mse2source(nmf_out, local=0.05)) pb.animate(dsi*len(sp_param) + spi +1) # Use the two cells below if you want to save/load this data, so you won't have to recalculate it next time. # In[51]: # This is used to save the generated results. import pickle cached_data = (datasets, sp_param, sp_cors, st_cors, max_basecor, mse, mse_local) pickle.dump(cached_data, open('cached_data_for_spatial_component_corr.pik','wb')) # In[ ]: # This is to load cached data instead #import pickle #datasets, sp_param, sp_cors, st_cors, max_basecor, mse, mse_local = cPickle.load(open('cached_data_for_spatial_component_corr.pik')) # In[52]: figsize = np.array((2,2))*3 fig = plt.figure(figsize=figsize) ax = fig.add_axes([0.15,0.25,0.8,0.7]) mykeyorder = np.sort(['nmf_'+str(sp) for sp in sp_param]) x = np.arange(len(mykeyorder)) mean_SR = [[1-np.mean(i) for i in mse[k]] for k in mykeyorder] ax.plot(x, [np.mean(i) for i in mean_SR], ':og', mec='none', ms=3, linewidth=0.5, dashes=(2,2), label='mean $SR_k$') ax.errorbar(x, [np.mean(i) for i in mean_SR], yerr = [np.std(i) for i in mean_SR], fmt="none", linewidht=0.5, ecolor='g', elinewidth=0.5, capsize=1) basecor = [[np.max(i) for i in max_basecor[k]] for k in mykeyorder] ax.plot(x, [np.mean(i) for i in basecor], ':sb', mec='none', ms=3, linewidth=0.5, dashes=(2,2), label='max $r^{comp}_k$') ax.errorbar(x, [np.mean(i) for i in basecor], yerr = [np.std(i) for i in basecor], fmt="none", linewidht=0.5, ecolor='b', elinewidth=0.5, capsize=1) # Axes Decoration ax.set_xticks(x) ax.set_xticklabels(['0']+['$2^{%d}$'%i for i in np.log2(sp_param[1:])], size=global_fs, rotation='45') ax.set_xlim([-0.5, len(mykeyorder)-0.5]) ax.set_yticks([0, 0.5, 1]) ax.set_ylim([0,1.05]) ax.set_yticklabels(['0', '.5', '1'], size=global_fs) ax.yaxis.set_tick_params(labelsize=global_fs, pad=2) little_spines(ax) ax.set_xlabel('$\\alpha_{sp}$', labelpad=2, size=global_fs) ax.legend(prop={'size':global_fs}, numpoints=1, loc = 'upper left', labelspacing=0.1, handletextpad=0.2, bbox_to_anchor = (0.65, 0.95), borderaxespad=0, frameon=False) plt.show() # __Fig. 6__ Maximal spatial component correlation $r^{comp}$ and it's standard deviation over dataset instances (blue) respectively mean recovery $SR_k$ (green) in dependence of $\alpha_{\text{sp}}$ ($k=80$, $\alpha_{\text{sm}}=2$). # Source recovery becomes maximal in the regime where maximal component correlation is strongly decreasing. # ### High temporal accuracy of NMF components # To become a better understanding of the measure source recorvery SR, we also take a look at temporal and spatial correlation of source to components. # It shows that exspecially temporal accuracy is very high, even for components with moderate source recovery values and hence some spatial inaccuracy. # In[53]: figsize = np.array((2,2))*2 fig = plt.figure(figsize=figsize) gs = gridspec.GridSpec(2,2, width_ratios=[2,1], height_ratios=[1,2], hspace=0.02, wspace=0.02) gs.update(left=0.2, right=0.95, top=0.95, bottom=0.2) toplot = 'nmf_0.5' x = st_cors[toplot] y = np.nanmax(sp_cors[toplot],1) axlim = np.floor(10*np.min(np.array([x,y])))/10. #set lower limit of axes #marginal distribution of x ax = fig.add_subplot(gs[0,0]) ax.hist(np.hstack(x), bins = np.arange(axlim,1.001,0.1), edgecolor='none', histtype='stepfilled') ax.set_xlim((axlim,1)) ax.set_ylim((0,len(np.hstack(x)))) no_spines(ax) #marginal distribution of y ax = fig.add_subplot(gs[1,1]) ax.hist(np.hstack(y), bins = np.arange(axlim,1.001,0.1), edgecolor='none', histtype='stepfilled', orientation='horizontal') ax.set_xlim((0,len(np.hstack(y)))) ax.set_ylim((axlim,1)) no_spines(ax) #scatter plot x vs y ax = fig.add_subplot(gs[1,0]) mse_color = [plt.cm.RdYlGn(i/0.8) for i in (1-np.hstack(mse[toplot]))] ax.scatter(x, y, marker='o', linewidths=0.4, s=10, facecolors= mse_color, edgecolors=mse_color) # decorate axes ax.plot([axlim, 1], [axlim,1], 'k:', lw=0.5) ax.set_ylim((axlim,1)) ax.set_xlim((axlim,1)) ax.set_xticks((axlim,1)) ax.set_xticks(np.arange(axlim,1,0.05), minor=True) ax.set_yticks((axlim,1)) ax.set_yticks(np.arange(axlim,1,0.05), minor=True) ax.set_ylabel(r'$r^{spt}$', fontsize=global_fs+2, labelpad=-4) ax.set_xlabel(r'$r^{tmp}$', fontsize=global_fs+2, labelpad=-4) #labeling of histograms ax = fig.add_subplot(gs[0,1]) ax.set_xlim((0,100)) ax.set_ylim((0,100)) ax.set_yticks([100]) ax.set_xticks([100]) little_spines(ax) ax.xaxis.set_tick_params(direction='inout') ax.yaxis.set_tick_params(direction='inout') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.text(1, 100, '100%', fontsize=global_fs, va='center') ax.text(100, 5, '100%', fontsize=global_fs, rotation='270', va='bottom', ha='center') plt.show() # __Fig. 7__ Spatial and temporal correlation of matched components ($k=80$, $\alpha_{\text{sm}}=2$, $\alpha_{\text{sp}}=0.5$) to 200 sources of five dataset instances. The color of each dot indicates the corresponding SR value. Histograms depict marginal distribution for both axes. # Especially temporal reconstruction was very precise for almost all sources whereas lower SR values stemmed from inaccurate spatial reconstruction. # ### Comparison of NMF and sICA # To benchmark regularized NMF we performed sICA on the same dataset instances. # In sICA both spatial and temporal reconstruction of sources were more noisy (see Fig. 3). # The many but small non-local values for pixel participation lower the previously used measure of source recovery for sICA to less accurate values than actually could be achieved by simple post-processing like thresholding. # To account for such post-processing and enable a fair comparison we only computed source recovery for localized sICA components ($\text{sICA}^{\text{loc}}$). # Nonetheless $\text{sICA}^{\text{loc}}$ components showed significantly lower source recovery than NMF components (Fig. 7a). # The advantage of NMF especially stood out in the temporal reconstruction of sources (Fig. 7b). # Whereas lower source recovery values in NMF still implied a precise temporal source reconstruction, this was not the case for sICA. # In[54]: for dataset in datasets: factors = anal_param['factors'] key = 'sica_%d'%factors # calculate sica = ia.sICA(num_components=factors) sica_out = sica(ia.TimeSeries(dataset.observed, shape=param['shape'])) #performance criteria _, st_cor, sp_cor = dataset.cor2source(sica_out) sp_cors[key].append(sp_cor) st_cors[key].append(st_cor) max_basecor[key].append(np.nanmax(np.corrcoef(sica_out.base._series) - np.diag(np.ones(factors)),0)) mse[key].append(dataset.mse2source(sica_out)) mse_local[key].append(dataset.mse2source(sica_out, local=0.05)) # In[55]: figsize= np.array((3,1.5))*3 fig = plt.figure(figsize=figsize) gs = gridspec.GridSpec(1,2) gs.update(left=0.1, right=0.95, top=0.85, bottom=0.2, wspace=0.5) #plot SR distributions for 'toplot' and sica_80 ax = fig.add_subplot(gs[0]) ax.text(-0.06, 1.05, '(a)', transform=ax.transAxes, fontsize=fs_num, fontweight='bold', va='bottom', ha='right') ax.hist(1-np.hstack(mse[toplot]), np.linspace(0,1,21), edgecolor='none', facecolor='0.7', label='NMF') ax.hist(1-np.hstack(mse_local['sica_80']), np.linspace(0,1,21), histtype='step', color='k', label='$sICA^{loc}}$') # Decorate ax.set_ylabel('#', fontsize=global_fs, labelpad=0) ax.set_xlabel('$SR_k$', fontsize=global_fs, labelpad=2) little_spines(ax) ax.yaxis.set_tick_params(pad=2, labelsize=global_fs) ax.xaxis.set_tick_params(pad=2, labelsize=global_fs) ax.set_xticks([0,0.4,0.8]) ax.legend(prop={'size':global_fs}, numpoints=1, loc = 'upper left', labelspacing=-0.2, handletextpad=0.2, bbox_to_anchor = (0.025, 1.04), borderaxespad=0, frameon=False) #plot temporal reconstruction for 'toplot' and sica_80 ax = fig.add_subplot(gs[1]) ax.text(-0.1, 1.05, '(b)', transform=ax.transAxes, fontsize=fs_num, fontweight='bold', va='bottom', ha='right') ax.plot(np.hstack(st_cors['sica_80']), np.hstack(st_cors[toplot]), '.k', ms=5, mfc='none') ax.plot([0,1],[0,1], ':', color='0.5', lw=0.4) # Decorate ax.set_ylabel('$r^{tmp}_{NMF}$', fontsize=global_fs, labelpad=-5) ax.set_xlabel('$r^{tmp}_{sICA}$', fontsize=global_fs, labelpad=-5) little_spines(ax) ax.yaxis.set_tick_params(pad=2) ax.yaxis.set_tick_params(pad=2, labelsize=global_fs) ax.set_yticks([0,1]) ax.set_yticks([0.25,0.5,0.75], minor=True) ax.xaxis.set_tick_params(pad=2, labelsize=global_fs) ax.set_xticks([0,1]) ax.set_yticks([0.25,0.5,0.75], minor=True) plt.show() # __Fig. 7__ # (a) Distribution of source recovery $SR_k$ for NMF (solid grey, $k=80$, $\alpha_{\text{sm}}=2$, $\alpha_{\text{sp}}=0.5$) and localized sICA components $\text{sICA}^{\text{loc}}$ (black line, $k=80$) in five instances of the surrogate dataset. # (b) Temporal correlation $r^{\text{tmp}}$ of actual sources to NMF and sICA components. NMF components exhibited a more precise temporal reconstruction of sources.. # ## IOS Data # The surrogate data provided insight into application domain of the methods and yielded a heuristic for parameter choice. # But the *in-vivo* IOS data is a lot more complex. With respect to spatial characteristics, glomeruli have varying size and shapes and also the exact spatial signal distribution is unknown. # Furthermore the temporal characteristics of glomeruli do not obey an unique probability distribution but vary strongly in overall peak activation strength and odour selectivity. # In[56]: prepro_param = {'baseline_cut':(0,10), 'temporal_binning':12, 'low_pass':1, 'high_pass':10, 'spatial_subsamp':2} more_param = {'odor_response':(2,5), #range of frames which are averaged to yield odor response} 'snapshot': (3,4)} #frames to plot as odor images # ### Experiments # #### IOS imaging and preprocessing # OMP-SpH-mice were anaesthetized using urethane. All animal care and procedures were in accordance with the animal ethics guidelines of the Max Planck Society. # # Odour molecules reached the nose $2.5\pm0.3\text{s}$ after recording onset as measured by a Photoionization detector. # In each animal the response to a stimulus set of 48 to 49 odours was recorded. # Each odour stimulus was repeated at least twice and stimuli were represented in a pseudo-randomized sequence. # To increase signal-to-noise ratio and reduce computational load the raw data was preprocessed. # First we applied an $8\times8\text{px}$ spatial and a $12$ frame temporal binning ('raw' data available in this notebook already was spatially binned $4\times4\text{px}$ to save storage). # Then the odour induced activation was calculated as the relative decrease of reflectance $-\Delta R / R = -(R-R_0)/R$. # $R_0$ is the mean reflectance until $2s$ after recording onset. # Furthermore the data was spatially bandpass filtered with two Gaussian filters ($\sigma_{\text{low}}=10\text{px}$, $\sigma_{\text{high}}=1\text{px}$) and down-sampled by a factor of 2. # In[60]: measids = ['111210sph','111222sph','111221sph'] ios_data = defaultdict(dict) for measid in measids: filename = os.path.join(datapath,measid,'timeseries') ts = ia.TimeSeries() ts.load(filename) #preprocess: #relative change baseline = ia.TrialMean()(ia.CutOut(prepro_param['baseline_cut'])(ts)) ts = ia.RelativeChange()(ia.TrialMean(ts.num_timepoints/prepro_param['temporal_binning'])(ts), baseline) #bandpass lowpassed = ia.Filter('gauss', prepro_param['low_pass'], downscale=prepro_param['spatial_subsamp'])(ts) highpassed = ia.Filter('gauss', prepro_param['high_pass'], downscale=prepro_param['spatial_subsamp'])(ts) ts = ia.Combine(np.subtract)(lowpassed, highpassed) # scale to positive activation and to promille values ts._series *= -1000 ios_data[measid]['ts'] = ts ios_data[measid]['bg'] = plt.imread(os.path.join(datapath, measid, 'green.png')) ios_data[measid]['gloms'] = pickle.load(open(os.path.join(datapath, 'anatomical_outlines.pik'),'rb'))[measid] # In[62]: # plot image maps toplot = '111210sph' image_maps = ia.TrialMean()(ia.CutOut(more_param['snapshot'])(ios_data[toplot]['ts'])) odournames = pickle.load(open(os.path.join(datapath,'odournames.pik'),'rb')) dim2 = int(np.ceil(image_maps.num_stimuli/10.)) fig_dim = np.array((10, 1*dim2*0.75))*1.8 fig = plt.figure(figsize=fig_dim) old_name = 'name' for ix, i_map in enumerate(image_maps.shaped2D()): ax = fig.add_subplot(dim2, 10, ix+1) im_h = ax.imshow(i_map, interpolation='none', vmin=-np.max(i_map)) ax.set_ylabel('max: %.1f'%np.max(i_map), size=global_fs, labelpad=0) odour = odournames[image_maps.label_stimuli[ix]] if old_name != odour: ax.set_title(odour, size=9, y=0.95, ha='left') old_name = odour no_ticks(ax) axbar = fig.add_axes([0.05,0.85,0.01,0.1]) cbar = fig.colorbar(im_h, axbar) cbar.set_ticks([0,np.max(i_map)]) cbar.set_ticklabels([0,'max']) plt.show() # __Fig. 8__ Examples of odour response maps: Mean $-\Delta R/R$ 6s after odour delivery. Images are scaled to maximal pixel activation (indicated on the left of each image). # #### Anatomical glomeruli outline # As we don't know the ground truth in the biological case, we additionaly performed 2-photon synapto-pHluorin imaging of the resting fluorescence to obtain anatomical glomeruli outlines. We then manually outlined glomeruli in the Z-stacks # The outlines did not provide full information on the actual pixel participation of glomeruli but rather binary information on glomerular position. # Furthermore the exact position was possibly impaired by the manual outlining process and the alignment of functional and anatomical pictures. # This made it difficult to derive objective quality criteria but allowed for visual inspection of match between outlined glomeruli and extracted components. # In[64]: figsize = np.array((3,2))*3 fig = plt.figure(figsize=figsize) ax = fig.add_subplot(111) ax.imshow(ios_data[toplot]['bg'], interpolation='none', cmap=plt.cm.bone, vmin=-1, vmax=1, extent=[0, 83, 0, 63]) no_ticks(ax) for r in ios_data[toplot]['gloms']: ax.contour(r, [0.3], extent=[0, 83, 63, 0], colors=['y'], linewidths=2) plt.show() # __Fig 9__ Glomeruli outlines (yellow) aligned to green image of the dorsal OB. # ### Matrix Factorization # So, did our general observations on the surrogate dataset still held for the IOS data? # To answer this, we performed sICA and NMF for three exemplary measurements according to our deduced heuristic. # We chose $k=150$, well above the approximately activated sixty glomeruli. # Additionally in NMF we set $\alpha_{\text{sm}}=2$ to avoid one-pixel scale structures and $\alpha_{\text{sp}}=0.5$, necessary for removing any component correlation above 0.5. # In[65]: nmf_param = {'config_name': 'nmf150_sp05_sm2', 'num_components':150, 'sparse_param':0.5, 'smooth_param':2., 'init':'convex', 'maxcount':50, 'verbose':10} sica_param = {'config_name':'sica150', 'num_components':150} # The following cell is another long-runner (> 15min). # In[66]: # apply factorizations # this is another long running calculation. Consider loading cached data if you're in a hurry. for measid in measids: for factorizer, param in [(ia.NNMF, nmf_param), (ia.sICA, sica_param)]: print("measurement {}, {}".format(measid, factorizer)) f = factorizer(**param)( ios_data[measid]['ts']) f.base._series[np.isnan(f.base._series)] = 0 results={} results['factorization'] = f results['spec'] = ia.TrialMean()(ia.CutOut(more_param['odor_response'])(f)) results['odor_response'] = ia.SingleSampleResponse()(results['spec']) results['t2t_cor'] = ia.CalcStimulusDrive()(results['spec']) ios_data[measid][param['config_name']] = results # Use the two cells below to save/load the results for later. # In[ ]: #caching data #pickle.dump(ios_data, open('cached_ios_decomp.pik','wb')) # In[ ]: #loading cached results #ios_data = pickle.load(open('cached_ios_decomp.pik', 'rb')) # In[67]: # helper function to plot timecourses rep_param = defaultdict(lambda: {'linewidth':1, 'ls':'--', 'color':'0.8'}) rep_param.update({0:{'linewidth':0.6, 'ls':'--' , 'color':'b','marker':'.', 'ms':2, 'dashes':(2,2)}, 1:{'linewidth':0.6, 'ls':'--', 'color':'g', 'marker':'.','ms':2, 'dashes':(2,2), 'mec':'0.4'}}) def timeplot(ax, data, mode): xtime = np.arange(data.num_timepoints) stimpos_dict = defaultdict(list) for lab_ind, lab in enumerate(data.label_stimuli): stimpos_dict[lab].append(lab_ind) for stim_num, stim in enumerate(stimpos_dict.keys()): for stimrep, stimpos in enumerate(stimpos_dict[stim]): xpoints = xtime + stim_num*data.num_timepoints if stim_num == 0: # plot with legend ax.plot(xpoints, data.trial_shaped()[stimpos,:,mode], label='trial %d'%(stimrep+1), **rep_param[stimrep]) else: ax.plot(xpoints, data.trial_shaped()[stimpos,:,mode],**rep_param[stimrep]) myxticks = np.hstack([xtime+i*data.num_timepoints for i in range(len(stimpos_dict))]) yticks = [0,1,2] ax.set_yticks(yticks) ax.set_yticklabels(yticks) ax.yaxis.set_tick_params(labelsize=7) ax.set_xticks(range(0, len(stimpos_dict)*5, 5)) ax.set_xticklabels([]) ax.set_xlim((0,len(stimpos_dict)*5)) ax.yaxis.set_ticks_position('right') # #### Properties of MF components # In both NMF and sICA some of the extracted components resembled in their characteristics glomeruli, both in the spatial extent and the activation course. But other rather look like parts of blood vessels or noise. # To distinguish putative glomeruli components from the rest we exploited the trial-to-trial correlations $r_k^{\text{t2t}}$ of the components odour spectra. # In[68]: fig_dim = np.array((10, 20))*1.5 fig = plt.figure(figsize=fig_dim) gs_outer = gridspec.GridSpec(2,2, top=0.96) measid = '111210sph' nmf_toplot = nmf_param['config_name'] toplot2 = sica_param['config_name'] for toplot_ix, toplot in enumerate([nmf_toplot, toplot2]): t2t= ios_data[measid][toplot]['t2t_cor']._series.flatten() sort_id = np.argsort(t2t) for block_ix, block in enumerate([sort_id[:10], sort_id[80:90]]): gs = gridspec.GridSpecFromSubplotSpec(10, 1, subplot_spec=gs_outer[block_ix, toplot_ix]) for subpos, comp_idx in enumerate(block): comp = ios_data[measid][toplot]['factorization'] gs_inner = gridspec.GridSpecFromSubplotSpec(1,2, subplot_spec = gs[subpos, 0], width_ratios=[1,3]) ax = fig.add_subplot(gs_inner[0]) ax.imshow(ios_data[measid]['bg'], interpolation='none', cmap=plt.cm.bone, vmin=-1, vmax=1, extent=[0, 83, 0, 63]) pixel_parti = comp.base.shaped2D()[comp_idx] im_rgba = plt.cm.jet((pixel_parti+1)/2) alpha = im_rgba[:,:,3] alpha[np.abs(pixel_parti)<0.2]=0 im_rgba[:,:,3] = alpha ax.imshow(im_rgba, interpolation='none') if (subpos == 0): ax.set_title(toplot, size=global_fs+2) no_ticks(ax) ax = fig.add_subplot(gs_inner[1]) timeplot(ax, comp, comp_idx) if (subpos == 0): if (block_ix == 0): ax.set_title('Components with highest trial-to-trial correlation', size=global_fs+2) else: ax.set_title('Components with low trial-to-trial correlation', size=global_fs+2) ax.text(0.05,0.8, 't2t: %.2f'%(1-t2t[comp_idx]), size=global_fs, transform=ax.transAxes) plt.show() # __Fig. 10__ Exemplary extracted NMF (left column) and sICA components (right column). Some show spatio-temporal characteristics of glomeruli with a high activation similarity for stimuli repetition (top block). # Other components exhibit rather low stimulus reproducibility (bottom block). This might be glomeruli low and therefore noisy activation, but also to a great extend border artefacts or blood vessel induced activation. # In general the distribution of $r_k^{\text{t2t}}$ values were roughly bimodal for both methods with a population of highly stimuli dependent components # In[69]: figsize = np.array((5,2))*2 fig = plt.figure(figsize=figsize) gs = gridspec.GridSpec(1,2, wspace=0.5, left=0.15, top=0.8, bottom=0.2) animal_colors = ['0.8','0.6','0.4'][:len(measids)] ax = fig.add_subplot(gs[0]) ax.text(-0.12, 1.05, '(a)', transform=ax.transAxes, fontsize=fs_num, fontweight='bold', va='bottom', ha='right') t2t_all = [1-ios_data[m][nmf_toplot]['t2t_cor']._series.squeeze() for m in measids] ax.hist(t2t_all, np.linspace(-1,1,41), histtype='barstacked',color=animal_colors, label=['animal a','animal b','animal c'], linewidth=0.5) lg = ax.legend(loc = 'upper left', numpoints=1, prop={'size':global_fs}, bbox_to_anchor = [0.01, 1.2, 0., 0.], labelspacing=0.1, handlelength=1, handletextpad=0.3, frameon=True) #decorate ax.xaxis.set_tick_params(labelsize=global_fs, pad=2) ax.yaxis.set_tick_params(labelsize=global_fs, pad=2) ax.set_ylabel('# NMF comp.', fontsize=global_fs, labelpad=2) ax.set_xlim(-0.5,1.1) myylim = ax.get_ylim() ax.set_xticks([-0.5,0,0.5,1]) ax.set_ylim(myylim) ax.set_xlabel(r'$cor_{t2t}$', fontsize=global_fs, labelpad=0) ax.set_title('NMF', size=global_fs) ax = fig.add_subplot(gs[1]) ax.text(-0.12, 1.05, '(b)', transform=ax.transAxes, fontsize=fs_num, fontweight='bold', va='bottom', ha='right') t2t_all = [1-ios_data[m][toplot2]['t2t_cor']._series.squeeze() for m in measids] ax.hist(t2t_all, np.linspace(-1,1,41), histtype='barstacked' , color=animal_colors, label=['animal a','animal b','animal c'], linewidth=0.5) #decorate ax.xaxis.set_tick_params(labelsize=global_fs, pad=2) ax.yaxis.set_tick_params(labelsize=global_fs, pad=2) ax.set_ylabel('# sICA comp.', fontsize=global_fs, labelpad=2) ax.set_xlabel(r'$cor_{t2t}$', fontsize=global_fs, labelpad=0) ax.set_xlim(-0.5,1.1) ax.set_ylim(myylim) ax.set_title('sICA',size=global_fs) ax.set_xticks([-0.5,0,0.5,1]) plt.show() # __Fig. 11__ Trial-to-trial correlations of (a) NMF and (b) sICA components. They were roughly bimodal distributed. # #### Comparison to anatomical outlines # We expected components with high $cor_{t2t}$ to be glomeruli and compared them to the anatomical outlines. # Considering the ambiguity of the anatomical outlines, the extracted components matched them very well (Fig. 12). # The following observations could be made: # # 1. Components with $r^{\text{t2t}}>0.7$ corresponded well to anatomical glomeruli outlines. The same holds true for components with $r^{\text{t2t}}>0.5$ except a few cases where components rather resembled blood vessel or other artefacts # 2. Many glomeruli are detected by both methods, but some were solely extracted by NMF or exhibited a higher $r^{\text{t2t}}$ in NMF than in sICA. # 3. In some cases two NMF components were located within one anatomical outline. It indicates a different temporal activation of both parts which probably is an artefact of underlying blood vessels or the bandpass filtering. Nevertheless, this cases of split glomeruli were quite rare. # # In general the observations were in accordance to the results on the surrogate dataset. # NMF yielded higher source recovery than sICA with a more precise temporal reconstruction, e.g. a higher trial-to-trial correlation. # In[71]: figsize = np.array((9,3*len(measids)))*2 fig = plt.figure(figsize=figsize) gs = gridspec.GridSpec(len(measids),2) # set colors for pixel participation strongglom = [(0.4,0.4,0.8), (0.1,0.1,0.2)] weakglom = [(0.6,0.8,0.6), (0.1,0.2,0.1)] # set contourlines contourborders = [0.3,0.7,1] for ix, measid in enumerate(measids): bg = ios_data[measid]['bg'] gloms = ios_data[measid]['gloms'] for column, toplot in enumerate([nmf_toplot, toplot2]): ax = fig.add_subplot(gs[ix,column]) ax.imshow(bg[:, :], cmap=plt.cm.bone, extent=[0, 83, 0, 63]) for j, b in enumerate(ios_data[measid][toplot]['spec'].base.shaped2D()): if ios_data[measid][toplot]['t2t_cor']._series[0,j] < 0.3: ax.contourf(np.abs(b), [0.3, 0.8, 1], extent=[0, 83, 63, 0], colors=strongglom, alpha=0.7) elif ios_data[measid][toplot]['t2t_cor']._series[0,j] < 0.5: ax.contourf(np.abs(b), [0.3, 0.8, 1], extent=[0, 83, 63, 0], colors=weakglom, alpha=0.7) for glom in gloms: ax.contour(glom, [0.3], extent=[0, 83, 63, 0], colors=['y'], linewidths=1) no_ticks(ax) ax.set_title(toplot) if column ==0: ax.set_ylabel('animal %d'%ix) # add colorbars axglom_strong = fig.add_axes([0.1, 0.93, 0.18, 0.01]) axglom_weak = fig.add_axes([0.1, 0.95, 0.18, 0.01]) bararray = np.array([range(11)]*2)/10. axglom_strong.contourf(bararray, contourborders, colors=strongglom) axglom_strong.set_yticks([]) axglom_strong.set_ylabel(r'$cor_{t2t}>0.7:$', rotation='horizontal', size=global_fs) axglom_strong.set_xticks([i*10 for i in contourborders[:-1]]) axglom_strong.set_xticklabels(contourborders[:-1], fontsize=global_fs) axglom_strong.set_xlabel('pixel participation', size=global_fs) axglom_weak.contourf(bararray,contourborders, colors=weakglom) axglom_weak.set_yticks([]) axglom_weak.set_ylabel(r'$cor_{t2t}>0.5:$', rotation='horizontal', size=global_fs) axglom_weak.set_xticks([i*10 for i in contourborders[:-1]]) axglom_weak.set_xticklabels([]) plt.show() # __Fig. 12__ Contour faces of pixel participation of stimulus driven components ($r^{t2t}_k>0.5$) and highly stimulus driven components ($r^{t2t}_k>0.7$) compared to anatomical outlined glomeruli (yellow) for NMF (left) and sICA (right). # #### Comparison of odor spectra # Finally, we asked for the coincidence of activations of glomeruli extracted by both methods, especially with regard to the confinement of NMF to positive activations. # To asses this we investigated the most reliable extracted components in sICA with $r^{t\text{2t}}>0.7$. # We matched each of those sICA components to the NMF component it exhibited the highest spatial correlation. # The comparison of odour responses for the matched components revealed a general high correlation ($r=0.95$) of both methods for reliable extracted sources. # It furthermore showed that sICA extracts only a few and small negative signals which are not present in NMF. Although this could indicate that our assumption of purely excitatory olfactory activation is been mildly impaired, negative values in sICA might just as well be artefacts of the initial highpass filtering. # In[72]: matched_spec = {} for measid in measids: # match spatial data1 = ios_data[measid][nmf_toplot]['spec'] high_cor_components = np.where(ios_data[measid][toplot2]['t2t_cor']._series.squeeze()<0.3)[0] data2 = ios_data[measid][toplot2]['spec'] num = len(high_cor_components) matchdist = (1-squareform(pdist( np.hstack([data2.base._series[high_cor_components].T, data1.base._series.T]).T, 'correlation'))[:num,num:]) match = np.argmax(matchdist, 1) matched_spec[measid] = [list(high_cor_components), list(match)] # In[73]: figsize = np.array((2,2))*3 fig = plt.figure(figsize=figsize) ax = fig.add_axes([0.2,0.2,0.7,0.7]) specs1, specs2 = [], [] for ix, measid in enumerate(measids): spec1 = ios_data[measid][nmf_toplot]['spec']._series[:,matched_spec[measid][1]] spec2 = ios_data[measid][toplot2]['spec']._series[:,matched_spec[measid][0]] specs1.append(spec1) specs2.append(spec2) ax.scatter(spec1, spec2, marker='.', s = 40, facecolor = 'none', edgecolor = ['r', 'g','b'][ix], label='animal %s'%ix, alpha=0.5) slope, intercept, r_value, p_value, std_err = stats.linregress(np.hstack([i.flatten() for i in specs1]), np.hstack([i.flatten() for i in specs2])) ax.plot([0,4],np.array([0,4])*slope+intercept, 'k') ax.set_xlim(-1,5) ax.set_ylim(-1,5) ax.xaxis.set_tick_params(labelsize=global_fs, pad=2) ax.yaxis.set_tick_params(labelsize=global_fs, pad=2) ax.set_ylabel('odor response sICA', fontsize=global_fs, labelpad=2) ax.set_xlabel('odor response NMF', fontsize=global_fs, labelpad=2) ax.text(3.5,3.5,'r = %.2f'%r_value, fontsize=global_fs,) ax.legend(loc = 'upper left', scatterpoints=1, prop={'size':global_fs}, bbox_to_anchor = [0.01, 1, 0., 0.], labelspacing=0.1, handlelength=1, handletextpad=0.1, frameon=False) plt.show() # __Fig. 13__ Comparison of NMF an sICA odour responses for components extracted in sICA with $r^{t2t}>0.7$. The odour responses were highly similar. # #### Excess of NMF components # To understand the increased number of high correlation NMF components compared to sICA components we examined for all NMF components with $r^{t2t}>0.7$ the spatial best matching sICA components (Figure 14). One can identify three reasons: # # 1. Some sICA components group more glomeruli together than NMF components. Usually only one of the NMF components is highly similar in its odor spectrum (for example components 16/35 or 26/45). # 2. Some sICA components exhibit significant non-local contributions and hence a lower trial-to-trial correlation (for example components 29 and 40). # 3. For some NMF component there has not been extracted any sICA component at the same spatial location (for example components 72,75 and 76). # In[74]: matched_spec_rev = {} for measid in measids: # match spatial data1 = ios_data[measid][toplot2]['spec'] data2 = ios_data[measid][nmf_toplot]['spec'] high_cor_components = np.where(ios_data[measid][nmf_toplot]['t2t_cor']._series.squeeze()<0.3)[0] num = len(high_cor_components) matchdist = (1-squareform(pdist( np.hstack([data2.base._series[high_cor_components].T, data1.base._series.T]).T, 'correlation'))[:num,num:]) match = np.argmax(np.abs(matchdist), 1) # reorder after macht quality reorder = np.argsort(np.max(np.abs(matchdist), 1))[::-1] matched_spec_rev[measid] = [list(high_cor_components[reorder]), list(match[reorder])] # In[75]: measid = '111222sph' num_rows = len(matched_spec_rev[measid][0]) figsize = np.array((3, num_rows))*3 fig = plt.figure(figsize = figsize) gs = gridspec.GridSpec(num_rows,1, top=0.97, hspace=0.5, left=0.05) #np.argsort(ios_data[measid][toplot2]['t2t_cor']._series[0,match_id2]) for ix, (match_id1, match_id2) in enumerate(zip(*matched_spec_rev[measid])): gs_inner = gridspec.GridSpecFromSubplotSpec(1,3, subplot_spec=gs[ix]) spec1 = ios_data[measid][toplot2]['spec']._series[:,match_id2] spec2 = ios_data[measid][nmf_toplot]['spec']._series[:,match_id1] mymin = np.floor(np.min(np.array([spec1, spec2]))*10)/10 mymax = np.ceil(np.max(np.array([spec1, spec2]))*10)/10 ax = fig.add_subplot(gs_inner[0], aspect='equal') ax.plot(spec1, spec2, 'o') ax.set_xlim([mymin, mymax]) ax.set_ylim([mymin, mymax]) ax.set_xticks([0, mymax]) ax.set_yticks([0, mymax]) slope, intercept, r_value, p_value, std_err = stats.linregress(spec1, spec2) ax.text(0.05,0.9,'cor: %.2f'%r_value, size=global_fs, transform=ax.transAxes) ax.text(-0.25,1.1,'(%d)'%ix, size=global_fs+4, transform=ax.transAxes) ax.set_xlabel('odour response sICA', size=global_fs) ax.set_ylabel('odour response NMF', size=global_fs) ax = fig.add_subplot(gs_inner[1]) ax.imshow(ios_data[measid][nmf_toplot]['spec'].base.shaped2D()[match_id1], vmin=-1, vmax=1) ax.set_xlabel('t2t: %.2f'%(1-ios_data[measid][nmf_toplot]['t2t_cor']._series[0,match_id1]), size=global_fs, y=0.9) ax.set_title('pixel participation NMF', size=global_fs) no_ticks(ax) ax = fig.add_subplot(gs_inner[2]) ax.imshow(ios_data[measid][toplot2]['spec'].base.shaped2D()[match_id2], vmin=-1, vmax=1) ax.contour(ios_data[measid][nmf_toplot]['spec'].base.shaped2D()[match_id1], [0.5], lw=1, colors=['w']) ax.set_xlabel('t2t: %.2f'%(1-ios_data[measid][toplot2]['t2t_cor']._series[0,match_id2]), size=global_fs, y=0.9) ax.set_title('pixel participation sICA', size=global_fs) no_ticks(ax) plt.show() # __Fig. 14__ Spatially best matching sICA components for all NMF components with high trial-to-trial correlation $cor_{t2t}>0.7$. First panel of each row shows odour response similarity of the matched components. Second and third row show pixel participation of NMF respectivly sICA components. White contours indicate position of strong pixel participation in NMF component. Components are sorted according to spatial matching quality. # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: