#!/usr/bin/env python # coding: utf-8 # # Ensemble average # # > Marcos Duarte # > [Laboratory of Biomechanics and Motor Control](https://bmclab.pesquisa.ufabc.edu.br/) # > Federal University of ABC, Brazil # A usual procedure employed to present the average pattern of a variable as a function of time or movement cycle across trials or across subjects is to show the ensemble average curve, which is a fancy name for (typically) the mean $\pm$ 1 standard-deviation curve. # # Let's simulate some data and explore different aesthetic variations to present the ensemble average. # In[1]: import numpy as np import matplotlib.pyplot as plt import sys from tnorma import tnorma # In[2]: # slighly different data with slightly different duration (number of points) from scipy.signal import butter, filtfilt # Butterworth filter b, a = butter(1, (5/(100/2)), btype = 'low') N = 10 y = np.empty([120, N]) * np.NaN for i in range(N): t = np.arange(0, 100 + np.random.randint(-20, high=20)) / 100 y[0: t.size, i] = 2*np.sin(2 * np.pi * t) + np.random.randn(t.size) / 2 + t.size / 20 y[0: t.size, i] = filtfilt(b, a, y[0: t.size, i]) plt.figure(figsize=(9, 4)) plt.plot(y) plt.xlabel('data points') plt.title('Plot of trials') plt.show() # To calculate the mean and standard deviation across these trials the different trials must have the same number of points. # We can do this with the [time normalization of data](http://nbviewer.ipython.org/github/demotu/BMC/blob/master/notebooks/TimeNormalization.ipynb), where we will normalize each trial to the same percent cycle (from 0 to 100%): # In[3]: # yn, tn, indie = tnorma(y, axis=0, step=1, k=3, smooth=0, mask=None, # nan_at_ext='delete', show=False, ax=None) yn, tn, indie = tnorma(y) # plot of the normalized data plt.figure(figsize=(10, 5)) plt.plot(yn) plt.xlabel('Cycle [%]') plt.title('Plot of normalized trials') plt.show() # So, the trials have the same number of points, now we can calculate the mean and standard deviation curves and plot the ensemble average # In[4]: ym, ysd = np.mean(yn, axis=1), np.std(yn, axis=1, ddof=1) # one line is all we need # plot of the ensemble average plt.figure(figsize=(10,5)) plt.errorbar(tn, ym, ysd, linewidth=2) plt.xlabel('Cycle [%]') plt.title('Plot of ensemble average') plt.show() # And here are some aesthetic variations to show the ensemble average: # In[5]: plt.rc('axes', labelsize=10, titlesize=12) plt.rc('xtick', labelsize=10) plt.rc('ytick', labelsize=10) plt.figure(figsize=(10, 7)) ax1 = plt.subplot(221) ax1.set_title('Errorbar every two data points and zero cap size') ax1.errorbar(tn, ym, ysd, color = [0, 0, 1, 0.5], capsize=0, errorevery=2, lw=4) ax2 = plt.subplot(222) ax2.set_title('Mean curve and shaded area') ax2.fill_between(tn, ym+ysd, ym-ysd, color = [0, 0, 1, 0.5]) ax2.plot(tn, ym, color = [0, 0, 1, .8], lw=2, label='Data') ax2.legend(fontsize=14, loc='best', framealpha=.8) ax3 = plt.subplot(223) ax3.set_title('Semi-transparent shaded areas overlap well') ax3.fill_between(tn, ym+ysd, ym-ysd, color=[0, 0, 1, 0.5], edgecolor=None) y2 = np.mean(ym) - ym + ym[0] ax3.fill_between(tn, y2+ysd, y2-ysd, color=[1, 0, 0, 0.5], edgecolor=None) ax3.set_xlabel('Cycle [%]') p1 = plt.Rectangle((0, 0), 1, 1, color=[0, 0, 1, 0.5]) p2 = plt.Rectangle((0, 0), 1, 1, color=[1, 0, 0, 0.5]) # fill_between() command creates a PolyCollection that is not supported by the legend() ax3.legend([p1, p2], ['Group 1', 'Group 2'], fontsize=14, loc='best', framealpha=.8) ax4 = plt.subplot(224) ax4.set_title('Combine errorbar and shaded area') ax4.errorbar(tn, ym, ysd, color = [0, 0, 1, 0.5], capsize=0, lw=1.5) y2 = np.mean(ym) - ym + ym[0] ax4.fill_between(tn, y2+ysd, y2-ysd, color=[1, 0, 0, 0.5], edgecolor=None) ax4.set_xlabel('Cycle [%]') p1 = plt.Line2D((0, 1), (1, 1), color=[0, 0, 1, 1], lw=1.5) p2 = plt.Rectangle((0, 0), 1, 1, color=[1, 0, 0, 0.5]) # fill_between() command creates a PolyCollection that is not supported by the legend() ax4.legend([p1, p2], ['Group 1', 'Group 2'], fontsize=14, loc='best', framealpha=.8) plt.suptitle(r'$\mathrm{Aesthetic\,variations\,for\,the\,ensemble\,average}$', fontsize=18, y=1.04) plt.tight_layout() plt.show() # Instead of mean and standard deviation we can use median and the first and third quartiles: # In[6]: ym = np.median(yn, axis=1) yq1, yq3 = np.abs(ym-np.percentile(yn, 25, 1)), np.abs(np.percentile(yn, 75, 1)-ym) # plot of the ensemble average plt.figure(figsize=(9, 5)) plt.errorbar(tn, ym, np.vstack((yq1, yq3)), linewidth=2) plt.xlabel('Cycle [%]') plt.title('Plot of ensemble average (median and quartiles)') plt.show() # In[ ]: