Notebook

Ensemble average¶

Marcos Duarte
Laboratory of Biomechanics and Motor Control
Federal University of ABC, Brazil

A usual procedure employed to present the average pattern of a variable as a function of time or movement cycle across trials or across subjects is to show the ensemble average curve, which is a fancy name for (typically) the mean $\pm$ 1 standard-deviation curve.

Let's simulate some data and explore different aesthetic variations to present the ensemble average.

In [1]:

import numpy as np
import matplotlib.pyplot as plt
import sys
from tnorma import tnorma

In [2]:

# slighly different data with slightly different duration (number of points)
from scipy.signal import butter, filtfilt
# Butterworth filter
b, a = butter(1, (5/(100/2)), btype = 'low')
N = 10
y = np.empty([120, N]) * np.NaN
for i in range(N):
    t = np.arange(0, 100 + np.random.randint(-20, high=20)) / 100
    y[0: t.size, i] = 2*np.sin(2 * np.pi * t) + np.random.randn(t.size) / 2 + t.size / 20
    y[0: t.size, i] = filtfilt(b, a,  y[0: t.size, i])

plt.figure(figsize=(9, 4))
plt.plot(y)
plt.xlabel('data points')
plt.title('Plot of trials')
plt.show()

To calculate the mean and standard deviation across these trials the different trials must have the same number of points.
We can do this with the time normalization of data, where we will normalize each trial to the same percent cycle (from 0 to 100%):

In [3]:

# yn, tn, indie = tnorma(y, axis=0, step=1, k=3, smooth=0, mask=None,
#                       nan_at_ext='delete', show=False, ax=None)
yn, tn, indie = tnorma(y)
# plot of the normalized data
plt.figure(figsize=(10, 5))
plt.plot(yn)
plt.xlabel('Cycle [%]')
plt.title('Plot of normalized trials')
plt.show()

So, the trials have the same number of points, now we can calculate the mean and standard deviation curves and plot the ensemble average

In [4]:

ym, ysd = np.mean(yn, axis=1), np.std(yn, axis=1, ddof=1) # one line is all we need
# plot of the ensemble average
plt.figure(figsize=(10,5))
plt.errorbar(tn, ym, ysd, linewidth=2)
plt.xlabel('Cycle [%]')
plt.title('Plot of ensemble average')
plt.show()

And here are some aesthetic variations to show the ensemble average:

In [5]:

plt.rc('axes', labelsize=10,  titlesize=12)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)
plt.figure(figsize=(10, 7))

ax1 = plt.subplot(221)
ax1.set_title('Errorbar every two data points and zero cap size')
ax1.errorbar(tn, ym, ysd, color = [0, 0, 1, 0.5], capsize=0, errorevery=2, lw=4)

ax2 = plt.subplot(222)
ax2.set_title('Mean curve and shaded area')
ax2.fill_between(tn, ym+ysd, ym-ysd, color = [0, 0, 1, 0.5])
ax2.plot(tn, ym, color = [0, 0, 1, .8], lw=2, label='Data')
ax2.legend(fontsize=14, loc='best', framealpha=.8)

ax3 = plt.subplot(223)
ax3.set_title('Semi-transparent shaded areas overlap well')
ax3.fill_between(tn, ym+ysd, ym-ysd, color=[0, 0, 1, 0.5], edgecolor=None)
y2 = np.mean(ym) - ym + ym[0]
ax3.fill_between(tn, y2+ysd, y2-ysd, color=[1, 0, 0, 0.5], edgecolor=None)
ax3.set_xlabel('Cycle [%]')
p1 = plt.Rectangle((0, 0), 1, 1, color=[0, 0, 1, 0.5])
p2 = plt.Rectangle((0, 0), 1, 1, color=[1, 0, 0, 0.5])
# fill_between() command creates a PolyCollection that is not supported by the legend()
ax3.legend([p1, p2], ['Group 1', 'Group 2'], fontsize=14, loc='best', framealpha=.8)

ax4 = plt.subplot(224)
ax4.set_title('Combine errorbar and shaded area')
ax4.errorbar(tn, ym, ysd, color = [0, 0, 1, 0.5], capsize=0, lw=1.5)
y2 = np.mean(ym) - ym + ym[0]
ax4.fill_between(tn, y2+ysd, y2-ysd, color=[1, 0, 0, 0.5], edgecolor=None)
ax4.set_xlabel('Cycle [%]')
p1 = plt.Line2D((0, 1), (1, 1), color=[0, 0, 1, 1], lw=1.5)
p2 = plt.Rectangle((0, 0), 1, 1, color=[1, 0, 0, 0.5])
# fill_between() command creates a PolyCollection that is not supported by the legend()
ax4.legend([p1, p2], ['Group 1', 'Group 2'], fontsize=14, loc='best', framealpha=.8)

plt.suptitle(r'$\mathrm{Aesthetic\,variations\,for\,the\,ensemble\,average}$', fontsize=18, y=1.04)
plt.tight_layout()
plt.show()

Instead of mean and standard deviation we can use median and the first and third quartiles:

In [6]:

ym = np.median(yn, axis=1)
yq1, yq3 = np.abs(ym-np.percentile(yn, 25, 1)), np.abs(np.percentile(yn, 75, 1)-ym)
# plot of the ensemble average
plt.figure(figsize=(9, 5))
plt.errorbar(tn, ym, np.vstack((yq1, yq3)), linewidth=2)
plt.xlabel('Cycle [%]')
plt.title('Plot of ensemble average (median and quartiles)')
plt.show()

In [ ]: