#!/usr/bin/env python # coding: utf-8 # ## EMD Demo # # [EnergyFlow website](https://energyflow.network) # # In this tutorial, we demonstrate how to compute EMD values for particle physics events. The core of the computation is done using the [Python Optimal Transport](https://pot.readthedocs.io) library with EnergyFlow providing a convenient interface to particle physics events. Batching functionality is also provided using the builtin multiprocessing library to distribute computations to worker processes. # # ### Energy Mover's Distance # # The Energy Mover's Distance was introduced in [1902.02346](https://arxiv.org/abs/1902.02346) as a metric between particle physics events. Closely related to the Earth Mover's Distance, the EMD solves an optimal transport problem between two distributions of energy (or transverse momentum), and the associated distance is the "work" required to transport supply to demand according to the resulting flow. Mathematically, we have # $$\text{EMD}(\mathcal E, \mathcal E') = \min_{\{f_{ij}\ge0\}}\sum_{ij} f_{ij} \frac{\theta_{ij}}{R} + \left|\sum_i E_i - \sum_j E'_j\right|,$$ # $$\sum_{j} f_{ij} \le E_i,\,\,\, \sum_i f_{ij} \le E'_j,\,\,\,\sum_{ij}f_{ij}= \min\Big(\sum_iE_i,\,\sum_jE'_j\Big).$$ # ### Imports # In[1]: import numpy as np get_ipython().run_line_magic('load_ext', 'wurlitzer') get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import energyflow as ef # ### Plot Style # In[2]: plt.rcParams['figure.figsize'] = (4,4) plt.rcParams['figure.dpi'] = 120 plt.rcParams['text.usetex'] = True plt.rcParams['font.family'] = 'serif' # ### Load EnergyFlow Quark/Gluon Jet Samples # In[3]: # load quark and gluon jets X, y = ef.qg_jets.load(2000, pad=False) num = 750 # the jet radius for these jets R = 0.4 # process jets Gs, Qs = [], [] for arr,events in [(Gs, X[y==0]), (Qs, X[y==1])]: for i,x in enumerate(events): if i >= num: break # ignore padded particles and removed particle id information x = x[x[:,0] > 0,:3] # center jet according to pt-centroid yphi_avg = np.average(x[:,1:3], weights=x[:,0], axis=0) x[:,1:3] -= yphi_avg # mask out any particles farther than R=0.4 away from center (rare) x = x[np.linalg.norm(x[:,1:3], axis=1) <= R] # add to list arr.append(x) # ### Event Display with EMD Flow # In[4]: # choose interesting events ev0, ev1 = Gs[0], Gs[15] # calculate the EMD and the optimal transport flow R = 0.4 emdval, G = ef.emd.emd(ev0, ev1, R=R, return_flow=True) # plot the two events colors = ['red', 'blue'] labels = ['Gluon Jet 1', 'Gluon Jet 2'] for i,ev in enumerate([ev0, ev1]): pts, ys, phis = ev[:,0], ev[:,1], ev[:,2] plt.scatter(ys, phis, marker='o', s=2*pts, color=colors[i], lw=0, zorder=10, label=labels[i]) # plot the flow mx = G.max() xs, xt = ev0[:,1:3], ev1[:,1:3] for i in range(xs.shape[0]): for j in range(xt.shape[0]): if G[i, j] > 0: plt.plot([xs[i, 0], xt[j, 0]], [xs[i, 1], xt[j, 1]], alpha=G[i, j]/mx, lw=1.25, color='black') # plot settings plt.xlim(-R, R); plt.ylim(-R, R) plt.xlabel('Rapidity'); plt.ylabel('Azimuthal Angle') plt.xticks(np.linspace(-R, R, 5)); plt.yticks(np.linspace(-R, R, 5)) plt.text(0.6, 0.03, 'EMD: {:.1f} GeV'.format(emdval), fontsize=10, transform=plt.gca().transAxes) plt.legend(loc=(0.1, 1.0), frameon=False, ncol=2, handletextpad=0) plt.show() # ### Intrinsic Dimension of Quark and Gluon Jets # The correlation dimension of a dataset is a type of fractal dimension which quantifies the dimensionality of the space of events at different energy scales $Q$. # # It is motivated by the fact that the number of neighbors a point has in a ball of radius $Q$ grows as $Q^\mathrm{dim}$, giving rise to the definition: # # $$ \dim (Q) = Q\frac{\partial}{\partial Q} \ln \sum_{i 0], bins=bins)[0]) dims.append((np.log(counts[1:] + reg) - np.log(counts[:-1] + reg))/dmidbins) # In[7]: # plot the correlation dimensions plt.plot(midbins2, dims[0], '-', color='blue', label='Quarks') plt.plot(midbins2, dims[1], '-', color='red', label='Gluons') # labels plt.legend(loc='center right', frameon=False) # plot style plt.xscale('log') plt.xlabel('Energy Scale Q/pT'); plt.ylabel('Correlation Dimension') plt.xlim(0.02, 1); plt.ylim(0, 5) plt.show() # ## Using Built-In Correlation Dimension # In[9]: # create external EMD handlers that will compute the correlation dimensions on the fly gcorrdim = ef.emd.wasserstein.CorrelationDimension(0.01, 1, 60) qcorrdim = ef.emd.wasserstein.CorrelationDimension(0.01, 1, 60) # compute pairwise EMDs between all jets (takes about 3 minutes, can change n_jobs if you have more cores) ef.emd.emds(Gs, R=R, norm=True, verbose=1, n_jobs=-1, print_every=-10, external_emd_handler=gcorrdim) ef.emd.emds(Qs, R=R, norm=True, verbose=1, n_jobs=-1, print_every=-10, external_emd_handler=qcorrdim) # In[20]: # plot the correlation dimensions plt.plot(qcorrdim.corrdim_bins(), qcorrdim.corrdims()[0], '-', color='blue', label='Quarks') plt.plot(gcorrdim.corrdim_bins(), gcorrdim.corrdims()[0], '-', color='red', label='Gluons') # labels plt.legend(loc='center right', frameon=False) # plot style plt.xscale('log') plt.xlabel('Energy Scale Q/pT'); plt.ylabel('Correlation Dimension') plt.xlim(0.02, 1); plt.ylim(0, 5) plt.show() # In[ ]: