#!/usr/bin/env python # coding: utf-8 # ## Plotly plot of chord diagrams ## # Circular layout or [Chord diagram](https://en.wikipedia.org/wiki/Chord_diagram) is a method of visualizing data that describe relationships. It was intensively promoted through [Circos](http://circos.ca/), a software package in Perl that was initially designed for displaying genomic data. # In 2013 on [stackoverflow](http://stackoverflow.com/questions/19105801/chord-diagram-in-python) it was adressed the question whether there is a Python package for plotting chord diagrams, but it was closed as being considered *off topic*. # After two years, in 2015, I presented in the initial version of this Jupyter Notebook a method to generate a chord diagram # via Python Plotly. # This Jupyter Notebook is an update of the initial one, using Python 3.6, and Plotly 3.+. # We illustrate the method of generating a chord diagram from data recorded in a square matrix. The rows and columns represent the same entities. # Suppose that for a community of 5 friends on Facebook we record the number of comments posted by each member on other friends wall. The data table is given in the next cell: # In: from IPython.display import Image Image(filename='Data/Data-table.png') # The aim of our visualization is to illustrate the total number of posts by each community member, and the # flows of posts between pairs of friends. # In: import numpy as np from numpy import pi import platform import plotly # In: print(f'Python version: {platform.python_version()}') print(f'Plotly version: {plotly.__version__}') # Define the array of data: # In: matrix = np.array([[16, 3, 28, 0, 18], [18, 0, 12, 5, 29], [ 9, 11, 17, 27, 0], [19, 0, 31, 11, 12], [23, 17, 10, 0, 34]], dtype=int) # In: def check_data(data_matrix): L, M = data_matrix.shape if L != M: raise ValueError('Data array must have a (n,n) shape') return L # In: L = check_data(matrix) # A chord diagram encodes information in two graphical objects: # - ideograms, represented by distinctly colored arcs of circles; # - ribbons, that are planar shapes bounded by two quadratic Bezier curves and two arcs of circle, that can degenerate to a point; # ### Ideograms ### # Summing up the entries on each matrix row, one gets a value (in our example this value is equal to the number of posts by a community member). # Let us denote by total_comments the total number of posts recorded in this community. # # Theoretically the interval [0, total_comments) is mapped linearly onto the unit circle, identified with the interval $[0,2\pi)$. # # For a better looking plot one proceeds as follows: starting from the angular position $0$, in counter-clockwise direction, one draws succesively, around the unit circle, two parallel arcs of length equal to a mapped row sum value, minus a fixed gap. Click the image below: # # # In: from IPython.display import HTML # In: HTML('') # Now we define the functions that process data in order to get ideogram ends. # # As we pointed out, the unit circle is oriented counter-clockwise. # In order to get an arc of circle of end angular # coordinates $\theta_0<\theta_1$, we define a function moduloAB that resolves the case when an arc contains # the point of angular coordinate $0$ (for example $\theta_0=2\pi-\pi/12$, $\theta_1=\pi/9$). The function corresponding to $a=-\pi, b=\pi$ allows to map the interval $[0,2\pi)$ onto $[-\pi, \pi)$. Via this transformation we have: # # $\theta_0\mapsto \theta'_0=-\pi/12$, and # # $\theta_1=\mapsto \theta'_1=\pi/9$, # # and now $\theta'_0<\theta'_1$. # In: def moduloAB(x, a, b): #maps a real number onto the unit circle identified with #the interval [a,b), b-a=2*PI if a>= b: raise ValueError('Incorrect interval ends') y = (x-a) % (b-a) return y+b if y < 0 else y+a # In: def test_2PI(x): return 0 <= x < 2*pi # Compute the row sums and the lengths of corresponding ideograms: # In: row_sum = [np.sum(matrix[k,:]) for k in range(L)] #set the gap between two consecutive ideograms gap = 2*pi*0.005 ideogram_length = 2*pi * np.asarray(row_sum) / sum(row_sum) - gap*np.ones(L) # The next function returns the list of end angular coordinates for each ideogram arc: # In: def get_ideogram_ends(ideogram_len, gap): ideo_ends = [] left = 0 for k in range(len(ideogram_len)): right = left + ideogram_len[k] ideo_ends.append([left, right]) left = right + gap return ideo_ends # In: ideo_ends = get_ideogram_ends(ideogram_length, gap) # The function make_ideogram_arc returns equally spaced points on an ideogram arc, expressed as complex # numbers in polar form: # In: def make_ideogram_arc(R, phi, a=50): # R is the circle radius # phi is the list of angle coordinates of an arc ends # a is a parameter that controls the number of points to be evaluated on an arc if not test_2PI(phi) or not test_2PI(phi): phi = [moduloAB(t, 0, 2*pi) for t in phi] length = (phi-phi) % 2*pi nr = 5 if length <= pi/4 else int(a*length/pi) if phi < phi: theta = np.linspace(phi, phi, nr) else: phi = [moduloAB(t, -pi, pi) for t in phi] theta = np.linspace(phi, phi, nr) return R * np.exp(1j*theta) # The real and imaginary parts of these complex numbers will be used to define the ideogram as a [Plotly # shape](https://plot.ly/python/shapes/) bounded by a SVG path. # In: make_ideogram_arc(1.3, [11*pi/6, pi/17]) # Set ideograms labels and colors: # In: labels=['Emma', 'Isabella', 'Ava', 'Olivia', 'Sophia'] ideo_colors=['rgba(244, 109, 67, 0.75)', 'rgba(253, 174, 97, 0.75)', 'rgba(254, 224, 139, 0.75)', 'rgba(217, 239, 139, 0.75)', 'rgba(166, 217, 106, 0.75)']#brewer colors with alpha set on 0.75 # ### Ribbons in a chord diagram ### # While ideograms illustrate how many comments posted each member of the Facebook community, ribbons # give a comparative information on the flows of comments from one friend to another. # # To illustrate this flow we map data onto the unit circle. More precisely, for each matrix row, $k$, the application: # # t$\mapsto$ t*ideogram_length[k]/row_sum[k] # # maps the interval [0, row_sum[k]] onto # the interval [0, ideogram_length[k]]. Hence each entry matrix[k][j] in the $k^{th}$ row is mapped to matrix[k][j] * ideogram_length[k] / row_value[k]. # # The function map_data maps all matrix entries to the corresponding values in the intervals associated to ideograms: # In: def map_data(data_matrix, row_value, ideogram_length): mapped = np.zeros(data_matrix.shape) for j in range(L): mapped[:, j] = ideogram_length * data_matrix[:,j] / row_value return mapped # In: mapped_data = map_data(matrix, row_sum, ideogram_length) mapped_data # - To each pair of values (mapped_data[k][j], mapped_data[j][k]), $k<=j$, one associates a ribbon, that is a curvilinear filled rectangle (that can be degenerate), having as opposite sides two subarcs of the $k^{th}$ ideogram, respectively $j^{th}$ ideogram, and two arcs of quadratic Bézier curves. # Here we illustrate the ribbons associated to pairs (mapped_data[j], mapped_data[j]), $j=\overline{0,4}$, # that illustrate the flow of comments between Emma and all other friends, and herself: # In: HTML('') # - For a better looking chord diagram, # [Circos documentation](http://circos.ca/presentations/articles/vis_tables1/) recommends to sort increasingly each row of the mapped_data. # The array idx_sort, defined below, has on each row the indices that sort the corresponding row in mapped_data: # In: idx_sort = np.argsort(mapped_data, axis=1) idx_sort # In the following we call ribbon ends, the lists l=[l, l], r=[r, r] having as elements the angular coordinates # of the ends of arcs that are opposite sides in a ribbon. These arcs are sub-arcs in the internal boundaries of # the ideograms, connected by the ribbon # (see the image above). # - Compute the ribbon ends and store them as tuples # in a list of lists ($L\times L$): # In: def make_ribbon_ends(mapped_data, ideo_ends, idx_sort): L = mapped_data.shape ribbon_boundary = np.zeros((L,L+1)) for k in range(L): start = ideo_ends[k] ribbon_boundary[k] = start for j in range(1,L+1): J = idx_sort[k][j-1] ribbon_boundary[k][j] = start + mapped_data[k][J] start = ribbon_boundary[k][j] return [[(ribbon_boundary[k][j], ribbon_boundary[k][j+1] ) for j in range(L)] for k in range(L)] # In: ribbon_ends = make_ribbon_ends(mapped_data, ideo_ends, idx_sort) print ('ribbon ends starting from the ideogram\n', ribbon_ends) We note that ribbon_ends[k][j] corresponds to mapped_data[i][idx_sort[k][j]], i.e. the length of the arc of ends in ribbon_ends[k][j] is equal to mapped_data[i][idx_sort[k][j]]. # Now we define a few functions that compute the control points for Bézier ribbon sides. # The function control_pts returns the cartesian coordinates of the control points, $b_0, b_1, b_2$, supposed as being initially located on the unit circle, and thus defined only by their angular coordinate. The angular coordinate # of the point $b_1$ is the mean of angular coordinates of the points $b_0, b_2$. # # Since for a Bézier ribbon side only $b_0, b_2$ are placed on the unit circle, one gives radius as a parameter that controls position of $b_1$. radius is the distance of $b_1$ to the circle center. # In: def control_pts(angle, radius): #angle is a 3-list containing angular coordinates of the control points b0, b1, b2 #radius is the distance from b1 to the origin O(0,0) if len(angle) != 3: raise InvalidInputError('angle must have len =3') b_cplx = np.array([np.exp(1j*angle[k]) for k in range(3)]) b_cplx = radius * b_cplx return list(zip(b_cplx.real, b_cplx.imag)) # In: def ctrl_rib_chords(l, r, radius): # this function returns a 2-list containing control poligons of the two quadratic Bezier #curves that are opposite sides in a ribbon #l (r) the list of angular variables of the ribbon arc ends defining #the ribbon starting (ending) arc # radius is a common parameter for both control polygons if len(l) != 2 or len(r) != 2: raise ValueError('the arc ends must be elements in a list of len 2') return [control_pts([l[j], (l[j]+r[j])/2, r[j]], radius) for j in range(2)] # Each ribbon is colored with the color of one of the two ideograms it connects. # We define an L-list of L-lists of colors for ribbons. Denote it by ribbon_color. # # ribbon_color[k][j] is the Plotly color string for the ribbon associated to mapped_data[k][j] and mapped_data[j][k], i.e. the ribbon connecting two subarcs in the $k^{th}$, respectively, $j^{th}$ ideogram. Hence this structure is symmetric. # Initially we define: # In: ribbon_color = [L * [ideo_colors[k]] for k in range(L)] # and then eventually we are changing the color in a few positions. # # For our example we are perfotming the following color change: # In: ribbon_color=ideo_colors ribbon_color=ideo_colors ribbon_color=ideo_colors ribbon_color=ideo_colors # The symmetric locations are not modified, because we do not access # ribbon_color[k][j], $k>j$, when drawing the ribbons. # Functions that return the Plotly SVG paths that are ribbon boundaries: # In: def make_q_bezier(b):# defines the Plotly SVG path for a quadratic Bezier curve defined by the #list of its control points if len(b) != 3: raise valueError('control poligon must have 3 points') A, B, C = b return f'M {A}, {A} Q {B}, {B} {C}, {C}' # In: b=[(1,4), (-0.5, 2.35), (3.745, 1.47)] # In: make_q_bezier(b) # make_ribbon_arc returns the Plotly SVG path corresponding to an arc represented by its end angular coordinates, theta0, theta1. # In: def make_ribbon_arc(theta0, theta1): if test_2PI(theta0) and test_2PI(theta1): if theta0 < theta1: theta0 = moduloAB(theta0, -pi, pi) theta1 = moduloAB(theta1, -pi, pi) if theta0 *theta1 > 0: raise ValueError('incorrect angle coordinates for ribbon') nr = int(40 * (theta0 - theta1) / pi) if nr <= 2: nr = 3 theta = np.linspace(theta0, theta1, nr) pts=np.exp(1j*theta)# points in polar complex form, on the given arc string_arc = '' for k in range(len(theta)): string_arc += f'L {pts.real[k]}, {pts.imag[k]} ' return string_arc else: raise ValueError('the angle coordinates for an arc side of a ribbon must be in [0, 2*pi]') # In: make_ribbon_arc(np.pi/3, np.pi/6) # Finally we are ready to define data and layout for the Plotly plot of the chord diagram. # In: import plotly.plotly as py import plotly.graph_objs as go # In: def make_layout(title, plot_size): return dict(title=title, xaxis=dict(visible=False), yaxis=dict(visible=False), showlegend=False, width=plot_size, height=plot_size, margin=dict(t=25, b=25, l=25, r=25), hovermode='closest', ) # Function that returns the Plotly shape of an ideogram: # In: def make_ideo_shape(path, line_color, fill_color): #line_color is the color of the shape boundary #fill_collor is the color assigned to an ideogram return dict(line=dict(color=line_color, width=0.45), path=path, layer='below', type='path', fillcolor=fill_color) # We generate two types of ribbons: a ribbon connecting subarcs in two distinct ideograms, respectively # a ribbon from one ideogram to itself (it corresponds to mapped_data[k][k], i.e. it gives the flow of comments # from a community member to herself). # In: def make_ribbon(l, r, line_color, fill_color, radius=0.2): #l=[l, l], r=[r, r] represent the opposite arcs in the ribbon #line_color is the color of the shape boundary #fill_color is the fill color for the ribbon shape poligon = ctrl_rib_chords(l,r, radius) b, c = poligon return dict(line=dict(color=line_color, width=0.5), path=make_q_bezier(b) + make_ribbon_arc(r, r)+ make_q_bezier(c[::-1]) + make_ribbon_arc(l, l), type='path', layer='below', fillcolor = fill_color, ) def make_self_rel(l, line_color, fill_color, radius): #radius is the radius of Bezier control point b_1 b = control_pts([l, (l+l)/2, l], radius) return dict(line = dict(color=line_color, width=0.5), path = make_q_bezier(b)+make_ribbon_arc(l, l), type = 'path', layer = 'below', fillcolor = fill_color ) # In: def invPerm(perm): # function that returns the inverse of a permutation, perm inv =  * len(perm) for i, s in enumerate(perm): inv[s] = i return inv # In: layout=make_layout('Chord diagram', 400) # Now let us explain the key point of associating ribbons to right data: # # From the definition of ribbon_ends we notice that ribbon_ends[k][j] corresponds to data stored in # matrix[k][sigma[j]], where sigma is the permutation of indices $0, 1, \ldots L-1$, that sort the row k in mapped_data. # If sigma_inv is the inverse permutation of sigma, we get that to matrix[k][j] corresponds the # ribbon_ends[k][sigma_inv[j]]. # ribbon_info is a list of dicts setting the information that is displayed when hovering the mouse over the ribbon ends. # Set the radius of Bézier control point, $b_1$, for each ribbon associated to a diagonal data entry: # In: radii_sribb=[0.4, 0.30, 0.35, 0.39, 0.12]# these value are set after a few trials # In: ribbon_info=[] shapes=[] for k in range(L): sigma = idx_sort[k] sigma_inv = invPerm(sigma) for j in range(k, L): if matrix[k][j] == 0 and matrix[j][k]==0: continue eta = idx_sort[j] eta_inv = invPerm(eta) l = ribbon_ends[k][sigma_inv[j]] if j == k: shapes.append(make_self_rel(l, 'rgb(175,175,175)' , ideo_colors[k], radius=radii_sribb[k])) z = 0.9*np.exp(1j*(l+l)/2) #the text below will be displayed when hovering the mouse over the ribbon text = f'{labels[k]} commented on {int(matrix[k][k])} of herself Fb posts' ribbon_info.append(go.Scatter(x=[z.real], y=[z.imag], mode='markers', marker=dict(size=0.5, color=ideo_colors[k]), text=text, hoverinfo='text' ) ) else: r = ribbon_ends[j][eta_inv[k]] zi = 0.9 * np.exp(1j*(l+l)/2) zf = 0.9 * np.exp(1j*(r+r)/2) #texti and textf are the strings that will be displayed when hovering the mouse #over the two ribbon ends texti = f'{labels[k]} commented on {int(matrix[k][j])} of {labels[j]} Fb posts' textf = f'{labels[j]} commented on {int(matrix[j][k])} of {labels[k]} Fb posts' ribbon_info.append(go.Scatter(x=[zi.real], y=[zi.imag], mode='markers', marker=dict(size=0.5, color=ribbon_color[k][j]), text=texti, hoverinfo='text' ) ), ribbon_info.append(go.Scatter(x=[zf.real], y=[zf.imag], mode='markers', marker=dict(size=0.5, color=ribbon_color[k][j]), text=textf, hoverinfo='text' ) ) r = (r, r) # IMPORTANT!!! Reverse these arc ends because otherwise you get # a twisted ribbon #append the ribbon shape shapes.append(make_ribbon(l, r, 'rgb(175,175,175)' , ribbon_color[k][j])) # ideograms` is a list of dicts that set the position, and color of ideograms, as well as the information associated to each ideogram. # # In: ideograms = [] for k in range(len(ideo_ends)): z = make_ideogram_arc(1.1, ideo_ends[k]) zi = make_ideogram_arc(1.0, ideo_ends[k]) m = len(z) n = len(zi) ideograms.append(go.Scatter(x=z.real, y=z.imag, mode='lines', line=dict(color=ideo_colors[k], shape='spline', width=0.25), text=f'{labels[k]}
{int(row_sum[k])} comments', hoverinfo='text' ) ) path = 'M ' for s in range(m): path += f'{z.real[s]}, {z.imag[s]} L ' Zi = np.array(zi.tolist()[::-1]) for s in range(m): path += f'{Zi.real[s]}, {Zi.imag[s]} L ' path += f'{z.real} ,{z.imag}' shapes.append(make_ideo_shape(path,'rgb(150,150,150)' , ideo_colors[k])) # In: data = ideograms + ribbon_info layout['shapes'] = shapes fig = go.Figure(data=data, layout=layout) from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot init_notebook_mode(connected=True) iplot(fig) # Here is a chord diagram associated to a community of 8 Facebook friends: # In: HTML('') # In: from IPython.core.display import HTML def css_styling(): styles = open("./custom.css", "r").read() return HTML(styles) css_styling() # In[ ]: