import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import networkx as nx
%matplotlib inline
import collections
import seaborn as sns
sns.set()
import warnings
warnings.filterwarnings("ignore")
f = open("chess/out.chess")
f.readline()
'% asym multisigned\n'
G = nx.MultiDiGraph()
edges = []
timestamps = []
weights = []
for line in f:
x = line.split()
G.add_edge(x[0], x[1], weight=int(x[2]), timestamp=float(x[3]))
edges.append((x[0], x[1]))
weights.append(int(x[2]))
timestamps.append(float(x[3]))
f.close()
len(list(G.nodes()))
7301
len(list(G.edges()))
65053
nx.is_connected(G.to_undirected())
False
comps = [c for c in sorted(nx.connected_components(G.to_undirected()), key=len, reverse=True)]
for comp in comps:
print(len(comp), end=' ')
7115 7 6 5 5 4 4 4 4 3 3 3 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
for comp in comps:
G_ = G.subgraph(comp)
print(len(list(G_.edges())), end=' ')
64926 11 7 4 5 3 3 4 3 3 2 2 2 2 3 4 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 2 1 1 1
Реально, почти вся информация в одной компоненте связности.
G = G.subgraph(comps[0])
len(list(G.nodes()))
7115
len(list(G.edges()))
64926
degree_sequence = sorted([d for n, d in G.degree()], reverse=True)
plt.figure(figsize=(7,7))
_ = sns.distplot(degree_sequence, bins=20)
plt.xlabel("Degree")
plt.ylabel("Nodes rate")
plt.savefig("degree.pdf")
in_degree_sequence = sorted([d for n, d in G.in_degree()], reverse=True)
plt.figure(figsize=(7,7))
_ = sns.distplot(in_degree_sequence, bins=20)
plt.xlabel("In_Degree")
plt.ylabel("Nodes rate")
plt.savefig("degree_in.pdf")
out_degree_sequence = sorted([d for n, d in G.out_degree()], reverse=True)
plt.figure(figsize=(7,7))
_ = sns.distplot(out_degree_sequence, bins=20)
plt.xlabel("Out_Degree")
plt.ylabel("Nodes rate")
plt.savefig("degree_out.pdf")
G_ = G.to_undirected()
nodes = list(G_.nodes())
from tqdm import tqdm
d = collections.defaultdict(int)
for i in tqdm(range(len(nodes))):
for j in range(i + 1, len(nodes)):
x = nx.shortest_path_length(G_, source=nodes[i], target=nodes[j])
d[x] += 1
100%|██████████| 7115/7115 [35:06<00:00, 3.38it/s]
d
defaultdict(int, {1: 55779, 2: 1184701, 3: 6950816, 4: 10011625, 5: 5094235, 6: 1516817, 7: 380233, 8: 89956, 9: 19818, 10: 3513, 11: 492, 12: 64, 13: 6})
p = np.array(list(d.values()))
dist = np.array(list(d.keys()))
Средняя длина пути между вершинами.
(p * dist).sum() / p.sum()
4.010471567253983
plt.figure(figsize=(7,5))
plt.plot(list(d.keys()), list(d.values()))
plt.scatter(list(d.keys()), list(d.values()))
plt.xlabel("Hop distance")
plt.ylabel("Number of pairs with given distance")
plt.savefig("hop_distance.pdf")
plt.figure(figsize=(7,5))
plt.plot(dist, p.cumsum())
plt.scatter(dist, p.cumsum())
plt.xlabel("Hop distance")
plt.ylabel("Number of pairs with less or equal distance")
plt.savefig("hop_distance_cum.pdf")
_G_ = nx.Graph()
for u,v in G_.edges():
_G_.add_edge(u, v)
Глобальный коэффициент
nx.transitivity(_G_)
0.1258356478709473
Среднее по локальным коэффициентам
nx.average_clustering(_G_)
0.17939575805920835
len(G.nodes())
7115
Средняя степень вершины в мультиграфе
np.mean(list(dict(G.degree()).values()))
18.250456781447646
Не в мультиграфе
np.mean(list(dict(_G_.degree()).values()))
15.679269149683767
coms = nx.algorithms.community.label_propagation_communities(G_)
c = []
lengths = []
for com in coms:
if len(com) > 100:
c.append(com)
lengths.append(len(com))
len(c)
2
x, y = np.unique(lengths, return_counts=True)
plt.figure(figsize=(7,6))
plt.scatter(x, y)
plt.xscale('log')
plt.xlabel('Members in communities')
plt.ylabel('Communities with given number of members')
plt.savefig('community_scatter.pdf')
plt.figure(figsize=(7,7))
nx.draw(G.subgraph(c[0]), node_size=50)
plt.savefig("community_1.pdf")
plt.figure(figsize=(10,10))
nx.draw(G_.subgraph(c[1]), node_size=10)
plt.savefig("community_2.pdf")
coms = nx.algorithms.community.label_propagation_communities(G_.subgraph(c[1]))
c_new = []
lengths = []
for com in coms:
if len(com) > 100:
c_new.append(com)
lengths.append(len(com))
len(c_new)
1
len(c_new[0])
6310
coms = nx.algorithms.community.label_propagation_communities(G_.subgraph(c_new[0]))
c_new = []
lengths = []
for com in coms:
if len(com) > 100:
c_new.append(com)
lengths.append(len(com))
len(c_new[0])
6310
plt.figure(figsize=(10,10))
nx.draw(G_.subgraph(c[0] | c[1]), node_size=10)
plt.savefig("community_mished.pdf")
d = nx.degree_centrality(G)
deg = np.array(list(d.values()))
nod_deg = np.array(list(d.keys()))
ind = deg.argsort()
plt.figure(figsize=(7,7))
plt.plot(deg[ind])
plt.ylabel("Degree centrality")
plt.xlabel("Sorted nodes")
plt.savefig("Degree_centrality.pdf")
pos = nx.spring_layout(G)
plt.figure(figsize=(10,10))
nx.draw_networkx_nodes(G,pos,
nodelist=list(G.nodes()),
node_color='r',
node_size=10,
alpha=0.8)
nx.draw_networkx_nodes(G,pos,
nodelist=list(nod_deg[ind][-50:]),
node_color='b',
node_size=50,
alpha=0.8)
nx.draw_networkx_edges(G, pos, width=0.5, alpha=0.5)
plt.savefig("Centrality_points.pdf")
d = nx.closeness_centrality(G)
close = np.array(list(d.values()))
nod_close = np.array(list(d.keys()))
ind_close = close.argsort()
plt.figure(figsize=(7,7))
plt.plot(close[ind_close])
plt.ylabel("Closeness centrality")
plt.xlabel("Sorted nodes")
plt.savefig("Close_centrality.pdf")
d = nx.eigenvector_centrality(_G_)
eigen = np.array(list(d.values()))
nod_eigen = np.array(list(d.keys()))
ind_eigen = eigen.argsort()
plt.figure(figsize=(7,7))
plt.plot(eigen[ind_eigen])
plt.ylabel("Eigen centrality")
plt.xlabel("Sorted nodes")
plt.savefig("Eigen_centrality.pdf")
pos = nx.spring_layout(G)
plt.figure(figsize=(10,10))
nx.draw_networkx_nodes(G,pos,
nodelist=list(G.nodes()),
node_color='r',
node_size=10,
alpha=0.8)
nx.draw_networkx_nodes(G,pos,
nodelist=list(nod_eigen[ind_eigen][-50:]),
node_color='b',
node_size=50,
alpha=0.8)
nx.draw_networkx_edges(G, pos, width=0.5, alpha=0.5)
plt.savefig("Centrality_points_eigen.pdf")
d = nx.betweenness_centrality(_G_)
betw = np.array(list(d.values()))
nod_betw = np.array(list(d.keys()))
ind_betw = betw.argsort()
plt.figure(figsize=(7,7))
plt.plot(betw[ind_betw])
plt.ylabel("Betweenness centrality")
plt.xlabel("Sorted nodes")
plt.savefig("Betweenness_centrality.pdf")