%matplotlib inline
import networkx as nx
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import networkx as nx
G=nx.Graph() # G = nx.DiGraph() # 有向网络
# 添加(孤立)节点
G.add_node("spam")
# 添加节点和链接
G.add_edge(1,2)
print(G.nodes())
print(G.edges())
[1, 2, 'spam'] [(1, 2)]
# 绘制网络
nx.draw(G, with_labels = True)
http://www3.nd.edu/~networks/resources.htm
https://pan.baidu.com/s/1o86ZaTc
World-Wide-Web: [README] [DATA] Réka Albert, Hawoong Jeong and Albert-László Barabási: Diameter of the World Wide Web Nature 401, 130 (1999) [ PDF ]
G = nx.Graph()
n = 0
with open ('/Users/datalab/bigdata/cjc/www.dat.gz.txt') as f:
for line in f:
n += 1
#if n % 10**4 == 0:
#flushPrint(n)
x, y = line.rstrip().split(' ')
G.add_edge(x,y)
nx.info(G)
'Name: \nType: Graph\nNumber of nodes: 325729\nNumber of edges: 1117563\nAverage degree: 6.8619'
我们从karate_club_graph开始,探索网络的基本性质。
G = nx.karate_club_graph()
clubs = [G.node[i]['club'] for i in G.nodes()]
colors = []
for j in clubs:
if j == 'Mr. Hi':
colors.append('r')
else:
colors.append('g')
nx.draw(G, with_labels = True, node_color = colors)
G.node[1], G.node[9] # 节点1的属性 # 节点1的属性
({'club': 'Mr. Hi'}, {'club': 'Officer'})
G.edges# 前三条边的id
#dir(G)
EdgeView([(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (0, 10), (0, 11), (0, 12), (0, 13), (0, 17), (0, 19), (0, 21), (0, 31), (1, 17), (1, 2), (1, 3), (1, 21), (1, 19), (1, 7), (1, 13), (1, 30), (2, 3), (2, 32), (2, 7), (2, 8), (2, 9), (2, 27), (2, 28), (2, 13), (3, 7), (3, 12), (3, 13), (4, 10), (4, 6), (5, 16), (5, 10), (5, 6), (6, 16), (8, 32), (8, 30), (8, 33), (9, 33), (13, 33), (14, 32), (14, 33), (15, 32), (15, 33), (18, 32), (18, 33), (19, 33), (20, 32), (20, 33), (22, 32), (22, 33), (23, 32), (23, 25), (23, 27), (23, 29), (23, 33), (24, 25), (24, 27), (24, 31), (25, 31), (26, 33), (26, 29), (27, 33), (28, 33), (28, 31), (29, 32), (29, 33), (30, 33), (30, 32), (31, 32), (31, 33), (32, 33)])
nx.info(G)
"Name: Zachary's Karate Club\nType: Graph\nNumber of nodes: 34\nNumber of edges: 78\nAverage degree: 4.5882"
list(G.nodes())[:3]
[0, 1, 2]
list(G.edges())[:3]
[(0, 1), (0, 2), (0, 3)]
print(*G.neighbors(1))
0 17 2 3 21 19 7 13 30
nx.average_shortest_path_length(G)
2.408199643493761
nx.diameter(G)#返回图G的直径(最长最短路径的长度)
5
nx.density(G)
0.13903743315508021
nodeNum = len(G.nodes())
edgeNum = len(G.edges())
2.0*edgeNum/(nodeNum * (nodeNum - 1))
0.13903743315508021
cc = nx.clustering(G)
cc.items()
dict_items([(0, 0.15), (1, 0.3333333333333333), (2, 0.24444444444444444), (3, 0.6666666666666666), (4, 0.6666666666666666), (5, 0.5), (6, 0.5), (7, 1.0), (8, 0.5), (9, 0), (10, 0.6666666666666666), (11, 0), (12, 1.0), (13, 0.6), (14, 1.0), (15, 1.0), (16, 1.0), (17, 1.0), (18, 1.0), (19, 0.3333333333333333), (20, 1.0), (21, 1.0), (22, 1.0), (23, 0.4), (24, 0.3333333333333333), (25, 0.3333333333333333), (26, 1.0), (27, 0.16666666666666666), (28, 0.3333333333333333), (29, 0.6666666666666666), (30, 0.5), (31, 0.2), (32, 0.19696969696969696), (33, 0.11029411764705882)])
plt.hist(cc.values(), bins = 15)
plt.xlabel('$Clustering \, Coefficient, \, C$', fontsize = 20)
plt.ylabel('$Frequency, \, F$', fontsize = 20)
plt.show()
In a math environment, LaTeX ignores the spaces you type and puts in the spacing that it thinks is best. LaTeX formats mathematics the way it's done in mathematics texts. If you want different spacing, LaTeX provides the following four commands for use in math mode:
; - a thick space
: - a medium space
, - a thin space
\! - a negative thin space
# M. E. J. Newman, Mixing patterns in networks Physical Review E, 67 026126, 2003
nx.degree_assortativity_coefficient(G) #计算一个图的度匹配性。
-0.47561309768461457
Ge=nx.Graph()
Ge.add_nodes_from([0,1],size=2)
Ge.add_nodes_from([2,3],size=3)
Ge.add_edges_from([(0,1),(2,3)])
node_size = [list(Ge.node[i].values())[0]*1000 for i in Ge.nodes()]
nx.draw(Ge, with_labels = True, node_size = node_size)
print(nx.numeric_assortativity_coefficient(Ge,'size'))
1.0
# plot degree correlation
from collections import defaultdict
import numpy as np
l=defaultdict(list)
g = nx.karate_club_graph()
for i in g.nodes():
k = []
for j in g.neighbors(i):
k.append(g.degree(j))
l[g.degree(i)].append(np.mean(k))
#l.append([g.degree(i),np.mean(k)])
x = list(l.keys())
y = [np.mean(i) for i in l.values()]
#x, y = np.array(l).T
plt.plot(x, y, 'ro', label = '$Karate\;Club$')
plt.legend(loc=1,fontsize=10, numpoints=1)
plt.xscale('log'); plt.yscale('log')
plt.ylabel(r'$<knn(k)$> ', fontsize = 20)
plt.xlabel('$k$', fontsize = 20)
plt.show()
dc = nx.degree_centrality(G)
closeness = nx.closeness_centrality(G)
betweenness= nx.betweenness_centrality(G)
fig = plt.figure(figsize=(15, 4),facecolor='white')
ax = plt.subplot(1, 3, 1)
plt.hist(dc.values(), bins = 20)
plt.xlabel('$Degree \, Centrality$', fontsize = 20)
plt.ylabel('$Frequency, \, F$', fontsize = 20)
ax = plt.subplot(1, 3, 2)
plt.hist(closeness.values(), bins = 20)
plt.xlabel('$Closeness \, Centrality$', fontsize = 20)
ax = plt.subplot(1, 3, 3)
plt.hist(betweenness.values(), bins = 20)
plt.xlabel('$Betweenness \, Centrality$', fontsize = 20)
plt.tight_layout()
plt.show()
fig = plt.figure(figsize=(15, 8),facecolor='white')
for k in betweenness:
plt.scatter(dc[k], closeness[k], s = betweenness[k]*10000)
plt.text(dc[k], closeness[k]+0.02, str(k))
plt.xlabel('$Degree \, Centrality$', fontsize = 20)
plt.ylabel('$Closeness \, Centrality$', fontsize = 20)
plt.show()
from collections import defaultdict
import numpy as np
def plotDegreeDistribution(G):
degs = defaultdict(int)
for i in dict(G.degree()).values(): degs[i]+=1
items = sorted ( degs.items () )
x, y = np.array(items).T
y_sum = np.sum(y)
y = [float(i)/y_sum for i in y]
plt.plot(x, y, 'b-o')
plt.xscale('log')
plt.yscale('log')
plt.legend(['Degree'])
plt.xlabel('$K$', fontsize = 20)
plt.ylabel('$P(K)$', fontsize = 20)
plt.title('$Degree\,Distribution$', fontsize = 20)
plt.show()
G = nx.karate_club_graph()
plotDegreeDistribution(G)
import networkx as nx
import matplotlib.pyplot as plt
RG = nx.random_graphs.random_regular_graph(3,200)
#生成包含200个节点、 每个节点有3个邻居的规则图RG
pos = nx.spectral_layout(RG)
#定义一个布局,此处采用了spectral布局方式,后变还会介绍其它布局方式,注意图形上的区别
nx.draw(RG,pos,with_labels=False,node_size = range(1, 201))
#绘制规则图的图形,with_labels决定节点是非带标签(编号),node_size是节点的直径
plt.show() #显示图形
plotDegreeDistribution(RG)
/Users/datalab/Applications/anaconda/lib/python3.5/site-packages/matplotlib/axes/_base.py:3443: UserWarning: Attempting to set identical bottom==top results in singular transformations; automatically expanding. bottom=1.0, top=1.0 'bottom=%s, top=%s') % (bottom, top))
import networkx as nx
import matplotlib.pyplot as plt
ER = nx.random_graphs.erdos_renyi_graph(200,0.1)
#生成包含20个节点、以概率0.2连接的随机图
pos = nx.spring_layout(ER)
#定义一个布局,此处采用了shell布局方式
nx.draw(ER,pos,with_labels=False,node_size = 30)
plt.show()
#ER = nx.random_graphs.erdos_renyi_graph(2000,0.1)
plotDegreeDistribution(ER)
import networkx as nx
import matplotlib.pyplot as plt
WS = nx.random_graphs.watts_strogatz_graph(200,4,0.3)
#生成包含200个节点、每个节点4个近邻、随机化重连概率为0.3的小世界网络
pos = nx.spring_layout(WS)
#定义一个布局,此处采用了circular布局方式
nx.draw(WS,pos,with_labels=False,node_size = 30)
#绘制图形
plt.show()
plotDegreeDistribution(WS)
nx.diameter(WS)
8
cc = nx.clustering(WS)
plt.hist(cc.values(), bins = 10)
plt.xlabel('$Clustering \, Coefficient, \, C$', fontsize = 20)
plt.ylabel('$Frequency, \, F$', fontsize = 20)
plt.show()
import numpy as np
np.mean(list(cc.values()))
0.15130952380952378
import networkx as nx
import matplotlib.pyplot as plt
BA= nx.random_graphs.barabasi_albert_graph(200,2)
#生成n=20、m=1的BA无标度网络
pos = nx.spring_layout(BA)
#定义一个布局,此处采用了spring布局方式
nx.draw(BA,pos,with_labels=False,node_size = 30)
#绘制图形
plt.show()
plotDegreeDistribution(BA)
BA= nx.random_graphs.barabasi_albert_graph(20000,2)
#生成n=20、m=1的BA无标度网络
plotDegreeDistribution(BA)
import networkx as nx
import matplotlib.pyplot as plt
BA= nx.random_graphs.barabasi_albert_graph(500,1)
#生成n=20、m=1的BA无标度网络
pos = nx.spring_layout(BA)
#定义一个布局,此处采用了spring布局方式
nx.draw(BA,pos,with_labels=False,node_size = 30)
#绘制图形
plt.show()
nx.degree_histogram(BA)[:3]
[0, 334, 82]
list(dict(BA.degree()).items())[:3]
[(0, 4), (1, 20), (2, 48)]
plt.hist( list(dict(BA.degree()).values()) )
plt.show()
from collections import defaultdict
import numpy as np
def plotDegreeDistributionLongTail(G):
degs = defaultdict(int)
for i in list(dict(G.degree()).values()): degs[i]+=1
items = sorted ( degs.items () )
x, y = np.array(items).T
y_sum = np.sum(y)
y = [float(i)/y_sum for i in y]
plt.plot(x, y, 'b-o')
plt.legend(['Degree'])
plt.xlabel('$K$', fontsize = 20)
plt.ylabel('$P_K$', fontsize = 20)
plt.title('$Degree\,Distribution$', fontsize = 20)
plt.show()
BA= nx.random_graphs.barabasi_albert_graph(5000,2)
#生成n=20、m=1的BA无标度网络
plotDegreeDistributionLongTail(BA)
def plotDegreeDistribution(G):
degs = defaultdict(int)
for i in list(dict(G.degree()).values()): degs[i]+=1
items = sorted ( degs.items () )
x, y = np.array(items).T
x, y = np.array(items).T
y_sum = np.sum(y)
plt.plot(x, y, 'b-o')
plt.xscale('log')
plt.yscale('log')
plt.legend(['Degree'])
plt.xlabel('$K$', fontsize = 20)
plt.ylabel('$P(K)$', fontsize = 20)
plt.title('$Degree\,Distribution$', fontsize = 20)
plt.show()
BA= nx.random_graphs.barabasi_albert_graph(50000,2)
#生成n=20、m=1的BA无标度网络
plotDegreeDistribution(BA)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-1-3bcb20280fb4> in <module>() 15 plt.show() 16 ---> 17 BA= nx.random_graphs.barabasi_albert_graph(50000,2) 18 #生成n=20、m=1的BA无标度网络 19 plotDegreeDistribution(BA) NameError: name 'nx' is not defined
Ns = [i*10 for i in [1, 10, 100, 1000]]
ds = []
for N in Ns:
print(N)
BA= nx.random_graphs.barabasi_albert_graph(N,2)
d = nx.average_shortest_path_length(BA)
ds.append(d)
10 100 1000 10000
plt.plot(Ns, ds, 'r-o')
plt.xlabel('$N$', fontsize = 20)
plt.ylabel('$<d>$', fontsize = 20)
plt.xscale('log')
plt.show()