import statsmodels.api as sm from scipy.io import loadmat from pandas import Series from numpy import array A = loadmat('A.mat')['A'].astype('int32') outlinks = Series(array(A.sum(1).flatten())[0]) inlinks = Series(array(A.sum(0).flatten())[0]) outlinks.describe() outlinks.hist(bins=20); log(outlinks+1).hist(bins=20, log=True) outlinks.hist(normed=True, cumulative=True, bins=30, histtype='step') ylim(.94,1) ecdf = sm.distributions.ECDF(outlinks) x = linspace(min(outlinks), max(outlinks)) y = ecdf(x) step(log(x+1), y) ylim(.9,1); inlinks.describe() inlinks.hist(bins=20) log(inlinks+1).hist(bins=20, log=True) ecdf = sm.distributions.ECDF(inlinks) x = linspace(min(inlinks), max(inlinks)) y = ecdf(x) step(log(x+1), y) ylim(.95, 1);