In [1]:

def pl(myloc='upper center',mylog=None):
    for k in 2,1,.5: plot(power(arange(101),k))
    grid('on')
    ylim(1,1e2)
    legend(['$y=x^2$','$y=x$','$y=\sqrt{x}$'],loc=myloc)
    if mylog=='log': xscale('log'),yscale('log')
    
def nl(myloc='upper center',mylog=None):
    x=arange(1,101)
    for k,c in zip((.5,1.,2.),('r','g','b')):
        plot(x,100*power(x,-k),color=c)
    grid('on')
    ylim(1,100)
    legend(['$y=100/\sqrt{x}$','$y=100/x$','$y=100/x^2$'],loc=myloc)
    if mylog=='log': xscale('log'),yscale('log')

In [2]:

figure(figsize=(10,10))

subplot(221)
pl()

subplot(222)
pl('upper left','log')

subplot(223)
nl()

subplot(224)
nl('upper right','log')

In [3]:

plaw=power(arange(1.,1000.),-1)

In [4]:

plaw[:10]

Out[4]:

array([ 1.        ,  0.5       ,  0.33333333,  0.25      ,  0.2       ,
        0.16666667,  0.14285714,  0.125     ,  0.11111111,  0.1       ])

In [5]:

bins=[0.] + list(cumsum(plaw)/sum(plaw))

In [6]:

bins[:10]

Out[6]:

[0.0,
 0.13360997973428795,
 0.20041496960143193,
 0.24495162951286123,
 0.2783541244464332,
 0.3050761203932908,
 0.32734445034900544,
 0.34643159031104659,
 0.36313283777783256,
 0.37797839108164233]

In [7]:

from collections import Counter
from scipy import stats

In [8]:

counts=Counter(digitize(random.random(6000),bins,right=True))
x,y=zip(*counts.most_common())

xa,ya = array(x[:32]),array(y[:32])

slope, intercept, r_value, p_value, std_err = stats.linregress(log(xa),log(ya))
print slope,r_value,p_value,std_err

-0.99102945277 -0.987688784914 9.87508012194e-26 0.028656982924

In [9]:

figure(figsize=(5,5))
axis((.8,1e3,.8,1e3))
grid('on')
loglog(x,y,'o')
xd=arange(0,log(32)+.5,.5)
plot(exp(xd),exp(slope*xd+intercept),'r-',linewidth=1.5)
legend(['data','fit, slope='+str(round(slope,2))]);

The r_value is also known as the "Pearson correlation coefficient", defined as $${E[(X-\mu_X)(Y-\mu_Y)]\over \sigma_X\sigma_Y}$$

In [ ]: