def pl(myloc='upper center',mylog=None):
for k in 2,1,.5: plot(power(arange(101),k))
grid('on')
ylim(1,1e2)
legend(['$y=x^2$','$y=x$','$y=\sqrt{x}$'],loc=myloc)
if mylog=='log': xscale('log'),yscale('log')
def nl(myloc='upper center',mylog=None):
x=arange(1,101)
for k,c in zip((.5,1.,2.),('r','g','b')):
plot(x,100*power(x,-k),color=c)
grid('on')
ylim(1,100)
legend(['$y=100/\sqrt{x}$','$y=100/x$','$y=100/x^2$'],loc=myloc)
if mylog=='log': xscale('log'),yscale('log')
figure(figsize=(10,10))
subplot(221)
pl()
subplot(222)
pl('upper left','log')
subplot(223)
nl()
subplot(224)
nl('upper right','log')
plaw=power(arange(1.,1000.),-1)
plaw[:10]
array([ 1. , 0.5 , 0.33333333, 0.25 , 0.2 , 0.16666667, 0.14285714, 0.125 , 0.11111111, 0.1 ])
bins=[0.] + list(cumsum(plaw)/sum(plaw))
bins[:10]
[0.0, 0.13360997973428795, 0.20041496960143193, 0.24495162951286123, 0.2783541244464332, 0.3050761203932908, 0.32734445034900544, 0.34643159031104659, 0.36313283777783256, 0.37797839108164233]
from collections import Counter
from scipy import stats
counts=Counter(digitize(random.random(6000),bins,right=True))
x,y=zip(*counts.most_common())
xa,ya = array(x[:32]),array(y[:32])
slope, intercept, r_value, p_value, std_err = stats.linregress(log(xa),log(ya))
print slope,r_value,p_value,std_err
-0.99102945277 -0.987688784914 9.87508012194e-26 0.028656982924
figure(figsize=(5,5))
axis((.8,1e3,.8,1e3))
grid('on')
loglog(x,y,'o')
xd=arange(0,log(32)+.5,.5)
plot(exp(xd),exp(slope*xd+intercept),'r-',linewidth=1.5)
legend(['data','fit, slope='+str(round(slope,2))]);
The r_value
is also known as the
"Pearson correlation coefficient",
defined as
$${E[(X-\mu_X)(Y-\mu_Y)]\over \sigma_X\sigma_Y}$$