n=100 x=10*rand(n) y=1.5*x + normal(0,2.5,size=n) figure(figsize=(6,5)) plot(x,y,'o'); xbar=mean(x) ybar=mean(y) a=(x.dot(y)/n-xbar*ybar)/(x.dot(x)/n-xbar*xbar) b=ybar-a*xbar print a,b figure(figsize=(6,5)) plot(x,y,'o') xr=arange(11) plot(xr,a*xr+b,linewidth=3); from scipy.stats import linregress slope, intercept, r_value, p_value, std_err = linregress(x,y) print 'a =',slope,', b=',intercept,', \xcf\x83=', std_err print "Pearson's r =",r_value, ', p-value=',p_value sqrt((1./(n-2))*(a*x+b -y).dot(a*x+b -y) / (x-xbar).dot(x-xbar)) (x.dot(y)/n-xbar*ybar)/sqrt((x.dot(x)/n-xbar*xbar)* (y.dot(y)/n-ybar*ybar)) from urllib2 import urlopen from IPython.display import Image url='http://upload.wikimedia.org/wikipedia/commons/8/86/Correlation_coefficient.gif' Image(urlopen(url).read(),width=425) from scipy.stats import pearsonr pearsonr(x,y) import random yr=random.sample(y,100) figure(figsize=(6,5)) plot(x,yr,'o'); print pearsonr(x,yr) def logit(x): return 1/(1+exp(-x)) f1=arange(-10,10,.1) plot(f1,logit(f1),label='logit($f_1$)') plot(f1,logit(10*f1-30),label='logit($10f_1-30$)') yticks(arange(0,1.1,.1)) grid('on') legend(loc='upper left');