n=100
x=10*rand(n)
y=1.5*x + normal(0,2.5,size=n)

figure(figsize=(6,5))
plot(x,y,'o');

xbar=mean(x)
ybar=mean(y)
a=(x.dot(y)/n-xbar*ybar)/(x.dot(x)/n-xbar*xbar)
b=ybar-a*xbar
print a,b

figure(figsize=(6,5))
plot(x,y,'o')
xr=arange(11)
plot(xr,a*xr+b,linewidth=3);

from scipy.stats import linregress
slope, intercept, r_value, p_value, std_err = linregress(x,y)

print 'a =',slope,', b=',intercept,', \xcf\x83=', std_err
print "Pearson's r =",r_value, ', p-value=',p_value

sqrt((1./(n-2))*(a*x+b -y).dot(a*x+b -y) / (x-xbar).dot(x-xbar))

(x.dot(y)/n-xbar*ybar)/sqrt((x.dot(x)/n-xbar*xbar)* (y.dot(y)/n-ybar*ybar))

from urllib2 import urlopen
from IPython.display import Image
url='http://upload.wikimedia.org/wikipedia/commons/8/86/Correlation_coefficient.gif'
Image(urlopen(url).read(),width=425)

from scipy.stats import pearsonr
pearsonr(x,y)

import random
yr=random.sample(y,100)
figure(figsize=(6,5))
plot(x,yr,'o');
print pearsonr(x,yr)

def logit(x): return 1/(1+exp(-x))

f1=arange(-10,10,.1)
plot(f1,logit(f1),label='logit($f_1$)')
plot(f1,logit(10*f1-30),label='logit($10f_1-30$)')
yticks(arange(0,1.1,.1))
grid('on')
legend(loc='upper left');