%pylab inline

xdata = array([0.0,1.0,2.0,3.0,4.0,5.0])  # Observed value of x
ydata = array([1.0,3.0,2.0,5.0,5.0,6.0])  # Observed value of y
a,b = polyfit(xdata,ydata,1)
print 'fitted slope: ',a
print 'fitted y-intercept: ',b
plot(xdata, ydata, 'bo', label='observed')
yfit = a*xdata + b
error = ydata - yfit  # Error
plot(xdata, yfit, 'r', label='fit')
xlabel('xdata')
ylabel('ydata')
legend(loc='best')

from pandas import read_csv
w = read_csv('douglas_data.csv',skiprows=[1],skipinitialspace=True)

a,b = polyfit(w.Estat,w.Edyn,1)
xfit = linspace(w.Estat.min(),w.Estat.max(),100)
yfit = a * xfit + b
plot(w.Estat,w.Edyn,'bo',label='observed')
plot(xfit,yfit,'r',label='best-fit line')
xlabel('Estat')
ylabel('Edyn')
legend(loc='best')

from scipy.stats import norm
error = w.Edyn - (a*w.Estat + b)
mu = mean(error)
sig = std(error)
hist(error,normed=True)
x = linspace(-6000,6000,100)
y = norm.pdf(x,loc=mu,scale=sig)
plot(x,y,'r')
x025 = norm.ppf(0.025,loc=mu,scale=sig)
x975 = norm.ppf(0.975,loc=mu,scale=sig)
axvline(x025,color='k')
axvline(x975,color='k')

print 'number of points outside 95 percentile: ',sum(abs(error)>x975)
outside = abs(error)>x975
plot(w.Estat[~outside],w.Edyn[~outside],'ko')
plot(w.Estat[outside],w.Edyn[outside],'ro')

a1,b1 = polyfit(w.Estat,w.Edyn,1)
a2,b2 = polyfit(w.Edyn,w.Estat,1)
print 'a1,b1: ',a1,b1
print 'a2,b2: ',1.0/a2,-b2/a2
plot(w.Estat,w.Edyn,'bo',label='observed')
x1 = array([w.Estat.min(),w.Estat.max()])
y1 = a1*x1 + b1
plot(x1,y1,'r',label='line 1')
y2 = 1.0/a2 * x1 - b2/a2
plot(x1,y2,'g',label='line 2')
xlabel('Estat')
ylabel('Edyn')
legend(loc='best')

error1 = w.Edyn - a1 * w.Estat - b1
error2 = w.Edyn - 1/a2 * w.Estat + b2/a2
print 'mean error line 1: ',mean(error1)
print 'mean error line 2: ',mean(error2)
print 'mean squared error line 1: ',sqrt(mean(error1**2))
print 'mean squared error line 2: ',sqrt(mean(error2**2))
#
plot(w.Estat,error1,'ro')
plot(w.Estat,error2,'go')
title('Line 2 (green dots) shows a trend')