%pylab inline xdata = array([0.0,1.0,2.0,3.0,4.0,5.0]) # Observed value of x ydata = array([1.0,3.0,2.0,5.0,5.0,6.0]) # Observed value of y a,b = polyfit(xdata,ydata,1) print 'fitted slope: ',a print 'fitted y-intercept: ',b plot(xdata, ydata, 'bo', label='observed') yfit = a*xdata + b error = ydata - yfit # Error plot(xdata, yfit, 'r', label='fit') xlabel('xdata') ylabel('ydata') legend(loc='best') from pandas import read_csv w = read_csv('douglas_data.csv',skiprows=[1],skipinitialspace=True) a,b = polyfit(w.Estat,w.Edyn,1) xfit = linspace(w.Estat.min(),w.Estat.max(),100) yfit = a * xfit + b plot(w.Estat,w.Edyn,'bo',label='observed') plot(xfit,yfit,'r',label='best-fit line') xlabel('Estat') ylabel('Edyn') legend(loc='best') from scipy.stats import norm error = w.Edyn - (a*w.Estat + b) mu = mean(error) sig = std(error) hist(error,normed=True) x = linspace(-6000,6000,100) y = norm.pdf(x,loc=mu,scale=sig) plot(x,y,'r') x025 = norm.ppf(0.025,loc=mu,scale=sig) x975 = norm.ppf(0.975,loc=mu,scale=sig) axvline(x025,color='k') axvline(x975,color='k') print 'number of points outside 95 percentile: ',sum(abs(error)>x975) outside = abs(error)>x975 plot(w.Estat[~outside],w.Edyn[~outside],'ko') plot(w.Estat[outside],w.Edyn[outside],'ro') a1,b1 = polyfit(w.Estat,w.Edyn,1) a2,b2 = polyfit(w.Edyn,w.Estat,1) print 'a1,b1: ',a1,b1 print 'a2,b2: ',1.0/a2,-b2/a2 plot(w.Estat,w.Edyn,'bo',label='observed') x1 = array([w.Estat.min(),w.Estat.max()]) y1 = a1*x1 + b1 plot(x1,y1,'r',label='line 1') y2 = 1.0/a2 * x1 - b2/a2 plot(x1,y2,'g',label='line 2') xlabel('Estat') ylabel('Edyn') legend(loc='best') error1 = w.Edyn - a1 * w.Estat - b1 error2 = w.Edyn - 1/a2 * w.Estat + b2/a2 print 'mean error line 1: ',mean(error1) print 'mean error line 2: ',mean(error2) print 'mean squared error line 1: ',sqrt(mean(error1**2)) print 'mean squared error line 2: ',sqrt(mean(error2**2)) # plot(w.Estat,error1,'ro') plot(w.Estat,error2,'go') title('Line 2 (green dots) shows a trend')