import matplotlib.pyplot as plt
import numpy as np
import scipy
import scipy.optimize
# Take a look at the data.
x, y, s = np.loadtxt('data/Chisqfitdata.txt', skiprows=1).T
print len(x), 'data points'
plt.errorbar(x, y, s)
20 data points
<Container object of 3 artists>
def chisq(b, ymodel):
# Compute the chi-square statistic from the data with the given model and parameters.
return np.sum(((y - ymodel(x, b)) / s)**2)
def plot_best_fit(ymodel, guess=(0, 0)):
# Find and print the best-fit parameters and plot the fit.
result = scipy.optimize.minimize(chisq, guess, args=(ymodel,))
print 'b_min', result.x
print 'chi-sq min', result.fun
covar = np.linalg.inv(0.5 * np.linalg.inv(result.hess_inv))
print 'covar:'
print covar
print 'stderrs', np.sqrt(np.diag(covar))
xs = np.arange(0, 3, 0.01)
ys = ymodel(xs, result.x)
plt.plot(xs, ys)
plt.errorbar(x, y, s, fmt='.')
# Raised parabola.
plot_best_fit(lambda x, b: b[0] + b[1] * x**2)
b_min [ 3.24206703 2.33685347] chi-sq min 35.4413840536 covar: [[ 0.01315661 -0.00169065] [-0.00169065 0.0002232 ]] stderrs [ 0.11470227 0.01493985]
# Linear fit.
plot_best_fit(lambda x, b: b[0] + b[1] * x)
b_min [ -7.00653275 10.20895879] chi-sq min 881.536817089 covar: [[ 0.0334224 -0.01208027] [-0.01208027 0.00441259]] stderrs [ 0.18281794 0.06642737]
# Exponential fit.
plot_best_fit(lambda x, b: b[0] * np.exp(b[1] * x))
b_min [ 3.61086845 0.63738496] chi-sq min 104.313769901 covar: [[ 1.08192266e-07 -7.16625157e-08] [ -7.16625157e-08 4.77478469e-08]] stderrs [ 0.00032893 0.00021851]
# General quadratic.
def quad(x, b):
return b[0] + b[1] * x + b[2] * x**2
plot_best_fit(quad, guess=(0, 0, 0))
b_min [ 1.63038888 1.49828665 2.00809415] chi-sq min 10.9251784017 covar: [[ 0.11910539 -0.09849463 0.01992139] [-0.09849463 0.09156492 -0.0200915 ] [ 0.01992139 -0.0200915 0.00463175]] stderrs [ 0.34511649 0.30259696 0.06805693]
The four models above in order of descreasing $\chi^2$: linear, exponential, raised parabola, general quad. This ordering generally matches what I would expect based on the plots, though I'm a little surprised about the linear (881.536817) and exponential (104.313771). Based on the plots, the linear looks like it fits the points a bit better than the exponential, but the $\chi^2$ statistic greatly favors the exponential. Looking more closely at the plots and the definition of the $\chi^2$ statistic, I believe this is because the exponential provides a better fit to the points in the upper right that have much smaller standard deviations.
For the general quadratic, $b_1$ = 1.49828665 +/- 0.30259696. This is nearly 5 standard deviations away from 0, so we can be confident that it makes sense to include this parameter. Also, adding this term decreased $\chi^2$ from 35.44 to 10.93. Since we have 20 data points, the mean of $\chi^2$ is 20, so 10.93 is still in the right ballpark.