In [1]:

import matplotlib.pyplot as plt
import numpy as np
import scipy
import scipy.optimize

In [2]:

# Take a look at the data.
x, y, s = np.loadtxt('data/Chisqfitdata.txt', skiprows=1).T
print len(x), 'data points'
plt.errorbar(x, y, s)

20 data points

Out[2]:

<Container object of 3 artists>

In [3]:

def chisq(b, ymodel):
    # Compute the chi-square statistic from the data with the given model and parameters.
    return np.sum(((y - ymodel(x, b)) / s)**2)

def plot_best_fit(ymodel, guess=(0, 0)):
    # Find and print the best-fit parameters and plot the fit.
    result = scipy.optimize.minimize(chisq, guess, args=(ymodel,))
    print 'b_min', result.x
    print 'chi-sq min', result.fun
    covar = np.linalg.inv(0.5 * np.linalg.inv(result.hess_inv))
    print 'covar:'
    print covar
    print 'stderrs', np.sqrt(np.diag(covar))
    
    xs = np.arange(0, 3, 0.01)
    ys = ymodel(xs, result.x)

    plt.plot(xs, ys)
    plt.errorbar(x, y, s, fmt='.')

In [4]:

# Raised parabola.
plot_best_fit(lambda x, b: b[0] + b[1] * x**2)

b_min [ 3.24206703  2.33685347]
chi-sq min 35.4413840536
covar:
[[ 0.01315661 -0.00169065]
 [-0.00169065  0.0002232 ]]
stderrs [ 0.11470227  0.01493985]

In [5]:

# Linear fit.
plot_best_fit(lambda x, b: b[0] + b[1] * x)

b_min [ -7.00653275  10.20895879]
chi-sq min 881.536817089
covar:
[[ 0.0334224  -0.01208027]
 [-0.01208027  0.00441259]]
stderrs [ 0.18281794  0.06642737]

In [6]:

# Exponential fit.
plot_best_fit(lambda x, b: b[0] * np.exp(b[1] * x))

b_min [ 3.61086845  0.63738496]
chi-sq min 104.313769901
covar:
[[  1.08192266e-07  -7.16625157e-08]
 [ -7.16625157e-08   4.77478469e-08]]
stderrs [ 0.00032893  0.00021851]

In [7]:

# General quadratic.
def quad(x, b):
    return b[0] + b[1] * x + b[2] * x**2

plot_best_fit(quad, guess=(0, 0, 0))

b_min [ 1.63038888  1.49828665  2.00809415]
chi-sq min 10.9251784017
covar:
[[ 0.11910539 -0.09849463  0.01992139]
 [-0.09849463  0.09156492 -0.0200915 ]
 [ 0.01992139 -0.0200915   0.00463175]]
stderrs [ 0.34511649  0.30259696  0.06805693]

The four models above in order of descreasing $\chi^2$: linear, exponential, raised parabola, general quad. This ordering generally matches what I would expect based on the plots, though I'm a little surprised about the linear (881.536817) and exponential (104.313771). Based on the plots, the linear looks like it fits the points a bit better than the exponential, but the $\chi^2$ statistic greatly favors the exponential. Looking more closely at the plots and the definition of the $\chi^2$ statistic, I believe this is because the exponential provides a better fit to the points in the upper right that have much smaller standard deviations.

For the general quadratic, $b_1$ = 1.49828665 +/- 0.30259696. This is nearly 5 standard deviations away from 0, so we can be confident that it makes sense to include this parameter. Also, adding this term decreased $\chi^2$ from 35.44 to 10.93. Since we have 20 data points, the mean of $\chi^2$ is 20, so 10.93 is still in the right ballpark.