Notebook

The real data for these problems wasn't posted before class (and these problems were not the class activity as the description suggests), so I generated my own simple data and used it for the problem.

In [12]:

import matplotlib.pyplot as plt
import numpy as np
import scipy.optimize

Generate Sample Data¶

In [13]:

class RealModel(object):
    def __init__(self):
        self.params = (2, 3, -3, 1)

    def predict(self, x):
        b0, b1, b2, b3 = self.params
        return b0 + (b1 * x) + (b2 * x**2) + (b3 * x**3)

REAL_MODEL = RealModel()

In [14]:

np.random.seed(7)

DATA_X = np.linspace(0, 2, num=10)
REAL_Y = REAL_MODEL.predict(DATA_X)
DATA_SIGMA = 0.15
DATA_Y = REAL_Y + np.random.normal(scale=DATA_SIGMA, size=len(REAL_Y))

xs = np.linspace(0, 2, num=500)
plt.plot(xs, REAL_MODEL.predict(xs), label='Real')
plt.scatter(DATA_X, DATA_Y, label='Noisy Measurement')
plt.legend()

Out[14]:

<matplotlib.legend.Legend at 0xa35824c>

Define Models¶

In [15]:

class LinearModel(object):
    FEATURE_FUNCS = [
        np.vectorize(lambda v: 1),
        lambda v: v,
        lambda v: v**2,
        lambda v: v**3,
        lambda v: v**4,
        lambda v: v**5,
        lambda v: v**6,
        lambda v: v**7,
        lambda v: v**8,
        lambda v: v**9,
        lambda v: v**10,
        lambda v: v**11,
        lambda v: v**12,
        lambda v: v**13,
        lambda v: v**14,
        lambda v: v**15,
        lambda v: v**16,
        lambda v: v**17,
        lambda v: v**18,
        lambda v: v**19,
        lambda v: v**20]

    def __init__(self, criterion=None):
        assert criterion in ('aic', 'bic', None)
        self.criterion = criterion
        self.feature_funcs = []
        self.params = None

    def fit(self, x, y, sigma, param_0_guess=1):
        # Try adding each of the parameters to the current model. Keep only those
        # that pass the AIC/BIC criterion.
        param_guess = []
        last_chisq = float('inf')
        for i, feature_func in enumerate(self.FEATURE_FUNCS):
            # Try adding another parameter
            self.feature_funcs.append(feature_func)
            if len(param_guess) == 0:
                param_guess = [param_0_guess]
            else:
                param_guess = list(param_guess) + [0]
            params_fit, chisq = self._fit(x, y, sigma, param_guess)
            delta_chisq = last_chisq - chisq
            #print 'params_fit', params_fit
            delta_loglikelihood = delta_chisq / 2.0
            #print 'chisq:', chisq, '--', 'delta_loglikelihood', delta_loglikelihood

            # Decide if the parameter is worth keeping.
            
            use_param = True
            if self.criterion == 'aic' and delta_loglikelihood <= 1:
                use_param = False
            if self.criterion == 'bic' and delta_loglikelihood <= (0.5 * np.log(len(x))):
                use_param = False
            if use_param:
                #print("Using parameter {0}".format(i))
                best_params = params_fit.copy()
                best_chisq = chisq
                param_guess = params_fit
                last_chisq = chisq
            else:
                #print("Not using parameter {0}".format(i))
                self.feature_funcs.pop()
                param_guess = params_fit[:-1]

        self.params = best_params
        self.chisq = best_chisq
        return self

    def predict(self, x):
        return self._predict(self.params, x)

    def _fit(self, x, y, sigma, params_guess):
        func = lambda params: self._chisquare(params, x, y, sigma)
        fit = scipy.optimize.fmin(func, params_guess, maxfun=10000, maxiter=10000, disp=False)
        chisq = func(fit)
        return fit, chisq

    def _chisquare(self, params, x, y, sigma):
        return np.sum(((self._predict(params, x) - y) / sigma)**2)

    def _predict(self, params, x):
        featurized_x = (np.array([f(x) for f in self.feature_funcs]).T)
        params_matrix = np.tile(params, (len(x), 1))
        prediction = (params_matrix * featurized_x).sum(axis=1)
        return prediction

Fit the Models¶

In [16]:

def plot_model(model, name='Model', *args, **kwargs):
    xs = np.linspace(0, 2, num=500)
    chisq = model.chisq
    plt.scatter(DATA_X, DATA_Y, label='Data')
    plt.plot(xs, REAL_MODEL.predict(xs), label='Real')
    plt.plot(xs, model.predict(xs), color='red', label='Fit', *args, **kwargs)
    plt.legend()
    plt.title("{0} ($\\chi^2 {1}$ -- {2} params)".format(name, chisq, len(model.params)))

Fit a model with the AIC criterion.

In [17]:

aic_model = LinearModel('aic').fit(DATA_X, DATA_Y, DATA_SIGMA)
plot_model(aic_model)

Fit a model with the BIC criterion.

In [18]:

bic_model = LinearModel('bic').fit(DATA_X, DATA_Y, DATA_SIGMA)
plot_model(bic_model)

Just for fun, fit a model with all models (i.e., no criteron for rejecting parameters).

In [19]:

overfit_model = LinearModel(None).fit(DATA_X, DATA_Y, DATA_SIGMA)
plot_model(overfit_model)