In [1]:

import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import numpy as np
import pandas
import scipy
import scipy.optimize

The original data has 44 measurements, but 4 are duplicates where the same temperature was recorded at each corner. We remove these duplicate measurements and then vizualize the measurements.

In [2]:

data = pandas.read_table('data/volcanodata.txt', names=('x', 'y', 't'), sep='\s+')
data = data.drop_duplicates()

im = np.zeros((11, 11))
for _, (x, y, t) in data.iterrows():
    im[x / 10, y / 10] = t
plt.matshow(im.T, origin='lower')
ticks = [''] + [str(10 * i * 2) for i in xrange(6)]
plt.gca().set_xticklabels(ticks)
plt.gca().set_yticklabels(ticks)
plt.colorbar()
plt.title('Temperature Measurements')

Out[2]:

<matplotlib.text.Text at 0xa10114c>

Define the model.

In [3]:

def temperature(x, y, b_params):
    """T(x, y | t0, t1, x0, y0, lamda)"""
    t0, t1, x0, y0, lam = b_params
    return t0 + (t1 - t0) * np.exp(-(((x - x0)**2 + (y - y0)**2) / (2 * lam**2)))

sigma = 30.0

Find the optimal parameters.

In [4]:

def chisquare_temperature(b_params):
    return np.sum(((data.t - temperature(data.x, data.y, b_params)) / sigma)**2)

b_params_guess = [0, 200, 60, 60, 30]
results = scipy.optimize.minimize(chisquare_temperature, b_params_guess)
b_params_opt = results.x
b_opt_cov = np.linalg.inv(0.5 * np.linalg.inv(results.hess_inv))

print "Optimal parameters:", zip(['t0', 't1', 'x0', 'y0', 'lambda'], b_params_opt)
print "Min function value:", results.fun
print "Standard errors of each parameter separately:", np.sqrt(np.diag(b_opt_cov))
print "Parameter covariance:"
print b_opt_cov

Optimal parameters: [('t0', 17.673484842250879), ('t1', 195.30201364892139), ('x0', 56.066149683291705), ('y0', 79.098303551787154), ('lambda', 38.891319341124706)]
Min function value: 24.5336951143
Standard errors of each parameter separately: [ 20.13250369  39.78433256   3.47624678   9.81651262   7.17102511]
Parameter covariance:
[[  405.31770485   -37.3178964     18.29330954    71.70887784
    -87.35883056]
 [  -37.3178964   1582.79311745   -71.90147235  -332.81252471
   -207.91406254]
 [   18.29330954   -71.90147235    12.08429165    19.83609197
      5.70954513]
 [   71.70887784  -332.81252471    19.83609197    96.36392008
     30.24770931]
 [  -87.35883056  -207.91406254     5.70954513    30.24770931
     51.42360115]]

Plot confidence regions around the maximim likelihood estimate of the volcano's position.

In [5]:

# Marginalize out everything except x0 and y0.
b_cov_x0_yo = b_opt_cov[2:4, 2:4]
b_cov_x0_yo

Out[5]:

array([[ 12.08429165,  19.83609197],
       [ 19.83609197,  96.36392008]])

In [6]:

# Extract the parameters of the MVN distribution.
_, _, x0, y0, _ = b_params_opt
sqrt_b_cov_x0_yo = np.sqrt(b_cov_x0_yo)
(sigma_x, sigma_xy), (_, sigma_y) = sqrt_b_cov_x0_yo
x0, y0, sigma_x, sigma_y, sigma_xy

Out[6]:

(56.066149683291705,
 79.098303551787154,
 3.4762467759197113,
 9.8165126231358961,
 4.4537727798602615)

In [7]:

def plot_error_ellipse(mu, sigma, std=2, ax=None, **kwargs):
    if ax is None:
        ax = plt.gca()
    num_points = 1000
    L = np.linalg.cholesky(sigma)
    points = np.linspace(0, 1, num=num_points)
    circle = np.array([np.cos(2 * np.pi * points),
                       np.sin(2 * np.pi * points)]) * std
    ellipse = L.dot(circle) + np.tile(mu, (num_points, 1)).T
    ax.plot(ellipse[0, :], ellipse[1, :], **kwargs)

In [8]:

mu = [x0, y0]
covar = [[sigma_x, sigma_xy], [sigma_xy, sigma_y]]
plt.xlim(0, 100)
plt.ylim(0, 100)
for std, alpha in zip((1, 2, 3), (1.0, 0.6, 0.3)):
    plot_error_ellipse(mu, covar, linewidth=2, std=std, color='red', alpha=alpha)
plt.gca().set_aspect('equal')
plt.title('Volcano location: $1/2/3-\sigma$ confidence regions')

Out[8]:

<matplotlib.text.Text at 0xa396a0c>