import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import numpy as np
import pandas
import scipy
import scipy.optimize
The original data has 44 measurements, but 4 are duplicates where the same temperature was recorded at each corner. We remove these duplicate measurements and then vizualize the measurements.
data = pandas.read_table('data/volcanodata.txt', names=('x', 'y', 't'), sep='\s+')
data = data.drop_duplicates()
im = np.zeros((11, 11))
for _, (x, y, t) in data.iterrows():
im[x / 10, y / 10] = t
plt.matshow(im.T, origin='lower')
ticks = [''] + [str(10 * i * 2) for i in xrange(6)]
plt.gca().set_xticklabels(ticks)
plt.gca().set_yticklabels(ticks)
plt.colorbar()
plt.title('Temperature Measurements')
<matplotlib.text.Text at 0xa10114c>
Define the model.
def temperature(x, y, b_params):
"""T(x, y | t0, t1, x0, y0, lamda)"""
t0, t1, x0, y0, lam = b_params
return t0 + (t1 - t0) * np.exp(-(((x - x0)**2 + (y - y0)**2) / (2 * lam**2)))
sigma = 30.0
Find the optimal parameters.
def chisquare_temperature(b_params):
return np.sum(((data.t - temperature(data.x, data.y, b_params)) / sigma)**2)
b_params_guess = [0, 200, 60, 60, 30]
results = scipy.optimize.minimize(chisquare_temperature, b_params_guess)
b_params_opt = results.x
b_opt_cov = np.linalg.inv(0.5 * np.linalg.inv(results.hess_inv))
print "Optimal parameters:", zip(['t0', 't1', 'x0', 'y0', 'lambda'], b_params_opt)
print "Min function value:", results.fun
print "Standard errors of each parameter separately:", np.sqrt(np.diag(b_opt_cov))
print "Parameter covariance:"
print b_opt_cov
Optimal parameters: [('t0', 17.673484842250879), ('t1', 195.30201364892139), ('x0', 56.066149683291705), ('y0', 79.098303551787154), ('lambda', 38.891319341124706)] Min function value: 24.5336951143 Standard errors of each parameter separately: [ 20.13250369 39.78433256 3.47624678 9.81651262 7.17102511] Parameter covariance: [[ 405.31770485 -37.3178964 18.29330954 71.70887784 -87.35883056] [ -37.3178964 1582.79311745 -71.90147235 -332.81252471 -207.91406254] [ 18.29330954 -71.90147235 12.08429165 19.83609197 5.70954513] [ 71.70887784 -332.81252471 19.83609197 96.36392008 30.24770931] [ -87.35883056 -207.91406254 5.70954513 30.24770931 51.42360115]]
Plot confidence regions around the maximim likelihood estimate of the volcano's position.
# Marginalize out everything except x0 and y0.
b_cov_x0_yo = b_opt_cov[2:4, 2:4]
b_cov_x0_yo
array([[ 12.08429165, 19.83609197], [ 19.83609197, 96.36392008]])
# Extract the parameters of the MVN distribution.
_, _, x0, y0, _ = b_params_opt
sqrt_b_cov_x0_yo = np.sqrt(b_cov_x0_yo)
(sigma_x, sigma_xy), (_, sigma_y) = sqrt_b_cov_x0_yo
x0, y0, sigma_x, sigma_y, sigma_xy
(56.066149683291705, 79.098303551787154, 3.4762467759197113, 9.8165126231358961, 4.4537727798602615)
def plot_error_ellipse(mu, sigma, std=2, ax=None, **kwargs):
if ax is None:
ax = plt.gca()
num_points = 1000
L = np.linalg.cholesky(sigma)
points = np.linspace(0, 1, num=num_points)
circle = np.array([np.cos(2 * np.pi * points),
np.sin(2 * np.pi * points)]) * std
ellipse = L.dot(circle) + np.tile(mu, (num_points, 1)).T
ax.plot(ellipse[0, :], ellipse[1, :], **kwargs)
mu = [x0, y0]
covar = [[sigma_x, sigma_xy], [sigma_xy, sigma_y]]
plt.xlim(0, 100)
plt.ylim(0, 100)
for std, alpha in zip((1, 2, 3), (1.0, 0.6, 0.3)):
plot_error_ellipse(mu, covar, linewidth=2, std=std, color='red', alpha=alpha)
plt.gca().set_aspect('equal')
plt.title('Volcano location: $1/2/3-\sigma$ confidence regions')
<matplotlib.text.Text at 0xa396a0c>