import numpy as np
import matplotlib.pyplot as plt
import scipy.stats
# Load the data.
events = [float(line.strip()) for line in open('data/Events20130204.txt').readlines()]
print '{0} independent measurements. First 10:'.format(len(events))
print events[:10]
# Take a peek.
plt.hist(events, bins=100)
sample_mean = np.mean(events)
sample_stdev = np.std(events)
print 'Sample mean: {}'.format(sample_mean)
print 'Sample stdev: {}'.format(sample_stdev)
1000 independent measurements. First 10: [6.1355644, 2.9098908, 3.9025586, 2.8688229, 2.6472936, 5.9071974, 0.21227957, 1.293517, 2.9736624, -0.22960726] Sample mean: 2.23679165769 Sample stdev: 2.23531606892
# Get a PDF for a normal distribution with the sample mean/stdev.
normal_pdf = scipy.stats.norm(loc=sample_mean, scale=sample_stdev).pdf
print 'Probability of the first data point: p({0}) = {1}'.format(events[0], normal_pdf(events[0]))
Probability of the first data point: p(6.1355644) = 0.0389924654454
def log_prob_data(data, pdf):
"""Computes the (log) probability of the data with the given PDF."""
return np.sum(np.log(pdf(data)))
normal_prob = log_prob_data(events, normal_pdf)
print 'Log probability of the data (normal): {0}'.format(normal_prob)
Log probability of the data (normal): -2223.32116913
t_mu = 2.0
t_sigma = 1.2
t_nu = 5.0
student_t_pdf = scipy.stats.t(t_nu, loc=t_mu, scale=t_sigma).pdf
student_t_prob = log_prob_data(events, student_t_pdf)
print 'Log probability of the data (Student t): {0}'.format(student_t_prob)
print '{0} times more probable than with the normal distribution'.format(np.exp(student_t_prob - normal_prob))
Log probability of the data (Student t): -2208.17899194 3768460.75583 times more probable than with the normal distribution
def func_to_minimize(params):
"""Returns the negative log probability of the data from a student t
distribution with the provided parameters."""
mu, sigma, nu = params
pdf = scipy.stats.t(nu, loc=mu, scale=sigma).pdf
return -log_prob_data(events, pdf)
initial_guess = [t_mu, t_sigma, t_nu]
best_params = scipy.optimize.fmin(func_to_minimize, initial_guess)
Optimization terminated successfully. Current function value: 2133.607855 Iterations: 58 Function evaluations: 109
t_map_mu, t_map_sigma, t_map_nu = best_params
for name, value in [('mu', t_map_mu), ('sigma', t_map_sigma), ('nu', t_map_nu)]:
print 'MAP {0}: {1}'.format(name, value)
map_student_t_pdf = scipy.stats.t(t_map_nu, loc=t_map_mu, scale=t_map_sigma).pdf
map_student_t_prob = log_prob_data(events, map_student_t_pdf)
print 'Log probability of the data (Student t w/ MAP params): {0}'.format(map_student_t_prob)
print '{0} times more probable than with our initial student t parameters'.format(np.exp(map_student_t_prob - student_t_prob))
MAP mu: 2.22092512454 MAP sigma: 1.52345938155 MAP nu: 3.59467140035 Log probability of the data (Student t w/ MAP params): -2133.60785489 2.43127079087e+32 times more probable than with our initial student t parameters
xs = np.linspace(1, 5, num=1000)
def scaled_posterior(nu, mu, sigma):
"""Returns something like the posterior probabilty of the data from a
Student t distribution with the provided parameters, but scaled
relative to map_student_t_prob."""
pdf = scipy.stats.t(nu, loc=mu, scale=sigma).pdf
prob = log_prob_data(events, pdf)
return np.exp(prob - map_student_t_prob)
vectorized_scaled_posterior = np.vectorize(scaled_posterior)
y_varied_nu = vectorized_scaled_posterior(xs, t_map_mu, t_map_sigma)
y_varied_mu = vectorized_scaled_posterior(t_map_nu, xs, t_map_sigma)
y_varied_sigma = vectorized_scaled_posterior(t_map_nu, t_map_mu, xs)
plt.plot(xs, y_varied_nu, label='nu')
plt.plot(xs, y_varied_mu, label='mu')
plt.plot(xs, y_varied_sigma, label='sigma')
plt.legend()
<matplotlib.legend.Legend at 0x99443cc>