Generate the data¶

In [1]:

from __future__ import division
import numpy as np
from scipy.stats import distributions
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import pymc as pm

In [2]:

nsamples = 1000
mu1_true = 0.3
mu2_true = 0.55
sig1_true = 0.08
sig2_true = 0.12
a_true = 0.4

In [3]:

np.random.seed(3)  # for repeatability
s1 = distributions.norm.rvs(mu1_true, sig1_true, size=round(a_true*nsamples))
s2 = distributions.norm.rvs(mu2_true, sig2_true, size=round((1-a_true)*nsamples))
samples = np.hstack([s1, s2])
bins = np.arange(-0.2, 1.2, 0.01)
data, _ = np.histogram(samples, bins=bins, density=True)
x_data = bins[:-1] + 0.5*(bins[1] - bins[0])

In [4]:

plt.plot(x_data, data, '-o');

Least-squares histogram fit¶

Model definition¶

In [5]:

import lmfit

peak1 = lmfit.models.GaussianModel(prefix='p1_')
peak2 = lmfit.models.GaussianModel(prefix='p2_')
model = peak1 + peak2

model.set_param_hint('p1_center', value=0.2, min=-1, max=2)
model.set_param_hint('p2_center', value=0.5, min=-1, max=2)
model.set_param_hint('p1_sigma', value=0.1, min=0.01, max=0.3)
model.set_param_hint('p2_sigma', value=0.1, min=0.01, max=0.3)
model.set_param_hint('p1_amplitude', value=1, min=0.0, max=1)
model.set_param_hint('p2_amplitude', expr='1 - p1_amplitude')
name = '2-gaussians'

Fit¶

In [6]:

fit_res = model.fit(data, x=x_data, method='nelder')
print fit_res.fit_report()

 - Adding parameter "p1_fwhm"
 - Adding parameter "p2_fwhm"
[[Model]]
 Composite Model:
    gaussian(prefix='p1_')
    gaussian(prefix='p2_')
[[Fit Statistics]]
    # function evals   = 503
    # data points      = 139
    # variables        = 5
    chi-square         = 8.886
    reduced chi-square = 0.066
[[Variables]]
    p1_amplitude:   0.40397393 (init= 1)
    p1_center:      0.30130651 (init= 0.2)
    p1_fwhm:        0.19268109  == '2.3548200*p1_sigma'
    p1_sigma:       0.08182412 (init= 0.1)
    p2_amplitude:   0.59602606  == '1 - p1_amplitude'
    p2_center:      0.55926808 (init= 0.5)
    p2_fwhm:        0.28679204  == '2.3548200*p2_sigma'
    p2_sigma:       0.12178937 (init= 0.1)
[[Correlations]] (unreported correlations are <  0.100)

In [7]:

fig, ax = plt.subplots()
x = x_data
ax.plot(x, model.eval(x=x, **fit_res.values), 'k', alpha=0.8)
plt.plot(x_data, data, 'o');
if  fit_res.model.components is not None:
    for component in fit_res.model.components:
        ax.plot(x, component.eval(x=x, **fit_res.values), '--k',
                alpha=0.8)
for param in ['p1_center', 'p2_center']:
    ax.axvline(fit_res.params[param].value, ls='--', color='r')
ax.axvline(mu1_true, color='k', alpha=0.5)
ax.axvline(mu2_true, color='k', alpha=0.5)

Out[7]:

<matplotlib.lines.Line2D at 0x19d21128>

Bayesian Inference: MCMC¶

In [8]:

samples.size, nsamples

Out[8]:

(1000, 1000)

In [9]:

sigmas = pm.Normal('sigmas', mu=0.1, tau=1000, size=2)
centers = pm.Normal('centers', [0.3, 0.7], [1/(0.1)**2, 1/(0.1)**2], size=2)
#centers = pm.Uniform('centers', 0, 1, size=2)

alpha  = pm.Beta('alpha', alpha=2, beta=3)
category = pm.Categorical("category", [alpha, 1 - alpha], size=nsamples)

@pm.deterministic
def mu(category=category, centers=centers):
    return centers[category]

@pm.deterministic
def tau(category=category, sigmas=sigmas):
    return 1/(sigmas[category]**2)

observations = pm.Normal('samples_model', mu=mu, tau=tau, value=samples, observed=True)

In [10]:

gen_model = pm.Normal('gen_model', mu=mu, tau=tau)  # generative model

Some debug plots:

In [11]:

t3 = pm.rbeta(alpha=6, beta=2, size=1e5)
t4 = pm.rbeta(alpha=2, beta=3, size=1e5)
plt.hist(t3, bins=bins, alpha=0.5);
plt.hist(t4, bins=bins, alpha=0.5);

In [12]:

t1 = pm.rnormal(0.3, 1/(0.1)**2, size=1e4)
t2 = pm.rnormal(0.55, 1/(0.1)**2, size=1e4)
plt.hist(t1, bins=bins, alpha=0.5)
plt.hist(t2, bins=bins, alpha=0.5);

In [13]:

model = pm.Model([observations, mu, tau, category, alpha, sigmas, centers])

In [14]:

mcmc = pm.MCMC(model)

In [15]:

mcmc.sample(100000, 30000)

 [-----------------100%-----------------] 100000 of 100000 complete in 62.9 sec

In [16]:

pm.Matplot.plot(mcmc)

Plotting alpha
Plotting centers_0
Plotting centers_1
Plotting sigmas_0
Plotting sigmas_1

In [16]: