PyMC is a python module that implements Bayesian statistical models and fitting algorithms, including Markov chain Monte Carlo. Its flexibility and extensibility make it applicable to a large suite of problems. Along with core sampling functionality, PyMC includes methods for summarizing output, plotting, goodness-of-fit and convergence diagnostics.
Library documentation: https://pymc-devs.github.io/pymc/
# objective is to model the number of coal mining disasters per year in the UK from 1851 to 1962
# start with some imports
%matplotlib inline
import numpy as np
from pymc import *
from pymc.Matplot import plot
# number of mining disasters per year
disasters_array = np.array([4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6,
3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5,
2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0,
1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1,
0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2,
3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4,
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1])
# variable representing the presumed "switch point" at which disasters became less frequent
switchpoint = DiscreteUniform('switchpoint', lower=0, upper=110, doc='Switchpoint[year]')
switchpoint
<pymc.distributions.DiscreteUniform 'switchpoint' at 0x7f69c67d4310>
# variables for the pre and post-switchpoint mean
early_mean = Exponential('early_mean', beta=1.)
late_mean = Exponential('late_mean', beta=1.)
early_mean, late_mean
(<pymc.distributions.Exponential 'early_mean' at 0x7f69c67d4490>, <pymc.distributions.Exponential 'late_mean' at 0x7f69c67d44d0>)
# deterministic rate parameter of the distribution of disasters
@deterministic(plot=False)
def rate(s=switchpoint, e=early_mean, l=late_mean):
''' Concatenate Poisson means '''
out = np.empty(len(disasters_array))
out[:s] = e
out[s:] = l
return out
# actual distribution of disasters (observed)
disasters = Poisson('disasters', mu=rate, value=disasters_array, observed=True)
disasters
<pymc.distributions.Poisson 'disasters' at 0x7f69c67d41d0>
# take a look at the parents/children for these variables to see relationships
switchpoint.parents, disasters.parents, rate.children
({'lower': 0, 'upper': 110}, {'mu': <pymc.PyMCObjects.Deterministic 'rate' at 0x7f69c67d4a50>}, {<pymc.distributions.Poisson 'disasters' at 0x7f69c67d41d0>})
# create the model
model = Model([switchpoint, early_mean, late_mean, disasters])
# Markov Chain Monte-Carlo sampling
M = MCMC(model)
M.sample(iter=10000, burn=1000, thin=10)
[-----------------100%-----------------] 10000 of 10000 complete in 3.7 sec
# examine a slice of the output
print M.trace('switchpoint')[:]
[91 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 46 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 37 48 40 40 40 40 40 40 40 40 40 40 40 40 45 45 46 43 43 43 37 43 43 43 37 41 41 41 41 41 41 41 41 41 41 41 41 36 37 41 41 41 41 41 41 41 41 41 40 40 40 40 40 40 40 43 38 38 36 38 38 38 39 39 46 38 43 43 39 41 39 39 39 39 39 39 39 39 39 37 38 38 36 36 39 39 39 39 39 40 40 37 37 44 39 39 44 44 37 37 37 38 37 37 37 37 46 40 40 40 36 46 39 39 39 45 43 46 47 39 39 46 36 40 39 44 39 39 39 39 39 39 41 41 46 42 43 43 44 39 39 39 39 39 39 41 40 40 40 40 40 40 40 40 46 46 41 41 41 41 41 39 41 41 41 41 41 41 36 36 41 41 41 41 41 41 41 41 41 41 36 46 37 37 41 41 41 41 41 41 41 39 39 39 39 44 39 39 39 39 39 37 43 43 39 39 39 40 40 37 41 41 41 41 41 41 41 41 41 37 44 37 37 38 38 37 37 39 39 44 44 39 39 39 41 41 46 39 39 39 39 39 39 39 39 39 39 39 39 39 44 39 39 36 41 36 39 39 39 39 39 39 39 40 40 37 39 39 39 39 37 36 36 46 37 41 41 43 39 39 39 44 39 39 39 37 40 40 40 47 36 39 39 42 42 42 42 42 40 40 40 40 40 40 40 39 36 40 40 40 40 40 39 39 41 41 41 41 41 37 41 36 36 41 37 37 40 40 40 38 41 46 41 38 42 36 39 39 39 43 40 40 40 40 40 39 36 42 43 42 41 40 36 39 39 40 40 40 39 39 40 40 40 40 40 43 41 41 41 42 41 41 36 42 38 42 38 38 41 41 40 40 40 40 40 39 35 36 40 42 40 40 40 40 40 38 38 41 41 41 41 41 36 36 46 40 43 39 39 39 39 44 37 37 41 42 35 36 40 40 39 39 39 39 39 39 44 42 40 40 36 40 40 36 36 40 42 37 37 40 40 40 36 44 42 36 41 41 41 38 36 36 37 40 40 40 44 40 36 36 49 39 36 46 41 41 37 41 41 40 40 36 40 40 40 40 40 40 40 40 41 44 43 41 41 41 41 38 40 36 40 41 42 37 39 37 41 39 39 40 39 39 41 41 46 40 40 41 37 39 42 40 36 43 39 36 40 42 37 41 43 37 40 42 40 36 35 36 41 41 37 41 41 42 40 41 41 41 41 41 46 37 40 46 39 44 41 42 41 37 36 40 46 40 40 38 39 39 39 41 41 41 41 41 41 46 40 36 40 42 42 36 40 40 39 39 43 37 39 38 39 39 39 39 41 36 39 36 40 40 40 42 35 35 41 41 45 39 41 41 41 39 46 38 40 36 39 43 32 34 36 40 40 41 41 38 41 40 39 40 37 41 44 42 40 38 39 40 42 37 40 36 40 42 43 36 39 39 37 37 39 42 39 39 39 37 37 42 42 36 41 36 42 40 41 37 40 37 39 41 41 39 41 43 41 37 36 41 36 39 41 39 40 40 40 32 39 39 40 42 40 39 39 39 39 39 40 40 37 42 46 41 41 43 38 44 41]
# generate a composite figure for the model
plot(M)
Plotting switchpoint Plotting late_mean Plotting early_mean
/home/john/anaconda/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2499: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`. VisibleDeprecationWarning)