import numpy as np, pymc as pm, pandas
# set random seed for reproducibility
np.random.seed(12345)
# Generate the synthetic data
a = 2.0
b = 8.0
c = 6.0
d = c + (b-a)
d1 = np.random.uniform(a, b, 100)
d2 = np.random.uniform(c, d, 100)
data = d1 + d2
#Setup the priors
pa = pm.Normal("pa", 0.0, .01, value=0)
pb = pm.Normal("pb", 0.0, .01, value=10)
pc = pm.Normal("pc", 0.0, .01, value=0)
pd = pm.Normal("pd", 0.0, .01, value=10)
# if data was just d1, this would be more familiar
data = d1
# here is how you could fit it with an "observed" stochastic
@pm.observed
def pdata(value=data, pa=pa, pb=pb, pc=pc, pd=pd):
return pm.uniform_like(value, pa, pb)
m = pm.MCMC(dict(pa=pa, pb=pb, pc=pc, pd=pd, pdata=pdata))
m.sample(100000, 50000, 50)
[-----------------100%-----------------] 100000 of 100000 complete in 16.1 sec
print pandas.DataFrame(m.stats()).loc[['mean', 'standard deviation']]
pa pb pc pd mean 1.992644 8.023612 -0.3094084 0.09822238 standard deviation 0.0577922 0.05885509 10.1256 10.20577
#Setup the priors
pa = pm.Normal("pa", 0.0, .01, value=0)
pb = pm.Normal("pb", 0.0, .01, value=10)
pc = pm.Normal("pc", 0.0, .01, value=0)
pd = pm.Normal("pd", 0.0, .01, value=10)
# In general, you can use this pattern with a more complicated
# calculation of the log-likelihood, e.g.
@pm.observed
def pdata(value=data, pa=pa, pb=pb, pc=pc, pd=pd):
logpr = 0.
# code to calculate logpr from data and parameters
return logpr
# Use MCMC to sample and obtain traces
m = pm.MCMC(dict(pa=pa, pb=pb, pc=pc, pd=pd, pdata=pdata))
m.sample(100000, 50000, 50)
[-----------------100%-----------------] 100000 of 100000 complete in 12.7 sec
To keep this example simple, let us restrict the model to have $c-a = b-d$. Then $d1_i+d2_i$ has a symmetric triangular distribution, with support $[a+c, b+d]$. In other words, $$ p(data_i=x|a,b,c,d) \propto \begin{cases} x - (a+c) &\quad \text{if } a+c \leq x \leq \frac{a+b+c+d}{2};\\ (b+d) - x &\quad \text{if } \frac{a+b+c+d}{2} \leq x \leq b+d;\\ 0 &\quad {\text{otherwise.}} \end{cases} $$
Did I say "simple"?
# set random seed for reproducibility
np.random.seed(12345)
# Generate the synthetic data
a = 2.0
b = 8.0
c = 6.0
d = c + (b-a)
d1 = np.random.uniform(a, b, 100)
d2 = np.random.uniform(c, d, 100)
data = d1 + d2
#Setup the priors
pa = pm.Normal("pa", 0.0, .01, value=0)
pb = pm.Normal("pb", 0.0, .01, value=10)
pc = pm.Uniform("pc", 5, 10, value=5) # <-- changed prior to break symmetry
pd = pm.Normal("pd", 0.0, .01, value=10)
@pm.observed
def pdata(value=data, pa=pa, pb=pb, pc=pc, pd=pd):
pd = pc + (pb - pa) # don't use pd value
# make sure order is acceptible
if pb < pa or pd < pc:
return -np.inf
x = value
pr = \
np.where(x < pa+pc, 0,
np.where(x <= (pa+pb+pc+pd)/2, x - (pa+pc),
np.where(x <= (pb+pd), (pb+pd) - x,
0))) \
/ (.5 * ((pb+pd) - (pa+pc)) * ((pb-pa) + (pd-pc))/2)
return np.sum(np.log(pr))
m = pm.MCMC(dict(pa=pa, pb=pb, pc=pc, pd=pd, pdata=pdata))
m.use_step_method(pm.AdaptiveMetropolis, [m.pa, m.pb, m.pc])
m.sample(20000, 10000, 10)
[-----------------100%-----------------] 20000 of 20000 complete in 5.9 sec
print pandas.DataFrame(m.stats()).loc[['mean', 'standard deviation']]
pa pb pc pd mean 1.068921 6.471076 7.74488 -0.4584424 standard deviation 1.477786 1.451357 1.439883 9.85236