!date
import matplotlib.pyplot as plt, numpy as np, seaborn as sns, pandas as pd
%matplotlib inline
sns.set_context("poster")
sns.set_style('whitegrid')
Wed Dec 31 09:55:52 PST 2014
# set random seed for reproducibility
np.random.seed(12345)
To teach myself PyMC I am trying to define a simple logistic regression. But I get a ZeroProbability error, and does not understand exactly why this happens or how to avoid it.
Here is my code:
import pymc
import numpy as np
x = np.array([85, 95, 70, 65, 70, 90, 75, 85, 80, 85])
y = np.array([1., 1., 0., 0., 0., 1., 1., 0., 0., 1.])
w0 = pymc.Normal('w0', 0, 0.000001) # uninformative prior (any real number)
w1 = pymc.Normal('w1', 0, 0.000001) # uninformative prior (any real number)
@pymc.deterministic
def logistic(w0=w0, w1=w1, x=x):
return 1.0 / (1. + np.exp(w0 + w1 * x))
observed = pymc.Bernoulli('observed', logistic, value=y, observed=True)
--------------------------------------------------------------------------- ZeroProbability Traceback (most recent call last) <ipython-input-3-e2203c482542> in <module>() 12 return 1.0 / (1. + np.exp(w0 + w1 * x)) 13 ---> 14 observed = pymc.Bernoulli('observed', logistic, value=y, observed=True) /homes/abie/anaconda/lib/python2.7/site-packages/pymc/distributions.pyc in __init__(self, *args, **kwds) 269 random = debug_wrapper(random) 270 else: --> 271 Stochastic.__init__(self, logp=logp, random=random, logp_partial_gradients = logp_partial_gradients, dtype=dtype, **arg_dict_out) 272 273 new_class.__name__ = name /homes/abie/anaconda/lib/python2.7/site-packages/pymc/PyMCObjects.pyc in __init__(self, logp, doc, name, parents, random, trace, value, dtype, rseed, observed, cache_depth, plot, verbose, isdata, check_logp, logp_partial_gradients) 757 if check_logp: 758 # Check initial value --> 759 if not isinstance(self.logp, float): 760 raise ValueError( 761 "Stochastic " + /homes/abie/anaconda/lib/python2.7/site-packages/pymc/PyMCObjects.pyc in get_logp(self) 914 (self._value, self._parents.value)) 915 else: --> 916 raise ZeroProbability(self.errmsg) 917 918 return logp ZeroProbability: Stochastic observed's value is outside its support, or it forbids its parents' current values.
What has gone wrong?
# when you initialize the Normal Stochastics
# their values are drawn from the prior
w0 = pymc.Normal('w0', 0, 0.000001) # uninformative prior (any real number)
w0.value
array(-519.4387150567381)
Since you have a diffuse prior, this can lead to a value that has such small posterior probability that it is effectively zero. The solution is simple: start from a better value:
w0 = pymc.Normal('w0', 0, 0.000001, value=0) # uninformative prior (any real number)
w0.value
array(0.0)
This can also make MCMC convergence faster, although any starting point that does not raise a ZeroProbability error should yield the same posterior distribution if your burnin period is long enough.
w0 = pymc.Normal('w0', 0, 0.000001, value=0) # uninformative prior (any real number)
w1 = pymc.Normal('w1', 0, 0.000001, value=0) # uninformative prior (any real number)
@pymc.deterministic
def logistic(w0=w0, w1=w1, x=x):
return 1.0 / (1. + np.exp(w0 + w1 * x))
observed = pymc.Bernoulli('observed', logistic, value=y, observed=True)