Plot posterior predictive distribution from hierarchal model

In [74]:
# Import libraries needed
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc3 as pm
import scipy
import scipy.stats as stats
import statsmodels.api as sm
import theano.tensor as tt

from datetime import datetime
from sklearn import preprocessing

%matplotlib inline
plt.style.use('bmh')
colors = ['#348ABD', '#A60628', '#7A68A6', '#467821', '#D55E00', 
          '#CC79A7', '#56B4E9', '#009E73', '#F0E442', '#0072B2']
In [75]:
data = pd.DataFrame(np.random.poisson(10, 100))
In [76]:
with pm.Model() as model:
    alpha = pm.Gamma('alpha', alpha=.1, beta=.1)
    mu = pm.Gamma('mu', alpha=.1, beta=.1)
    y_pred = pm.NegativeBinomial('y_pred', mu=mu, alpha=alpha)
    y_est = pm.NegativeBinomial('y_est', mu=mu, alpha=alpha, observed=data)
    
    start = pm.find_MAP()
    step = pm.Metropolis(start=start)
    trace = pm.sample(20000, step, start=start, progressbar=True)
 [-----------------100%-----------------] 20000 of 20000 complete in 5.3 sec
In [77]:
_ = pm.traceplot(trace, vars=['alpha', 'mu'])
In [78]:
x_lim = 60
y_pred = trace.get_values('y_pred')

fig = plt.figure(figsize=(14,6))
fig.add_subplot(211)

_ = plt.hist(y_pred, range=[0, x_lim], bins=x_lim, histtype='stepfilled', color=colors[1])   
_ = plt.xlim(1, x_lim)
_ = plt.ylabel('Frequency')
_ = plt.title('Posterior predictive distribution')

fig.add_subplot(212)

_ = plt.hist(data.values, range=[0, x_lim], bins=x_lim, histtype='stepfilled')
_ = plt.xlabel('Response time in seconds')
_ = plt.ylabel('Frequency')
_ = plt.title('Distribution of observed data')

plt.tight_layout()
In [79]:
people = np.array(['Amy','Peter','Paul','Tom']*25)

# Convert categorical variables to integer
le = preprocessing.LabelEncoder()
#people_map = le.fit(people)
people_idx = le.fit_transform(people)
n_participants = len(np.unique(people))
In [80]:
with pm.Model() as model:
    alpha = pm.Gamma('alpha', alpha=.1, beta=.1, shape=n_participants)
    mu = pm.Gamma('mu', alpha=.1, beta=.1, shape=n_participants)
    
    y_est = pm.NegativeBinomial('y_est', 
                                mu=mu[people_idx], 
                                alpha=alpha[people_idx], 
                                observed=data.values)
    
    start = pm.find_MAP()
    step = pm.Metropolis(start=start)
    trace = pm.sample(2000, step, start=start, progressbar=True)
 [-----------------100%-----------------] 2000 of 2000 complete in 5.6 sec
In [81]:
with pm.Model() as model:
    alpha = pm.Gamma('alpha', alpha=.1, beta=.1, shape=n_participants)
    mu = pm.Gamma('mu', alpha=.1, beta=.1, shape=n_participants)
    
    y_est = pm.NegativeBinomial('y_est', 
                                mu=mu[people_idx], 
                                alpha=alpha[people_idx], 
                                observed=data.values)
    
    y_pred = pm.NegativeBinomial('y_pred', 
                                 mu=mu[people_idx], 
                                 alpha=alpha[people_idx])
    
    start = pm.find_MAP()
    step = pm.Metropolis(start=start)
    trace = pm.sample(2000, step, start=start, progressbar=True)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-81-d049367a98c4> in <module>()
     10     y_pred = pm.NegativeBinomial('y_pred', 
     11                                  mu=mu[people_idx],
---> 12                                  alpha=alpha[people_idx])
     13 
     14     start = pm.find_MAP()

/Library/Python/2.7/site-packages/pymc3/distributions/distribution.pyc in __new__(cls, name, *args, **kwargs)
     17             data = kwargs.pop('observed', None)
     18             dist = cls.dist(*args, **kwargs)
---> 19             return model.Var(name, dist, data)
     20         elif name is None:
     21             return object.__new__(cls) #for pickle

/Library/Python/2.7/site-packages/pymc3/model.pyc in Var(self, name, dist, data)
    148         if data is None:
    149             if getattr(dist, "transform", None) is None:
--> 150                 var = FreeRV(name=name, distribution=dist, model=self)
    151                 self.free_RVs.append(var)
    152             else:

/Library/Python/2.7/site-packages/pymc3/model.pyc in __init__(self, type, owner, index, name, distribution, model)
    360             self.tag.test_value = np.ones(
    361                 distribution.shape, distribution.dtype) * distribution.default()
--> 362             self.logp_elemwiset = distribution.logp(self)
    363             self.model = model
    364 

/Library/Python/2.7/site-packages/pymc3/distributions/discrete.pyc in logp(self, value)
    194 
    195         # Return Poisson when alpha gets very large
--> 196         pois = bound(logpow(mu, value) - factln(value) - mu,
    197                      mu > 0,
    198                      value >= 0)

/Library/Python/2.7/site-packages/pymc3/distributions/dist_math.pyc in logpow(x, m)
     54     Calculates log(x**m) since m*log(x) will fail when m, x = 0.
     55     """
---> 56     return switch(eq(x, 0) & eq(m, 0), 0, m * log(x))
     57 
     58 

/Library/Python/2.7/site-packages/theano/gof/op.pyc in __call__(self, *inputs, **kwargs)
    515             for i, ins in enumerate(node.inputs):
    516                 try:
--> 517                     storage_map[ins] = [self._get_test_value(ins)]
    518                     compute_map[ins] = [True]
    519                 except AttributeError:

/Library/Python/2.7/site-packages/theano/gof/op.pyc in _get_test_value(cls, v)
    452             # ensure that the test value is correct
    453             try:
--> 454                 ret = v.type.filter(v.tag.test_value)
    455             except Exception, e:
    456                 # Better error message.

/Library/Python/2.7/site-packages/theano/tensor/type.pyc in filter(self, data, strict, allow_downcast)
    167             raise TypeError("Wrong number of dimensions: expected %s,"
    168                             " got %s with shape %s." % (self.ndim, data.ndim,
--> 169                                                         data.shape))
    170         if not data.flags.aligned:
    171             try:

TypeError: For compute_test_value, one input test value does not have the requested type.

The error when converting the test value to that variable type:
Wrong number of dimensions: expected 0, got 1 with shape (100,).
In [ ]:
 
In [ ]:
 
In [ ]: