Using Cochrane's meta analysis data.
Bjelakovic G, Gluud LL, Nikolova D, Whitfield K, Wetterslev J, Simonetti RG, Bjelakovic M, Gluud C. Vitamin D supplementation for prevention of mortality in adults. Cochrane Database of Systematic Reviews 2014, Issue 1. Art. No.: CD007470. DOI: 10.1002/14651858.CD007470.pub3.
from lxml import etree
from lxml import objectify
cr = etree.parse("CD007470StatsDataOnly.rm5")
#pulls out all the studies with low risk of bias
root = cr.getroot()
ad = root.getchildren()[7]
a = ad.getchildren()[0].getchildren()[1].getchildren()
studies_xml = a[5].getchildren()[1:]#+ a[6].getchildren()[1:]
def toNumber(v):
try:
return float(v)
except Exception:
return v
def asNumbers(d):
return { k : toNumber(v) for k,v in d.items()}
stds = [asNumbers(dict(s.attrib)) for s in studies_xml]
import pandas as pd
studies = pd.DataFrame(stds)
from pymc import *
def inv_logit(p):
return 1 /(1+ exp(-p))
with Model() as model:
nstudies = len(studies)
hm = Normal('hazard_mean', -2.5, 3) #mean hazard rate
sdh = Tpos('sd_hazard', 15, 1, .5**-2)
#hyperparameters for effect size distribution
em = T('effect_mean', 8, 0, .05**-2) #mean effect size
sd = Tpos('sd_effect', 15, .05, .05**-2)
h = Normal('hazard', hm, sd = sdh, shape = nstudies)
e = Normal('effect', em, sd = sd, shape = nstudies)
controls = Binomial('controls', p = inv_logit(h), n = studies.TOTAL_2.values, observed = studies.EVENTS_2.values)
treatment = Binomial('treatment', p = inv_logit(h+e), n = studies.TOTAL_1.values, observed = studies.EVENTS_1.values)
start = find_MAP(vars =[hm, em, h, e])
step = NUTS(scaling=start)
trace = sample(3000, step, progressbar=False)
traceplot(trace);
esamples = pd.DataFrame(trace[em])
print "probability mean effect is negative", (esamples < 0).mean()
print "mean effect estimate", esamples.mean()