!date
The plan with this notebook is to work through the RStan Getting Started Guide to see what Python wrappers would be necessary for equivalent functionality.
This is an example in Section 5.5 of Gelman et al (2003), which studied coaching effects from eight schools. For simplicity, we call this example "eight schools."
Translating it to Python is a matter of converting single quotes to triple quotes, and a handful of other, similarly simple substitutions.
def stan(model_code, data, iter, chains):
return # TODO: plumbing
schools_code = """
data {
int<lower=0> J; // number of schools
real y[J]; // estimated treatment effects
real<lower=0> sigma[J]; // s.e. of effect estimates
}
parameters {
real mu;
real<lower=0> tau;
real eta[J];
}
transformed parameters {
real theta[J];
for (j in 1:J)
theta[j] <- mu + tau * eta[j];
}
model {
eta ~ normal(0, 1);
y ~ normal(theta, sigma);
}
"""
schools_dat = dict(J = 8,
y = (28, 8, -3, 7, -1, 1, 18, 12),
sigma = (15, 10, 16, 11, 9, 11, 10, 18))
fit = stan(model_code = schools_code, data = schools_dat,
iter = 1000, chains = 4)
Of course, there is some plumbing necessary to make that do anything...
tmp_dir = '/tmp/'
def save_temp(txt, fname):
fname = tmp_dir + fname # TODO: avoid collisions, plan for cleanup
with file(fname, 'w') as f:
f.write(txt)
return fname
def rdump(params):
# If a full implementation of this does not yet exist,
# I suggest implementing json on the C++ side of Stan
txt = ''
for key, val in params.items():
if type(val) == int:
txt += '%s <- %d\n' % (key, val)
elif type(val) == tuple:
txt += '%s <- c%s\n' % (key, val)
return txt
import pandas as pd, StringIO
def load_samples(sample_path):
comments = ''
samples = ''
with file(sample_path) as f:
for line in f:
if line[0] == '#':
comments += line
else:
samples += line
df = pd.read_csv(StringIO.StringIO(samples))
df.comments = comments
return df
def stan(model_code, data, iter, chains):
code_path = save_temp(model_code, 'model.stan')
data_path = save_temp(rdump(data), 'model.Rdata')
model_path = code_path.replace('.stan', '')
%cd ~/notebook/stan-src-1.1.1/
!time make $model_path
%cd $tmp_dir
!time $model_path --data=$data_path
sample_path = code_path.replace('model.stan', 'samples.csv')
return load_samples(sample_path)
fit = stan(model_code = schools_code, data = schools_dat,
iter = 1000, chains = 4)
print round_(fit.describe(95).T, 1)
print fit.comments
!date
Patches welcome!