import numpy as np, pymc as pm, pandas as pd 
from scipy import stats

true_alpha = -0.1
true_beta = 0.8

n_obs = 100
x = np.repeat(np.linspace(start=-3, stop=3, num=9), n_obs)

Normal = stats.norm
p = Normal.cdf(true_alpha + true_beta*x)

Bernoulli = stats.bernoulli
y = Bernoulli.rvs(p)

with pm.Model() as model:
    
    # priors
    alpha = pm.Normal('alpha', mu=0, tau=0.001)
    beta = pm.Normal('beta', mu=0, tau=0.001)
    
    # linear predictor
    theta_p = (alpha + beta * x)
        
    # logit transform: this seems to work ok
#     def invlogit(x):
#         import theano.tensor as t
#         return t.exp(x) / (1 + t.exp(x))
#     theta = invlogit(theta_p)
    
    
    # Probit transform: this doesn't work
    def phi(x):
        import theano.tensor as t
        return 0.5 * (1 + t.erf(x / t.sqr(2)))
    theta = phi(theta_p)
    
    
    # likelihood
    y = pm.Bernoulli('y', p=theta, observed=y)

with model:
    # Inference
    start = pm.find_MAP() # Find starting value by optimization
    
    print("MAP found:")
    print("alpha:", start['alpha'])
    print("beta:", start['beta'])

    print("Compare with true values:")
    print("true_alpha", true_alpha)
    print("true_beta", true_beta)

with model:
        step = pm.NUTS()
        trace = pm.sample(2000,
                           step, 
                           start=start, 
                           progressbar=True) # draw posterior samples

pm.summary(trace)

%load_ext rmagic
from __future__ import division

## push data to R
%Rpush x y

%%R
library(R2jags)
model <- "
model {
    # likelihood
    for (i in 1:length(y)) {
        probit(theta[i]) <- a + b * x[i]
        y[i] ~ dbern(theta[i])
    }
    # priors
    a ~ dnorm(0, 1e-4)
    b ~ dnorm(0, 1e-4)
}"

data <- list(x=x, y=y)

parameters <- c("a", "b")

samples = jags(data = data,
               # inits = inits,
               parameters.to.save = parameters,
               model.file = textConnection(model),
               n.chains = 1,
               n.iter = 1000,
               n.burnin = 200,
               n.thin = 1,
               DIC = T)

%%R
samples