#!/usr/bin/env python
# coding: utf-8

# # 線形回帰モデル - pystan

# In[2]:


from __future__ import division
import os
import sys
import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
get_ipython().run_line_magic('matplotlib', 'inline')
get_ipython().run_line_magic('precision', '4')
#plt.style.use('ggplot')
import seaborn as sns
sns.set_style('white')
sns.set_context('paper')

np.random.seed(1234)
import pystan
import scipy.stats as stats

import scipy.stats as stats


# ## 線形回帰モデルのパラメータ推定
# 
# 普通の伝統的な統計学では以下の線形回帰式を用いて回帰パラメータ$a$, $b$を推定する．
# 
# $$
# y \sim ax + b
# $$
# 
# 確率論的プログラミングにおいては，，
# 
# $$y = ax + b + \epsilon$$
# 
# という線型モデルを次の形式に解釈することでモデリングを行う．
# 
# $$
# y \sim \mathcal{N}(ax + b, \sigma^2)
# $$
# 
# 推定するパラメータは$a$, $b$ と $\sigma$で，それぞれの事前分布は次のように決めるとする．
# 
# $$
# a \sim \mathcal{N}(0, 100) \\
# b \sim \mathcal{N}(0, 100) \\
# \sigma \sim \mathcal{U}(0, 20)
# $$

# In[3]:


lin_reg_code = """
data {
    int<lower=0> n; 
    real x[n];
    real y[n]; 
}
transformed data {}
parameters {
    real a;
    real b;
    real sigma;
}
transformed parameters {
    real mu[n];
    for (i in 1:n) {
        mu[i] <- a*x[i] + b;
        }
}
model {
    sigma ~ uniform(0, 20);
    y ~ normal(mu, sigma);
}
generated quantities {}
"""

n = 11
_a = 6
_b = 2
x = np.linspace(0, 1, n)
y = _a*x + _b + np.random.randn(n)

lin_reg_dat = {
             'n': n,
             'x': x,
             'y': y
            }

fit = pystan.stan(model_code=lin_reg_code, data=lin_reg_dat, iter=1000, chains=1)


# In[4]:


print(fit)


# In[5]:


fit.plot(['a', 'b']);
plt.tight_layout()