#!/usr/bin/env python
# coding: utf-8

# # 第3部 モデリング入門

# ## 3.1 確率論的プログラマの仕事
# 
# ![alt text](fig/infer2.png "infer2")
# 
# 結局，確率論的プログラミングにおいて，プログラマがやることは，
# 
# - データを用意する
# - パラメータの種類を決め，確率分布を決める
# - パラメータ込みで事前分布を数式で表す
# - パラメータ込みで尤度を数式で表す
# - MCMCサンプリング法を選ぶ
# 
# あとは計算機がMCMCサンプリングをして，パラメータの推定値を返す．

# 結局，事前分布や尤度の確率分布を上手にモデリングするには，確率分布とその確率分布の関係をよく知っていなければならない．
# 
# ![alt text](fig/distrib.png "distrib") [distrib.png](fig/distrib.png)
# 
# "Univariate Distribution Relationships" [2008amstat.pdf](doc/2008amstat.pdf)

# ## 3.2 コイン投げ - pymc3

# In[1]:


# coding: utf-8
from __future__ import division
import os
import sys
import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
get_ipython().run_line_magic('matplotlib', 'inline')
get_ipython().run_line_magic('precision', '4')
#plt.style.use('ggplot')
import seaborn as sns
sns.set_style('white')
sns.set_context('paper')

np.random.seed(1234)
import pymc3 as pm
import scipy.stats as stats


# In[5]:


# データを用意する
n = 100
h = 61

# パラメータをきめる
alpha = 2
beta = 2

niter = 1000
with pm.Model() as model: # context management
    # 事前分布をモデリング
    p = pm.Beta('p', alpha=alpha, beta=beta)

    # 尤度をモデリング
    y = pm.Binomial('y', n=n, p=p, observed=h)

    #-- パラメータ推定 --
    
    # MAP推定を使って初期値をきめる．MAP推定とMCMCは相性がいい
    start = pm.find_MAP() 
    
    # MCMCの遷移ステップを選ぶ
    step = pm.NUTS(scaling=start) 
    
    trace = pm.sample(niter, step, start, random_seed=123, progressbar=True)
    
    pm.traceplot(trace)


# In[6]:


plt.hist(trace['p'], 15, histtype='step', normed=True, label='post');
x = np.linspace(0, 1, 100)
plt.plot(x, stats.beta.pdf(x, alpha, beta), label='prior');
plt.legend(loc='best');


# ## 3.3 コイン投げ - pystan

# In[12]:


import pystan

# pystanではstanのコードは日本語を許しません．
# それはstanに渡す直前で強制的にasciiに変換しているからです．

coin_code = """
// The order of following declarations matters.
//
// data declaration here
data {
    int<lower=0> n; // number of tosses
    int<lower=0> y; // number of heads
}

// in case you want to tweak data...
transformed data {}

// parameters declaration here
parameters {
    real<lower=0, upper=1> p;
}

// in case you want to tweak parameters...
transformed parameters {}

// your model here
model {
    p ~ beta(2, 2);
    y ~ binomial(n, p);
}

// tweaking samples from posterior
generated quantities {}
"""

# データを用意
coin_dat = {
             'n': 100,
             'y': 61,
            }

# パラメータ推定．デフォルトでNUTSが使われる
fit = pystan.stan(model_code=coin_code, data=coin_dat, iter=1000, chains=1)
#fit = pystan.stan(file='coin_code.stan', data=coin_dat, iter=1000, chains=1)

print(fit)


# In[13]:


fit.plot('p');
plt.tight_layout()


# In[7]:


coin_dict = fit.extract()
coin_dict.keys()


# ## 3.4 モデリングの素振り練習
# 
# - 正規分布 [normal_pymc](normal_pymc.ipynb) [normal_pystan](normal_pystan.ipynb)
# - 線形回帰 [linreg_pymc](linreg_pymc.ipynb) [linreg_pystan](linreg_pystan.ipynb)
# - ロジステック回帰 [gender_pymc](gender_pymc.ipynb) [gender_pystan](gender_pystan.ipynb)
# - 階層ベイズ [gelmanradon_pymc](gelmanradon_pymc.ipynb)