from __future__ import division
import os
import sys
import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline
%precision 4
#plt.style.use('ggplot')
import seaborn as sns
sns.set_style('white')
sns.set_context('paper')
np.random.seed(1234)
import pystan
import scipy.stats as stats
import scipy.stats as stats
普通の伝統的な統計学では以下の線形回帰式を用いて回帰パラメータ$a$, $b$を推定する.
$$ y \sim ax + b $$確率論的プログラミングにおいては,,
$$y = ax + b + \epsilon$$という線型モデルを次の形式に解釈することでモデリングを行う.
$$ y \sim \mathcal{N}(ax + b, \sigma^2) $$推定するパラメータは$a$, $b$ と $\sigma$で,それぞれの事前分布は次のように決めるとする.
$$ a \sim \mathcal{N}(0, 100) \\ b \sim \mathcal{N}(0, 100) \\ \sigma \sim \mathcal{U}(0, 20) $$lin_reg_code = """
data {
int<lower=0> n;
real x[n];
real y[n];
}
transformed data {}
parameters {
real a;
real b;
real sigma;
}
transformed parameters {
real mu[n];
for (i in 1:n) {
mu[i] <- a*x[i] + b;
}
}
model {
sigma ~ uniform(0, 20);
y ~ normal(mu, sigma);
}
generated quantities {}
"""
n = 11
_a = 6
_b = 2
x = np.linspace(0, 1, n)
y = _a*x + _b + np.random.randn(n)
lin_reg_dat = {
'n': n,
'x': x,
'y': y
}
fit = pystan.stan(model_code=lin_reg_code, data=lin_reg_dat, iter=1000, chains=1)
print(fit)
Inference for Stan model: anon_model_4bdbb0aeaf5fafee91f7aa8b257093f2. 1 chains, each with iter=1000; warmup=500; thin=1; post-warmup draws per chain=500, total post-warmup draws=500. mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat a 5.74 0.15 1.3 3.03 4.9 5.82 6.59 8.25 80 1.0 b 2.06 0.09 0.78 0.57 1.54 2.03 2.52 3.77 78 1.0 sigma 1.38 0.04 0.4 0.81 1.09 1.3 1.58 2.33 89 1.0 mu[0] 2.06 0.09 0.78 0.57 1.54 2.03 2.52 3.77 78 1.0 mu[1] 2.63 0.08 0.68 1.34 2.19 2.6 3.02 4.08 80 1.0 mu[2] 3.2 0.06 0.58 2.09 2.83 3.17 3.53 4.44 85 1.0 mu[3] 3.78 0.05 0.5 2.73 3.47 3.76 4.08 4.82 95 1.0 mu[4] 4.35 0.04 0.44 3.49 4.07 4.35 4.63 5.22 115 1.0 mu[5] 4.93 0.03 0.42 4.1 4.64 4.92 5.2 5.78 150 1.0 mu[6] 5.5 0.03 0.43 4.7 5.22 5.51 5.78 6.41 163 1.0 mu[7] 6.07 0.04 0.49 5.15 5.75 6.08 6.39 7.09 156 1.0 mu[8] 6.65 0.05 0.56 5.63 6.28 6.63 7.02 7.8 143 1.0 mu[9] 7.22 0.06 0.66 5.94 6.81 7.21 7.66 8.54 131 1.0 mu[10] 7.8 0.07 0.76 6.34 7.29 7.76 8.28 9.32 116 1.0 lp__ -8.09 0.16 1.47 -11.87 -8.82 -7.75 -7.03 -6.32 88 1.01 Samples were drawn using NUTS(diag_e) at Tue Oct 6 10:44:55 2015. For each parameter, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence, Rhat=1).
fit.plot(['a', 'b']);
plt.tight_layout()