# -*- coding: utf-8 -*-
"""
Created on Sat Dec 20 12:01:13 2014

Author: Josef Perktold
License for Code: BSD-3

"""

import numpy as np
from statsmodels.regression.linear_model import OLS, WLS

from statsmodels.sandbox.regression.predstd import wls_prediction_std
from statsmodels.regression._prediction import get_prediction
from statsmodels.genmod._prediction import params_transform_univariate

import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (10.0, 8.0)

# from example wls.py
np.random.seed(123456)
nsample = 50
x = np.linspace(0, 20, nsample)
X = np.column_stack((x, (x - 5)**2))
from statsmodels.tools.tools import add_constant
X = add_constant(X)
beta = [5., 0.5, -0.01]
sig = 0.5
w = np.ones(nsample)
w[nsample * 6/10:] = 3
y_true = np.dot(X, beta)
e = np.random.normal(size=nsample)
y = y_true + sig * w * e
X = X[:,[0,1]]

mod_wls = WLS(y, X, weights=1./w)
res_wls = mod_wls.fit()

pred_res = get_prediction(res_wls)
pf = pred_res.summary_frame()
pf.head()

# ### WLS knowing the true variance ratio of heteroscedasticity

mod_wls = WLS(y, X, weights=1./w)
res_wls = mod_wls.fit()


prstd, iv_l, iv_u = wls_prediction_std(res_wls)
pred_res = get_prediction(res_wls)
ci = pred_res.conf_int(obs=True)

from numpy.testing import assert_allclose
assert_allclose(pred_res.se_obs, prstd, rtol=1e-13)
assert_allclose(ci, np.column_stack((iv_l, iv_u)), rtol=1e-13)

print pred_res.summary_frame().head()

pred_res2 = res_wls.get_prediction()
ci2 = pred_res2.conf_int(obs=True)

from numpy.testing import assert_allclose
assert_allclose(pred_res2.se_obs, prstd, rtol=1e-13)
assert_allclose(ci2, np.column_stack((iv_l, iv_u)), rtol=1e-13)

print pred_res2.summary_frame().head()

pf = pred_res2.summary_frame()
c = pf.columns
dir(c)
cols = pf.columns.tolist()
cols.remove('mean_se')
ax = pf[cols].plot()
ax.plot(y, 'o')
#ax.fill_between(np.asarray(pf.index), pf['obs_ci_upper'].values, pf['obs_ci_lower'].values, facecolor='blue', alpha=0.1)
#ax.fill_between(np.asarray(pf.index), pf['mean_ci_upper'], pf['mean_ci_lower'].values, facecolor='red', alpha=0.1)
ax.fill_between(np.asarray(pf.index), pf['obs_ci_upper'].values, pf['obs_ci_lower'].values, alpha=0.1)
ax.fill_between(np.asarray(pf.index), pf['mean_ci_upper'], pf['mean_ci_lower'].values, alpha=0.1)

# there should be a pretty plotting method in `get_prediction` results

pf01 = pred_res2.summary_frame(alpha=0.1)
print(pred_res2.conf_int(alpha=0.1)[:5])
print(pred_res2.conf_int(obs=True, alpha=0.1)[:5])
pf01.head()


res_wls_n = mod_wls.fit(use_t=False)
pred_wls_n = res_wls_n.get_prediction()
print(pred_wls_n.summary_frame().head())

mod_ols = OLS(y, X)
res_ols = mod_ols.fit(cov_type='HC3')
pf = res_ols.get_prediction().summary_frame()
ax = pf[cols].plot()  # cols defined before
ax.plot(y, 'o')
print(pf.head())

res_ols = mod_ols.fit()
pf = res_ols.get_prediction().summary_frame()
ax = pf[cols].plot()  # cols defined before
ax.plot(y, 'o')
print(pf.head())

from statsmodels.genmod.generalized_linear_model import GLM

w_sqrt = np.sqrt(w)
mod_glm = GLM(y/w_sqrt, X/w_sqrt[:,None])
res_glm = mod_glm.fit()
pred_glm = res_glm.get_prediction()
print(pred_glm.summary_frame().head())

res_glm_t = mod_glm.fit(use_t=True)
pred_glm_t = res_glm_t.get_prediction()
print(pred_glm_t.summary_frame().head())

rates = params_transform_univariate(res_glm.params, res_glm.cov_params())
print('\nRates exp(params)')
print(rates.summary_frame())

rates2 = np.column_stack((np.exp(res_glm.params),
                          res_glm.bse * np.exp(res_glm.params),
                          np.exp(res_glm.conf_int())))
assert_allclose(rates.summary_frame().values, rates2, rtol=1e-13)

from statsmodels.genmod.families import links

# with identity transform
pt = params_transform_univariate(res_glm.params, res_glm.cov_params(), link=links.identity())
print(pt.tvalues)

assert_allclose(pt.tvalues, res_glm.tvalues, rtol=1e-13)
assert_allclose(pt.se_mean, res_glm.bse, rtol=1e-13)
ptt = pt.t_test()
assert_allclose(ptt[0], res_glm.tvalues, rtol=1e-13)
assert_allclose(ptt[1], res_glm.pvalues, rtol=1e-13)