#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')

import pandas as pd
import seaborn as sns
import statsmodels.api as sm
pd.options.display.max_rows = 10


# In[2]:


df = (sns.load_dataset('titanic')
         .dropna(subset=['survived', 'age']))
df.head()


# In[3]:


mod = sm.Logit.from_formula('survived ~ age', df)
res = mod.fit()
me = res.get_margeff()


# In[4]:


res.summary()


# In[5]:


me.summary()


# Using Greene's notation here (~page 736). We've got a few things
# 
# - Exogenous: $X$, a single row is $x.T$
# - estimated coef $\hat{\beta}$
# - predicted prob $\hat{F} = \Lambda(\bf(x)^T\hat{\bf{\beta}})$
# where $\Lambda$ is the logistic CDF
# - Variance of $\hat{\beta} = V$, from statsmodels
# 
# We want a prediction interval for the predicted probability around the CEF. We have
# 
# $Asy. Var[\hat{F}] = [\partial \hat{F} / \partial \hat{\beta}]\bf{V} [\partial \hat{F} / \partial \hat{\beta}]$
# 
# And the vector of derivatives is
# 
# $[\partial \hat{F} / \partial \hat{\beta}] = [d\hat{F}/dz][\partial z / \partial \hat{\beta}] = \hat{f} x$
# 
# where $\hat{f} = \hat{\Lambda}(1 - \hat{\Lambda})$ (the logistic pdf I beleive).
# 
# And so
# 
# $Asy.Var[\hat{F}] = \hat{f}^2 x^T\bf{V}x$
# 
# Note that the $x$ here is a single *row* from the matrix. But vectors are always column vectors so $x$ is $Kx1$, where $K$ is the number of predictors (2 for us).

# In[6]:


from scipy import stats

Λ = lambda x: stats.logistic().cdf(x)
λ = lambda x: stats.logistic().pdf(x)

β_ = res.params.values.reshape(-1, 1)
V_ = res.cov_params().values


# In[7]:


# this is the equation from above for Var(F)
# only works for a single observation though
def var_π(x, β, V_):
    #      λ(z)**s * x.T @ V_ @ x
    prob = λ(x.T.dot(β))**2 * x.T.dot(V_).dot(x)  # maybe this is it?
    return prob


# In[8]:


# for multiple observations, but not quite vectorized fully

def var_πs(xx, β, V_):
    α = λ(xx.dot(β))**2
    out = np.empty((500, 1))
    for i, x in enumerate(xx):
        out[i] = x.T.dot(V_).dot(x)
    return α * out


# In[9]:


# Making some fake data.
xx = sm.add_constant(np.linspace(df.age.min(), df.age.max(), 500).reshape(-1, 1))
πs = Λ(xx.dot(β_))
vv = np.sqrt(var_πs(xx, β_, V_))


# In[10]:


plt.plot(xx[:, 1], πs)
plt.fill_between(xx[:, 1], (πs - 1.96*vv).ravel(), (πs + 1.96*vv).ravel(),
                 alpha=.25, color='g')


# In[ ]: