import pandas as pd
import numpy as np
%load_ext rmagic
%%R
download.file("https://dl.dropbox.com/u/7710864/data/ravensData.rda",destfile="../data/ravensData.rda",method="curl")
load("../data/ravensData.rda")
write.csv(ravensData, "../data/ravensData.csv", row.names=FALSE)
ravensData = pd.read_csv('../data/ravensData.csv')
ravensData.head(6)
ravenWinNum | ravenWin | ravenScore | opponentScore | |
---|---|---|---|---|
0 | 1 | W | 24 | 9 |
1 | 1 | W | 38 | 35 |
2 | 1 | W | 28 | 13 |
3 | 1 | W | 34 | 31 |
4 | 1 | W | 44 | 13 |
5 | 0 | L | 23 | 24 |
from statsmodels.formula.api import ols
lmRavens = ols('ravenWinNum ~ ravenScore', ravensData).fit()
lmRavens.summary()
Dep. Variable: | ravenWinNum | R-squared: | 0.146 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.099 |
Method: | Least Squares | F-statistic: | 3.080 |
Date: | Wed, 20 Feb 2013 | Prob (F-statistic): | 0.0963 |
Time: | 19:46:45 | Log-Likelihood: | -11.193 |
No. Observations: | 20 | AIC: | 26.39 |
Df Residuals: | 18 | BIC: | 28.38 |
Df Model: | 1 |
coef | std err | t | P>|t| | [95.0% Conf. Int.] | |
---|---|---|---|---|---|
Intercept | 0.2850 | 0.257 | 1.111 | 0.281 | -0.254 0.824 |
ravenScore | 0.0159 | 0.009 | 1.755 | 0.096 | -0.003 0.035 |
Omnibus: | 4.880 | Durbin-Watson: | 1.571 |
---|---|---|---|
Prob(Omnibus): | 0.087 | Jarque-Bera (JB): | 2.190 |
Skew: | -0.505 | Prob(JB): | 0.335 |
Kurtosis: | 1.732 | Cond. No. | 72.9 |
plot(ravensData['ravenScore'], lmRavens.fittedvalues, 'ob')
xlabel('Raven Score')
ylabel('Prob Win');
from statsmodels.formula.api import glm
from statsmodels.api import families as f
# we only have t-values here
logRegRavens = glm('ravenWinNum ~ ravenScore', ravensData, family=f.Binomial()).fit()
logRegRavens.summary()
Dep. Variable: | ravenWinNum | No. Observations: | 20 |
---|---|---|---|
Model: | GLM | Df Residuals: | 18 |
Model Family: | Binomial | Df Model: | 1 |
Link Function: | logit | Scale: | 1.0 |
Method: | IRLS | Log-Likelihood: | -10.447 |
Date: | Wed, 20 Feb 2013 | Deviance: | 20.895 |
Time: | 20:18:03 | Pearson chi2: | 17.2 |
No. Iterations: | 6 |
coef | std err | t | P>|t| | [95.0% Conf. Int.] | |
---|---|---|---|---|---|
Intercept | -1.6800 | 1.554 | -1.081 | 0.294 | -4.726 1.366 |
ravenScore | 0.1066 | 0.067 | 1.597 | 0.128 | -0.024 0.237 |
plot(ravensData['ravenScore'], logRegRavens.fittedvalues, 'ob')
xlabel('Score')
ylabel('Prob Ravens Win');
np.exp(logRegRavens.params)
Intercept 0.186372 ravenScore 1.112469
# not the same as R
np.exp(logRegRavens.conf_int())
0 | 1 | |
---|---|---|
Intercept | 0.008862 | 3.91965 |
ravenScore | 0.976070 | 1.26793 |
# no anova for logistic regression
%%R
load("../data/ravensData.rda")
logRegRavens <- glm(ravensData$ravenWinNum ~ ravensData$ravenScore, family="binomial")
print(anova(logRegRavens,test="Chisq"))
Analysis of Deviance Table Model: binomial, link: logit Response: ravensData$ravenWinNum Terms added sequentially (first to last) Df Deviance Resid. Df Resid. Dev Pr(>Chi) NULL 19 24.435 ravensData$ravenScore 1 3.5398 18 20.895 0.05991 . --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1