# load numpy and pandas for data manipulation
import numpy as np
import pandas as pd
# load statsmodels as alias ``sm``
import statsmodels.api as sm
# load the longley dataset into a pandas data frame - first column (year) used as row labels
df = pd.read_csv('http://vincentarelbundock.github.io/Rdatasets/csv/datasets/longley.csv', index_col=0)
df.head()
GNP.deflator | GNP | Unemployed | Armed.Forces | Population | Year | Employed | |
---|---|---|---|---|---|---|---|
1947 | 83.0 | 234.289 | 235.6 | 159.0 | 107.608 | 1947 | 60.323 |
1948 | 88.5 | 259.426 | 232.5 | 145.6 | 108.632 | 1948 | 61.122 |
1949 | 88.2 | 258.054 | 368.2 | 161.6 | 109.773 | 1949 | 60.171 |
1950 | 89.5 | 284.599 | 335.1 | 165.0 | 110.929 | 1950 | 61.187 |
1951 | 96.2 | 328.975 | 209.9 | 309.9 | 112.075 | 1951 | 63.221 |
import statsmodels.formula.api as smf
# formula: response ~ predictors
est = smf.ols(formula='Employed ~ GNP + Population + Year', data=df).fit()
est.summary()
Dep. Variable: | Employed | R-squared: | 0.979 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.974 |
Method: | Least Squares | F-statistic: | 190.1 |
Date: | Thu, 22 Jan 2015 | Prob (F-statistic): | 2.22e-10 |
Time: | 11:44:49 | Log-Likelihood: | -11.227 |
No. Observations: | 16 | AIC: | 30.45 |
Df Residuals: | 12 | BIC: | 33.55 |
Df Model: | 3 |
coef | std err | t | P>|t| | [95.0% Conf. Int.] | |
---|---|---|---|---|---|
Intercept | 416.9465 | 740.264 | 0.563 | 0.584 | -1195.950 2029.843 |
GNP | 0.0679 | 0.015 | 4.436 | 0.001 | 0.035 0.101 |
Population | -0.3597 | 0.193 | -1.860 | 0.088 | -0.781 0.062 |
Year | -0.1718 | 0.388 | -0.443 | 0.666 | -1.016 0.673 |
Omnibus: | 1.348 | Durbin-Watson: | 1.219 |
---|---|---|---|
Prob(Omnibus): | 0.510 | Jarque-Bera (JB): | 0.640 |
Skew: | 0.489 | Prob(JB): | 0.726 |
Kurtosis: | 2.934 | Cond. No. | 1.05e+07 |