In [7]:

# load numpy and pandas for data manipulation
import numpy as np
import pandas as pd

# load statsmodels as alias ``sm``
import statsmodels.api as sm

# load the longley dataset into a pandas data frame - first column (year) used as row labels
df = pd.read_csv('http://vincentarelbundock.github.io/Rdatasets/csv/datasets/longley.csv', index_col=0)
df.head()

Out[7]:

	GNP.deflator	GNP	Unemployed	Armed.Forces	Population	Year	Employed
1947	83.0	234.289	235.6	159.0	107.608	1947	60.323
1948	88.5	259.426	232.5	145.6	108.632	1948	61.122
1949	88.2	258.054	368.2	161.6	109.773	1949	60.171
1950	89.5	284.599	335.1	165.0	110.929	1950	61.187
1951	96.2	328.975	209.9	309.9	112.075	1951	63.221

In [9]:

import statsmodels.formula.api as smf
# formula: response ~ predictors
est = smf.ols(formula='Employed ~ GNP + Population + Year', data=df).fit()
est.summary()

Out[9]:

OLS Regression Results
Dep. Variable:	Employed	R-squared:	0.979
Model:	OLS	Adj. R-squared:	0.974
Method:	Least Squares	F-statistic:	190.1
Date:	Thu, 22 Jan 2015	Prob (F-statistic):	2.22e-10
Time:	11:44:49	Log-Likelihood:	-11.227
No. Observations:	16	AIC:	30.45
Df Residuals:	12	BIC:	33.55
Df Model:	3

	coef	std err	t	P>\|t\|	[95.0% Conf. Int.]
Intercept	416.9465	740.264	0.563	0.584	-1195.950 2029.843
GNP	0.0679	0.015	4.436	0.001	0.035 0.101
Population	-0.3597	0.193	-1.860	0.088	-0.781 0.062
Year	-0.1718	0.388	-0.443	0.666	-1.016 0.673

Omnibus:	1.348	Durbin-Watson:	1.219
Prob(Omnibus):	0.510	Jarque-Bera (JB):	0.640
Skew:	0.489	Prob(JB):	0.726
Kurtosis:	2.934	Cond. No.	1.05e+07

In [ ]: