import numpy as np import statsmodels.api as sm import matplotlib import matplotlib.pyplot as plt from statsmodels.sandbox.regression.predstd import wls_prediction_std np.random.seed(9876789) nsample = 100 x = np.linspace(0, 10, 100) X = np.column_stack((x, x**2)) beta = np.array([1, 0.1, 10]) e = np.random.normal(size=nsample) X = sm.add_constant(X) y = np.dot(X, beta) + e X = sm.add_constant(X) y = np.dot(X, beta) + e model = sm.OLS(y, X) results = model.fit() print results.summary() print 'Parameters: ', results.params print 'R2: ', results.rsquared nsample = 50 sig = 0.5 x = np.linspace(0, 20, nsample) X = np.c_[x, np.sin(x), (x-5)**2, np.ones(nsample)] beta = [0.5, 0.5, -0.02, 5.] y_true = np.dot(X, beta) y = y_true + sig * np.random.normal(size=nsample) res = sm.OLS(y, X).fit() print res.summary() print 'Parameters: ', res.params print 'Standard errors: ', res.bse print 'Predicted values: ', res.predict() plt.figure() plt.plot(x, y, 'o', x, y_true, 'b-') prstd, iv_l, iv_u = wls_prediction_std(res) plt.plot(x, res.fittedvalues, 'r--.') plt.plot(x, iv_u, 'r--') plt.plot(x, iv_l, 'r--') plt.title('blue: true, red: OLS') nsample = 50 groups = np.zeros(nsample, int) groups[20:40] = 1 groups[40:] = 2 dummy = (groups[:,None] == np.unique(groups)).astype(float) x = np.linspace(0, 20, nsample) X = np.c_[x, dummy[:,1:], np.ones(nsample)] beta = [1., 3, -3, 10] y_true = np.dot(X, beta) e = np.random.normal(size=nsample) y = y_true + e print X[:5,:] print y[:5] print groups print dummy[:5,:] res2 = sm.OLS(y, X).fit() print res.summary() prstd, iv_l, iv_u = wls_prediction_std(res2) plt.figure() plt.plot(x, y, 'o', x, y_true, 'b-') plt.plot(x, res2.fittedvalues, 'r--.') plt.plot(x, iv_u, 'r--') plt.plot(x, iv_l, 'r--') plt.title('blue: true, red: OLS') R = [[0, 1, 0, 0], [0, 0, 1, 0]] print np.array(R) print res2.f_test(R) beta = [1., 0.3, -0.0, 10] y_true = np.dot(X, beta) y = y_true + np.random.normal(size=nsample) res3 = sm.OLS(y, X).fit() print res3.f_test(R) from statsmodels.datasets.longley import load y = load().endog X = load().exog X = sm.tools.add_constant(X) ols_model = sm.OLS(y, X) ols_results = ols_model.fit() print ols_results.summary() norm_x = np.ones_like(X) for i in range(int(ols_model.df_model)): norm_x[:,i] = X[:,i]/np.linalg.norm(X[:,i]) norm_xtx = np.dot(norm_x.T,norm_x) eigs = np.linalg.eigvals(norm_xtx) condition_number = np.sqrt(eigs.max() / eigs.min()) print condition_number ols_results2 = sm.OLS(y[:-1], X[:-1,:]).fit() print "Percentage change %4.2f%%\n"*7 % tuple([i for i in ols_results.params/ols_results2.params*100 - 100])