#!/usr/bin/env python # coding: utf-8 # # Statsmodels # Statsmodels is a Python module that allows users to explore data, estimate statistical models, and perform statistical tests. An extensive list of descriptive statistics, statistical tests, plotting functions, and result statistics are available for different types of data and each estimator. # # Library documentation: http://statsmodels.sourceforge.net/ # ### Linear Regression Models # In[1]: # needed to display the graphs get_ipython().run_line_magic('matplotlib', 'inline') from pylab import * # In[2]: import numpy as np import pandas as pd import statsmodels.api as sm from statsmodels.sandbox.regression.predstd import wls_prediction_std np.random.seed(9876789) # In[3]: # create some artificial data nsample = 100 x = np.linspace(0, 10, 100) X = np.column_stack((x, x**2)) beta = np.array([1, 0.1, 10]) e = np.random.normal(size=nsample) # In[4]: # add column of 1s for intercept X = sm.add_constant(X) y = np.dot(X, beta) + e # In[5]: # fit model and print the summary model = sm.OLS(y, X) results = model.fit() print(results.summary()) # In[6]: # individual results parameters can be accessed print('Parameters: ', results.params) print('R2: ', results.rsquared) # In[7]: # example with non-linear relationship nsample = 50 sig = 0.5 x = np.linspace(0, 20, nsample) X = np.column_stack((x, np.sin(x), (x-5)**2, np.ones(nsample))) beta = [0.5, 0.5, -0.02, 5.] y_true = np.dot(X, beta) y = y_true + sig * np.random.normal(size=nsample) res = sm.OLS(y, X).fit() print(res.summary()) # In[8]: # look at some quantities of interest print('Parameters: ', res.params) print('Standard errors: ', res.bse) print('Predicted values: ', res.predict()) # In[9]: # plot the true relationship vs. the prediction prstd, iv_l, iv_u = wls_prediction_std(res) fig, ax = plt.subplots(figsize=(8,6)) ax.plot(x, y, 'o', label="data") ax.plot(x, y_true, 'b-', label="True") ax.plot(x, res.fittedvalues, 'r--.', label="OLS") ax.plot(x, iv_u, 'r--') ax.plot(x, iv_l, 'r--') ax.legend(loc='best') # ### Time-Series Analysis # In[10]: from statsmodels.tsa.arima_process import arma_generate_sample # In[11]: # generate some data np.random.seed(12345) arparams = np.array([.75, -.25]) maparams = np.array([.65, .35]) # In[12]: # set parameters arparams = np.r_[1, -arparams] maparam = np.r_[1, maparams] nobs = 250 y = arma_generate_sample(arparams, maparams, nobs) # In[13]: # add some dates information dates = sm.tsa.datetools.dates_from_range('1980m1', length=nobs) y = pd.TimeSeries(y, index=dates) arma_mod = sm.tsa.ARMA(y, order=(2,2)) arma_res = arma_mod.fit(trend='nc', disp=-1) # In[14]: print(arma_res.summary())