See http://www.stata.com/manuals13/tsarima.pdf Examples 1-4
and http://www.stata.com/manuals13/tsarimapostestimation.pdf Example 1
%matplotlib inline
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from datetime import datetime
# Dataset
data = pd.read_stata('wpi1.dta')
data.index = data.t
# Fit the model
mod = sm.tsa.SARIMAX(data['wpi'], trend='c', order=(1,1,1))
res = mod.fit()
print res.summary()
Statespace Model Results ============================================================================== Dep. Variable: D.wpi No. Observations: 124 Model: SARIMAX(1, 1, 1) Log Likelihood -134.983 Date: Wed, 13 Aug 2014 AIC 277.965 Time: 01:01:03 BIC 289.246 Sample: 01-01-1960 HQIC 282.548 - 10-01-1990 ============================================================================== coef std err z P>|z| [95.0% Conf. Int.] ------------------------------------------------------------------------------ intercept 0.1050 0.057 1.837 0.066 -0.007 0.217 ar.L1 0.8740 0.061 14.215 0.000 0.753 0.994 ma.L1 -0.4206 0.120 -3.508 0.000 -0.656 -0.186 sigma2 0.5226 0.067 7.840 0.000 0.392 0.653 ==============================================================================
# Dataset
data = pd.read_stata('wpi1.dta')
data.index = data.t
data['ln_wpi'] = np.log(data['wpi'])
data['D.ln_wpi'] = data['ln_wpi'].diff()
# Graph data
fig, axes = plt.subplots(1, 2, figsize=(15,4))
# Levels
axes[0].plot(data.index, data['wpi'])
axes[0].set(title='US Wholesale Price Index')
# Log difference
axes[1].plot(data.index, data['D.ln_wpi'])
axes[1].hlines(0, data.index[0], data.index[-1], 'r')
axes[1].set(title='US Wholesale Price Index - difference of logs');
# Graph data
fig, axes = plt.subplots(1, 2, figsize=(15,4))
fig = sm.graphics.tsa.plot_acf(data.ix[1:, 'D.ln_wpi'], lags=40, ax=axes[0])
fig = sm.graphics.tsa.plot_pacf(data.ix[1:, 'D.ln_wpi'], lags=40, ax=axes[1])
# Fit the model
mod = sm.tsa.SARIMAX(data['ln_wpi'], trend='c', order=(1,1,(1,0,0,1)))
res = mod.fit()
print res.summary()
Statespace Model Results ================================================================================= Dep. Variable: D.ln_wpi No. Observations: 124 Model: SARIMAX(1, 1, (1, 4)) Log Likelihood 386.151 Date: Wed, 13 Aug 2014 AIC -762.301 Time: 00:59:19 BIC -748.200 Sample: 01-01-1960 HQIC -756.573 - 10-01-1990 ============================================================================== coef std err z P>|z| [95.0% Conf. Int.] ------------------------------------------------------------------------------ intercept 0.0025 0.001 2.063 0.039 0.000 0.005 ar.L1 0.7837 0.081 9.730 0.000 0.626 0.942 ma.L1 -0.4044 0.105 -3.855 0.000 -0.610 -0.199 ma.L4 0.3015 0.105 2.861 0.004 0.095 0.508 sigma2 0.0001 1.38e-05 7.856 0.000 8.16e-05 0.000 ==============================================================================
# Dataset
data = pd.read_stata('air2.dta')
data.index = pd.date_range(start=datetime(data.time[0], 1, 1), periods=len(data), freq='MS')
data['lnair'] = np.log(data['air'])
# Fit the model
mod = sm.tsa.SARIMAX(data['lnair'], order=(0,1,1), seasonal_order=(0,1,1,12))
res = mod.fit()
print res.summary()
Statespace Model Results ========================================================================================== Dep. Variable: D.DS12.lnair No. Observations: 144 Model: SARIMAX(0, 1, 1)x(0, 1, 1, 12) Log Likelihood 244.696 Date: Wed, 13 Aug 2014 AIC -483.393 Time: 01:35:13 BIC -474.484 Sample: 01-01-1949 HQIC -479.773 - 12-01-1960 ============================================================================== coef std err z P>|z| [95.0% Conf. Int.] ------------------------------------------------------------------------------ ma.L1 -0.4017 0.090 -4.448 0.000 -0.579 -0.225 ma.S.L12 -0.5569 0.074 -7.551 0.000 -0.701 -0.412 sigma2 0.0013 0.000 8.070 0.000 0.001 0.002 ==============================================================================
# Dataset
data = pd.read_stata('friedman2.dta')
data.index = data.time
# Variables
endog = data.ix['1959':'1981', 'consump']
exog = sm.add_constant(data.ix['1959':'1981', 'm2'])
# Fit the model
mod = sm.tsa.SARIMAX(endog, exog, order=(1,0,1))
res = mod.fit()
print res.summary()
Statespace Model Results ============================================================================== Dep. Variable: consump No. Observations: 92 Model: SARIMAX(1, 0, 1) Log Likelihood -340.508 Date: Wed, 13 Aug 2014 AIC 691.015 Time: 00:47:41 BIC 703.624 Sample: 01-01-1959 HQIC 696.105 - 10-01-1981 ============================================================================== coef std err z P>|z| [95.0% Conf. Int.] ------------------------------------------------------------------------------ const -36.0584 34.019 -1.060 0.289 -102.734 30.617 x1 1.1220 0.032 34.567 0.000 1.058 1.186 ar.L1 0.9348 0.040 23.369 0.000 0.856 1.013 ma.L1 0.3091 0.112 2.762 0.006 0.090 0.528 sigma2 93.2554 13.755 6.780 0.000 66.296 120.215 ==============================================================================
# Dataset
raw = pd.read_stata('friedman2.dta')
raw.index = raw.time
data = raw.ix[:'1981']
# Variables
endog = data.ix['1959':, 'consump']
exog = sm.add_constant(data.ix['1959':, 'm2'])
nobs = endog.shape[0]
# Fit the model
mod = sm.tsa.SARIMAX(endog.ix[:'1978-01-01'], exog=exog.ix[:'1978-01-01'], order=(1,0,1))
fit_res = mod.fit()
print fit_res.summary()
Statespace Model Results ============================================================================== Dep. Variable: consump No. Observations: 77 Model: SARIMAX(1, 0, 1) Log Likelihood -243.316 Date: Wed, 13 Aug 2014 AIC 496.633 Time: 01:18:26 BIC 508.352 Sample: 01-01-1959 HQIC 501.320 - 01-01-1978 ============================================================================== coef std err z P>|z| [95.0% Conf. Int.] ------------------------------------------------------------------------------ const 0.6745 14.273 0.047 0.962 -27.299 28.648 x1 1.0379 0.019 54.687 0.000 1.001 1.075 ar.L1 0.8775 0.061 14.294 0.000 0.757 0.998 ma.L1 0.2771 0.100 2.768 0.006 0.081 0.473 sigma2 31.6982 5.110 6.203 0.000 21.683 41.714 ==============================================================================
# Prediction
npredict = data.ix['1978-01-01':].shape[0]
mod = sm.tsa.SARIMAX(endog, exog=exog, order=(1,0,1))
mod.update(fit_res.params)
res = mod.filter()
# Graph
fig, ax = plt.subplots(figsize=(9,4))
npre = 4
ax.set(title='Personal consumption', xlabel='Date', ylabel='Billions of dollars')
ax.plot(data.index[-npredict-npre+1:], data.ix[-npredict-npre+1:, 'consump'], 'o', label='Observed')
# In-sample one-step-ahead predictions and 95% confidence intervals (forecast without data)
predict, cov, ci, idx = res.predict(alpha=0.05)
ax.plot(idx[-npredict-npre:], predict[0, -npredict-npre:], 'r--', label='One-step-ahead forecast');
ax.plot(idx[-npredict-npre:], ci[0, -npredict-npre:], 'r--', alpha=0.3)
# Dynamic predictions and 95% confidence intervals
predict_dy, cov_dy, ci_dy, idx_dy = res.predict(dynamic=nobs-npredict-1, alpha=0.01)
ax.plot(idx_dy[-npredict-npre:], predict_dy[0, -npredict-npre:], 'g', label='Dynamic forecast (1978)');
ax.plot(idx_dy[-npredict-npre:], ci_dy[0, -npredict-npre:], 'g:', alpha=0.3);
legend = ax.legend(loc='lower right')
legend.get_frame().set_facecolor('w')
# Prediction error
# Graph
fig, ax = plt.subplots(figsize=(9,4))
npre = 4
ax.set(title='Forecast error', xlabel='Date', ylabel='Forecast - Actual')
# In-sample one-step-ahead predictions and 95% confidence intervals (forecast without data)
predict_error = predict[0, -npredict-1:] - endog.iloc[-npredict-1:]
predict_ci = ci[0, -npredict-1:] - endog.iloc[-npredict-1:][:, None]
ax.plot(idx[-npredict-1:], predict_error, label='One-step-ahead forecast');
ax.plot(idx[-npredict-1:], predict_ci, 'b--', alpha=0.4)
# Dynamic predictions and 95% confidence intervals
predict_dy_error = predict_dy[0, -npredict-1:] - endog.iloc[-npredict-1:]
predict_dy_ci = ci_dy[0, -npredict-1:] - endog.iloc[-npredict-1:][:, None]
ax.plot(idx[-npredict-1:], predict_dy_error, 'r', label='Dynamic forecast (1978)');
ax.plot(idx[-npredict-1:], predict_dy_ci, 'r--', alpha=0.4)
legend = ax.legend(loc='lower left');
legend.get_frame().set_facecolor('w')