import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
%matplotlib inline
airline = pd.read_csv('data/airline_passengers.csv', index_col='Month', parse_dates=True)
airline.head()
Thousands of Passengers | |
---|---|
Month | |
1949-01-01 | 112 |
1949-02-01 | 118 |
1949-03-01 | 132 |
1949-04-01 | 129 |
1949-05-01 | 121 |
Moving Average 구하는 방법에 대해서는 앞서 rolling mean 을 통해 알아본 바 있습니다.
airline['6M SMA'] = airline['Thousands of Passengers'].rolling(window=6).mean()
airline['12M SMA'] = airline['Thousands of Passengers'].rolling(window=12).mean()
airline.head()
Thousands of Passengers | 6M SMA | 12M SMA | |
---|---|---|---|
Month | |||
1949-01-01 | 112 | NaN | NaN |
1949-02-01 | 118 | NaN | NaN |
1949-03-01 | 132 | NaN | NaN |
1949-04-01 | 129 | NaN | NaN |
1949-05-01 | 121 | NaN | NaN |
airline.plot(figsize=(12,8))
<matplotlib.axes._subplots.AxesSubplot at 0x7f9479904128>
airline['WMA12'] = airline['Thousands of Passengers'].ewm(span=12).mean()
airline['WMA6'] = airline['Thousands of Passengers'].ewm(span=6).mean()
airline[['Thousands of Passengers', 'WMA6','WMA12']].plot(figsize=(12,8))
<matplotlib.axes._subplots.AxesSubplot at 0x7f947930de48>
from statsmodels.tsa.api import SimpleExpSmoothing
train = airline[:'1959']
test = airline['1960':]
train['Thousands of Passengers'].plot(figsize=(12,8))
test['Thousands of Passengers'].plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f94715cd5c0>
ses_model = SimpleExpSmoothing(np.asarray(train['Thousands of Passengers']))
ses_result = ses_model.fit()
y_hat = test.copy()
y_hat['SES'] = ses_result.forecast(len(test))
plt.figure(figsize=(12,8))
plt.plot(train['Thousands of Passengers'], label='Train')
plt.plot(test['Thousands of Passengers'], label='Test')
plt.plot(y_hat['SES'], label='Simple Exp Smoothing')
plt.legend()
<matplotlib.legend.Legend at 0x7f94710c4f98>
rmse = np.sqrt(mean_squared_error(test['Thousands of Passengers'], y_hat['SES']))
rmse
102.97653454387881
from statsmodels.tsa.api import Holt
holt_model = Holt(np.asarray(train['Thousands of Passengers']))
holt_result = holt_model.fit()
y_hat['HOLT'] = holt_result.forecast(len(test))
plt.figure(figsize=(12,8))
plt.plot(train['Thousands of Passengers'], label='Train')
plt.plot(test['Thousands of Passengers'], label='Test')
plt.plot(y_hat['HOLT'], label='Holts Exp Smoothing')
plt.legend()
<matplotlib.legend.Legend at 0x7f947105c3c8>
rmse = np.sqrt(mean_squared_error(test['Thousands of Passengers'], y_hat['HOLT']))
rmse
92.6663528028185
from statsmodels.tsa.api import ExponentialSmoothing
winter_model = ExponentialSmoothing(
np.asarray(train['Thousands of Passengers']),
seasonal_periods=12, trend='add', seasonal='add'
)
winter_result = winter_model.fit()
y_hat['WINTER'] = winter_result.forecast(len(test))
plt.figure(figsize=(12,8))
plt.plot(train['Thousands of Passengers'], label='Train')
plt.plot(test['Thousands of Passengers'], label='Test')
plt.plot(y_hat['WINTER'], label='Holt-Winters Exp Smoothing')
plt.legend()
<matplotlib.legend.Legend at 0x7f947100fe80>
rmse = np.sqrt(mean_squared_error(test['Thousands of Passengers'],y_hat['WINTER']))
rmse
15.57083049822115
import statsmodels.api as sm
# ARIMA(p,d,q)(P,D,Q)m
arima = sm.tsa.statespace.SARIMAX(train['Thousands of Passengers'],
order=(2,1,1),
seasonal_order=(0,1,0,12),
enforce_stationarity=False,
enforce_invertibility=False)
/home/lyle/anaconda3/envs/tsa/lib/python3.7/site-packages/statsmodels/tsa/base/tsa_model.py:171: ValueWarning: No frequency information was provided, so inferred frequency MS will be used. % freq, ValueWarning)
arima_result = arima.fit()
y_hat['ARIMA'] = arima_result.predict(start='1960-01-01', end='1960-12-01', dynamic=True)
plt.figure(figsize=(12,8))
plt.plot(train['Thousands of Passengers'], label='Train')
plt.plot(test['Thousands of Passengers'], label='Test')
plt.plot(y_hat['ARIMA'], label='Seasonal ARIMA')
plt.legend()
<matplotlib.legend.Legend at 0x7f94708f5c88>
rmse = np.sqrt(mean_squared_error(test['Thousands of Passengers'],y_hat['ARIMA']))
rmse
22.65096415638221