try:
import pycaret
except:
!pip install pycaret-ts-alpha
#### Import libraries ----
from pprint import pprint
from pycaret.datasets import get_data
from pycaret.internal.pycaret_experiment import TimeSeriesExperiment
/usr/local/lib/python3.7/dist-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details. defaults = yaml.load(f)
#### Get the data ---
y = get_data("airline")
Period 1949-01 112.0 1949-02 118.0 1949-03 132.0 1949-04 129.0 1949-05 121.0 Freq: M, Name: Number of airline passengers, dtype: float64
#### Setup the experiment ----
exp = TimeSeriesExperiment()
exp.setup(data=y, fh=12, seasonal_period=12, session_id=42)
Description | Value | |
---|---|---|
0 | session_id | 42 |
1 | Original Data | (144, 1) |
2 | Missing Values | False |
3 | Transformed Train Set | (132,) |
4 | Transformed Test Set | (12,) |
5 | Fold Generator | ExpandingWindowSplitter |
6 | Fold Number | 3 |
7 | Enforce Prediction Interval | False |
8 | Seasonal Period Tested | 12 |
9 | Seasonality Detected | True |
10 | Target Strictly Positive | True |
11 | Target White Noise | No |
12 | Recommended d | 1 |
13 | Recommended Seasonal D | 1 |
14 | CPU Jobs | -1 |
15 | Use GPU | False |
16 | Log Experiment | False |
17 | Experiment Name | ts-default-name |
18 | USI | f18d |
19 | Imputation Type | simple |
<pycaret.internal.pycaret_experiment.time_series_experiment.TimeSeriesExperiment at 0x7f39adfe0f90>
#### Create different types of models ----
# ARIMA model from `pmdarima`
arima_model = exp.create_model("arima")
# ETS and Exponential Smoothing models from `statsmodels`
ets_model = exp.create_model("ets")
exp_smooth_model = exp.create_model("exp_smooth")
# Reduced Regression model using `sklearn` Linear Regression
lr_model = exp.create_model("lr_cds_dt")
cutoff | MAE | RMSE | MAPE | SMAPE | R2 | |
---|---|---|---|---|---|---|
0 | 1956-12 | 38.6824 | 45.0820 | 0.0998 | 0.1051 | 0.3384 |
1 | 1957-12 | 28.0608 | 34.6867 | 0.0751 | 0.0734 | 0.6848 |
2 | 1958-12 | 32.1693 | 38.2681 | 0.0737 | 0.0753 | 0.6724 |
Mean | NaN | 32.9708 | 39.3456 | 0.0828 | 0.0846 | 0.5652 |
SD | NaN | 4.3731 | 4.3117 | 0.0120 | 0.0145 | 0.1604 |
#### Check model types ----
print(type(arima_model)) # <-- sktime `pmdarima` adapter
print(type(ets_model)) # <-- sktime `statsmodels` adapter
print(type(exp_smooth_model)) # <-- sktime `statsmodels` adapter
print(type(lr_model)) # <-- Your custom sktime compatible model pipeline
<class 'sktime.forecasting.arima.ARIMA'> <class 'sktime.forecasting.ets.AutoETS'> <class 'sktime.forecasting.exp_smoothing.ExponentialSmoothing'> <class 'pycaret.containers.models.time_series.BaseCdsDtForecaster'>
#### Access internal models using `_forecaster` ----
print(type(arima_model._forecaster))
print(type(ets_model._forecaster))
print(type(exp_smooth_model._forecaster))
print(type(lr_model._forecaster))
<class 'pmdarima.arima.arima.ARIMA'> <class 'statsmodels.tsa.exponential_smoothing.ets.ETSModel'> <class 'statsmodels.tsa.holtwinters.model.ExponentialSmoothing'> <class 'sktime.forecasting.compose._pipeline.TransformedTargetForecaster'>
#### What hyperparameters were used to train the model? ----
print(arima_model)
ARIMA(maxiter=50, method='lbfgs', order=(1, 0, 0), out_of_sample_size=0, scoring='mse', scoring_args=None, seasonal_order=(0, 1, 0, 12), start_params=None, suppress_warnings=False, trend=None, with_intercept=True)
#### Access statistical fit properties using underlying `pmdarima`
arima_model._forecaster.summary()
#### Alternately, use sktime's convenient wrapper to do so ----
arima_model.summary()
Dep. Variable: | y | No. Observations: | 132 |
---|---|---|---|
Model: | SARIMAX(1, 0, 0)x(0, 1, 0, 12) | Log Likelihood | -450.590 |
Date: | Tue, 16 Nov 2021 | AIC | 907.180 |
Time: | 11:26:51 | BIC | 915.542 |
Sample: | 0 | HQIC | 910.576 |
- 132 | |||
Covariance Type: | opg |
coef | std err | z | P>|z| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
intercept | 5.7982 | 2.005 | 2.892 | 0.004 | 1.869 | 9.727 |
ar.L1 | 0.8100 | 0.061 | 13.261 | 0.000 | 0.690 | 0.930 |
sigma2 | 105.9407 | 12.533 | 8.453 | 0.000 | 81.377 | 130.505 |
Ljung-Box (L1) (Q): | 2.30 | Jarque-Bera (JB): | 1.04 |
---|---|---|---|
Prob(Q): | 0.13 | Prob(JB): | 0.60 |
Heteroskedasticity (H): | 1.34 | Skew: | -0.07 |
Prob(H) (two-sided): | 0.36 | Kurtosis: | 3.43 |
You can not starts correlating these properties to the forecasts that you see. I will write about it in a subsewquent post.
#### What hyperparameters were used to train the model? ----
print(ets_model)
AutoETS(additive_only=False, allow_multiplicative_trend=False, auto=False, bounds=None, callback=None, damped_trend=False, dates=None, disp=False, error='add', freq=None, full_output=True, ignore_inf_ic=True, information_criterion='aic', initial_level=None, initial_seasonal=None, initial_trend=None, initialization_method='estimated', maxiter=1000, missing='none', n_jobs=None, restrict=True, return_params=False, seasonal='mul', sp=12, start_params=None, trend='add')
#### Access statsitical fit properties using underlying statsmodel
ets_model._forecaster.fit().summary()
#### Alternatively, use sktime's convenient wrapper to do so ----
ets_model.summary()
Dep. Variable: | Number of airline passengers | No. Observations: | 132 |
---|---|---|---|
Model: | ETS(AAM) | Log Likelihood | -488.626 |
Date: | Tue, 16 Nov 2021 | AIC | 1013.253 |
Time: | 11:26:51 | BIC | 1065.143 |
Sample: | 01-31-1949 | HQIC | 1034.339 |
- 12-31-1959 | Scale | 96.116 | |
Covariance Type: | approx |
coef | std err | z | P>|z| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
smoothing_level | 0.3734 | 0.067 | 5.550 | 0.000 | 0.242 | 0.505 |
smoothing_trend | 3.734e-05 | nan | nan | nan | nan | nan |
smoothing_seasonal | 0.6265 | 0.067 | 9.296 | 0.000 | 0.494 | 0.759 |
initial_level | 109.3470 | nan | nan | nan | nan | nan |
initial_trend | 2.6555 | nan | nan | nan | nan | nan |
initial_seasonal.0 | 0.9773 | nan | nan | nan | nan | nan |
initial_seasonal.1 | 0.8482 | nan | nan | nan | nan | nan |
initial_seasonal.2 | 0.9508 | nan | nan | nan | nan | nan |
initial_seasonal.3 | 1.0885 | nan | nan | nan | nan | nan |
initial_seasonal.4 | 1.1927 | nan | nan | nan | nan | nan |
initial_seasonal.5 | 1.2076 | nan | nan | nan | nan | nan |
initial_seasonal.6 | 1.1092 | nan | nan | nan | nan | nan |
initial_seasonal.7 | 1.0129 | nan | nan | nan | nan | nan |
initial_seasonal.8 | 1.0970 | nan | nan | nan | nan | nan |
initial_seasonal.9 | 1.1541 | nan | nan | nan | nan | nan |
initial_seasonal.10 | 1.0517 | nan | nan | nan | nan | nan |
initial_seasonal.11 | 1.0000 | nan | nan | nan | nan | nan |
Ljung-Box (Q): | 41.34 | Jarque-Bera (JB): | 1.25 |
---|---|---|---|
Prob(Q): | 0.02 | Prob(JB): | 0.54 |
Heteroskedasticity (H): | 2.21 | Skew: | 0.11 |
Prob(H) (two-sided): | 0.01 | Kurtosis: | 3.42 |
#### sktime pipelines are similar to sklearn.
#### Access steps using `named_steps` attribute
print(lr_model._forecaster.named_steps.keys(), "\n\n")
#### Details about the steps ----
pprint(lr_model._forecaster.named_steps)
dict_keys(['conditional_deseasonalise', 'detrend', 'forecast']) {'conditional_deseasonalise': ConditionalDeseasonalizer(model='additive', seasonality_test=None, sp=1), 'detrend': Detrender(forecaster=PolynomialTrendForecaster(degree=1, regressor=None, with_intercept=True)), 'forecast': RecursiveTabularRegressionForecaster(estimator=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=-1, normalize=False, positive=False), window_length=10)}