x = AirPassengers
plot(x)

plot(log(x))

y = log(x)

Let’s work with y = log(x).

First, note that a simple ARIMA(0,1,0) model implies the forecast for next month is this months value. Not too good…

fit01=arima(y, order=c(0, 1, 0), seasonal=list(order=c(0, 0, 0), period=12) )
f01  =fitted( fit01 )
ts.plot( cbind(y, f01), col=c(1,2))

Now let’s try ARIMA(0,1,0)x12. For this one the forecast is last year’s value. This would actually be quite good if we could include a constant to get year-on-year growth. But I can’t find any way to do that.

fit02=arima(y, order=c(0, 0, 0), seasonal=list(order=c(0, 1, 0), period=12), xreg=time(x) )
f02  =fitted( fit02 )
ts.plot( cbind(y, f02), col=c(1,2))

ARIMA(0,1,0)x(0,1,0)x12 gets the year-on-year growth and is a very good model! But, we are missing some autocorrelation in the residuals at 1 month and 1 year.

fit03=arima(y, order=c(0, 1, 0), seasonal=list(order=c(0, 1, 0), period=12) )
f03  =fitted( fit03 )
ts.plot( cbind(y, f03), col=c(1,2))

acf(resid(fit03))

So, let’s try adding some MA terms to capture that:

fit04=arima(y, order=c(0, 1, 1), seasonal=list(order=c(0, 1, 0), period=12) )
f04  =fitted( fit04 )
fit05=arima(y, order=c(0, 1, 0), seasonal=list(order=c(0, 1, 1), period=12) )
f05  =fitted( fit05 )
fit06=arima(y, order=c(0, 1, 1), seasonal=list(order=c(0, 1, 1), period=12) )
f06  =fitted( fit06 )
ts.plot( cbind(y, f03, f04, f05, f06), col=1:5)

Visually, these all look pretty similar. Let’s look at AIC/BIC

AIC(fit03,fit04,fit05,fit06)
##       df       AIC
## fit03  1 -434.8300
## fit04  2 -449.9794
## fit05  2 -467.5581
## fit06  3 -483.3991
BIC(fit03,fit04,fit05,fit06)
##       df       BIC
## fit03  1 -431.9548
## fit04  2 -444.2290
## fit05  2 -461.8077
## fit06  3 -474.7735

ARIMA(0,1,1)x(0,1,1)x12 definitely wins. Let’s check the diagnostics.

sarima( y, 0,1,1, 0,1,1, S=12, details=FALSE)

## $fit
## 
## Call:
## stats::arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, 
##     Q), period = S), include.mean = !no.constant, optim.control = list(trace = trc, 
##     REPORT = 1, reltol = tol))
## 
## Coefficients:
##           ma1     sma1
##       -0.4018  -0.5569
## s.e.   0.0896   0.0731
## 
## sigma^2 estimated as 0.001348:  log likelihood = 244.7,  aic = -483.4
## 
## $degrees_of_freedom
## [1] 142
## 
## $ttable
##      Estimate     SE t.value p.value
## ma1   -0.4018 0.0896 -4.4825       0
## sma1  -0.5569 0.0731 -7.6190       0
## 
## $AIC
## [1] -5.58133
## 
## $AICc
## [1] -5.56625
## 
## $BIC
## [1] -6.540082

NICE!