plot(rec)

acf(rec)

pacf(rec)

Looks like an AR(2), maybe…

(fit10 = arima( rec, order=c(1,0,0) ))
## 
## Call:
## arima(x = rec, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.9249    61.2531
## s.e.  0.0177     6.4941
## 
## sigma^2 estimated as 113.6:  log likelihood = -1715.64,  aic = 3437.27
(fit20 = arima( rec, order=c(2,0,0) ))
## 
## Call:
## arima(x = rec, order = c(2, 0, 0))
## 
## Coefficients:
##          ar1      ar2  intercept
##       1.3512  -0.4612    61.8585
## s.e.  0.0416   0.0417     4.0039
## 
## sigma^2 estimated as 89.33:  log likelihood = -1661.51,  aic = 3331.02

AR(2) is definitely better than AR(1).

plot(resid(fit20))

acf(resid(fit20))

The residuals look pretty good, but there is some autocorrelation at lag 12 (1 year). Let’s see if we can do a little better

Maybe add a time trend?

trend=time(rec)
fit00_t = lm( rec ~ trend )
summary(fit00_t)
## 
## Call:
## lm(formula = rec ~ trend)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.864 -22.696   5.365  23.036  45.855 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1035.5774   232.0641  -4.462 1.02e-05 ***
## trend           0.5576     0.1179   4.731 3.00e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.34 on 451 degrees of freedom
## Multiple R-squared:  0.04728,    Adjusted R-squared:  0.04517 
## F-statistic: 22.38 on 1 and 451 DF,  p-value: 2.996e-06
plot(resid(fit00_t), type="l")

Trend term is significant, but do we really believe this? Is there really a linear downward trend or is it just a small sample artifact?

Let’s try fitting AR(p) (p=1,2,3) with time trend.

(fit10 = arima( rec, order=c(1,0,0), xreg=trend )) 
## 
## Call:
## arima(x = rec, order = c(1, 0, 0), xreg = trend)
## 
## Coefficients:
##          ar1  intercept   trend
##       0.9230  -495.7544  0.2829
## s.e.  0.0182  1094.5964  0.5560
## 
## sigma^2 estimated as 113.5:  log likelihood = -1715.51,  aic = 3439.02
(fit20 = arima( rec, order=c(2,0,0), xreg=trend )) 
## 
## Call:
## arima(x = rec, order = c(2, 0, 0), xreg = trend)
## 
## Coefficients:
##          ar1      ar2  intercept   trend
##       1.3498  -0.4649  -846.4388  0.4614
## s.e.  0.0415   0.0417   680.8166  0.3458
## 
## sigma^2 estimated as 89.01:  log likelihood = -1660.67,  aic = 3331.33
(fit30 = arima( rec, order=c(3,0,0), xreg=trend )) 
## 
## Call:
## arima(x = rec, order = c(3, 0, 0), xreg = trend)
## 
## Coefficients:
##          ar1      ar2      ar3  intercept   trend
##       1.3277  -0.4006  -0.0478  -866.2729  0.4715
## s.e.  0.0469   0.0758   0.0470   650.4578  0.3304
## 
## sigma^2 estimated as 88.8:  log likelihood = -1660.15,  aic = 3332.3

Trend is no longer significant.

Let’s drop the trend term and try fitting a few more ARMA(p,q).

(fit01 = arima( rec, order=c(0,0,1) )) 
## 
## Call:
## arima(x = rec, order = c(0, 0, 1))
## 
## Coefficients:
##          ma1  intercept
##       0.8632    62.2155
## s.e.  0.0179     1.4444
## 
## sigma^2 estimated as 272.8:  log likelihood = -1913.86,  aic = 3833.72
(fit02 = arima( rec, order=c(0,0,2) )) 
## 
## Call:
## arima(x = rec, order = c(0, 0, 2))
## 
## Coefficients:
##          ma1     ma2  intercept
##       1.2105  0.6089    62.1938
## s.e.  0.0383  0.0310     1.6823
## 
## sigma^2 estimated as 161.9:  log likelihood = -1795.86,  aic = 3599.71
(fit03 = arima( rec, order=c(0,0,3) )) 
## 
## Call:
## arima(x = rec, order = c(0, 0, 3))
## 
## Coefficients:
##          ma1     ma2     ma3  intercept
##       1.2549  1.0024  0.5188    62.1364
## s.e.  0.0458  0.0440  0.0362     1.9309
## 
## sigma^2 estimated as 119.1:  log likelihood = -1726.52,  aic = 3463.03
(fit10 = arima( rec, order=c(1,0,0) )) 
## 
## Call:
## arima(x = rec, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.9249    61.2531
## s.e.  0.0177     6.4941
## 
## sigma^2 estimated as 113.6:  log likelihood = -1715.64,  aic = 3437.27
(fit11 = arima( rec, order=c(1,0,1) )) 
## 
## Call:
## arima(x = rec, order = c(1, 0, 1))
## 
## Coefficients:
##          ar1     ma1  intercept
##       0.8783  0.4187    61.6072
## s.e.  0.0234  0.0407     5.2219
## 
## sigma^2 estimated as 93.82:  log likelihood = -1672.55,  aic = 3353.1
(fit12 = arima( rec, order=c(1,0,2) )) 
## 
## Call:
## arima(x = rec, order = c(1, 0, 2))
## 
## Coefficients:
##         ar1     ma1     ma2  intercept
##       0.848  0.4658  0.1684    61.7511
## s.e.  0.028  0.0495  0.0482     4.7682
## 
## sigma^2 estimated as 91.43:  log likelihood = -1666.72,  aic = 3343.44
(fit20 = arima( rec, order=c(2,0,0) )) 
## 
## Call:
## arima(x = rec, order = c(2, 0, 0))
## 
## Coefficients:
##          ar1      ar2  intercept
##       1.3512  -0.4612    61.8585
## s.e.  0.0416   0.0417     4.0039
## 
## sigma^2 estimated as 89.33:  log likelihood = -1661.51,  aic = 3331.02
(fit21 = arima( rec, order=c(2,0,1) )) 
## 
## Call:
## arima(x = rec, order = c(2, 0, 1))
## 
## Coefficients:
##          ar1      ar2      ma1  intercept
##       1.4257  -0.5301  -0.0949    61.9230
## s.e.  0.0855   0.0801   0.1005     3.8184
## 
## sigma^2 estimated as 89.16:  log likelihood = -1661.08,  aic = 3332.16
(fit30 = arima( rec, order=c(3,0,0) )) 
## 
## Call:
## arima(x = rec, order = c(3, 0, 0))
## 
## Coefficients:
##          ar1      ar2      ar3  intercept
##       1.3318  -0.4043  -0.0421    61.9256
## s.e.  0.0469   0.0759   0.0469     3.8411
## 
## sigma^2 estimated as 89.17:  log likelihood = -1661.11,  aic = 3332.22
AIC(fit01,fit02,fit03,fit10,fit11,fit12,fit20,fit21,fit30)
##       df      AIC
## fit01  3 3833.724
## fit02  4 3599.712
## fit03  5 3463.032
## fit10  3 3437.273
## fit11  4 3353.097
## fit12  5 3343.441
## fit20  4 3331.019
## fit21  5 3332.165
## fit30  5 3332.215
BIC(fit01,fit02,fit03,fit10,fit11,fit12,fit20,fit21,fit30)
##       df      BIC
## fit01  3 3846.072
## fit02  4 3616.176
## fit03  5 3483.612
## fit10  3 3449.621
## fit11  4 3369.560
## fit12  5 3364.020
## fit20  4 3347.483
## fit21  5 3352.744
## fit30  5 3352.795

Both AIC and BIC prefer AR(2).

Now, Let’s try adding seasonal dummies (monthly). (Notice that I like to include a mean and drop one of the dummies.) First, let’s look at a model with dummies only, an AR(2) and an AR(2) with dummies.

seas = array(dim=c(length(rec),11))
for (i in 1:11) seas[,i] = ifelse( cycle(rec)==i, 1, 0)
(fit00 = arima( rec, order=c(0,0,0), xreg=seas )) 
## 
## Call:
## arima(x = rec, order = c(0, 0, 0), xreg = seas)
## 
## Coefficients:
##       intercept    seas1    seas2    seas3    seas4     seas5     seas6
##         73.7049  -0.2341  -1.5522  -4.0364  -8.0020  -12.6104  -19.0096
## s.e.     4.2878   6.0238   6.0238   6.0238   6.0238    6.0238    6.0238
##          seas7     seas8     seas9   seas10   seas11
##       -26.6012  -29.5096  -22.0251  -8.9019  -4.2657
## s.e.    6.0238    6.0238    6.0238   6.0639   6.0639
## 
## sigma^2 estimated as 680.3:  log likelihood = -2120.12,  aic = 4266.24
(fit20 = arima( rec, order=c(2,0,0), xreg=seas )) 
## 
## Call:
## arima(x = rec, order = c(2, 0, 0), xreg = seas)
## 
## Coefficients:
##          ar1      ar2  intercept   seas1    seas2    seas3    seas4
##       1.2488  -0.3318    72.7134  0.0125  -1.2027  -3.5948  -7.4873
## s.e.  0.0443   0.0444     5.1191  1.4476   2.2502   2.7903   3.1385
##          seas5     seas6     seas7     seas8     seas9   seas10   seas11
##       -12.0305  -18.3773  -25.9247  -28.7965  -21.2839  -9.0909  -4.3754
## s.e.    3.3347    3.3996    3.3399    3.1494    2.8081   2.2635   1.4479
## 
## sigma^2 estimated as 73.26:  log likelihood = -1616.54,  aic = 3263.08
(fit20 = arima( rec, order=c(2,0,0))) 
## 
## Call:
## arima(x = rec, order = c(2, 0, 0))
## 
## Coefficients:
##          ar1      ar2  intercept
##       1.3512  -0.4612    61.8585
## s.e.  0.0416   0.0417     4.0039
## 
## sigma^2 estimated as 89.33:  log likelihood = -1661.51,  aic = 3331.02

The best model includes seasonality and AR(2).
Let’s try some more models with seasonality and see if we can do any better.

(fit01 = arima( rec, order=c(0,0,1), xreg=seas )) 
## 
## Call:
## arima(x = rec, order = c(0, 0, 1), xreg = seas)
## 
## Coefficients:
##          ma1  intercept    seas1    seas2    seas3    seas4     seas5
##       0.8598    73.5986  -0.1326  -1.4527  -3.9443  -7.8995  -12.4940
## s.e.  0.0187     3.3328   3.3513   4.6952   4.6952   4.6952    4.6952
##          seas6     seas7     seas8     seas9   seas10   seas11
##       -18.9093  -26.4952  -29.3987  -21.9193  -9.3419  -4.1720
## s.e.    4.6952    4.6952    4.6952    4.6952   4.7132   3.3518
## 
## sigma^2 estimated as 239:  log likelihood = -1883.84,  aic = 3795.68
(fit02 = arima( rec, order=c(0,0,2), xreg=seas )) 
## 
## Call:
## arima(x = rec, order = c(0, 0, 2), xreg = seas)
## 
## Coefficients:
##          ma1     ma2  intercept    seas1    seas2    seas3    seas4
##       1.1925  0.5925    73.6314  -0.1618  -1.4850  -3.9743  -7.9387
## s.e.  0.0395  0.0304     3.2556   2.5921   4.0755   4.5929   4.5929
##          seas5     seas6     seas7     seas8     seas9   seas10   seas11
##       -12.5403  -18.9331  -26.5235  -29.4383  -21.9619  -9.5234  -4.4872
## s.e.    4.5929    4.5929    4.5929    4.5929    4.5929   4.0732   2.5859
## 
## sigma^2 estimated as 143.8:  log likelihood = -1769.02,  aic = 3568.03
(fit03 = arima( rec, order=c(0,0,3), xreg=seas )) 
## 
## Call:
## arima(x = rec, order = c(0, 0, 3), xreg = seas)
## 
## Coefficients:
##          ma1     ma2     ma3  intercept   seas1    seas2    seas3    seas4
##       1.2009  0.9562  0.5215    73.3287  0.1434  -1.1756  -3.6624  -7.6316
## s.e.  0.0514  0.0438  0.0388     3.1813  2.0943   3.3773   4.1584   4.4929
##          seas5     seas6     seas7     seas8     seas9   seas10   seas11
##       -12.2448  -18.6470  -26.2390  -29.1442  -21.6546  -9.4049  -4.5190
## s.e.    4.4929    4.4929    4.4929    4.4929    4.1584   3.3781   2.0915
## 
## sigma^2 estimated as 105.4:  log likelihood = -1698.78,  aic = 3429.57
(fit10 = arima( rec, order=c(1,0,0), xreg=seas )) 
## 
## Call:
## arima(x = rec, order = c(1, 0, 0), xreg = seas)
## 
## Coefficients:
##          ar1  intercept    seas1    seas2    seas3    seas4     seas5
##       0.9372    72.3660  -0.0289  -1.2740  -3.6914  -7.5936  -12.1440
## s.e.  0.0161     6.7528   1.4705   1.9678   2.2795   2.4779    2.5901
##          seas6     seas7     seas8     seas9   seas10   seas11
##       -18.4907  -26.0335  -28.8966  -21.3718  -9.0683  -4.3448
## s.e.    2.6275    2.5936    2.4853    2.2913   1.9774   1.4706
## 
## sigma^2 estimated as 82.35:  log likelihood = -1642.91,  aic = 3313.82
(fit11 = arima( rec, order=c(1,0,1), xreg=seas )) 
## 
## Call:
## arima(x = rec, order = c(1, 0, 1), xreg = seas)
## 
## Coefficients:
##          ar1     ma1  intercept    seas1    seas2    seas3    seas4
##       0.9044  0.3118    72.6211  -0.0058  -1.2379  -3.6449  -7.5425
## s.e.  0.0208  0.0446     5.7209   1.4440   2.1930   2.6271   2.8956
##          seas5     seas6     seas7     seas8     seas9   seas10   seas11
##       -12.0891  -18.4344  -25.9800  -28.8481  -21.3289  -9.1135  -4.3647
## s.e.    3.0453    3.0949    3.0499    2.9053    2.6427   2.2053   1.4441
## 
## sigma^2 estimated as 74.55:  log likelihood = -1620.48,  aic = 3270.96
(fit12 = arima( rec, order=c(1,0,2), xreg=seas )) 
## 
## Call:
## arima(x = rec, order = c(1, 0, 2), xreg = seas)
## 
## Coefficients:
##          ar1     ma1     ma2  intercept   seas1    seas2    seas3    seas4
##       0.8894  0.3299  0.1020    72.6776  0.0031  -1.2219  -3.6228  -7.5184
## s.e.  0.0239  0.0497  0.0502     5.4366  1.4403   2.2087   2.7134   3.0193
##          seas5     seas6     seas7     seas8     seas9   seas10   seas11
##       -12.0624  -18.4072  -25.9518  -28.8235  -21.3111  -9.0998  -4.3770
## s.e.    3.1882    3.2439    3.1931    3.0295    2.7300   2.2212   1.4402
## 
## sigma^2 estimated as 73.88:  log likelihood = -1618.43,  aic = 3268.87
(fit20 = arima( rec, order=c(2,0,0), xreg=seas )) 
## 
## Call:
## arima(x = rec, order = c(2, 0, 0), xreg = seas)
## 
## Coefficients:
##          ar1      ar2  intercept   seas1    seas2    seas3    seas4
##       1.2488  -0.3318    72.7134  0.0125  -1.2027  -3.5948  -7.4873
## s.e.  0.0443   0.0444     5.1191  1.4476   2.2502   2.7903   3.1385
##          seas5     seas6     seas7     seas8     seas9   seas10   seas11
##       -12.0305  -18.3773  -25.9247  -28.7965  -21.2839  -9.0909  -4.3754
## s.e.    3.3347    3.3996    3.3399    3.1494    2.8081   2.2635   1.4479
## 
## sigma^2 estimated as 73.26:  log likelihood = -1616.54,  aic = 3263.08
(fit21 = arima( rec, order=c(2,0,1), xreg=seas )) 
## 
## Call:
## arima(x = rec, order = c(2, 0, 1), xreg = seas)
## 
## Coefficients:
##          ar1      ar2      ma1  intercept   seas1    seas2    seas3
##       1.4774  -0.5456  -0.2618    72.6714  0.0177  -1.1940  -3.5779
## s.e.  0.1424   0.1326   0.1692     4.6757  1.4327   2.2076   2.7709
##         seas4     seas5    seas6     seas7     seas8     seas9   seas10
##       -7.4593  -11.9958  -18.335  -25.8807  -28.7540  -21.2426  -9.0553
## s.e.   3.1563    3.3820    3.458    3.3877    3.1681    2.7899   2.2214
##        seas11
##       -4.3649
## s.e.   1.4330
## 
## sigma^2 estimated as 72.88:  log likelihood = -1615.38,  aic = 3262.75
(fit30 = arima( rec, order=c(3,0,0), xreg=seas )) 
## 
## Call:
## arima(x = rec, order = c(3, 0, 0), xreg = seas)
## 
## Coefficients:
##          ar1      ar2      ar3  intercept   seas1    seas2    seas3
##       1.2295  -0.2595  -0.0578    72.7035  0.0168  -1.1921  -3.5816
## s.e.  0.0469   0.0735   0.0469     4.8881  1.4420   2.2353   2.8024
##         seas4     seas5     seas6     seas7     seas8     seas9   seas10
##       -7.4662  -12.0069  -18.3524  -25.8997  -28.7729  -21.2621  -9.0717
## s.e.   3.1803    3.3976    3.4701    3.4031    3.1917    2.8208   2.2488
##        seas11
##       -4.3703
## s.e.   1.4423
## 
## sigma^2 estimated as 73.01:  log likelihood = -1615.78,  aic = 3263.56
AIC(fit01,fit02,fit03,fit10,fit11,fit12,fit20,fit21,fit30)
##       df      AIC
## fit01 14 3795.684
## fit02 15 3568.030
## fit03 16 3429.570
## fit10 14 3313.825
## fit11 15 3270.959
## fit12 16 3268.866
## fit20 15 3263.077
## fit21 16 3262.750
## fit30 16 3263.562
BIC(fit01,fit02,fit03,fit10,fit11,fit12,fit20,fit21,fit30)
##       df      BIC
## fit01 14 3853.307
## fit02 15 3629.769
## fit03 16 3495.424
## fit10 14 3371.447
## fit11 15 3332.698
## fit12 16 3334.720
## fit20 15 3324.815
## fit21 16 3328.605
## fit30 16 3329.416

With seasonality, AIC slightly prefers ARMA(2,1) but BIC still likes ARMA(2,0). This is fairly common. AIC tends to overfit. One could argue for either ARMA(2,1) or AR(2). I’ll go for AR(2).

plot(resid(fit20))

acf( resid(fit20))

pacf(resid(fit20))

Diagnostics look good!