plot(rec)
acf(rec)
pacf(rec)
Looks like an AR(2), maybe…
(fit10 = arima( rec, order=c(1,0,0) ))
##
## Call:
## arima(x = rec, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.9249 61.2531
## s.e. 0.0177 6.4941
##
## sigma^2 estimated as 113.6: log likelihood = -1715.64, aic = 3437.27
(fit20 = arima( rec, order=c(2,0,0) ))
##
## Call:
## arima(x = rec, order = c(2, 0, 0))
##
## Coefficients:
## ar1 ar2 intercept
## 1.3512 -0.4612 61.8585
## s.e. 0.0416 0.0417 4.0039
##
## sigma^2 estimated as 89.33: log likelihood = -1661.51, aic = 3331.02
AR(2) is definitely better than AR(1).
plot(resid(fit20))
acf(resid(fit20))
The residuals look pretty good, but there is some autocorrelation at lag 12 (1 year). Let’s see if we can do a little better
Maybe add a time trend?
trend=time(rec)
fit00_t = lm( rec ~ trend )
summary(fit00_t)
##
## Call:
## lm(formula = rec ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.864 -22.696 5.365 23.036 45.855
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1035.5774 232.0641 -4.462 1.02e-05 ***
## trend 0.5576 0.1179 4.731 3.00e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27.34 on 451 degrees of freedom
## Multiple R-squared: 0.04728, Adjusted R-squared: 0.04517
## F-statistic: 22.38 on 1 and 451 DF, p-value: 2.996e-06
plot(resid(fit00_t), type="l")
Trend term is significant, but do we really believe this? Is there really a linear downward trend or is it just a small sample artifact?
Let’s try fitting AR(p) (p=1,2,3) with time trend.
(fit10 = arima( rec, order=c(1,0,0), xreg=trend ))
##
## Call:
## arima(x = rec, order = c(1, 0, 0), xreg = trend)
##
## Coefficients:
## ar1 intercept trend
## 0.9230 -495.7544 0.2829
## s.e. 0.0182 1094.5964 0.5560
##
## sigma^2 estimated as 113.5: log likelihood = -1715.51, aic = 3439.02
(fit20 = arima( rec, order=c(2,0,0), xreg=trend ))
##
## Call:
## arima(x = rec, order = c(2, 0, 0), xreg = trend)
##
## Coefficients:
## ar1 ar2 intercept trend
## 1.3498 -0.4649 -846.4388 0.4614
## s.e. 0.0415 0.0417 680.8166 0.3458
##
## sigma^2 estimated as 89.01: log likelihood = -1660.67, aic = 3331.33
(fit30 = arima( rec, order=c(3,0,0), xreg=trend ))
##
## Call:
## arima(x = rec, order = c(3, 0, 0), xreg = trend)
##
## Coefficients:
## ar1 ar2 ar3 intercept trend
## 1.3277 -0.4006 -0.0478 -866.2729 0.4715
## s.e. 0.0469 0.0758 0.0470 650.4578 0.3304
##
## sigma^2 estimated as 88.8: log likelihood = -1660.15, aic = 3332.3
Trend is no longer significant.
Let’s drop the trend term and try fitting a few more ARMA(p,q).
(fit01 = arima( rec, order=c(0,0,1) ))
##
## Call:
## arima(x = rec, order = c(0, 0, 1))
##
## Coefficients:
## ma1 intercept
## 0.8632 62.2155
## s.e. 0.0179 1.4444
##
## sigma^2 estimated as 272.8: log likelihood = -1913.86, aic = 3833.72
(fit02 = arima( rec, order=c(0,0,2) ))
##
## Call:
## arima(x = rec, order = c(0, 0, 2))
##
## Coefficients:
## ma1 ma2 intercept
## 1.2105 0.6089 62.1938
## s.e. 0.0383 0.0310 1.6823
##
## sigma^2 estimated as 161.9: log likelihood = -1795.86, aic = 3599.71
(fit03 = arima( rec, order=c(0,0,3) ))
##
## Call:
## arima(x = rec, order = c(0, 0, 3))
##
## Coefficients:
## ma1 ma2 ma3 intercept
## 1.2549 1.0024 0.5188 62.1364
## s.e. 0.0458 0.0440 0.0362 1.9309
##
## sigma^2 estimated as 119.1: log likelihood = -1726.52, aic = 3463.03
(fit10 = arima( rec, order=c(1,0,0) ))
##
## Call:
## arima(x = rec, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.9249 61.2531
## s.e. 0.0177 6.4941
##
## sigma^2 estimated as 113.6: log likelihood = -1715.64, aic = 3437.27
(fit11 = arima( rec, order=c(1,0,1) ))
##
## Call:
## arima(x = rec, order = c(1, 0, 1))
##
## Coefficients:
## ar1 ma1 intercept
## 0.8783 0.4187 61.6072
## s.e. 0.0234 0.0407 5.2219
##
## sigma^2 estimated as 93.82: log likelihood = -1672.55, aic = 3353.1
(fit12 = arima( rec, order=c(1,0,2) ))
##
## Call:
## arima(x = rec, order = c(1, 0, 2))
##
## Coefficients:
## ar1 ma1 ma2 intercept
## 0.848 0.4658 0.1684 61.7511
## s.e. 0.028 0.0495 0.0482 4.7682
##
## sigma^2 estimated as 91.43: log likelihood = -1666.72, aic = 3343.44
(fit20 = arima( rec, order=c(2,0,0) ))
##
## Call:
## arima(x = rec, order = c(2, 0, 0))
##
## Coefficients:
## ar1 ar2 intercept
## 1.3512 -0.4612 61.8585
## s.e. 0.0416 0.0417 4.0039
##
## sigma^2 estimated as 89.33: log likelihood = -1661.51, aic = 3331.02
(fit21 = arima( rec, order=c(2,0,1) ))
##
## Call:
## arima(x = rec, order = c(2, 0, 1))
##
## Coefficients:
## ar1 ar2 ma1 intercept
## 1.4257 -0.5301 -0.0949 61.9230
## s.e. 0.0855 0.0801 0.1005 3.8184
##
## sigma^2 estimated as 89.16: log likelihood = -1661.08, aic = 3332.16
(fit30 = arima( rec, order=c(3,0,0) ))
##
## Call:
## arima(x = rec, order = c(3, 0, 0))
##
## Coefficients:
## ar1 ar2 ar3 intercept
## 1.3318 -0.4043 -0.0421 61.9256
## s.e. 0.0469 0.0759 0.0469 3.8411
##
## sigma^2 estimated as 89.17: log likelihood = -1661.11, aic = 3332.22
AIC(fit01,fit02,fit03,fit10,fit11,fit12,fit20,fit21,fit30)
## df AIC
## fit01 3 3833.724
## fit02 4 3599.712
## fit03 5 3463.032
## fit10 3 3437.273
## fit11 4 3353.097
## fit12 5 3343.441
## fit20 4 3331.019
## fit21 5 3332.165
## fit30 5 3332.215
BIC(fit01,fit02,fit03,fit10,fit11,fit12,fit20,fit21,fit30)
## df BIC
## fit01 3 3846.072
## fit02 4 3616.176
## fit03 5 3483.612
## fit10 3 3449.621
## fit11 4 3369.560
## fit12 5 3364.020
## fit20 4 3347.483
## fit21 5 3352.744
## fit30 5 3352.795
Both AIC and BIC prefer AR(2).
Now, Let’s try adding seasonal dummies (monthly). (Notice that I like to include a mean and drop one of the dummies.) First, let’s look at a model with dummies only, an AR(2) and an AR(2) with dummies.
seas = array(dim=c(length(rec),11))
for (i in 1:11) seas[,i] = ifelse( cycle(rec)==i, 1, 0)
(fit00 = arima( rec, order=c(0,0,0), xreg=seas ))
##
## Call:
## arima(x = rec, order = c(0, 0, 0), xreg = seas)
##
## Coefficients:
## intercept seas1 seas2 seas3 seas4 seas5 seas6
## 73.7049 -0.2341 -1.5522 -4.0364 -8.0020 -12.6104 -19.0096
## s.e. 4.2878 6.0238 6.0238 6.0238 6.0238 6.0238 6.0238
## seas7 seas8 seas9 seas10 seas11
## -26.6012 -29.5096 -22.0251 -8.9019 -4.2657
## s.e. 6.0238 6.0238 6.0238 6.0639 6.0639
##
## sigma^2 estimated as 680.3: log likelihood = -2120.12, aic = 4266.24
(fit20 = arima( rec, order=c(2,0,0), xreg=seas ))
##
## Call:
## arima(x = rec, order = c(2, 0, 0), xreg = seas)
##
## Coefficients:
## ar1 ar2 intercept seas1 seas2 seas3 seas4
## 1.2488 -0.3318 72.7134 0.0125 -1.2027 -3.5948 -7.4873
## s.e. 0.0443 0.0444 5.1191 1.4476 2.2502 2.7903 3.1385
## seas5 seas6 seas7 seas8 seas9 seas10 seas11
## -12.0305 -18.3773 -25.9247 -28.7965 -21.2839 -9.0909 -4.3754
## s.e. 3.3347 3.3996 3.3399 3.1494 2.8081 2.2635 1.4479
##
## sigma^2 estimated as 73.26: log likelihood = -1616.54, aic = 3263.08
(fit20 = arima( rec, order=c(2,0,0)))
##
## Call:
## arima(x = rec, order = c(2, 0, 0))
##
## Coefficients:
## ar1 ar2 intercept
## 1.3512 -0.4612 61.8585
## s.e. 0.0416 0.0417 4.0039
##
## sigma^2 estimated as 89.33: log likelihood = -1661.51, aic = 3331.02
The best model includes seasonality and AR(2).
Let’s try some more models with seasonality and see if we can do any better.
(fit01 = arima( rec, order=c(0,0,1), xreg=seas ))
##
## Call:
## arima(x = rec, order = c(0, 0, 1), xreg = seas)
##
## Coefficients:
## ma1 intercept seas1 seas2 seas3 seas4 seas5
## 0.8598 73.5986 -0.1326 -1.4527 -3.9443 -7.8995 -12.4940
## s.e. 0.0187 3.3328 3.3513 4.6952 4.6952 4.6952 4.6952
## seas6 seas7 seas8 seas9 seas10 seas11
## -18.9093 -26.4952 -29.3987 -21.9193 -9.3419 -4.1720
## s.e. 4.6952 4.6952 4.6952 4.6952 4.7132 3.3518
##
## sigma^2 estimated as 239: log likelihood = -1883.84, aic = 3795.68
(fit02 = arima( rec, order=c(0,0,2), xreg=seas ))
##
## Call:
## arima(x = rec, order = c(0, 0, 2), xreg = seas)
##
## Coefficients:
## ma1 ma2 intercept seas1 seas2 seas3 seas4
## 1.1925 0.5925 73.6314 -0.1618 -1.4850 -3.9743 -7.9387
## s.e. 0.0395 0.0304 3.2556 2.5921 4.0755 4.5929 4.5929
## seas5 seas6 seas7 seas8 seas9 seas10 seas11
## -12.5403 -18.9331 -26.5235 -29.4383 -21.9619 -9.5234 -4.4872
## s.e. 4.5929 4.5929 4.5929 4.5929 4.5929 4.0732 2.5859
##
## sigma^2 estimated as 143.8: log likelihood = -1769.02, aic = 3568.03
(fit03 = arima( rec, order=c(0,0,3), xreg=seas ))
##
## Call:
## arima(x = rec, order = c(0, 0, 3), xreg = seas)
##
## Coefficients:
## ma1 ma2 ma3 intercept seas1 seas2 seas3 seas4
## 1.2009 0.9562 0.5215 73.3287 0.1434 -1.1756 -3.6624 -7.6316
## s.e. 0.0514 0.0438 0.0388 3.1813 2.0943 3.3773 4.1584 4.4929
## seas5 seas6 seas7 seas8 seas9 seas10 seas11
## -12.2448 -18.6470 -26.2390 -29.1442 -21.6546 -9.4049 -4.5190
## s.e. 4.4929 4.4929 4.4929 4.4929 4.1584 3.3781 2.0915
##
## sigma^2 estimated as 105.4: log likelihood = -1698.78, aic = 3429.57
(fit10 = arima( rec, order=c(1,0,0), xreg=seas ))
##
## Call:
## arima(x = rec, order = c(1, 0, 0), xreg = seas)
##
## Coefficients:
## ar1 intercept seas1 seas2 seas3 seas4 seas5
## 0.9372 72.3660 -0.0289 -1.2740 -3.6914 -7.5936 -12.1440
## s.e. 0.0161 6.7528 1.4705 1.9678 2.2795 2.4779 2.5901
## seas6 seas7 seas8 seas9 seas10 seas11
## -18.4907 -26.0335 -28.8966 -21.3718 -9.0683 -4.3448
## s.e. 2.6275 2.5936 2.4853 2.2913 1.9774 1.4706
##
## sigma^2 estimated as 82.35: log likelihood = -1642.91, aic = 3313.82
(fit11 = arima( rec, order=c(1,0,1), xreg=seas ))
##
## Call:
## arima(x = rec, order = c(1, 0, 1), xreg = seas)
##
## Coefficients:
## ar1 ma1 intercept seas1 seas2 seas3 seas4
## 0.9044 0.3118 72.6211 -0.0058 -1.2379 -3.6449 -7.5425
## s.e. 0.0208 0.0446 5.7209 1.4440 2.1930 2.6271 2.8956
## seas5 seas6 seas7 seas8 seas9 seas10 seas11
## -12.0891 -18.4344 -25.9800 -28.8481 -21.3289 -9.1135 -4.3647
## s.e. 3.0453 3.0949 3.0499 2.9053 2.6427 2.2053 1.4441
##
## sigma^2 estimated as 74.55: log likelihood = -1620.48, aic = 3270.96
(fit12 = arima( rec, order=c(1,0,2), xreg=seas ))
##
## Call:
## arima(x = rec, order = c(1, 0, 2), xreg = seas)
##
## Coefficients:
## ar1 ma1 ma2 intercept seas1 seas2 seas3 seas4
## 0.8894 0.3299 0.1020 72.6776 0.0031 -1.2219 -3.6228 -7.5184
## s.e. 0.0239 0.0497 0.0502 5.4366 1.4403 2.2087 2.7134 3.0193
## seas5 seas6 seas7 seas8 seas9 seas10 seas11
## -12.0624 -18.4072 -25.9518 -28.8235 -21.3111 -9.0998 -4.3770
## s.e. 3.1882 3.2439 3.1931 3.0295 2.7300 2.2212 1.4402
##
## sigma^2 estimated as 73.88: log likelihood = -1618.43, aic = 3268.87
(fit20 = arima( rec, order=c(2,0,0), xreg=seas ))
##
## Call:
## arima(x = rec, order = c(2, 0, 0), xreg = seas)
##
## Coefficients:
## ar1 ar2 intercept seas1 seas2 seas3 seas4
## 1.2488 -0.3318 72.7134 0.0125 -1.2027 -3.5948 -7.4873
## s.e. 0.0443 0.0444 5.1191 1.4476 2.2502 2.7903 3.1385
## seas5 seas6 seas7 seas8 seas9 seas10 seas11
## -12.0305 -18.3773 -25.9247 -28.7965 -21.2839 -9.0909 -4.3754
## s.e. 3.3347 3.3996 3.3399 3.1494 2.8081 2.2635 1.4479
##
## sigma^2 estimated as 73.26: log likelihood = -1616.54, aic = 3263.08
(fit21 = arima( rec, order=c(2,0,1), xreg=seas ))
##
## Call:
## arima(x = rec, order = c(2, 0, 1), xreg = seas)
##
## Coefficients:
## ar1 ar2 ma1 intercept seas1 seas2 seas3
## 1.4774 -0.5456 -0.2618 72.6714 0.0177 -1.1940 -3.5779
## s.e. 0.1424 0.1326 0.1692 4.6757 1.4327 2.2076 2.7709
## seas4 seas5 seas6 seas7 seas8 seas9 seas10
## -7.4593 -11.9958 -18.335 -25.8807 -28.7540 -21.2426 -9.0553
## s.e. 3.1563 3.3820 3.458 3.3877 3.1681 2.7899 2.2214
## seas11
## -4.3649
## s.e. 1.4330
##
## sigma^2 estimated as 72.88: log likelihood = -1615.38, aic = 3262.75
(fit30 = arima( rec, order=c(3,0,0), xreg=seas ))
##
## Call:
## arima(x = rec, order = c(3, 0, 0), xreg = seas)
##
## Coefficients:
## ar1 ar2 ar3 intercept seas1 seas2 seas3
## 1.2295 -0.2595 -0.0578 72.7035 0.0168 -1.1921 -3.5816
## s.e. 0.0469 0.0735 0.0469 4.8881 1.4420 2.2353 2.8024
## seas4 seas5 seas6 seas7 seas8 seas9 seas10
## -7.4662 -12.0069 -18.3524 -25.8997 -28.7729 -21.2621 -9.0717
## s.e. 3.1803 3.3976 3.4701 3.4031 3.1917 2.8208 2.2488
## seas11
## -4.3703
## s.e. 1.4423
##
## sigma^2 estimated as 73.01: log likelihood = -1615.78, aic = 3263.56
AIC(fit01,fit02,fit03,fit10,fit11,fit12,fit20,fit21,fit30)
## df AIC
## fit01 14 3795.684
## fit02 15 3568.030
## fit03 16 3429.570
## fit10 14 3313.825
## fit11 15 3270.959
## fit12 16 3268.866
## fit20 15 3263.077
## fit21 16 3262.750
## fit30 16 3263.562
BIC(fit01,fit02,fit03,fit10,fit11,fit12,fit20,fit21,fit30)
## df BIC
## fit01 14 3853.307
## fit02 15 3629.769
## fit03 16 3495.424
## fit10 14 3371.447
## fit11 15 3332.698
## fit12 16 3334.720
## fit20 15 3324.815
## fit21 16 3328.605
## fit30 16 3329.416
With seasonality, AIC slightly prefers ARMA(2,1) but BIC still likes ARMA(2,0). This is fairly common. AIC tends to overfit. One could argue for either ARMA(2,1) or AR(2). I’ll go for AR(2).
plot(resid(fit20))
acf( resid(fit20))
pacf(resid(fit20))
Diagnostics look good!