Q = factor( cycle( jj ))
trend = time(jj) - 1970
fit01 = lm( log(jj) ~ 0 + trend + Q )
summary(fit01)
##
## Call:
## lm(formula = log(jj) ~ 0 + trend + Q)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.29318 -0.09062 -0.01180 0.08460 0.27644
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## trend 0.167172 0.002259 74.00 <2e-16 ***
## Q1 1.052793 0.027359 38.48 <2e-16 ***
## Q2 1.080916 0.027365 39.50 <2e-16 ***
## Q3 1.151024 0.027383 42.03 <2e-16 ***
## Q4 0.882266 0.027412 32.19 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1254 on 79 degrees of freedom
## Multiple R-squared: 0.9935, Adjusted R-squared: 0.9931
## F-statistic: 2407 on 5 and 79 DF, p-value: < 2.2e-16
Average increase in log earnings is the coefficient on trend, 16.7%.
The average change from Q3 to Q4 is the difference in the coefficients on Q3 and Q4 plus 0.25 times the annual increase (from (b)), 0.882 - 1.151 + 0.25*0.167 = -0.227
Multicollinearity (because of quarterly dummies).
ts.plot( cbind(log(jj), fitted(fit01)), col=1:2, type="o", main="JJ, quarterly earnings per share", ylab="Dollars")
plot.ts(residuals(fit01), type="o", main="JJ earnings, residuals", ylab="Dollars")
acf(residuals(fit01), main="ACF, JJ residuals")
The residuals do not look like white noise. There is too much autocorrelation.
temp = tempr-mean(tempr)
temp2 = temp^2
trend = time(cmort)
part4 = lag(part,-4)
plot( cbind(Mortality=cmort, Temperature=temp, Particulate=part), main="LA mortality")
A = ts.intersect( cmort, trend, temp, temp2, part, part4, dframe=TRUE )
fit01 = lm( cmort ~ trend + temp + temp2 + part + part4, data=A )
summary(fit01)
##
## Call:
## lm(formula = cmort ~ trend + temp + temp2 + part + part4, data = A)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.228 -4.314 -0.614 3.713 27.800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.808e+03 1.989e+02 14.123 < 2e-16 ***
## trend -1.385e+00 1.006e-01 -13.765 < 2e-16 ***
## temp -4.058e-01 3.528e-02 -11.503 < 2e-16 ***
## temp2 2.155e-02 2.803e-03 7.688 8.02e-14 ***
## part 2.029e-01 2.266e-02 8.954 < 2e-16 ***
## part4 1.030e-01 2.485e-02 4.147 3.96e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.287 on 498 degrees of freedom
## Multiple R-squared: 0.608, Adjusted R-squared: 0.6041
## F-statistic: 154.5 on 5 and 498 DF, p-value: < 2.2e-16
Lagged particulates appears to be significant.
pairs( cbind( Mort=A$cmort, Temp=A$temp, Part=A$part, Lag_part=A$part4 ))
cor( cbind( Mort=A$cmort, Temp=A$temp, Part=A$part, Lag_part=A$part4 ))
## Mort Temp Part Lag_part
## Mort 1.0000000 -0.4369648 0.4422896 0.5209993
## Temp -0.4369648 1.0000000 -0.0148241 -0.3990848
## Part 0.4422896 -0.0148241 1.0000000 0.5340505
## Lag_part 0.5209993 -0.3990848 0.5340505 1.0000000
Both are positively correlated and have very significant positive regression coefficients. Although “lagged particulates” is more highly correlated with “cmort” than is contemporaneous “particulates,” the regression coefficient is smaller. There must be some interactions with “Temp” or “Temp^2”.
plot( cbind(varve,log(varve)), main="Varve" )
n = length(varve)
var(varve[1:(n/2)])
## [1] 133.4574
var(varve[(n/2+1):n])
## [1] 594.4904
var(log(varve[1:(n/2)]))
## [1] 0.2707217
var(log(varve[(n/2+1):n]))
## [1] 0.451371
hist(varve)
hist(log(varve))
Taking log helps, but we really need to log at log differences.
t=seq(260,357)
plot(t,log(varve[t]),type="l", main="Log Varve", ylab="Log Varve")
t=seq(500,572)
plot(t,log(varve[t]),type="l", main="Log Varve", ylab="Log Varve")
Observations 260-357 and 500-572 both exhibit behavior similar to recent global warming.
acf(log(varve), main="Log Varve")
Lots of autocorrelation.
u = diff(log(varve))
plot(u, main="Change in Log Varve")
acf(u, main="Change in Log Varve")
Much better! Log differences correspond to percentage changes from one year to the next.
The ACF cuts off after 1 lag, so a moving average with one lag may be appropriate.
(gamma0 = var(u))
## [1] 0.3322131
(rho1 = cor( ts.intersect(u,lag(u)))[1,2])
## [1] -0.39751
(gamma1 = rho1*gamma0)
## [1] -0.132058
From \(\rho_1=\gamma_1/\gamma_0=\theta/(1+\theta^2)\), we get \(\rho_1\theta^2-\theta+\rho_1=0\). Using the quadratic equation to solve, we get \[ \theta = (1\pm \sqrt{1-4\rho_1^2})/(2\rho_1). \] Then, from \(\gamma_1 = \theta\sigma^2_w\), we get \[ \sigma^2_w = \gamma_1/\theta. \] Plugging our estimates for \(\rho_1\) and \(\gamma_1=\rho_1\gamma_0\), we get
( theta1 = (1+sqrt(1-4*rho1^2))/(2*rho1) )
## [1] -2.020809
( theta2 = (1-sqrt(1-4*rho1^2))/(2*rho1) )
## [1] -0.4948514
(sigmaw2 = gamma1/theta2)
## [1] 0.266864
where we have chosen \(\theta_2\) to get an invertible model (we will discuss this in more detail in Chapter 3).
plot(soi, main="SOI")
trend=time(soi)
fit01 = lm( soi~trend )
summary(fit01)
##
## Call:
## lm(formula = soi ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.04140 -0.24183 0.01935 0.27727 0.83866
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.70367 3.18873 4.298 2.12e-05 ***
## trend -0.00692 0.00162 -4.272 2.36e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3756 on 451 degrees of freedom
## Multiple R-squared: 0.0389, Adjusted R-squared: 0.03677
## F-statistic: 18.25 on 1 and 451 DF, p-value: 2.359e-05
y = residuals(fit01)
n=length(soi)
The trend is significantly downwards.
f = seq(1,(n-1)/2)/n
A = rep(0,length(f))
for (i in 1:length(f) ) {
fit = lm( y ~ cos(2*pi*f[i]*seq(1,n)) + sin(2*pi*f[i]*seq(1,n)))
A[i] = sqrt(fit$coefficients[2]^2 + fit$coefficients[3]^2)
}
plot( f, A, type="l", ylab="Amplitude", xlab="Frequency", main="SOI, Periodogram")
s = sort( A, index.return=TRUE, decreasing=TRUE )
ii = s$ix[1:10]
f[ii]
## [1] 0.083885210 0.081677704 0.024282561 0.019867550 0.013245033
## [6] 0.022075055 0.004415011 0.039735099 0.028697572 0.249448124
1/f[ii]
## [1] 11.92105 12.24324 41.18182 50.33333 75.50000 45.30000 226.50000
## [8] 25.16667 34.84615 4.00885
Or,
f = seq(1,(n-1)/2)/n
I = fft(y)[1+seq(1,(n-1)/2)]
A = 2/n*abs(I)
plot(f, A, type="l", ylab="Amplitude", xlab="Frequency", main="SOI, Periodogram")
s = sort( A, index.return=TRUE, decreasing=TRUE)
ii = s$ix[1:10]
f[ii]
## [1] 0.083885210 0.081677704 0.024282561 0.019867550 0.013245033
## [6] 0.022075055 0.004415011 0.039735099 0.028697572 0.249448124
1/f[ii]
## [1] 11.92105 12.24324 41.18182 50.33333 75.50000 45.30000 226.50000
## [8] 25.16667 34.84615 4.00885
The main peak occurs at a period of about 1 year. The second peak occurs at a period of about 4 years.
plot( cbind(oil,gas), main="Oil and gas" )
plot(cbind(log(oil),log(gas)), main="Log oil and gas" )
Maybe a random walk? They don’t look stationary.
poil = diff(log(oil))
pgas = diff(log(gas))
plot( cbind( Oil=poil, Gas=pgas), main="Oil and gas, log differences" )
acf( poil, lag.max=100, main="Oil, log differences")
acf( pgas, lag.max=100, main="Gas, log differences")
These look pretty much like white noise . ## 2.11 (d)
ccf( poil, pgas, main="Oil and gas, log differences" )
lag2.plot( poil, pgas, 3 )
The relationships look pretty weak except at lag 0.
indi = ifelse(poil<0,0,1)
mess = ts.intersect( pgas, poil, poilL = lag(poil,-1), indi )
summary(fit <- lm( pgas ~ poil + poilL + indi, data=mess ))
##
## Call:
## lm(formula = pgas ~ poil + poilL + indi, data = mess)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.18451 -0.02161 -0.00038 0.02176 0.34342
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.006445 0.003464 -1.860 0.06338 .
## poil 0.683127 0.058369 11.704 < 2e-16 ***
## poilL 0.111927 0.038554 2.903 0.00385 **
## indi 0.012368 0.005516 2.242 0.02534 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04169 on 539 degrees of freedom
## Multiple R-squared: 0.4563, Adjusted R-squared: 0.4532
## F-statistic: 150.8 on 3 and 539 DF, p-value: < 2.2e-16
plot( residuals(fit), type="l" )
acf( residuals(fit) )
There is weak evidence of asymmetry. The residuals look pretty good, but there are a couple of big outliers and possibly volatility clustering. (They don’t look much different if the asymmetry is omitted.)