6 Time series models
Packages required (plus any dependencies)
DAAG ggsci latticeExtra ggplot2 mice car forecast mgcv tseries
Additionally, knitr and Hmisc are required in order to process the Rmd source file.
Section 6.1: Time series – some basic ideas
Subsection 6.1.1: Time series objects
class("lakeHuron")
## Use `time()` to extract the `time` attribute
range(time(LakeHuron))
## Use `window()` to subset a time series
<- window(LakeHuron, from=1875, to=1925) LHto1925
<- DAAG::jobs
jobs names(jobs)
<- ts(jobs[, -7]) # Create multivariate time series
allRegions time(allRegions) # Times run from 1 to 24
<- ts(jobs[, -7], start=c(1995,1), frequency=12)
allRegions "BC"] # Extract jobs data for British Columbia
allRegions[,<- ts(jobs[, "BC"], start=c(1995,1), frequency=12)
jobsBC # Obtain equivalent of `allRegions[,"BC"]` directly from `jobs` dataset
Subsection 6.1.2: Preliminary graphical exploration
## Plot depth measurements: ts object LakeHuron (datasets)
plot(LakeHuron, ylab="depth (in feet)", xlab = "Time (in years)", fg="gray")
lag.plot(LakeHuron, lags=3, do.lines=FALSE)
Subsection 6.1.3: The autocorrelation and partial autocorrelation function
par(oma=c(0,0,1.5,0))
par(pty="s")
lag.plot(LakeHuron, set.lags=1:4,do.lines=F, oma=c(0,1.5,1.5,1.5),
fg="gray", layout=c(1,4), cex.lab=1.15, asp=1)
mtext(side=3, line=0.5, "A: Lag plots", adj=0, cex=0.85, outer=TRUE)
library(lattice)
<- c("gray80",rev(ggsci::pal_npg()(2)))
col3 <- 15
lag.max <- 0.18
offset <- 2/sqrt(length(LakeHuron))
ci95 <- ar(LakeHuron)
ar2 <- list(x=0.975, y=0.965, corner=c(1,1), columns=1, cex=0.85,
gph.key text=list(c("Lake Huron data","AR1 process","AR2 process")),
lines=list(lwd=c(3,1.5,1.5), col=col3,lend=2),
padding.text=1)
<- list(fontsize=list(text=8, points=5),
parsetBC superpose.line=list(col=col3, lty=rep(1,3),
lwd=c(3,1.5,1.5)))
<- modifyList(parsetBC,list(grid.pars = list(lineend = "butt")))
parsetBC <- factor(c("acfData","acfAR1","acfAR2"),
lev3 levels=c("acfData","acfAR1","acfAR2"))
<- acf(LakeHuron, main="", plot=FALSE, lag.max=lag.max)$acf
acfData <- pacf(LakeHuron, main="", plot=FALSE, lag.max=lag.max)$acf
pacfData <- ARMAacf(ar=0.8, lag.max=lag.max)
acfAR1 <- ARMAacf(ar=ar2$ar, ma=0, lag.max=lag.max)
acfAR2 <- ARMAacf(ar=0.8, lag.max=lag.max, pacf=TRUE)
pacfAR1 <- ARMAacf(ar=ar2$ar, ma=0, lag.max=lag.max, pacf=TRUE)
pacfAR2 <- data.frame(acf=c(acfData,acfAR1,acfAR2),
xy Lag=c(rep(0:lag.max,3))+rep(c(0,-offset,offset),
rep(lag.max+1,3)),
gp=rep(lev3, rep(lag.max+1,3)))
<- xyplot(acf ~ Lag, data = xy, groups=gp, type=c("h"),
gphB par.strip.text = list(cex = 0.85), lend=2,origin=0,
ylim=c(-0.325, 1.04),key=gph.key, par.settings=parsetBC,
panel=function(x,y,...){
panel.xyplot(x,y,...)
panel.abline(h=0, lwd=0.8)
panel.abline(h=ci95, lwd=0.8, lty=2)
panel.abline(h=-ci95, lwd=0.8, lty=2) } )
<- data.frame(pacf=c(pacfData,pacfAR1,pacfAR2),
xyp Lag=c(rep(1:lag.max,3))+c(rep(c(0,-offset,offset),
rep(lag.max,3))), gp=rep(lev3, rep(lag.max,3)))
<- xyplot(pacf ~ Lag, data = xyp, groups=gp, type=c("h"),
gphC par.strip.text = list(cex = 0.85), lend=2,
ylab = "Partial correlation", origin=0, ylim=c(-0.325, 1.04),
key=gph.key, par.settings=parsetBC,
panel=function(x,y,...){
panel.xyplot(x,y,...)
panel.abline(h=0, lwd=0.8)
panel.abline(h=ci95, lwd=0.8, lty=2)
panel.abline(h=-ci95, lwd=0.8, lty=2) } )
print(update(gphB, scales=list(alternating=FALSE, tck=0.5),
ylab = "Autocorrelation",
main=list("B: Autocorelation -- Data vs AR processes",
font=1, x=0, y=0.25, just="left", cex=1)),
pos=c(0,0,0.5,0.9))
print(update(gphC,
scales=list(x=list(at=c(1,5,10,15)), alternating=FALSE, tck=0.5),
ylab = "Partial autocorrelation",
main=list("C: Partial autocorrelation -- Data vs AR processes",
font=1, x=0, y=0.25, just="left", cex=1)),
pos=c(0.5,0,1,0.9),newpage=FALSE)
acf(LakeHuron)
## pacf(LakeHuron) gives the plot of partial autocorrelations
Subsection 6.1.4: Autoregressive (AR) models
The AR(1)
model
## Yule-Walker autocorrelation estimate
<- ar(LakeHuron, order.max = 1, method = "yw") # autocorrelation estimate
LH.yw # order.max = 1 for AR(1) model
$ar # autocorrelation estimate of alpha
LH.yw## Maximum likelihood estimate
<- ar(LakeHuron, order.max = 1, method = "mle")
LH.mle $ar # maximum likelihood estimate of alpha
LH.mle$x.mean # estimated series mean
LH.mle$var.pred # estimated innovation variance LH.mle
The general AR(p) model
ar(LakeHuron, method="mle")
~Moving average (MA) processes
Subsection 6.1.5: ~Autoregressive moving average (ARMA) models – theory
Subsection 6.1.6: Automatic model selection?
library(forecast, quietly=TRUE)
<- auto.arima(LakeHuron, approximation=F, stepwise=F)) (aaLH
## Check that model removes most of the correlation structure
acf(resid(aaLH, type="innovation")) # `type="innovation"` is the default
auto.arima(LakeHuron)
<- auto.arima(LakeHuron, d=0, approximation=F, stepwise=F)) (aaLH0
plot(forecast(aaLH0, h=12)) ## `level=c(80,95)` is the default
<- forecast(LakeHuron, h=12)
fcETS plot(fcETS)
plot(forecast(aaLH, h=12, level=c(80,95))) # Panel B; ARIMA(2,1,1)
auto.arima(LakeHuron, d=0, max.Q=0, approximation=F, stepwise=F)
Use of simulation as a check
<- par(mfrow=c(2,2), mar=c(3.1,4.6,2.6, 1.1))
oldpar for(i in 1:2){
<- arima.sim(model=list(order=c(0,0,3), ma=c(0,0,0.25*i)), n=98)
simts acf(simts, main="", xlab="")
mtext(side=3, line=0.5, paste("ma3 =", 0.25*i), adj=0)
}par(oldpar)
set.seed(29) # Ensure that results are reproducible
<- matrix(0, nrow=4, ncol=20)
estMAord for(i in 1:4){
for(j in 1:20){
<- arima.sim(n=98, model=list(ma=c(0,0,0.125*i)))
simts <- auto.arima(simts, start.q=3)$arma[2] }
estMAord[i,j]
}<- table(row(estMAord), estMAord)
detectedAs dimnames(detectedAs) <- list(ma3=paste(0.125*(1:4)),
Order=paste(0:(dim(detectedAs)[2]-1)))
print(detectedAs)
Subsection 6.1.7: Seasonal effects
suppressPackageStartupMessages(library(ggplot2))
<- window(DAAG::mdbAVtJtoD, c(1980,1))
mdb12AVt1980on <- ets(mdb12AVt1980on)
AVt.ets autoplot(AVt.ets, main="", fg="gray") +
::ggtitle("A: Components of ETS fit") +
ggplot2theme(plot.title = element_text(hjust=0, vjust=0.5, size=11))
monthplot(mdb12AVt1980on, col.base=2, fg="gray")
title("B: Seasonal component, SI ratio",
font.main=1, line=1, adj=0, cex=1.25)
<- DAAG::bomregions2021
bomreg ## Plot time series mdbRain, SOI, and IOD: ts object bomregions2021 (DAAG)
<- xyplot(ts(bomreg[, c("mdbRain", "mdbAVt", "SOI", "IOD")], start=1900),
gph xlab="", type=c('p','smooth'), scales=list(alternating=rep(1,3)))
update(gph, layout=c(4,1), par.settings=DAAG::DAAGtheme(color=F))
suppressPackageStartupMessages(library(mgcv))
<- within(DAAG::bomregions2021, mdbrtRain <- mdbRain^0.5)
bomreg ## Check first for a sequential correlation structure, after
## fitting smooth terms s(CO2), s(SOI), and s(IOD)
library(mgcv)
<- gam(mdbrtRain~s(CO2) + s(SOI) + s(IOD), data=bomreg)
mdbrtRain.gam auto.arima(resid(mdbrtRain.gam))
plot(mdbrtRain.gam, residuals=T, cex=2, fg="gray")
## Do also `gam.check(mdbrtRain.gam)` (Output looks fine)
anova(mdbrtRain.gam)
Box.test(resid(mdbrtRain.gam), lag=10, type="Ljung")
## Examine normality of estimates of "residuals"
qqnorm(resid(mdbrtRain.gam))
The mdbAVt series
<- gam(mdbAVt ~ s(CO2)+s(SOI)+s(IOD), data=bomreg)
mdbAVt.gam auto.arima(resid(mdbAVt.gam))
anova(mdbAVt.gam)
<- gam(mdbAVt ~ s(CO2)+s(SOI), data=bomreg) mdbAVt1.gam
plot(mdbAVt1.gam, residuals=TRUE)
<- c("A: Rainfall", expression("B: Average Temp ("^o*"C)"))
faclevs <- fitted(mdbrtRain.gam)
fitrain <- c(rep(NA,10), fitted(mdbAVt1.gam))
fitAVt <- xyplot(mdbrtRain+mdbAVt~Year,data=bomreg, outer=T, xlab="", ylab="",
gph scales=list(y=list(relation='free',
at=list(sqrt((3:8)*100),(33:39)/2),
labels=list((3:8)*100,(33:39)/2)), x=list(alternating=rep(1,2))),
strip=strip.custom(factor.levels=faclevs))
+ latticeExtra::as.layer(xyplot(fitrain+fitAVt~Year, outer=T,
gph scales=list(y=list(relation='free')),
data=bomreg, pch=3, col=2))
## Use `auto.arima()` to choose the ARIMA order:
<- with(bomreg[-(1:10),], auto.arima(mdbAVt, xreg=cbind(CO2,SOI)))
aaFitCO2 ## Try including a degree 2 polynomial term
<- with(bomreg[-(1:10),],
aaFitpol2CO2 auto.arima(mdbAVt, xreg=cbind(poly(CO2,2),SOI)))
cbind(AIC(aaFitCO2, aaFitpol2CO2), BIC=BIC(aaFitCO2, aaFitpol2CO2))
Subsection 6.1.9: A calibration problem with time series errors
<- DAAG::frostedflakes
flakes <- with(flakes, auto.arima(IA400, xreg=Lab))
calib.arima calib.arima
with(flakes, coef(auto.arima(IA400/Lab, approximation=F, stepwise=F)))
with(flakes, coef(auto.arima(IA400-Lab, approximation=F, stepwise=F)))
Section 6.2: Nonlinear time series
<- numeric(999) # x will contain the ARCH(1) errors
x <- rnorm(1)
x0 for (i in 1:999){
<- rnorm(1, sd=sqrt(.25 + .95*x0^2))
x0 <- x0
x[i] }
suppressPackageStartupMessages(library(tseries))
garch(x, order = c(0, 1), trace=FALSE)
Section 6.3: Further reading
Section 6.4: Exercises
6.4
<- matrix(x, ncol=1000) xx
6.7
library(tseries)
data(ice.river)
<- diff(log(ice.river[, 1])) river1
6.9
library(forecast)
<- window(EuStockMarkets[,1], end = c(1996, 260))
Eu1 <- nnetar(Eu1)
Eu1nn <- forecast(Eu1nn, end=end(EuStockMarkets[,1]))
Eu1f plot(Eu1f, ylim=c(1400, 7000))
lines(EuStockMarkets[,1])
6.10a
<- cbind(airquality[, 1:4], day=1:nrow(airquality))
airq # Column 5 ('day' starting May 1) replaces columns 'Month' & 'Day')
library(mgcv)
<- gam(Temp~s(day), data=airq)
temp.gam <- gamm(Temp~s(day), data=airq, correlation=corAR1())
tempAR1.gamm plot(temp.gam, res=T, cex=2)
plot(tempAR1.gamm$gam, res=T, cex=2)
6.10b
<- coef(tempAR1.gamm$lme$modelStruct$corStruct, unconstrained = FALSE) )
(Phi <- sqrt(tempAR1.gamm$gam$sig2)
Sigma ## Simulate an AR1 process with this parameter
<- arima.sim(model=list(ar=Phi), n=nrow(airq), sd=Sigma)
AR1.sim <- AR1.sim+fitted(tempAR1.gamm$gam)
simSeries plot(I(1:nrow(airq)), simSeries)
## Compare with initial series
plot(I(1:nrow(airq)), airq$Temp)
if(file.exists("/Users/johnm1/pkgs/PGRcode/inst/doc/")){
<- knitr::knit_code$get()
code <- paste0("\n## ", names(code),"\n", sapply(code, paste, collapse='\n'))
txt writeLines(txt, con="/Users/johnm1/pkgs/PGRcode/inst/doc/ch6.R")
}