6 Time series models

Packages required (plus any dependencies)

DAAG ggsci latticeExtra ggplot2 mice car forecast mgcv tseries

Additionally, knitr and Hmisc are required in order to process the Rmd source file.

Section 6.1: Time series – some basic ideas

Subsection 6.1.1: Time series objects

class("lakeHuron")
## Use `time()` to extract the `time` attribute
range(time(LakeHuron))
## Use `window()` to subset a time series
LHto1925 <- window(LakeHuron, from=1875, to=1925)

jobs <- DAAG::jobs
names(jobs)
allRegions <- ts(jobs[, -7])   # Create multivariate time series
time(allRegions)               # Times run from 1 to 24
allRegions <- ts(jobs[, -7], start=c(1995,1), frequency=12)
allRegions[,"BC"]              # Extract jobs data for British Columbia
jobsBC <- ts(jobs[, "BC"], start=c(1995,1), frequency=12)
  # Obtain equivalent of `allRegions[,"BC"]` directly from `jobs` dataset

Subsection 6.1.2: Preliminary graphical exploration

## Plot depth measurements: ts object LakeHuron (datasets)
plot(LakeHuron, ylab="depth (in feet)", xlab = "Time (in years)", fg="gray")

lag.plot(LakeHuron, lags=3, do.lines=FALSE)

Subsection 6.1.3: The autocorrelation and partial autocorrelation function

par(oma=c(0,0,1.5,0))
par(pty="s")
lag.plot(LakeHuron, set.lags=1:4,do.lines=F, oma=c(0,1.5,1.5,1.5),
fg="gray", layout=c(1,4), cex.lab=1.15, asp=1)
mtext(side=3, line=0.5, "A: Lag plots", adj=0, cex=0.85, outer=TRUE)

library(lattice)
col3 <- c("gray80",rev(ggsci::pal_npg()(2)))
lag.max <- 15
offset <- 0.18
ci95 <- 2/sqrt(length(LakeHuron))
ar2 <- ar(LakeHuron)
gph.key <- list(x=0.975, y=0.965, corner=c(1,1), columns=1, cex=0.85,
                 text=list(c("Lake Huron data","AR1 process","AR2 process")),
                 lines=list(lwd=c(3,1.5,1.5), col=col3,lend=2),
                 padding.text=1)
parsetBC <- list(fontsize=list(text=8, points=5), 
                 superpose.line=list(col=col3, lty=rep(1,3),
                 lwd=c(3,1.5,1.5)))
parsetBC <- modifyList(parsetBC,list(grid.pars = list(lineend = "butt")))
lev3 <- factor(c("acfData","acfAR1","acfAR2"),
               levels=c("acfData","acfAR1","acfAR2"))
acfData <- acf(LakeHuron, main="", plot=FALSE, lag.max=lag.max)$acf
pacfData <- pacf(LakeHuron, main="", plot=FALSE, lag.max=lag.max)$acf
acfAR1 <- ARMAacf(ar=0.8, lag.max=lag.max)
acfAR2 <- ARMAacf(ar=ar2$ar, ma=0, lag.max=lag.max)
pacfAR1 <- ARMAacf(ar=0.8, lag.max=lag.max, pacf=TRUE)
pacfAR2 <- ARMAacf(ar=ar2$ar, ma=0, lag.max=lag.max, pacf=TRUE)
xy <- data.frame(acf=c(acfData,acfAR1,acfAR2),
Lag=c(rep(0:lag.max,3))+rep(c(0,-offset,offset),
rep(lag.max+1,3)),
gp=rep(lev3, rep(lag.max+1,3)))
gphB <- xyplot(acf ~ Lag, data = xy, groups=gp, type=c("h"),
              par.strip.text = list(cex = 0.85), lend=2,origin=0, 
              ylim=c(-0.325, 1.04),key=gph.key, par.settings=parsetBC, 
          panel=function(x,y,...){
            panel.xyplot(x,y,...)
            panel.abline(h=0, lwd=0.8)
            panel.abline(h=ci95, lwd=0.8, lty=2)
            panel.abline(h=-ci95, lwd=0.8, lty=2) } )
xyp <- data.frame(pacf=c(pacfData,pacfAR1,pacfAR2),
                  Lag=c(rep(1:lag.max,3))+c(rep(c(0,-offset,offset),
                  rep(lag.max,3))), gp=rep(lev3, rep(lag.max,3)))
gphC <- xyplot(pacf ~ Lag, data = xyp, groups=gp, type=c("h"),
               par.strip.text = list(cex = 0.85), lend=2,
               ylab = "Partial correlation", origin=0, ylim=c(-0.325, 1.04),
               key=gph.key, par.settings=parsetBC, 
               panel=function(x,y,...){
                 panel.xyplot(x,y,...)
                 panel.abline(h=0, lwd=0.8)
                 panel.abline(h=ci95, lwd=0.8, lty=2)
                 panel.abline(h=-ci95, lwd=0.8, lty=2) } )
print(update(gphB, scales=list(alternating=FALSE, tck=0.5),
             ylab = "Autocorrelation",
             main=list("B: Autocorelation -- Data vs AR processes", 
                       font=1, x=0, y=0.25, just="left", cex=1)), 
                       pos=c(0,0,0.5,0.9))
print(update(gphC, 
        scales=list(x=list(at=c(1,5,10,15)), alternating=FALSE, tck=0.5),
        ylab = "Partial autocorrelation",
        main=list("C: Partial autocorrelation -- Data vs AR processes", 
                  font=1, x=0, y=0.25, just="left", cex=1)),
        pos=c(0.5,0,1,0.9),newpage=FALSE)

acf(LakeHuron)
## pacf(LakeHuron) gives the plot of partial autocorrelations

Subsection 6.1.4: Autoregressive (AR) models

The `AR(1)` model

## Yule-Walker autocorrelation estimate
LH.yw <- ar(LakeHuron, order.max = 1, method = "yw")  # autocorrelation estimate
# order.max = 1 for AR(1) model
LH.yw$ar                  # autocorrelation estimate of alpha
## Maximum likelihood estimate
LH.mle <- ar(LakeHuron, order.max = 1, method = "mle")
LH.mle$ar                 # maximum likelihood estimate of alpha
LH.mle$x.mean             # estimated series mean
LH.mle$var.pred           # estimated innovation variance

The general AR(p) model

ar(LakeHuron, method="mle")

~Moving average (MA) processes

Subsection 6.1.5: ~Autoregressive moving average (ARMA) models – theory

Subsection 6.1.6: Automatic model selection?

library(forecast, quietly=TRUE)
(aaLH <- auto.arima(LakeHuron, approximation=F, stepwise=F))

## Check that model removes most of the correlation structure
acf(resid(aaLH, type="innovation"))   # `type="innovation"` is the default

auto.arima(LakeHuron)

(aaLH0 <- auto.arima(LakeHuron, d=0, approximation=F, stepwise=F))

plot(forecast(aaLH0,  h=12))  ## `level=c(80,95)` is the default
fcETS <- forecast(LakeHuron, h=12)
plot(fcETS)
plot(forecast(aaLH,  h=12, level=c(80,95)))  # Panel B; ARIMA(2,1,1)

auto.arima(LakeHuron, d=0, max.Q=0, approximation=F, stepwise=F)

Use of simulation as a check

oldpar <- par(mfrow=c(2,2), mar=c(3.1,4.6,2.6, 1.1))
for(i in 1:2){
  simts <- arima.sim(model=list(order=c(0,0,3), ma=c(0,0,0.25*i)), n=98)
  acf(simts, main="", xlab="")
  mtext(side=3, line=0.5, paste("ma3 =", 0.25*i), adj=0)
}
par(oldpar)

set.seed(29)         # Ensure that results are reproducible
estMAord <- matrix(0, nrow=4, ncol=20)
for(i in 1:4){
  for(j in 1:20){
    simts <- arima.sim(n=98, model=list(ma=c(0,0,0.125*i)))
    estMAord[i,j] <- auto.arima(simts, start.q=3)$arma[2] }
}
detectedAs <- table(row(estMAord), estMAord)
dimnames(detectedAs) <- list(ma3=paste(0.125*(1:4)),
Order=paste(0:(dim(detectedAs)[2]-1)))

print(detectedAs)

Subsection 6.1.7: Seasonal effects

suppressPackageStartupMessages(library(ggplot2))
mdb12AVt1980on <- window(DAAG::mdbAVtJtoD, c(1980,1))
AVt.ets <- ets(mdb12AVt1980on)
autoplot(AVt.ets, main="", fg="gray") + 
  ggplot2::ggtitle("A: Components of ETS fit") +
    theme(plot.title = element_text(hjust=0, vjust=0.5, size=11))
monthplot(mdb12AVt1980on, col.base=2,  fg="gray")
title("B: Seasonal component, SI ratio", 
      font.main=1, line=1, adj=0, cex=1.25)

bomreg <- DAAG::bomregions2021
## Plot time series mdbRain, SOI, and IOD: ts object bomregions2021 (DAAG)
gph <- xyplot(ts(bomreg[, c("mdbRain", "mdbAVt", "SOI", "IOD")], start=1900), 
              xlab="", type=c('p','smooth'), scales=list(alternating=rep(1,3)))
update(gph, layout=c(4,1), par.settings=DAAG::DAAGtheme(color=F))

suppressPackageStartupMessages(library(mgcv))
bomreg <- within(DAAG::bomregions2021, mdbrtRain <- mdbRain^0.5)
## Check first for a sequential correlation structure, after
## fitting smooth terms s(CO2), s(SOI), and s(IOD)
library(mgcv)
mdbrtRain.gam <- gam(mdbrtRain~s(CO2) + s(SOI) + s(IOD), data=bomreg)
auto.arima(resid(mdbrtRain.gam))

plot(mdbrtRain.gam, residuals=T, cex=2, fg="gray")
## Do also `gam.check(mdbrtRain.gam)` (Output looks fine)

anova(mdbrtRain.gam)

Box.test(resid(mdbrtRain.gam), lag=10, type="Ljung")

## Examine normality of estimates of "residuals" 
qqnorm(resid(mdbrtRain.gam))

The mdbAVt series

mdbAVt.gam <- gam(mdbAVt ~ s(CO2)+s(SOI)+s(IOD), data=bomreg)
auto.arima(resid(mdbAVt.gam))
anova(mdbAVt.gam)

mdbAVt1.gam <- gam(mdbAVt ~ s(CO2)+s(SOI), data=bomreg)

plot(mdbAVt1.gam, residuals=TRUE)

faclevs <- c("A: Rainfall", expression("B: Average Temp ("^o*"C)"))
fitrain <- fitted(mdbrtRain.gam) 
fitAVt <- c(rep(NA,10), fitted(mdbAVt1.gam))
gph <- xyplot(mdbrtRain+mdbAVt~Year,data=bomreg, outer=T, xlab="", ylab="",
  scales=list(y=list(relation='free', 
              at=list(sqrt((3:8)*100),(33:39)/2), 
              labels=list((3:8)*100,(33:39)/2)), x=list(alternating=rep(1,2))),
  strip=strip.custom(factor.levels=faclevs))
gph + latticeExtra::as.layer(xyplot(fitrain+fitAVt~Year, outer=T,
                                    scales=list(y=list(relation='free')), 
                                    data=bomreg, pch=3, col=2))

## Use `auto.arima()` to choose the ARIMA order:
aaFitCO2 <- with(bomreg[-(1:10),], auto.arima(mdbAVt, xreg=cbind(CO2,SOI)))
## Try including a degree 2 polynomial term
aaFitpol2CO2 <- with(bomreg[-(1:10),], 
                     auto.arima(mdbAVt, xreg=cbind(poly(CO2,2),SOI)))
cbind(AIC(aaFitCO2, aaFitpol2CO2), BIC=BIC(aaFitCO2, aaFitpol2CO2))

Subsection 6.1.8: The `gamm()` function, with a correlated errors model

SOI.gam <- gam(SOI~s(Year), data=bomreg)
auto.arima(resid(SOI.gam))             # sigma^2 = 43.4
## The following breaks the model into two parts -- gam and lme
SOI.gamm <- gamm(SOI~s(Year), data=bomreg)       
res <- resid(SOI.gamm$lme, type="normalized")
auto.arima(res)                        # sigma^2 = 0.945
## Extract scale estimate for `gam` component of SOI.gamm
summary(SOI.gamm$gam)[['scale']]       # 45.98
  # Note that 45.98 x .945 ~= 43.4

SOIma2.gamm <- gamm(SOI~s(Year), data=bomreg, correlation=corARMA(q=2))
coef(SOIma2.gamm$lme$modelStruct$corStruct, unconstrained = FALSE)  # MA2 ests
SOIar2.gamm <- gamm(SOI~s(Year), data=bomreg, correlation=corARMA(p=2))
coef(SOIar2.gamm$lme$modelStruct$corStruct, unconstrained = FALSE)  # AR2 ests
cbind(AIC(SOI.gam, SOIma2.gamm$lme, SOIar2.gamm$lme), 
      BIC=BIC(SOI.gam, SOIma2.gamm$lme, SOIar2.gamm$lme)[,2])

The dataset `airquality` (153 days, New York, 1972)

## Add time in days from May 1 to data. 
airq <- cbind(airquality[, 1:4], day=1:nrow(airquality))
  # Column 5 ('day' starting May 1) replaces columns 'Month' & 'Day')
## Check numbers of missing values   # Solar.R:7; Ozone:37
mice::md.pattern(airq, plot=FALSE)   # Final row has totals missing.

smoothPars <- list(col.smooth='red', lty.smooth=2, spread=0)
car::spm(airq, cex.labels=1.2, regLine=FALSE, oma=c(1.95,3,4,3), gap=.15,
         col=adjustcolor('blue', alpha.f=0.3), smooth=smoothPars, fg="gray")

car::powerTransform(gam(Ozone ~ s(Solar.R)+s(Wind)+s(Temp)+s(day), data=airq))
airq$rt4Ozone <- airq$Ozone^0.25

Ozone.gam <- gam(rt4Ozone ~ s(Solar.R)+s(Wind)+s(Temp)+s(day), data=airq)
auto.arima(resid(Ozone.gam))  # Independent errors model appears OK
## Check model terms
anova(Ozone.gam)   # For GAM models, this leaves out terms one at a time
## The term in `day` has no explanatory, and will be removed
Ozone1.gam <- update(Ozone.gam, formula=rt4Ozone ~ s(Solar.R)+s(Wind)+s(Temp))

Subsection 6.1.9: A calibration problem with time series errors

flakes <- DAAG::frostedflakes
calib.arima <- with(flakes, auto.arima(IA400, xreg=Lab))
calib.arima

with(flakes, coef(auto.arima(IA400/Lab, approximation=F, stepwise=F)))
with(flakes, coef(auto.arima(IA400-Lab, approximation=F, stepwise=F)))

Section 6.2: Nonlinear time series

x <- numeric(999)  # x will contain the ARCH(1) errors
x0 <- rnorm(1)
for (i in 1:999){
x0 <- rnorm(1, sd=sqrt(.25 + .95*x0^2))
x[i] <- x0
}

suppressPackageStartupMessages(library(tseries))
garch(x, order = c(0, 1), trace=FALSE)

Section 6.3: Further reading

Section 6.4: Exercises

6.4

xx <- matrix(x, ncol=1000)

6.7

library(tseries)
data(ice.river)
river1 <- diff(log(ice.river[, 1]))

6.9

library(forecast)
Eu1 <- window(EuStockMarkets[,1], end = c(1996, 260))
Eu1nn <- nnetar(Eu1)
Eu1f <- forecast(Eu1nn, end=end(EuStockMarkets[,1]))
plot(Eu1f, ylim=c(1400, 7000))
lines(EuStockMarkets[,1])

6.10a

airq <- cbind(airquality[, 1:4], day=1:nrow(airquality))
  # Column 5 ('day' starting May 1) replaces columns 'Month' & 'Day')
library(mgcv)
temp.gam <- gam(Temp~s(day), data=airq)
tempAR1.gamm <- gamm(Temp~s(day), data=airq, correlation=corAR1())
plot(temp.gam, res=T, cex=2)
plot(tempAR1.gamm$gam, res=T, cex=2)

6.10b

(Phi <- coef(tempAR1.gamm$lme$modelStruct$corStruct, unconstrained = FALSE) )
Sigma <- sqrt(tempAR1.gamm$gam$sig2)
## Simulate an AR1 process with this parameter
AR1.sim <- arima.sim(model=list(ar=Phi), n=nrow(airq), sd=Sigma)
simSeries <- AR1.sim+fitted(tempAR1.gamm$gam)
plot(I(1:nrow(airq)), simSeries)
## Compare with initial series
plot(I(1:nrow(airq)), airq$Temp)

if(file.exists("/Users/johnm1/pkgs/PGRcode/inst/doc/")){
code <- knitr::knit_code$get()
txt <- paste0("\n## ", names(code),"\n", sapply(code, paste, collapse='\n'))
writeLines(txt, con="/Users/johnm1/pkgs/PGRcode/inst/doc/ch6.R")
}

Packages required (plus any dependencies)

Section 6.1: Time series – some basic ideas

Subsection 6.1.1: Time series objects

Subsection 6.1.2: Preliminary graphical exploration

Subsection 6.1.3: The autocorrelation and partial autocorrelation function

Subsection 6.1.4: Autoregressive (AR) models

The AR(1) model

The general AR(p) model

~Moving average (MA) processes

Subsection 6.1.5: ~Autoregressive moving average (ARMA) models – theory

Subsection 6.1.6: Automatic model selection?

Use of simulation as a check

Subsection 6.1.7: Seasonal effects

The mdbAVt series

Subsection 6.1.8: The gamm() function, with a correlated errors model

The dataset airquality (153 days, New York, 1972)

Subsection 6.1.9: A calibration problem with time series errors

Section 6.2: Nonlinear time series

Section 6.3: Further reading

Section 6.4: Exercises

The `AR(1)` model

Subsection 6.1.8: The `gamm()` function, with a correlated errors model

The dataset `airquality` (153 days, New York, 1972)