• Main
    • Code and Supplements
    • Answers to selected exercises
    • Afterword
  • Practical Guide
  • Blog
  • R code
  • Resources
    • Ways to Use Code
    • Code for Selected Figures
    • Download PDF
    • Download Docx
  • Download PDF
  • Download Docx
  1. 6  Time series models
  • Preface
  • 1  Learning from data
  • 2  Generalizing from models
  • 3  Multiple linear regression
  • 4  Exploiting the linear model framework
  • 5  Generalized linear models and survival analysis
  • 6  Time series models
  • 7  Multilevel models, and repeated measures
  • 8  Tree-based Classification and Regression
  • 9  Multivariate data exploration and discrimination
  • 10  Appendix A: The R System – A Brief Overview

Table of contents

  • Packages required (plus any dependencies)
  • Section 6.1: Time series – some basic ideas
  • Section 6.2: Nonlinear time series
  • Section 6.3: Further reading
  • Section 6.4: Exercises

6  Time series models

Packages required (plus any dependencies)

DAAG ggsci latticeExtra ggplot2 mice car forecast mgcv tseries

Additionally, knitr and Hmisc are required in order to process the Rmd source file.

Section 6.1: Time series – some basic ideas

Subsection 6.1.1: Time series objects

class("lakeHuron")
## Use `time()` to extract the `time` attribute
range(time(LakeHuron))
## Use `window()` to subset a time series
LHto1925 <- window(LakeHuron, from=1875, to=1925)
jobs <- DAAG::jobs
names(jobs)
allRegions <- ts(jobs[, -7])   # Create multivariate time series
time(allRegions)               # Times run from 1 to 24
allRegions <- ts(jobs[, -7], start=c(1995,1), frequency=12)
allRegions[,"BC"]              # Extract jobs data for British Columbia
jobsBC <- ts(jobs[, "BC"], start=c(1995,1), frequency=12)
  # Obtain equivalent of `allRegions[,"BC"]` directly from `jobs` dataset

Subsection 6.1.2: Preliminary graphical exploration

## Plot depth measurements: ts object LakeHuron (datasets)
plot(LakeHuron, ylab="depth (in feet)", xlab = "Time (in years)", fg="gray")
lag.plot(LakeHuron, lags=3, do.lines=FALSE)

Subsection 6.1.3: The autocorrelation and partial autocorrelation function

par(oma=c(0,0,1.5,0))
par(pty="s")
lag.plot(LakeHuron, set.lags=1:4,do.lines=F, oma=c(0,1.5,1.5,1.5),
fg="gray", layout=c(1,4), cex.lab=1.15, asp=1)
mtext(side=3, line=0.5, "A: Lag plots", adj=0, cex=0.85, outer=TRUE)
library(lattice)
col3 <- c("gray80",rev(ggsci::pal_npg()(2)))
lag.max <- 15
offset <- 0.18
ci95 <- 2/sqrt(length(LakeHuron))
ar2 <- ar(LakeHuron)
gph.key <- list(x=0.975, y=0.965, corner=c(1,1), columns=1, cex=0.85,
                 text=list(c("Lake Huron data","AR1 process","AR2 process")),
                 lines=list(lwd=c(3,1.5,1.5), col=col3,lend=2),
                 padding.text=1)
parsetBC <- list(fontsize=list(text=8, points=5), 
                 superpose.line=list(col=col3, lty=rep(1,3),
                 lwd=c(3,1.5,1.5)))
parsetBC <- modifyList(parsetBC,list(grid.pars = list(lineend = "butt")))
lev3 <- factor(c("acfData","acfAR1","acfAR2"),
               levels=c("acfData","acfAR1","acfAR2"))
acfData <- acf(LakeHuron, main="", plot=FALSE, lag.max=lag.max)$acf
pacfData <- pacf(LakeHuron, main="", plot=FALSE, lag.max=lag.max)$acf
acfAR1 <- ARMAacf(ar=0.8, lag.max=lag.max)
acfAR2 <- ARMAacf(ar=ar2$ar, ma=0, lag.max=lag.max)
pacfAR1 <- ARMAacf(ar=0.8, lag.max=lag.max, pacf=TRUE)
pacfAR2 <- ARMAacf(ar=ar2$ar, ma=0, lag.max=lag.max, pacf=TRUE)
xy <- data.frame(acf=c(acfData,acfAR1,acfAR2),
Lag=c(rep(0:lag.max,3))+rep(c(0,-offset,offset),
rep(lag.max+1,3)),
gp=rep(lev3, rep(lag.max+1,3)))
gphB <- xyplot(acf ~ Lag, data = xy, groups=gp, type=c("h"),
              par.strip.text = list(cex = 0.85), lend=2,origin=0, 
              ylim=c(-0.325, 1.04),key=gph.key, par.settings=parsetBC, 
          panel=function(x,y,...){
            panel.xyplot(x,y,...)
            panel.abline(h=0, lwd=0.8)
            panel.abline(h=ci95, lwd=0.8, lty=2)
            panel.abline(h=-ci95, lwd=0.8, lty=2) } )
xyp <- data.frame(pacf=c(pacfData,pacfAR1,pacfAR2),
                  Lag=c(rep(1:lag.max,3))+c(rep(c(0,-offset,offset),
                  rep(lag.max,3))), gp=rep(lev3, rep(lag.max,3)))
gphC <- xyplot(pacf ~ Lag, data = xyp, groups=gp, type=c("h"),
               par.strip.text = list(cex = 0.85), lend=2,
               ylab = "Partial correlation", origin=0, ylim=c(-0.325, 1.04),
               key=gph.key, par.settings=parsetBC, 
               panel=function(x,y,...){
                 panel.xyplot(x,y,...)
                 panel.abline(h=0, lwd=0.8)
                 panel.abline(h=ci95, lwd=0.8, lty=2)
                 panel.abline(h=-ci95, lwd=0.8, lty=2) } )
print(update(gphB, scales=list(alternating=FALSE, tck=0.5),
             ylab = "Autocorrelation",
             main=list("B: Autocorelation -- Data vs AR processes", 
                       font=1, x=0, y=0.25, just="left", cex=1)), 
                       pos=c(0,0,0.5,0.9))
print(update(gphC, 
        scales=list(x=list(at=c(1,5,10,15)), alternating=FALSE, tck=0.5),
        ylab = "Partial autocorrelation",
        main=list("C: Partial autocorrelation -- Data vs AR processes", 
                  font=1, x=0, y=0.25, just="left", cex=1)),
        pos=c(0.5,0,1,0.9),newpage=FALSE)
acf(LakeHuron)
## pacf(LakeHuron) gives the plot of partial autocorrelations

Subsection 6.1.4: Autoregressive (AR) models

The AR(1) model
## Yule-Walker autocorrelation estimate
LH.yw <- ar(LakeHuron, order.max = 1, method = "yw")  # autocorrelation estimate
# order.max = 1 for AR(1) model
LH.yw$ar                  # autocorrelation estimate of alpha
## Maximum likelihood estimate
LH.mle <- ar(LakeHuron, order.max = 1, method = "mle")
LH.mle$ar                 # maximum likelihood estimate of alpha
LH.mle$x.mean             # estimated series mean
LH.mle$var.pred           # estimated innovation variance
The general AR(p) model
ar(LakeHuron, method="mle")
~Moving average (MA) processes

Subsection 6.1.5: ~Autoregressive moving average (ARMA) models – theory

Subsection 6.1.6: Automatic model selection?

library(forecast, quietly=TRUE)
(aaLH <- auto.arima(LakeHuron, approximation=F, stepwise=F))
## Check that model removes most of the correlation structure
acf(resid(aaLH, type="innovation"))   # `type="innovation"` is the default
auto.arima(LakeHuron)
(aaLH0 <- auto.arima(LakeHuron, d=0, approximation=F, stepwise=F))
plot(forecast(aaLH0,  h=12))  ## `level=c(80,95)` is the default
fcETS <- forecast(LakeHuron, h=12)
plot(fcETS)
plot(forecast(aaLH,  h=12, level=c(80,95)))  # Panel B; ARIMA(2,1,1)
auto.arima(LakeHuron, d=0, max.Q=0, approximation=F, stepwise=F)
Use of simulation as a check
oldpar <- par(mfrow=c(2,2), mar=c(3.1,4.6,2.6, 1.1))
for(i in 1:2){
  simts <- arima.sim(model=list(order=c(0,0,3), ma=c(0,0,0.25*i)), n=98)
  acf(simts, main="", xlab="")
  mtext(side=3, line=0.5, paste("ma3 =", 0.25*i), adj=0)
}
par(oldpar)
set.seed(29)         # Ensure that results are reproducible
estMAord <- matrix(0, nrow=4, ncol=20)
for(i in 1:4){
  for(j in 1:20){
    simts <- arima.sim(n=98, model=list(ma=c(0,0,0.125*i)))
    estMAord[i,j] <- auto.arima(simts, start.q=3)$arma[2] }
}
detectedAs <- table(row(estMAord), estMAord)
dimnames(detectedAs) <- list(ma3=paste(0.125*(1:4)),
Order=paste(0:(dim(detectedAs)[2]-1)))
print(detectedAs)

Subsection 6.1.7: Seasonal effects

suppressPackageStartupMessages(library(ggplot2))
mdb12AVt1980on <- window(DAAG::mdbAVtJtoD, c(1980,1))
AVt.ets <- ets(mdb12AVt1980on)
autoplot(AVt.ets, main="", fg="gray") + 
  ggplot2::ggtitle("A: Components of ETS fit") +
    theme(plot.title = element_text(hjust=0, vjust=0.5, size=11))
monthplot(mdb12AVt1980on, col.base=2,  fg="gray")
title("B: Seasonal component, SI ratio", 
      font.main=1, line=1, adj=0, cex=1.25)
bomreg <- DAAG::bomregions2021
## Plot time series mdbRain, SOI, and IOD: ts object bomregions2021 (DAAG)
gph <- xyplot(ts(bomreg[, c("mdbRain", "mdbAVt", "SOI", "IOD")], start=1900), 
              xlab="", type=c('p','smooth'), scales=list(alternating=rep(1,3)))
update(gph, layout=c(4,1), par.settings=DAAG::DAAGtheme(color=F))
suppressPackageStartupMessages(library(mgcv))
bomreg <- within(DAAG::bomregions2021, mdbrtRain <- mdbRain^0.5)
## Check first for a sequential correlation structure, after
## fitting smooth terms s(CO2), s(SOI), and s(IOD)
library(mgcv)
mdbrtRain.gam <- gam(mdbrtRain~s(CO2) + s(SOI) + s(IOD), data=bomreg)
auto.arima(resid(mdbrtRain.gam))
plot(mdbrtRain.gam, residuals=T, cex=2, fg="gray")
## Do also `gam.check(mdbrtRain.gam)` (Output looks fine)
anova(mdbrtRain.gam)
Box.test(resid(mdbrtRain.gam), lag=10, type="Ljung")
## Examine normality of estimates of "residuals" 
qqnorm(resid(mdbrtRain.gam))
The mdbAVt series
mdbAVt.gam <- gam(mdbAVt ~ s(CO2)+s(SOI)+s(IOD), data=bomreg)
auto.arima(resid(mdbAVt.gam))
anova(mdbAVt.gam)
mdbAVt1.gam <- gam(mdbAVt ~ s(CO2)+s(SOI), data=bomreg)
plot(mdbAVt1.gam, residuals=TRUE)
faclevs <- c("A: Rainfall", expression("B: Average Temp ("^o*"C)"))
fitrain <- fitted(mdbrtRain.gam) 
fitAVt <- c(rep(NA,10), fitted(mdbAVt1.gam))
gph <- xyplot(mdbrtRain+mdbAVt~Year,data=bomreg, outer=T, xlab="", ylab="",
  scales=list(y=list(relation='free', 
              at=list(sqrt((3:8)*100),(33:39)/2), 
              labels=list((3:8)*100,(33:39)/2)), x=list(alternating=rep(1,2))),
  strip=strip.custom(factor.levels=faclevs))
gph + latticeExtra::as.layer(xyplot(fitrain+fitAVt~Year, outer=T,
                                    scales=list(y=list(relation='free')), 
                                    data=bomreg, pch=3, col=2))
## Use `auto.arima()` to choose the ARIMA order:
aaFitCO2 <- with(bomreg[-(1:10),], auto.arima(mdbAVt, xreg=cbind(CO2,SOI)))
## Try including a degree 2 polynomial term
aaFitpol2CO2 <- with(bomreg[-(1:10),], 
                     auto.arima(mdbAVt, xreg=cbind(poly(CO2,2),SOI)))
cbind(AIC(aaFitCO2, aaFitpol2CO2), BIC=BIC(aaFitCO2, aaFitpol2CO2))

Subsection 6.1.8: The gamm() function, with a correlated errors model

SOI.gam <- gam(SOI~s(Year), data=bomreg)
auto.arima(resid(SOI.gam))             # sigma^2 = 43.4
## The following breaks the model into two parts -- gam and lme
SOI.gamm <- gamm(SOI~s(Year), data=bomreg)       
res <- resid(SOI.gamm$lme, type="normalized")
auto.arima(res)                        # sigma^2 = 0.945
## Extract scale estimate for `gam` component of SOI.gamm
summary(SOI.gamm$gam)[['scale']]       # 45.98
  # Note that 45.98 x .945 ~= 43.4
SOIma2.gamm <- gamm(SOI~s(Year), data=bomreg, correlation=corARMA(q=2))
coef(SOIma2.gamm$lme$modelStruct$corStruct, unconstrained = FALSE)  # MA2 ests
SOIar2.gamm <- gamm(SOI~s(Year), data=bomreg, correlation=corARMA(p=2))
coef(SOIar2.gamm$lme$modelStruct$corStruct, unconstrained = FALSE)  # AR2 ests
cbind(AIC(SOI.gam, SOIma2.gamm$lme, SOIar2.gamm$lme), 
      BIC=BIC(SOI.gam, SOIma2.gamm$lme, SOIar2.gamm$lme)[,2])
The dataset airquality (153 days, New York, 1972)
## Add time in days from May 1 to data. 
airq <- cbind(airquality[, 1:4], day=1:nrow(airquality))
  # Column 5 ('day' starting May 1) replaces columns 'Month' & 'Day')
## Check numbers of missing values   # Solar.R:7; Ozone:37
mice::md.pattern(airq, plot=FALSE)   # Final row has totals missing.
smoothPars <- list(col.smooth='red', lty.smooth=2, spread=0)
car::spm(airq, cex.labels=1.2, regLine=FALSE, oma=c(1.95,3,4,3), gap=.15,
         col=adjustcolor('blue', alpha.f=0.3), smooth=smoothPars, fg="gray")
car::powerTransform(gam(Ozone ~ s(Solar.R)+s(Wind)+s(Temp)+s(day), data=airq))
airq$rt4Ozone <- airq$Ozone^0.25
Ozone.gam <- gam(rt4Ozone ~ s(Solar.R)+s(Wind)+s(Temp)+s(day), data=airq)
auto.arima(resid(Ozone.gam))  # Independent errors model appears OK
## Check model terms
anova(Ozone.gam)   # For GAM models, this leaves out terms one at a time
## The term in `day` has no explanatory, and will be removed
Ozone1.gam <- update(Ozone.gam, formula=rt4Ozone ~ s(Solar.R)+s(Wind)+s(Temp))

Subsection 6.1.9: A calibration problem with time series errors

flakes <- DAAG::frostedflakes
calib.arima <- with(flakes, auto.arima(IA400, xreg=Lab))
calib.arima
with(flakes, coef(auto.arima(IA400/Lab, approximation=F, stepwise=F)))
with(flakes, coef(auto.arima(IA400-Lab, approximation=F, stepwise=F)))

Section 6.2: Nonlinear time series

x <- numeric(999)  # x will contain the ARCH(1) errors
x0 <- rnorm(1)
for (i in 1:999){
x0 <- rnorm(1, sd=sqrt(.25 + .95*x0^2))
x[i] <- x0
}
suppressPackageStartupMessages(library(tseries))
garch(x, order = c(0, 1), trace=FALSE)

Section 6.3: Further reading

Section 6.4: Exercises

6.4

xx <- matrix(x, ncol=1000)

6.7

library(tseries)
data(ice.river)
river1 <- diff(log(ice.river[, 1]))

6.9

library(forecast)
Eu1 <- window(EuStockMarkets[,1], end = c(1996, 260))
Eu1nn <- nnetar(Eu1)
Eu1f <- forecast(Eu1nn, end=end(EuStockMarkets[,1]))
plot(Eu1f, ylim=c(1400, 7000))
lines(EuStockMarkets[,1])

6.10a

airq <- cbind(airquality[, 1:4], day=1:nrow(airquality))
  # Column 5 ('day' starting May 1) replaces columns 'Month' & 'Day')
library(mgcv)
temp.gam <- gam(Temp~s(day), data=airq)
tempAR1.gamm <- gamm(Temp~s(day), data=airq, correlation=corAR1())
plot(temp.gam, res=T, cex=2)
plot(tempAR1.gamm$gam, res=T, cex=2)

6.10b

(Phi <- coef(tempAR1.gamm$lme$modelStruct$corStruct, unconstrained = FALSE) )
Sigma <- sqrt(tempAR1.gamm$gam$sig2)
## Simulate an AR1 process with this parameter
AR1.sim <- arima.sim(model=list(ar=Phi), n=nrow(airq), sd=Sigma)
simSeries <- AR1.sim+fitted(tempAR1.gamm$gam)
plot(I(1:nrow(airq)), simSeries)
## Compare with initial series
plot(I(1:nrow(airq)), airq$Temp)
if(file.exists("/Users/johnm1/pkgs/PGRcode/inst/doc/")){
code <- knitr::knit_code$get()
txt <- paste0("\n## ", names(code),"\n", sapply(code, paste, collapse='\n'))
writeLines(txt, con="/Users/johnm1/pkgs/PGRcode/inst/doc/ch6.R")
}
5  Generalized linear models and survival analysis
7  Multilevel models, and repeated measures