10 Appendix A: The R System – A Brief Overview

On options for working with the code see the vignettes Ch1-Learning and UsingCode.

Packages required (plus any dependencies)

DAAG dplyr tidyr tibble MASS gplots plotrix latticeExtra RColorBrewer

Additionally, knitr and Hmisc are required in order to process the qmd source file.

Section 10.1: Getting started with R

Subsection 10.1.1: Learn by typing code at the command line

> ## Arithmetic calculations.  See the help page `?Arithmetic` {-}
> 2*3+10            # The symbol `*` denotes 'multiply'

> ## Use the `c()` function to join results into a numeric vector
> c(sqrt(10), 2*3+10, sqrt(10), 2^3)  # 2^3 is 2 to the power of 3
> ## R knows about pi
> 2*pi*6378         # Approximate circumference of earth at equator (km)

?help              # Get information on the use of `help()`
?sqrt              # Or, type help(sqrt)
?Arithmetic        # See, in similar vein ?Syntax
?'<'               # `?Comparison` finds the same help page

## Two commands on one line; Use ';' as separator
2*3*4+10; sqrt(10)        ## Try also `cat(2*3*4+10, sqrt(10), sep='n')
## Convert CO2 carbon emissions from tonnes of carbon to tonnes of CO2 
3.664*c(.53, 2.56, 9.62)  ## Data are for 1900, 1960 & 2020

## Use `cat()` to print several items, with control of formatting
cat(2*3*4+10, sqrt(10), '\n')

Assignment

## Convert from amounts of carbon to amounts of CO2 (billions of tonnes)
## and assign result to a named object
fossilCO2vals <- c(.53, 2.56, 9.62)*3.664  # Amounts in 1900, 1960, and 2020
  # Equivalently `fossilCO2vals <- c(.53, 2.56, 9.62)*rep(3.664,3)`
## To assign and print, enclose in round brackets
(fossilCO2vals <- c(.53, 2.56, 9.62)*3.664)

3.664*c(.53,2.56, 9.62) -> fossilCO2vals

Entry of data at the command line, a graphics formula, and a graph

Year <- c(1900, 1920, 1940, 1960, 1980, 2000, 2020)
CO2 <- c(.53,.96,1.32,2.56,5.32,6.95,9.62)*3.664
## Now plot Carbon Dioxide emissions as a function of Year
plot(CO2 ~ Year, pch=16, fg="gray")

Grouping vectors togeher into data frames

CO2byYear <- data.frame(year=Year, co2gas=CO2)
CO2byYear         # Display the contents of the data frame.
rm(Year, CO2)     # Optionally, remove `Year` and `Carbon` from the workspace
plot(co2gas ~ year, data=CO2byYear, pch=16)

sqrt(10)           # Number of digits is determined by current seting
options(digits=2)  # Change until further notice,
sqrt(10)

Section 10.2: R data structures

Subsection 10.2.1: Vectors, dates, and arrays

vehicles <- c("Compact", "Large", "Midsize", "Small", "Sporty", "Van")
c(T, F, F, F, T, T, F)   # A logical vector, assuming F=FALSE and T=TRUE

## Character vector
mammals <- c("Rat", "Pig", "Rat", "Mouse", "Pig")
## Logical vector
rodent <- c("TRUE", "FALSE", "TRUE", "FALSE", "TRUE", "FALSE")
## From character vector `mammals`, create factor
mfac <- factor(mammals)
levels(mfac)  
table(mfac)

Dates

day1 <- as.Date(c("2022-01-01", "2022-02-01", "2022-03-01"))
as.numeric(day1)   # Days since 1 January 1970
day1[3] - day1[2]

The use of square brackets to extract subsets of vectors

## Specify the indices of the elements that are to be extracted
x <- c(3, 11, 8, 15, 12,18)
x[c(1,4:6)]        # Elements in positions 1, 4, 5, and 6
## Use negative indices to identify elements for omission
x[-c(2,3)]         # Positive and negative indices cannot be mixed
## Specify a vector of logical values. 
x > 10             # This generates a vector of logical values
x[x > 10]

bodywt <- c(Cow=465, Goat=28, Donkey=187, Pig=192)
bodywt[c("Goat", "Pig")]

Matrices and arrays

arr123 <- array(1:24, dim=c(2,4,3))
## This prints as three 2 by 4 matrices.  Print just the first of the three.
arr123[, 2, 1]     # Column 2 and index 1 of 3rd dimension
attributes(arr123)

Subsection 10.2.2: Factors

gender <- c(rep("male",691), rep("female",692))
gender <- factor(gender)  # From character vector, create factor
levels(gender)            # Notice that `female` comes first

Gender <- factor(gender, levels=c("male", "female"))

mf1 <- factor(rep(c('male','female'),c(2,3)), labels=c("f", "m"))
## The following has the same result
mf2 <- factor(rep(c('male','female'), c(2,3)))
levels(mf2) <- c("f","m")   # Assign new levels
if(all(mf1==mf2))print(mf1)

sum(gender=="male")

table(chickwts$feed)  # feed is a factor
source <- chickwts$feed  
levels(source) <- c("milk","plant","plant","meat","plant","plant")
table(source)

Ordered factors

stress <- rep(c("low","medium","high"), 2)
ord.stress <- ordered(stress, levels=c("low", "medium", "high"))
ord.stress
ord.stress >= "medium"

Subsection 10.2.3: Operations with data frames

Cars93sum <- DAAG::Cars93.summary  # Create copy in workspace
Cars93sum

Cars93sum[4:6, 2:3]   # Extract rows 4 to 6 and columns 2 and 3
Cars93sum[6:4, ]      # Extract rows in the order 6, 5, 4
Cars93sum[, 2:3]      # Extract columns 2 and 3
## Or, use negative integers to specify rows and/or columns to be omitted
Cars93sum[-(1:3), -c(1,4)]  # In each case, numbers must be all +ve or all -ve
## Specify row and/or column names
Cars93sum[c("Small","Sporty","Van"), c("Max.passengers","No.of.cars")]

Data frames vs matrices

names(Cars93sum)[3] <- "numCars"
names(Cars9sum) <- c("minPass","maxPass","numCars","code")

Using a data frame as a database – with() and within()

## trees (datasets) has data on Black Cherry Trees
with(trees, round(c(mean(Girth), median(Girth), sd(Girth)),1))

with(DAAG::pair65,       # stretch of rubber bands
  {lenchange = heated-ambient
   c(mean(lenchange), median(lenchange))
})

## Add variables `mph` and `gradient` to `DAAG::nihills`
nihr <- within(DAAG::nihills, {mph <- dist/time; gradient <- climb/dist})

Extracting rows from data frames

unlist(Cars93sum[1, ])

Subsection 10.2.4: Data manipulation functions used in earlier chapters

## For columns of `DAAG::jobs`, show the range of values
sapply(DAAG::jobs, range)
## Split egg lengths by species, calculate mean, sd, and number for each
with(DAAG::cuckoos, sapply(split(length,species), 
                           function(x)c(av=mean(x), sd=sd(x), nobs=length(x))))

apply(UCBAdmissions, 3, function(x)(x[1,2]/(x[1,2]+x[2,2]))*100) # Females
apply(UCBAdmissions, 3, function(x)(x[1,1]/(x[1,1]+x[2,1]))*100) # Males

UCBAdmissions[, , 1]

DAAG::cricketer |> dplyr::count(year, left, name="Freq") -> handYear
names(handYear)[2] <- "hand"
byYear <- tidyr::pivot_wider(handYear, names_from='hand', values_from="Freq")

Subsection 10.2.5: Writing data to a file, and reading data from a file

CO2byYear <- data.frame(year = seq(from=1900, to=2020, by=20),
                        co2gas = c(1.94, 3.52, 4.84, 9.38, 19.49, 25.46, 35.25))
write.table(CO2byYear, file='gas.txt')    # Write data frame to file
CO2byYear <- read.table(file="gas.txt")   # Read data back in
write.csv(CO2byYear, file='gas.csv')                  # Write data frame
CO2byYear <- read.csv(file="gas.csv", row.names=1)    # Read data back in

Subsection 10.2.6: Issues for working with data frames and tibbles

Extraction of columns from data frames and tibbles

sites <- DAAG::possumsites    # sites is then a data frame
sites[,3]                     # returns a vector
sites[,3, drop=FALSE]         # returns a 1-column data frame

dplyr::as_tibble(sites)[,3]   # returns a 1-column tibble
dplyr::as_tibble(sites)[[3]]  # returns a vector
sites[[3]]                    # returns a vector

Conversion between data frames and tibbles

attributes(DAAG::possumsites)[['row.names']]

possumSites <- tibble::as_tibble(DAAG::possumsites, rownames="Site")
possumSites

Subsection 10.2.7: Lists

## Summary statistics for 31 felled black cherry tree
## Median (middle value), range, number, units
htstats <- list(med=76, range=c(low=63,high=87), n=31, units="ft")
htstats[1:2]       # Show first two list elements only

## The following are alternative ways to extract the second list element
htstats[2]          # First list element (Can replace `2` by 'range')
htstats[2][1]       # A subset of a list is a list

htstats[[2]]; htstats$range; htstats[["range"]]

unlist(htstats[2])  # Contents of second list element, with composite names
unlist(htstats[2], use.names=F)   # Elements have no names

tstats <- with(MASS::shoes, t.test(B, A, paired=TRUE))
names(tstats)        ## Names of list elements. See `?t.test` for details.
tstats[1]            ## Type tstats[1] to see the first list element
## Compact listing of contents list elements 1 to 5, which are all numeric
unlist(tstats[1:5])  ## With `unlist(tstats)` all elements become character

Section 10.3: Functions and operators

Subsection 10.3.1: Common useful built-in functions

## Data indices
length()       # number of elements in a vector or a list
order()        # x[order(x)] sorts x (by default, NAs are last)
which()        # which indices of a logical vector are `TRUE`
which.max()    # locates (first) maximum (NB, also: `which.min()`)

## Data manipulation
c()            # join together (`concatenate`) elements or vectors or lists
diff()         # vector of first differences
sort()         # sort elements into order, by default omitting NAs
rev()          # reverse the order of vector elements
t()            # transpose matrix or data frame 
               # (a data frame is first coerced to a matrixwith() 
with()         # do computation using columns of specified data frame

## Data summary
mean()         # mean of the elements of a vector
median()       # median of the elements of a vector
range()        # minimum and maximum value elements of vector
unique()       # form the vector of distinct values
## List function arguments
args()         # information on the arguments to a function

## Obtain details
head()         # display first few rows (by default 6) of object
ls()           # list names of objects in the workspace

## Print multiple objects
cat()          # prints multiple objects, one after the other

## Functions that return TRUE or FALSE?
all()          # returns TRUE if all values are TRUE
any()          # returns TRUE if any values are TRUE
is.factor()    # returns TRUE if the argument is a factor
is.na()        # returns TRUE if the argument is an NA
               # NB also is.logical(), etc.

seq(from =1, by=2, length.out=3)  # Unabbeviated arguments
seq(from =1, by=2, length=3)      # Abbreviate `length.out` to `length`

Subsection 10.3.2: User-written functions

distance <- c(148,182,173,166,109,141,166)
mean.and.sd(distance)

## Execute the function with the  default argument:
mean.and.sd()

## Thus, to return the mean, SD and name of the input vector
## replace c(mean=av, SD=sdev) by
list(mean=av, SD=sdev, dataset = deparse(substitute(x)))

Subsection 10.3.3: Generic functions, and the class of an object

Subsection 10.3.4: Pipes — a “Do this, then this, . . .” syntax

mean(rnorm(20, sd=2))
20 |> rnorm(sd=2) |> mean()

logmammals <- MASS::mammals |> log() |> setNames(c("logbody","logbrain"))
## Alternatively, use the ability to reverse the assignment operator.
MASS::mammals |> log() |> setNames(c("logbody","logbrain")) -> logmammals 
  ## This last is more in the spirit of pipes.

MASS::mammals |>
  log() |>
  setNames(c("logbody","logbrain")) |>
  (\(d)lm(logbrain ~ logbody, data=d))() |>
  coef()

Subsection 10.3.5: Operators

## Multiple of divisor that leaves smallest non-negative remainder
c("Multiple of divisor" = 24 %/% 9, "Remainder after division" = 6)

Section 10.4: Calculations with matrices, arrays, lists, and data frames

Calculations in parallel across all elements of a vector

x <- 1:6
log(x)                 # Natural logarithm of 1, 2, ... 6
log(x, base=10)        # Common logarithm (base 10)
log(64, base=c(2,10))  # Apply different bases to one number
log(matrix(1:6, nrow=2), base=2)  # Take logarithms of all matrix elements

Patterned data

seq(from=5, to=22, by=3)  # The first value is 5.
rep(c(2,3,5), 4)          #  Repeat the sequence (2, 3, 5) four times over
rep(c("female", "male"), c(2,3))    # Use syntax with a character vector

Subsection 10.4.1: Missing values

nbranch <- subset(DAAG::rainforest, species=="Acacia mabellae")$branch
nbranch            # Number of small branches (2cm or less)

mean(nbranch, na.rm=TRUE)

nbranch == NA      # This always equals `NA`

is.na(nbranch)    # Use to check for NAs

nbranch[is.na(nbranch)] <- -999
  # `mean(nbranch)` will then be a nonsense value

NAs in modeling functions

options()$na.action # Version 3.2.2, following startup

Counting and identifying NAs – the use of table()

with(DAAG::nswdemo, table(trt, re74>0, useNA="ifany"))

Infinities and NaNs

summary(DAAG::primates)

primates <- DAAG::primates

gplots::plotCI()    # `plotCI() function in package `gplots`
plotrix::plotCI()   # `plotCI() function in package `plotrix`

sessionInfo()[['basePkgs']]

## List just the workspace and the first eight packages on the search list:
search()[1:9]

data(package="datasets")

Section 10.5: Brief notes on R graphics packages and functions

Subsection 10.5.1: Lattice graphics — a step beyond base graphics

grog <- DAAG::grog
chr <- with(grog, match(Country, c('Australia', 'NewZealand')))  
  # Australia: 1; matches 1st element of c('Australia', 'NewZealand')
  # NewZealand: 2; matches 2nd element
plot(Beer ~ Year, data=grog, ylim=c(0, max(Beer)*1.1), pch = chr)
with(grog, points(Wine ~ Year, pch=chr, col='red'))
legend("bottomright", legend=c("Australia", "New Zealand"), pch=1:2)
title(main="Beer consumption (l, pure alcohol)", line=1)

library(latticeExtra)    ## Loads both lattice and the add-on latticeExtra
gph <- xyplot(Beer+Wine ~ Year, groups=Country, data=grog)
update(gph, par.settings=simpleTheme(pch=19), auto.key=list(columns=2))

## Or, condition on `Country`
xyplot(Beer+Wine+Spirit ~ Year | Country, data=grog,
       par.settings=simpleTheme(pch=19), auto.key=list(columns=3))

tinting <- DAAG::tinting
xyplot(csoa~it | tint*target, groups=agegp, data=tinting, auto.key=list(columns=2))

cuckoos <- DAAG::cuckoos
av <- with(cuckoos, aggregate(length, list(species=species), FUN=mean))
gph <- dotplot(species ~ length, data=cuckoos, alpha=0.4) +
  as.layer(dotplot(species ~ x, pch=3, cex=1.4, col="black", data=av))
update(gph, xlab="Length of egg (mm)")

## Alternatives, using `layer()` or `as.layer()`
avg <- with(cuckoos, data.frame(nspec=1:nlevels(species), 
                             av=sapply(split(length,species),mean)))
dotplot(species ~ length, data=cuckoos) + 
  layer(lpoints(nspec~av, pch=3, cex=1.25, col="black"), data=avg)

dotplot(species ~ length, data=cuckoos) + 
  as.layer(dotplot(nspec~av, data=avg, pch=3, cex=1.25, col="black"))

## Specify panel function
dotplot(species ~ length, data=cuckoos, 
  panel=function(x,y,...){panel.dotplot(x, y, pch=1, col="gray40")
    avg <- data.frame(nspec=1:nlevels(y), av=sapply(split(x,y),mean))
    with(avg, lpoints(nspec~av, pch=3, cex=1.25, col="black")) })

Combining separately created graphics objects

cuckoos <- DAAG::cuckoos
## Panel A: Dotplot without species means added
gphA <- dotplot(species ~ length, data=cuckoos) 
## Panel B: Box and whisker plot
gphB <- bwplot(species ~ length, data=cuckoos)
update(c("A: Dotplot"=gphA, "B: Boxplot"=gphB), between=list(x=0.4),
       xlab="Length of egg (mm)", layout=c(2,1))
  ## `latticeExtra::c()` joins compatible plots together. 
  ## `layout=c(2,1)` : join horizontally; `layout=c(1,2)` : join vertically

Subsection 10.5.2: Dynamic graphics – the rgl package

vignette('plot3D', package='plot3D')

Section 10.6: Plotting characters, symbols, line types and colors

ycol <- -2.1 - (0:9) * 2.1
ftype <- c("plain", "bold", "italic", "bold italic", "symbol")
yline <- 4.2
ypmax <- 20
farleft <- -7.8
plot(c(-4, 31), c(4.25, ypmax), type = "n", xlab = "", ylab = "",
axes = F)
chh <- par()$cxy[2]
text(0:25, rep(ypmax + 0.8 * chh, 26), paste(0:25), srt = 90,
cex = 0.75, xpd = T)
text(-1.5, ypmax + 0.8 * chh, "pch = ", cex = 0.75, xpd = T)
points(0:25, rep(ypmax, 26), pch = 0:25, lwd=0.8)
letterfont <- function(ypos = ypmax, font = 2) {
par(font = font)
text(-1.35, ypos, "64-76", cex = 0.75, adj = 1, xpd = TRUE)
text(19 - 1.35, ypos, "96-108", cex = 0.75, adj = 1)
points(c(0:12), rep(ypos, 13), pch = 64:76)
points(19:31, rep(ypos, 13), pch = 96:108)
text(farleft, ypos, paste(font), xpd = T)
text(farleft, ypos - 0.5, ftype[font], cex = 0.75)
}
plotfont <- function(xpos = 0:31, ypos = ypmax, font = 1,
sel32 = 2:4, showfont = TRUE) {
par(font = font)
i <- 0
for (j in sel32) {
i <- i + 1
maxval <- j * 32 - 1
if(j==4)maxval <- maxval-1
text(-1.35, ypos - i + 1, paste((j - 1) * 32, "-",
maxval, sep = ""), cex = 0.75, adj = 1, xpd = TRUE)
if(j!=4)
points(xpos, rep(ypos - i + 1, 32), pch = (j - 1) *
32 + (0:31))
else
points(xpos[-32], rep(ypos - i + 1, 31), pch = (j - 1) *
32 + (0:30))
}
lines(rep(-1.05, 2), c(ypos - length(sel32) + 1, ypos) +
c(-0.4, 0.4), xpd = T, col = "grey40")
if (showfont) {
text(farleft, ypos, paste("font =", font, " "), xpd = T)
text(farleft, ypos - 0.5, ftype[font], cex = 0.75,
xpd = T)
}
}
plotfont(ypos = ypmax - 1.5, font = 1, sel32 = 2:4)
for (j in 2:4) letterfont(ypos = ypmax - 2.1 - 1.4 * j, font = j)
plotfont(ypos = ypmax - 9.1, font = 5, sel32 = 3)
plotfont(xpos = c(-0.5, 1:31), ypos = ypmax - 10.1, font = 5,
sel32 = 4, showfont = FALSE)
par(font = 1)
ltypes <- c("blank", "solid", "dashed", "dotted", "dotdash",
"longdash", "twodash")
lcode <- c("", "", "44", "13", "1343", "73", "2262")
for (i in 0:6) {
lines(c(4, 31), c(yline + 4.5 - 0.8 * i, yline + 4.5 -
0.8 * i), lty = i, lwd = 2, xpd = T)
if (i == 0)
numchar <- paste("lty =", i, " ")
else numchar <- i
text(farleft, yline + 4.5 - 0.8 * i, numchar, xpd = TRUE)
text(farleft + 3.5, yline + 4.5 - 0.8 * i, ltypes[i +
1], cex = 0.85, xpd = TRUE)
text(farleft + 7.5, yline + 4.5 - 0.8 * i, lcode[i +
1], cex = 0.85, xpd = TRUE)
}

ycol <- -2.1 - (0:9) * 2.1
ftype <- c("plain", "bold", "italic", "bold italic", "symbol")
yline <- 4.2
ypmax <- 20
farleft <- -7.8
plot(c(-4, 31), c(4.25, ypmax), type = "n", xlab = "", ylab = "",
axes = F)
chh <- par()$cxy[2]
text(0:25, rep(ypmax + 0.8 * chh, 26), paste(0:25), srt = 90,
cex = 0.75, xpd = T)
text(-1.5, ypmax + 0.8 * chh, "pch = ", cex = 0.75, xpd = T)
points(0:25, rep(ypmax, 26), pch = 0:25, lwd=0.8)
letterfont <- function(ypos = ypmax, font = 2) {
par(font = font)
text(-1.35, ypos, "64-76", cex = 0.75, adj = 1, xpd = TRUE)
text(19 - 1.35, ypos, "96-108", cex = 0.75, adj = 1)
points(c(0:12), rep(ypos, 13), pch = 64:76)
points(19:31, rep(ypos, 13), pch = 96:108)
text(farleft, ypos, paste(font), xpd = T)
text(farleft, ypos - 0.5, ftype[font], cex = 0.75)
}
plotfont <- function(xpos = 0:31, ypos = ypmax, font = 1,
sel32 = 2:4, showfont = TRUE) {
par(font = font)
i <- 0
for (j in sel32) {
i <- i + 1
maxval <- j * 32 - 1
if(j==4)maxval <- maxval-1
text(-1.35, ypos - i + 1, paste((j - 1) * 32, "-",
maxval, sep = ""), cex = 0.75, adj = 1, xpd = TRUE)
if(j!=4)
points(xpos, rep(ypos - i + 1, 32), pch = (j - 1) *
32 + (0:31))
else
points(xpos[-32], rep(ypos - i + 1, 31), pch = (j - 1) *
32 + (0:30))
}
lines(rep(-1.05, 2), c(ypos - length(sel32) + 1, ypos) +
c(-0.4, 0.4), xpd = T, col = "grey40")
if (showfont) {
text(farleft, ypos, paste("font =", font, " "), xpd = T)
text(farleft, ypos - 0.5, ftype[font], cex = 0.75,
xpd = T)
}
}
plotfont(ypos = ypmax - 1.5, font = 1, sel32 = 2:4)
for (j in 2:4) letterfont(ypos = ypmax - 2.1 - 1.4 * j, font = j)
plotfont(ypos = ypmax - 9.1, font = 5, sel32 = 3)
plotfont(xpos = c(-0.5, 1:31), ypos = ypmax - 10.1, font = 5,
sel32 = 4, showfont = FALSE)
par(font = 1)
ltypes <- c("blank", "solid", "dashed", "dotted", "dotdash",
"longdash", "twodash")
lcode <- c("", "", "44", "13", "1343", "73", "2262")
for (i in 0:6) {
lines(c(4, 31), c(yline + 4.5 - 0.8 * i, yline + 4.5 -
0.8 * i), lty = i, lwd = 2, xpd = T)
if (i == 0)
numchar <- paste("lty =", i, " ")
else numchar <- i
text(farleft, yline + 4.5 - 0.8 * i, numchar, xpd = TRUE)
text(farleft + 3.5, yline + 4.5 - 0.8 * i, ltypes[i +
1], cex = 0.85, xpd = TRUE)
text(farleft + 7.5, yline + 4.5 - 0.8 * i, lcode[i +
1], cex = 0.85, xpd = TRUE)
}

Colors

library(RColorBrewer)
palette(brewer.pal(12, "Set3"))

if(file.exists("/Users/johnm1/pkgs/PGRcode/inst/doc/")){
code <- knitr::knit_code$get()
txt <- paste0("\n## ", names(code),"\n", sapply(code, paste, collapse='\n'))
writeLines(txt, con="/Users/johnm1/pkgs/PGRcode/inst/doc/Appendix.R")
}