10 Appendix A: The R System – A Brief Overview
On options for working with the code see the vignettes Ch1-Learning and UsingCode.
Packages required (plus any dependencies)
DAAG dplyr tidyr tibble MASS gplots plotrix latticeExtra RColorBrewer
Additionally, knitr and Hmisc are required in order to process the qmd source file.
Section 10.1: Getting started with R
Subsection 10.1.1: Learn by typing code at the command line
> ## Arithmetic calculations. See the help page `?Arithmetic` {-}
> 2*3+10 # The symbol `*` denotes 'multiply'> ## Use the `c()` function to join results into a numeric vector
> c(sqrt(10), 2*3+10, sqrt(10), 2^3) # 2^3 is 2 to the power of 3
> ## R knows about pi
> 2*pi*6378 # Approximate circumference of earth at equator (km)?help # Get information on the use of `help()`
?sqrt # Or, type help(sqrt)
?Arithmetic # See, in similar vein ?Syntax
?'<' # `?Comparison` finds the same help page## Two commands on one line; Use ';' as separator
2*3*4+10; sqrt(10) ## Try also `cat(2*3*4+10, sqrt(10), sep='n')
## Convert CO2 carbon emissions from tonnes of carbon to tonnes of CO2
3.664*c(.53, 2.56, 9.62) ## Data are for 1900, 1960 & 2020## Use `cat()` to print several items, with control of formatting
cat(2*3*4+10, sqrt(10), '\n')Assignment
## Convert from amounts of carbon to amounts of CO2 (billions of tonnes)
## and assign result to a named object
fossilCO2vals <- c(.53, 2.56, 9.62)*3.664 # Amounts in 1900, 1960, and 2020
# Equivalently `fossilCO2vals <- c(.53, 2.56, 9.62)*rep(3.664,3)`
## To assign and print, enclose in round brackets
(fossilCO2vals <- c(.53, 2.56, 9.62)*3.664)3.664*c(.53,2.56, 9.62) -> fossilCO2valsEntry of data at the command line, a graphics formula, and a graph
Year <- c(1900, 1920, 1940, 1960, 1980, 2000, 2020)
CO2 <- c(.53,.96,1.32,2.56,5.32,6.95,9.62)*3.664
## Now plot Carbon Dioxide emissions as a function of Year
plot(CO2 ~ Year, pch=16, fg="gray")Grouping vectors togeher into data frames
CO2byYear <- data.frame(year=Year, co2gas=CO2)
CO2byYear # Display the contents of the data frame.
rm(Year, CO2) # Optionally, remove `Year` and `Carbon` from the workspace
plot(co2gas ~ year, data=CO2byYear, pch=16) sqrt(10) # Number of digits is determined by current seting
options(digits=2) # Change until further notice,
sqrt(10)Section 10.2: R data structures
Subsection 10.2.1: Vectors, dates, and arrays
vehicles <- c("Compact", "Large", "Midsize", "Small", "Sporty", "Van")
c(T, F, F, F, T, T, F) # A logical vector, assuming F=FALSE and T=TRUE## Character vector
mammals <- c("Rat", "Pig", "Rat", "Mouse", "Pig")
## Logical vector
rodent <- c("TRUE", "FALSE", "TRUE", "FALSE", "TRUE", "FALSE")
## From character vector `mammals`, create factor
mfac <- factor(mammals)
levels(mfac)
table(mfac)Dates
day1 <- as.Date(c("2022-01-01", "2022-02-01", "2022-03-01"))
as.numeric(day1) # Days since 1 January 1970
day1[3] - day1[2]The use of square brackets to extract subsets of vectors
## Specify the indices of the elements that are to be extracted
x <- c(3, 11, 8, 15, 12,18)
x[c(1,4:6)] # Elements in positions 1, 4, 5, and 6
## Use negative indices to identify elements for omission
x[-c(2,3)] # Positive and negative indices cannot be mixed
## Specify a vector of logical values.
x > 10 # This generates a vector of logical values
x[x > 10]bodywt <- c(Cow=465, Goat=28, Donkey=187, Pig=192)
bodywt[c("Goat", "Pig")]Matrices and arrays
arr123 <- array(1:24, dim=c(2,4,3))
## This prints as three 2 by 4 matrices. Print just the first of the three.
arr123[, 2, 1] # Column 2 and index 1 of 3rd dimension
attributes(arr123)Subsection 10.2.2: Factors
gender <- c(rep("male",691), rep("female",692))
gender <- factor(gender) # From character vector, create factor
levels(gender) # Notice that `female` comes firstGender <- factor(gender, levels=c("male", "female"))mf1 <- factor(rep(c('male','female'),c(2,3)), labels=c("f", "m"))
## The following has the same result
mf2 <- factor(rep(c('male','female'), c(2,3)))
levels(mf2) <- c("f","m") # Assign new levels
if(all(mf1==mf2))print(mf1)sum(gender=="male")table(chickwts$feed) # feed is a factor
source <- chickwts$feed
levels(source) <- c("milk","plant","plant","meat","plant","plant")
table(source)Ordered factors
stress <- rep(c("low","medium","high"), 2)
ord.stress <- ordered(stress, levels=c("low", "medium", "high"))
ord.stress
ord.stress >= "medium"Subsection 10.2.3: Operations with data frames
Cars93sum <- DAAG::Cars93.summary # Create copy in workspace
Cars93sumCars93sum[4:6, 2:3] # Extract rows 4 to 6 and columns 2 and 3
Cars93sum[6:4, ] # Extract rows in the order 6, 5, 4
Cars93sum[, 2:3] # Extract columns 2 and 3
## Or, use negative integers to specify rows and/or columns to be omitted
Cars93sum[-(1:3), -c(1,4)] # In each case, numbers must be all +ve or all -ve
## Specify row and/or column names
Cars93sum[c("Small","Sporty","Van"), c("Max.passengers","No.of.cars")]Data frames vs matrices
names(Cars93sum)[3] <- "numCars"
names(Cars9sum) <- c("minPass","maxPass","numCars","code")Using a data frame as a database – with() and within()
## trees (datasets) has data on Black Cherry Trees
with(trees, round(c(mean(Girth), median(Girth), sd(Girth)),1))with(DAAG::pair65, # stretch of rubber bands
{lenchange = heated-ambient
c(mean(lenchange), median(lenchange))
})## Add variables `mph` and `gradient` to `DAAG::nihills`
nihr <- within(DAAG::nihills, {mph <- dist/time; gradient <- climb/dist})Extracting rows from data frames
unlist(Cars93sum[1, ])Subsection 10.2.4: Data manipulation functions used in earlier chapters
## For columns of `DAAG::jobs`, show the range of values
sapply(DAAG::jobs, range)
## Split egg lengths by species, calculate mean, sd, and number for each
with(DAAG::cuckoos, sapply(split(length,species),
function(x)c(av=mean(x), sd=sd(x), nobs=length(x))))apply(UCBAdmissions, 3, function(x)(x[1,2]/(x[1,2]+x[2,2]))*100) # Females
apply(UCBAdmissions, 3, function(x)(x[1,1]/(x[1,1]+x[2,1]))*100) # MalesUCBAdmissions[, , 1]DAAG::cricketer |> dplyr::count(year, left, name="Freq") -> handYear
names(handYear)[2] <- "hand"
byYear <- tidyr::pivot_wider(handYear, names_from='hand', values_from="Freq")Subsection 10.2.5: Writing data to a file, and reading data from a file
CO2byYear <- data.frame(year = seq(from=1900, to=2020, by=20),
co2gas = c(1.94, 3.52, 4.84, 9.38, 19.49, 25.46, 35.25))
write.table(CO2byYear, file='gas.txt') # Write data frame to file
CO2byYear <- read.table(file="gas.txt") # Read data back in
write.csv(CO2byYear, file='gas.csv') # Write data frame
CO2byYear <- read.csv(file="gas.csv", row.names=1) # Read data back inSubsection 10.2.6: Issues for working with data frames and tibbles
Extraction of columns from data frames and tibbles
sites <- DAAG::possumsites # sites is then a data frame
sites[,3] # returns a vector
sites[,3, drop=FALSE] # returns a 1-column data framedplyr::as_tibble(sites)[,3] # returns a 1-column tibble
dplyr::as_tibble(sites)[[3]] # returns a vector
sites[[3]] # returns a vectorConversion between data frames and tibbles
attributes(DAAG::possumsites)[['row.names']]possumSites <- tibble::as_tibble(DAAG::possumsites, rownames="Site")
possumSitesSubsection 10.2.7: Lists
## Summary statistics for 31 felled black cherry tree
## Median (middle value), range, number, units
htstats <- list(med=76, range=c(low=63,high=87), n=31, units="ft")
htstats[1:2] # Show first two list elements only## The following are alternative ways to extract the second list element
htstats[2] # First list element (Can replace `2` by 'range')
htstats[2][1] # A subset of a list is a list htstats[[2]]; htstats$range; htstats[["range"]]unlist(htstats[2]) # Contents of second list element, with composite names
unlist(htstats[2], use.names=F) # Elements have no nameststats <- with(MASS::shoes, t.test(B, A, paired=TRUE))
names(tstats) ## Names of list elements. See `?t.test` for details.
tstats[1] ## Type tstats[1] to see the first list element
## Compact listing of contents list elements 1 to 5, which are all numeric
unlist(tstats[1:5]) ## With `unlist(tstats)` all elements become character Section 10.3: Functions and operators
Subsection 10.3.1: Common useful built-in functions
## Data indices
length() # number of elements in a vector or a list
order() # x[order(x)] sorts x (by default, NAs are last)
which() # which indices of a logical vector are `TRUE`
which.max() # locates (first) maximum (NB, also: `which.min()`)## Data manipulation
c() # join together (`concatenate`) elements or vectors or lists
diff() # vector of first differences
sort() # sort elements into order, by default omitting NAs
rev() # reverse the order of vector elements
t() # transpose matrix or data frame
# (a data frame is first coerced to a matrixwith()
with() # do computation using columns of specified data frame## Data summary
mean() # mean of the elements of a vector
median() # median of the elements of a vector
range() # minimum and maximum value elements of vector
unique() # form the vector of distinct values
## List function arguments
args() # information on the arguments to a function## Obtain details
head() # display first few rows (by default 6) of object
ls() # list names of objects in the workspace## Print multiple objects
cat() # prints multiple objects, one after the other## Functions that return TRUE or FALSE?
all() # returns TRUE if all values are TRUE
any() # returns TRUE if any values are TRUE
is.factor() # returns TRUE if the argument is a factor
is.na() # returns TRUE if the argument is an NA
# NB also is.logical(), etc.seq(from =1, by=2, length.out=3) # Unabbeviated arguments
seq(from =1, by=2, length=3) # Abbreviate `length.out` to `length`Subsection 10.3.2: User-written functions
distance <- c(148,182,173,166,109,141,166)
mean.and.sd(distance)## Execute the function with the default argument:
mean.and.sd()## Thus, to return the mean, SD and name of the input vector
## replace c(mean=av, SD=sdev) by
list(mean=av, SD=sdev, dataset = deparse(substitute(x)))Subsection 10.3.3: Generic functions, and the class of an object
Subsection 10.3.4: Pipes — a “Do this, then this, . . .” syntax
mean(rnorm(20, sd=2))
20 |> rnorm(sd=2) |> mean()logmammals <- MASS::mammals |> log() |> setNames(c("logbody","logbrain"))
## Alternatively, use the ability to reverse the assignment operator.
MASS::mammals |> log() |> setNames(c("logbody","logbrain")) -> logmammals
## This last is more in the spirit of pipes.MASS::mammals |>
log() |>
setNames(c("logbody","logbrain")) |>
(\(d)lm(logbrain ~ logbody, data=d))() |>
coef()Subsection 10.3.5: Operators
## Multiple of divisor that leaves smallest non-negative remainder
c("Multiple of divisor" = 24 %/% 9, "Remainder after division" = 6)Section 10.4: Calculations with matrices, arrays, lists, and data frames
Calculations in parallel across all elements of a vector
x <- 1:6
log(x) # Natural logarithm of 1, 2, ... 6
log(x, base=10) # Common logarithm (base 10)
log(64, base=c(2,10)) # Apply different bases to one number
log(matrix(1:6, nrow=2), base=2) # Take logarithms of all matrix elementsPatterned data
seq(from=5, to=22, by=3) # The first value is 5.
rep(c(2,3,5), 4) # Repeat the sequence (2, 3, 5) four times over
rep(c("female", "male"), c(2,3)) # Use syntax with a character vectorSubsection 10.4.1: Missing values
nbranch <- subset(DAAG::rainforest, species=="Acacia mabellae")$branch
nbranch # Number of small branches (2cm or less)mean(nbranch, na.rm=TRUE)nbranch == NA # This always equals `NA`is.na(nbranch) # Use to check for NAsnbranch[is.na(nbranch)] <- -999
# `mean(nbranch)` will then be a nonsense valueNAs in modeling functions
options()$na.action # Version 3.2.2, following startupCounting and identifying NAs – the use of table()
with(DAAG::nswdemo, table(trt, re74>0, useNA="ifany"))Infinities and NaNs
summary(DAAG::primates)primates <- DAAG::primatesgplots::plotCI() # `plotCI() function in package `gplots`
plotrix::plotCI() # `plotCI() function in package `plotrix`sessionInfo()[['basePkgs']]## List just the workspace and the first eight packages on the search list:
search()[1:9]data(package="datasets") Section 10.5: Brief notes on R graphics packages and functions
Subsection 10.5.1: Lattice graphics — a step beyond base graphics
grog <- DAAG::grog
chr <- with(grog, match(Country, c('Australia', 'NewZealand')))
# Australia: 1; matches 1st element of c('Australia', 'NewZealand')
# NewZealand: 2; matches 2nd element
plot(Beer ~ Year, data=grog, ylim=c(0, max(Beer)*1.1), pch = chr)
with(grog, points(Wine ~ Year, pch=chr, col='red'))
legend("bottomright", legend=c("Australia", "New Zealand"), pch=1:2)
title(main="Beer consumption (l, pure alcohol)", line=1)library(latticeExtra) ## Loads both lattice and the add-on latticeExtra
gph <- xyplot(Beer+Wine ~ Year, groups=Country, data=grog)
update(gph, par.settings=simpleTheme(pch=19), auto.key=list(columns=2))## Or, condition on `Country`
xyplot(Beer+Wine+Spirit ~ Year | Country, data=grog,
par.settings=simpleTheme(pch=19), auto.key=list(columns=3))tinting <- DAAG::tinting
xyplot(csoa~it | tint*target, groups=agegp, data=tinting, auto.key=list(columns=2))cuckoos <- DAAG::cuckoos
av <- with(cuckoos, aggregate(length, list(species=species), FUN=mean))
gph <- dotplot(species ~ length, data=cuckoos, alpha=0.4) +
as.layer(dotplot(species ~ x, pch=3, cex=1.4, col="black", data=av))
update(gph, xlab="Length of egg (mm)")## Alternatives, using `layer()` or `as.layer()`
avg <- with(cuckoos, data.frame(nspec=1:nlevels(species),
av=sapply(split(length,species),mean)))
dotplot(species ~ length, data=cuckoos) +
layer(lpoints(nspec~av, pch=3, cex=1.25, col="black"), data=avg)dotplot(species ~ length, data=cuckoos) +
as.layer(dotplot(nspec~av, data=avg, pch=3, cex=1.25, col="black"))## Specify panel function
dotplot(species ~ length, data=cuckoos,
panel=function(x,y,...){panel.dotplot(x, y, pch=1, col="gray40")
avg <- data.frame(nspec=1:nlevels(y), av=sapply(split(x,y),mean))
with(avg, lpoints(nspec~av, pch=3, cex=1.25, col="black")) })Combining separately created graphics objects
cuckoos <- DAAG::cuckoos
## Panel A: Dotplot without species means added
gphA <- dotplot(species ~ length, data=cuckoos)
## Panel B: Box and whisker plot
gphB <- bwplot(species ~ length, data=cuckoos)
update(c("A: Dotplot"=gphA, "B: Boxplot"=gphB), between=list(x=0.4),
xlab="Length of egg (mm)", layout=c(2,1))
## `latticeExtra::c()` joins compatible plots together.
## `layout=c(2,1)` : join horizontally; `layout=c(1,2)` : join verticallySubsection 10.5.2: Dynamic graphics – the rgl package
vignette('plot3D', package='plot3D')Section 10.6: Plotting characters, symbols, line types and colors
ycol <- -2.1 - (0:9) * 2.1
ftype <- c("plain", "bold", "italic", "bold italic", "symbol")
yline <- 4.2
ypmax <- 20
farleft <- -7.8
plot(c(-4, 31), c(4.25, ypmax), type = "n", xlab = "", ylab = "",
axes = F)
chh <- par()$cxy[2]
text(0:25, rep(ypmax + 0.8 * chh, 26), paste(0:25), srt = 90,
cex = 0.75, xpd = T)
text(-1.5, ypmax + 0.8 * chh, "pch = ", cex = 0.75, xpd = T)
points(0:25, rep(ypmax, 26), pch = 0:25, lwd=0.8)
letterfont <- function(ypos = ypmax, font = 2) {
par(font = font)
text(-1.35, ypos, "64-76", cex = 0.75, adj = 1, xpd = TRUE)
text(19 - 1.35, ypos, "96-108", cex = 0.75, adj = 1)
points(c(0:12), rep(ypos, 13), pch = 64:76)
points(19:31, rep(ypos, 13), pch = 96:108)
text(farleft, ypos, paste(font), xpd = T)
text(farleft, ypos - 0.5, ftype[font], cex = 0.75)
}
plotfont <- function(xpos = 0:31, ypos = ypmax, font = 1,
sel32 = 2:4, showfont = TRUE) {
par(font = font)
i <- 0
for (j in sel32) {
i <- i + 1
maxval <- j * 32 - 1
if(j==4)maxval <- maxval-1
text(-1.35, ypos - i + 1, paste((j - 1) * 32, "-",
maxval, sep = ""), cex = 0.75, adj = 1, xpd = TRUE)
if(j!=4)
points(xpos, rep(ypos - i + 1, 32), pch = (j - 1) *
32 + (0:31))
else
points(xpos[-32], rep(ypos - i + 1, 31), pch = (j - 1) *
32 + (0:30))
}
lines(rep(-1.05, 2), c(ypos - length(sel32) + 1, ypos) +
c(-0.4, 0.4), xpd = T, col = "grey40")
if (showfont) {
text(farleft, ypos, paste("font =", font, " "), xpd = T)
text(farleft, ypos - 0.5, ftype[font], cex = 0.75,
xpd = T)
}
}
plotfont(ypos = ypmax - 1.5, font = 1, sel32 = 2:4)
for (j in 2:4) letterfont(ypos = ypmax - 2.1 - 1.4 * j, font = j)
plotfont(ypos = ypmax - 9.1, font = 5, sel32 = 3)
plotfont(xpos = c(-0.5, 1:31), ypos = ypmax - 10.1, font = 5,
sel32 = 4, showfont = FALSE)
par(font = 1)
ltypes <- c("blank", "solid", "dashed", "dotted", "dotdash",
"longdash", "twodash")
lcode <- c("", "", "44", "13", "1343", "73", "2262")
for (i in 0:6) {
lines(c(4, 31), c(yline + 4.5 - 0.8 * i, yline + 4.5 -
0.8 * i), lty = i, lwd = 2, xpd = T)
if (i == 0)
numchar <- paste("lty =", i, " ")
else numchar <- i
text(farleft, yline + 4.5 - 0.8 * i, numchar, xpd = TRUE)
text(farleft + 3.5, yline + 4.5 - 0.8 * i, ltypes[i +
1], cex = 0.85, xpd = TRUE)
text(farleft + 7.5, yline + 4.5 - 0.8 * i, lcode[i +
1], cex = 0.85, xpd = TRUE)
}ycol <- -2.1 - (0:9) * 2.1
ftype <- c("plain", "bold", "italic", "bold italic", "symbol")
yline <- 4.2
ypmax <- 20
farleft <- -7.8
plot(c(-4, 31), c(4.25, ypmax), type = "n", xlab = "", ylab = "",
axes = F)
chh <- par()$cxy[2]
text(0:25, rep(ypmax + 0.8 * chh, 26), paste(0:25), srt = 90,
cex = 0.75, xpd = T)
text(-1.5, ypmax + 0.8 * chh, "pch = ", cex = 0.75, xpd = T)
points(0:25, rep(ypmax, 26), pch = 0:25, lwd=0.8)
letterfont <- function(ypos = ypmax, font = 2) {
par(font = font)
text(-1.35, ypos, "64-76", cex = 0.75, adj = 1, xpd = TRUE)
text(19 - 1.35, ypos, "96-108", cex = 0.75, adj = 1)
points(c(0:12), rep(ypos, 13), pch = 64:76)
points(19:31, rep(ypos, 13), pch = 96:108)
text(farleft, ypos, paste(font), xpd = T)
text(farleft, ypos - 0.5, ftype[font], cex = 0.75)
}
plotfont <- function(xpos = 0:31, ypos = ypmax, font = 1,
sel32 = 2:4, showfont = TRUE) {
par(font = font)
i <- 0
for (j in sel32) {
i <- i + 1
maxval <- j * 32 - 1
if(j==4)maxval <- maxval-1
text(-1.35, ypos - i + 1, paste((j - 1) * 32, "-",
maxval, sep = ""), cex = 0.75, adj = 1, xpd = TRUE)
if(j!=4)
points(xpos, rep(ypos - i + 1, 32), pch = (j - 1) *
32 + (0:31))
else
points(xpos[-32], rep(ypos - i + 1, 31), pch = (j - 1) *
32 + (0:30))
}
lines(rep(-1.05, 2), c(ypos - length(sel32) + 1, ypos) +
c(-0.4, 0.4), xpd = T, col = "grey40")
if (showfont) {
text(farleft, ypos, paste("font =", font, " "), xpd = T)
text(farleft, ypos - 0.5, ftype[font], cex = 0.75,
xpd = T)
}
}
plotfont(ypos = ypmax - 1.5, font = 1, sel32 = 2:4)
for (j in 2:4) letterfont(ypos = ypmax - 2.1 - 1.4 * j, font = j)
plotfont(ypos = ypmax - 9.1, font = 5, sel32 = 3)
plotfont(xpos = c(-0.5, 1:31), ypos = ypmax - 10.1, font = 5,
sel32 = 4, showfont = FALSE)
par(font = 1)
ltypes <- c("blank", "solid", "dashed", "dotted", "dotdash",
"longdash", "twodash")
lcode <- c("", "", "44", "13", "1343", "73", "2262")
for (i in 0:6) {
lines(c(4, 31), c(yline + 4.5 - 0.8 * i, yline + 4.5 -
0.8 * i), lty = i, lwd = 2, xpd = T)
if (i == 0)
numchar <- paste("lty =", i, " ")
else numchar <- i
text(farleft, yline + 4.5 - 0.8 * i, numchar, xpd = TRUE)
text(farleft + 3.5, yline + 4.5 - 0.8 * i, ltypes[i +
1], cex = 0.85, xpd = TRUE)
text(farleft + 7.5, yline + 4.5 - 0.8 * i, lcode[i +
1], cex = 0.85, xpd = TRUE)
}Colors
library(RColorBrewer)
palette(brewer.pal(12, "Set3"))if(file.exists("/Users/johnm1/pkgs/PGRcode/inst/doc/")){
code <- knitr::knit_code$get()
txt <- paste0("\n## ", names(code),"\n", sapply(code, paste, collapse='\n'))
writeLines(txt, con="/Users/johnm1/pkgs/PGRcode/inst/doc/Appendix.R")
}