10 Appendix A: The R System – A Brief Overview
On options for working with the code see the vignettes Ch1-Learning and UsingCode.
Packages required (plus any dependencies)
DAAG dplyr tidyr tibble MASS gplots plotrix latticeExtra RColorBrewer
Additionally, knitr and Hmisc are required in order to process the qmd source file.
Section 10.1: Getting started with R
Subsection 10.1.1: Learn by typing code at the command line
> ## Arithmetic calculations. See the help page `?Arithmetic` {-}
> 2*3+10 # The symbol `*` denotes 'multiply'
> ## Use the `c()` function to join results into a numeric vector
> c(sqrt(10), 2*3+10, sqrt(10), 2^3) # 2^3 is 2 to the power of 3
> ## R knows about pi
> 2*pi*6378 # Approximate circumference of earth at equator (km)
# Get information on the use of `help()`
?help # Or, type help(sqrt)
?sqrt # See, in similar vein ?Syntax
?Arithmetic '<' # `?Comparison` finds the same help page ?
## Two commands on one line; Use ';' as separator
2*3*4+10; sqrt(10) ## Try also `cat(2*3*4+10, sqrt(10), sep='n')
## Convert CO2 carbon emissions from tonnes of carbon to tonnes of CO2
3.664*c(.53, 2.56, 9.62) ## Data are for 1900, 1960 & 2020
## Use `cat()` to print several items, with control of formatting
cat(2*3*4+10, sqrt(10), '\n')
Assignment
## Convert from amounts of carbon to amounts of CO2 (billions of tonnes)
## and assign result to a named object
<- c(.53, 2.56, 9.62)*3.664 # Amounts in 1900, 1960, and 2020
fossilCO2vals # Equivalently `fossilCO2vals <- c(.53, 2.56, 9.62)*rep(3.664,3)`
## To assign and print, enclose in round brackets
<- c(.53, 2.56, 9.62)*3.664) (fossilCO2vals
3.664*c(.53,2.56, 9.62) -> fossilCO2vals
Entry of data at the command line, a graphics formula, and a graph
<- c(1900, 1920, 1940, 1960, 1980, 2000, 2020)
Year <- c(.53,.96,1.32,2.56,5.32,6.95,9.62)*3.664
CO2 ## Now plot Carbon Dioxide emissions as a function of Year
plot(CO2 ~ Year, pch=16, fg="gray")
Grouping vectors togeher into data frames
<- data.frame(year=Year, co2gas=CO2)
CO2byYear # Display the contents of the data frame.
CO2byYear rm(Year, CO2) # Optionally, remove `Year` and `Carbon` from the workspace
plot(co2gas ~ year, data=CO2byYear, pch=16)
sqrt(10) # Number of digits is determined by current seting
options(digits=2) # Change until further notice,
sqrt(10)
Section 10.2: R data structures
Subsection 10.2.1: Vectors, dates, and arrays
<- c("Compact", "Large", "Midsize", "Small", "Sporty", "Van")
vehicles c(T, F, F, F, T, T, F) # A logical vector, assuming F=FALSE and T=TRUE
## Character vector
<- c("Rat", "Pig", "Rat", "Mouse", "Pig")
mammals ## Logical vector
<- c("TRUE", "FALSE", "TRUE", "FALSE", "TRUE", "FALSE")
rodent ## From character vector `mammals`, create factor
<- factor(mammals)
mfac levels(mfac)
table(mfac)
Dates
<- as.Date(c("2022-01-01", "2022-02-01", "2022-03-01"))
day1 as.numeric(day1) # Days since 1 January 1970
3] - day1[2] day1[
The use of square brackets to extract subsets of vectors
## Specify the indices of the elements that are to be extracted
<- c(3, 11, 8, 15, 12,18)
x c(1,4:6)] # Elements in positions 1, 4, 5, and 6
x[## Use negative indices to identify elements for omission
-c(2,3)] # Positive and negative indices cannot be mixed
x[## Specify a vector of logical values.
> 10 # This generates a vector of logical values
x > 10] x[x
<- c(Cow=465, Goat=28, Donkey=187, Pig=192)
bodywt c("Goat", "Pig")] bodywt[
Matrices and arrays
<- array(1:24, dim=c(2,4,3))
arr123 ## This prints as three 2 by 4 matrices. Print just the first of the three.
2, 1] # Column 2 and index 1 of 3rd dimension
arr123[, attributes(arr123)
Subsection 10.2.2: Factors
<- c(rep("male",691), rep("female",692))
gender <- factor(gender) # From character vector, create factor
gender levels(gender) # Notice that `female` comes first
<- factor(gender, levels=c("male", "female")) Gender
<- factor(rep(c('male','female'),c(2,3)), labels=c("f", "m"))
mf1 ## The following has the same result
<- factor(rep(c('male','female'), c(2,3)))
mf2 levels(mf2) <- c("f","m") # Assign new levels
if(all(mf1==mf2))print(mf1)
sum(gender=="male")
table(chickwts$feed) # feed is a factor
<- chickwts$feed
source levels(source) <- c("milk","plant","plant","meat","plant","plant")
table(source)
Ordered factors
<- rep(c("low","medium","high"), 2)
stress <- ordered(stress, levels=c("low", "medium", "high"))
ord.stress
ord.stress>= "medium" ord.stress
Subsection 10.2.3: Operations with data frames
<- DAAG::Cars93.summary # Create copy in workspace
Cars93sum Cars93sum
4:6, 2:3] # Extract rows 4 to 6 and columns 2 and 3
Cars93sum[6:4, ] # Extract rows in the order 6, 5, 4
Cars93sum[2:3] # Extract columns 2 and 3
Cars93sum[, ## Or, use negative integers to specify rows and/or columns to be omitted
-(1:3), -c(1,4)] # In each case, numbers must be all +ve or all -ve
Cars93sum[## Specify row and/or column names
c("Small","Sporty","Van"), c("Max.passengers","No.of.cars")] Cars93sum[
Data frames vs matrices
names(Cars93sum)[3] <- "numCars"
names(Cars9sum) <- c("minPass","maxPass","numCars","code")
Using a data frame as a database – with() and within()
## trees (datasets) has data on Black Cherry Trees
with(trees, round(c(mean(Girth), median(Girth), sd(Girth)),1))
with(DAAG::pair65, # stretch of rubber bands
= heated-ambient
{lenchange c(mean(lenchange), median(lenchange))
})
## Add variables `mph` and `gradient` to `DAAG::nihills`
<- within(DAAG::nihills, {mph <- dist/time; gradient <- climb/dist}) nihr
Extracting rows from data frames
unlist(Cars93sum[1, ])
Subsection 10.2.4: Data manipulation functions used in earlier chapters
## For columns of `DAAG::jobs`, show the range of values
sapply(DAAG::jobs, range)
## Split egg lengths by species, calculate mean, sd, and number for each
with(DAAG::cuckoos, sapply(split(length,species),
function(x)c(av=mean(x), sd=sd(x), nobs=length(x))))
apply(UCBAdmissions, 3, function(x)(x[1,2]/(x[1,2]+x[2,2]))*100) # Females
apply(UCBAdmissions, 3, function(x)(x[1,1]/(x[1,1]+x[2,1]))*100) # Males
1] UCBAdmissions[, ,
::cricketer |> dplyr::count(year, left, name="Freq") -> handYear
DAAGnames(handYear)[2] <- "hand"
<- tidyr::pivot_wider(handYear, names_from='hand', values_from="Freq") byYear
Subsection 10.2.5: Writing data to a file, and reading data from a file
<- data.frame(year = seq(from=1900, to=2020, by=20),
CO2byYear co2gas = c(1.94, 3.52, 4.84, 9.38, 19.49, 25.46, 35.25))
write.table(CO2byYear, file='gas.txt') # Write data frame to file
<- read.table(file="gas.txt") # Read data back in
CO2byYear write.csv(CO2byYear, file='gas.csv') # Write data frame
<- read.csv(file="gas.csv", row.names=1) # Read data back in CO2byYear
Subsection 10.2.6: Issues for working with data frames and tibbles
Extraction of columns from data frames and tibbles
<- DAAG::possumsites # sites is then a data frame
sites 3] # returns a vector
sites[,3, drop=FALSE] # returns a 1-column data frame sites[,
::as_tibble(sites)[,3] # returns a 1-column tibble
dplyr::as_tibble(sites)[[3]] # returns a vector
dplyr3]] # returns a vector sites[[
Conversion between data frames and tibbles
attributes(DAAG::possumsites)[['row.names']]
<- tibble::as_tibble(DAAG::possumsites, rownames="Site")
possumSites possumSites
Subsection 10.2.7: Lists
## Summary statistics for 31 felled black cherry tree
## Median (middle value), range, number, units
<- list(med=76, range=c(low=63,high=87), n=31, units="ft")
htstats 1:2] # Show first two list elements only htstats[
## The following are alternative ways to extract the second list element
2] # First list element (Can replace `2` by 'range')
htstats[2][1] # A subset of a list is a list htstats[
2]]; htstats$range; htstats[["range"]] htstats[[
unlist(htstats[2]) # Contents of second list element, with composite names
unlist(htstats[2], use.names=F) # Elements have no names
<- with(MASS::shoes, t.test(B, A, paired=TRUE))
tstats names(tstats) ## Names of list elements. See `?t.test` for details.
1] ## Type tstats[1] to see the first list element
tstats[## Compact listing of contents list elements 1 to 5, which are all numeric
unlist(tstats[1:5]) ## With `unlist(tstats)` all elements become character
Section 10.3: Functions and operators
Subsection 10.3.1: Common useful built-in functions
## Data indices
length() # number of elements in a vector or a list
order() # x[order(x)] sorts x (by default, NAs are last)
which() # which indices of a logical vector are `TRUE`
which.max() # locates (first) maximum (NB, also: `which.min()`)
## Data manipulation
c() # join together (`concatenate`) elements or vectors or lists
diff() # vector of first differences
sort() # sort elements into order, by default omitting NAs
rev() # reverse the order of vector elements
t() # transpose matrix or data frame
# (a data frame is first coerced to a matrixwith()
with() # do computation using columns of specified data frame
## Data summary
mean() # mean of the elements of a vector
median() # median of the elements of a vector
range() # minimum and maximum value elements of vector
unique() # form the vector of distinct values
## List function arguments
args() # information on the arguments to a function
## Obtain details
head() # display first few rows (by default 6) of object
ls() # list names of objects in the workspace
## Print multiple objects
cat() # prints multiple objects, one after the other
## Functions that return TRUE or FALSE?
all() # returns TRUE if all values are TRUE
any() # returns TRUE if any values are TRUE
is.factor() # returns TRUE if the argument is a factor
is.na() # returns TRUE if the argument is an NA
# NB also is.logical(), etc.
seq(from =1, by=2, length.out=3) # Unabbeviated arguments
seq(from =1, by=2, length=3) # Abbreviate `length.out` to `length`
Subsection 10.3.2: User-written functions
<- c(148,182,173,166,109,141,166)
distance mean.and.sd(distance)
## Execute the function with the default argument:
mean.and.sd()
## Thus, to return the mean, SD and name of the input vector
## replace c(mean=av, SD=sdev) by
list(mean=av, SD=sdev, dataset = deparse(substitute(x)))
Subsection 10.3.3: Generic functions, and the class of an object
Subsection 10.3.4: Pipes — a “Do this, then this, . . .” syntax
mean(rnorm(20, sd=2))
20 |> rnorm(sd=2) |> mean()
<- MASS::mammals |> log() |> setNames(c("logbody","logbrain"))
logmammals ## Alternatively, use the ability to reverse the assignment operator.
::mammals |> log() |> setNames(c("logbody","logbrain")) -> logmammals
MASS## This last is more in the spirit of pipes.
::mammals |>
MASSlog() |>
setNames(c("logbody","logbrain")) |>
lm(logbrain ~ logbody, data=d))() |>
(\(d)coef()
Subsection 10.3.5: Operators
## Multiple of divisor that leaves smallest non-negative remainder
c("Multiple of divisor" = 24 %/% 9, "Remainder after division" = 6)
Section 10.4: Calculations with matrices, arrays, lists, and data frames
Calculations in parallel across all elements of a vector
<- 1:6
x log(x) # Natural logarithm of 1, 2, ... 6
log(x, base=10) # Common logarithm (base 10)
log(64, base=c(2,10)) # Apply different bases to one number
log(matrix(1:6, nrow=2), base=2) # Take logarithms of all matrix elements
Patterned data
seq(from=5, to=22, by=3) # The first value is 5.
rep(c(2,3,5), 4) # Repeat the sequence (2, 3, 5) four times over
rep(c("female", "male"), c(2,3)) # Use syntax with a character vector
Subsection 10.4.1: Missing values
<- subset(DAAG::rainforest, species=="Acacia mabellae")$branch
nbranch # Number of small branches (2cm or less) nbranch
mean(nbranch, na.rm=TRUE)
== NA # This always equals `NA` nbranch
is.na(nbranch) # Use to check for NAs
is.na(nbranch)] <- -999
nbranch[# `mean(nbranch)` will then be a nonsense value
NAs in modeling functions
options()$na.action # Version 3.2.2, following startup
Counting and identifying NAs – the use of table()
with(DAAG::nswdemo, table(trt, re74>0, useNA="ifany"))
Infinities and NaNs
summary(DAAG::primates)
<- DAAG::primates primates
::plotCI() # `plotCI() function in package `gplots`
gplots::plotCI() # `plotCI() function in package `plotrix` plotrix
sessionInfo()[['basePkgs']]
## List just the workspace and the first eight packages on the search list:
search()[1:9]
data(package="datasets")
Section 10.5: Brief notes on R graphics packages and functions
Subsection 10.5.1: Lattice graphics — a step beyond base graphics
<- DAAG::grog
grog <- with(grog, match(Country, c('Australia', 'NewZealand')))
chr # Australia: 1; matches 1st element of c('Australia', 'NewZealand')
# NewZealand: 2; matches 2nd element
plot(Beer ~ Year, data=grog, ylim=c(0, max(Beer)*1.1), pch = chr)
with(grog, points(Wine ~ Year, pch=chr, col='red'))
legend("bottomright", legend=c("Australia", "New Zealand"), pch=1:2)
title(main="Beer consumption (l, pure alcohol)", line=1)
library(latticeExtra) ## Loads both lattice and the add-on latticeExtra
<- xyplot(Beer+Wine ~ Year, groups=Country, data=grog)
gph update(gph, par.settings=simpleTheme(pch=19), auto.key=list(columns=2))
## Or, condition on `Country`
xyplot(Beer+Wine+Spirit ~ Year | Country, data=grog,
par.settings=simpleTheme(pch=19), auto.key=list(columns=3))
<- DAAG::tinting
tinting xyplot(csoa~it | tint*target, groups=agegp, data=tinting, auto.key=list(columns=2))
<- DAAG::cuckoos
cuckoos <- with(cuckoos, aggregate(length, list(species=species), FUN=mean))
av <- dotplot(species ~ length, data=cuckoos, alpha=0.4) +
gph as.layer(dotplot(species ~ x, pch=3, cex=1.4, col="black", data=av))
update(gph, xlab="Length of egg (mm)")
## Alternatives, using `layer()` or `as.layer()`
<- with(cuckoos, data.frame(nspec=1:nlevels(species),
avg av=sapply(split(length,species),mean)))
dotplot(species ~ length, data=cuckoos) +
layer(lpoints(nspec~av, pch=3, cex=1.25, col="black"), data=avg)
dotplot(species ~ length, data=cuckoos) +
as.layer(dotplot(nspec~av, data=avg, pch=3, cex=1.25, col="black"))
## Specify panel function
dotplot(species ~ length, data=cuckoos,
panel=function(x,y,...){panel.dotplot(x, y, pch=1, col="gray40")
<- data.frame(nspec=1:nlevels(y), av=sapply(split(x,y),mean))
avg with(avg, lpoints(nspec~av, pch=3, cex=1.25, col="black")) })
Combining separately created graphics objects
<- DAAG::cuckoos
cuckoos ## Panel A: Dotplot without species means added
<- dotplot(species ~ length, data=cuckoos)
gphA ## Panel B: Box and whisker plot
<- bwplot(species ~ length, data=cuckoos)
gphB update(c("A: Dotplot"=gphA, "B: Boxplot"=gphB), between=list(x=0.4),
xlab="Length of egg (mm)", layout=c(2,1))
## `latticeExtra::c()` joins compatible plots together.
## `layout=c(2,1)` : join horizontally; `layout=c(1,2)` : join vertically
Subsection 10.5.2: Dynamic graphics – the rgl package
vignette('plot3D', package='plot3D')
Section 10.6: Plotting characters, symbols, line types and colors
<- -2.1 - (0:9) * 2.1
ycol <- c("plain", "bold", "italic", "bold italic", "symbol")
ftype <- 4.2
yline <- 20
ypmax <- -7.8
farleft plot(c(-4, 31), c(4.25, ypmax), type = "n", xlab = "", ylab = "",
axes = F)
<- par()$cxy[2]
chh text(0:25, rep(ypmax + 0.8 * chh, 26), paste(0:25), srt = 90,
cex = 0.75, xpd = T)
text(-1.5, ypmax + 0.8 * chh, "pch = ", cex = 0.75, xpd = T)
points(0:25, rep(ypmax, 26), pch = 0:25, lwd=0.8)
<- function(ypos = ypmax, font = 2) {
letterfont par(font = font)
text(-1.35, ypos, "64-76", cex = 0.75, adj = 1, xpd = TRUE)
text(19 - 1.35, ypos, "96-108", cex = 0.75, adj = 1)
points(c(0:12), rep(ypos, 13), pch = 64:76)
points(19:31, rep(ypos, 13), pch = 96:108)
text(farleft, ypos, paste(font), xpd = T)
text(farleft, ypos - 0.5, ftype[font], cex = 0.75)
}<- function(xpos = 0:31, ypos = ypmax, font = 1,
plotfont sel32 = 2:4, showfont = TRUE) {
par(font = font)
<- 0
i for (j in sel32) {
<- i + 1
i <- j * 32 - 1
maxval if(j==4)maxval <- maxval-1
text(-1.35, ypos - i + 1, paste((j - 1) * 32, "-",
sep = ""), cex = 0.75, adj = 1, xpd = TRUE)
maxval, if(j!=4)
points(xpos, rep(ypos - i + 1, 32), pch = (j - 1) *
32 + (0:31))
else
points(xpos[-32], rep(ypos - i + 1, 31), pch = (j - 1) *
32 + (0:30))
}lines(rep(-1.05, 2), c(ypos - length(sel32) + 1, ypos) +
c(-0.4, 0.4), xpd = T, col = "grey40")
if (showfont) {
text(farleft, ypos, paste("font =", font, " "), xpd = T)
text(farleft, ypos - 0.5, ftype[font], cex = 0.75,
xpd = T)
}
}plotfont(ypos = ypmax - 1.5, font = 1, sel32 = 2:4)
for (j in 2:4) letterfont(ypos = ypmax - 2.1 - 1.4 * j, font = j)
plotfont(ypos = ypmax - 9.1, font = 5, sel32 = 3)
plotfont(xpos = c(-0.5, 1:31), ypos = ypmax - 10.1, font = 5,
sel32 = 4, showfont = FALSE)
par(font = 1)
<- c("blank", "solid", "dashed", "dotted", "dotdash",
ltypes "longdash", "twodash")
<- c("", "", "44", "13", "1343", "73", "2262")
lcode for (i in 0:6) {
lines(c(4, 31), c(yline + 4.5 - 0.8 * i, yline + 4.5 -
0.8 * i), lty = i, lwd = 2, xpd = T)
if (i == 0)
<- paste("lty =", i, " ")
numchar else numchar <- i
text(farleft, yline + 4.5 - 0.8 * i, numchar, xpd = TRUE)
text(farleft + 3.5, yline + 4.5 - 0.8 * i, ltypes[i +
1], cex = 0.85, xpd = TRUE)
text(farleft + 7.5, yline + 4.5 - 0.8 * i, lcode[i +
1], cex = 0.85, xpd = TRUE)
}
<- -2.1 - (0:9) * 2.1
ycol <- c("plain", "bold", "italic", "bold italic", "symbol")
ftype <- 4.2
yline <- 20
ypmax <- -7.8
farleft plot(c(-4, 31), c(4.25, ypmax), type = "n", xlab = "", ylab = "",
axes = F)
<- par()$cxy[2]
chh text(0:25, rep(ypmax + 0.8 * chh, 26), paste(0:25), srt = 90,
cex = 0.75, xpd = T)
text(-1.5, ypmax + 0.8 * chh, "pch = ", cex = 0.75, xpd = T)
points(0:25, rep(ypmax, 26), pch = 0:25, lwd=0.8)
<- function(ypos = ypmax, font = 2) {
letterfont par(font = font)
text(-1.35, ypos, "64-76", cex = 0.75, adj = 1, xpd = TRUE)
text(19 - 1.35, ypos, "96-108", cex = 0.75, adj = 1)
points(c(0:12), rep(ypos, 13), pch = 64:76)
points(19:31, rep(ypos, 13), pch = 96:108)
text(farleft, ypos, paste(font), xpd = T)
text(farleft, ypos - 0.5, ftype[font], cex = 0.75)
}<- function(xpos = 0:31, ypos = ypmax, font = 1,
plotfont sel32 = 2:4, showfont = TRUE) {
par(font = font)
<- 0
i for (j in sel32) {
<- i + 1
i <- j * 32 - 1
maxval if(j==4)maxval <- maxval-1
text(-1.35, ypos - i + 1, paste((j - 1) * 32, "-",
sep = ""), cex = 0.75, adj = 1, xpd = TRUE)
maxval, if(j!=4)
points(xpos, rep(ypos - i + 1, 32), pch = (j - 1) *
32 + (0:31))
else
points(xpos[-32], rep(ypos - i + 1, 31), pch = (j - 1) *
32 + (0:30))
}lines(rep(-1.05, 2), c(ypos - length(sel32) + 1, ypos) +
c(-0.4, 0.4), xpd = T, col = "grey40")
if (showfont) {
text(farleft, ypos, paste("font =", font, " "), xpd = T)
text(farleft, ypos - 0.5, ftype[font], cex = 0.75,
xpd = T)
}
}plotfont(ypos = ypmax - 1.5, font = 1, sel32 = 2:4)
for (j in 2:4) letterfont(ypos = ypmax - 2.1 - 1.4 * j, font = j)
plotfont(ypos = ypmax - 9.1, font = 5, sel32 = 3)
plotfont(xpos = c(-0.5, 1:31), ypos = ypmax - 10.1, font = 5,
sel32 = 4, showfont = FALSE)
par(font = 1)
<- c("blank", "solid", "dashed", "dotted", "dotdash",
ltypes "longdash", "twodash")
<- c("", "", "44", "13", "1343", "73", "2262")
lcode for (i in 0:6) {
lines(c(4, 31), c(yline + 4.5 - 0.8 * i, yline + 4.5 -
0.8 * i), lty = i, lwd = 2, xpd = T)
if (i == 0)
<- paste("lty =", i, " ")
numchar else numchar <- i
text(farleft, yline + 4.5 - 0.8 * i, numchar, xpd = TRUE)
text(farleft + 3.5, yline + 4.5 - 0.8 * i, ltypes[i +
1], cex = 0.85, xpd = TRUE)
text(farleft + 7.5, yline + 4.5 - 0.8 * i, lcode[i +
1], cex = 0.85, xpd = TRUE)
}
Colors
library(RColorBrewer)
palette(brewer.pal(12, "Set3"))
if(file.exists("/Users/johnm1/pkgs/PGRcode/inst/doc/")){
<- knitr::knit_code$get()
code <- paste0("\n## ", names(code),"\n", sapply(code, paste, collapse='\n'))
txt writeLines(txt, con="/Users/johnm1/pkgs/PGRcode/inst/doc/Appendix.R")
}