# 1 Building blocks

• Data: vectors, objects
• Algorithms: functions, generics, methods
x <- rnorm(1000)
y <- x + rnorm(1000)
df <- data.frame(Independent = x, Dependent = y)
fit <- lm(Dependent ~ Independent, df)
anova(fit)
## Analysis of Variance Table
##
## Response: Dependent
##              Df Sum Sq Mean Sq F value    Pr(>F)
## Independent   1 1015.5 1015.47    1001 < 2.2e-16 ***
## Residuals   998 1012.4    1.01
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Atomic vectors

• logical(), integer(), numeric(), complex(), raw(), character()

Attributes

x <- structure(list(), class="Foo")
attributes(x)
## $class ## [1] "Foo" .Internal(inspect(x)) ## @4967c40 19 VECSXP g0c0 [OBJ,NAM(2),ATT] (len=0, tl=0) ## ATTRIB: ## @4967c78 02 LISTSXP g0c0 [] ## TAG: @294ea48 01 SYMSXP g1c0 [MARK,NAM(2),LCK,gp=0x4000] "class" (has value) ## @4df9708 16 STRSXP g0c1 [NAM(2)] (len=1, tl=0) ## @451a628 09 CHARSXP g0c1 [gp=0x60] [ASCII] [cached] "Foo" • Attributes in C: pair list An implicit class – matrix() list() and environment() • recursive • environment(): reference semantics Plain-old-functions • First class objects # 2 The S3 object system Class • Definition – list (usually) or atomic vector with class attribute • Inheritance: is-a and has-a • linear hierarchy People <- function(name = character(), age = numeric()) { stopifnot( is.character(name), is.numeric(age), all(age > 0), length(name) == length(age) ) structure(list(name=name, age=age), class = "People") } Employees <- function(people = People(), job = character()) { stopifnot( identical(class(people), "People"), is.character(job), length(people) == length(job) ) employees <- people employees$job <- job
class(employees) <- c("Employees", class(employees))
employees
}

Company <- function(company_name = character(), employees = Employees()) {
stopifnot(
is.character(company_name), length(company_name) == 1L,
!is.na(company_name),
inherits(employees, "Employees")
)
structure(
list(
company_name = company_name,
employees = employees
),
class = "Company"
)
}

Generic

description <- function(object, ...)
UseMethod("description")

Method

description.People <- function(object, ...)
sprintf("%s; age %s", object$name, object$age)

description.Employees <-
function(object, ...)
{
person_description <- NextMethod()
sprintf("%s; job %s", person_description, object$job) } • ‘Next’ methods • ‘Group’ generic methods Strengths & weakneses • (+) Easy to implement structure(list(), class="Foo") • (+, usually) Simple inheritance (linear) and dispatch (single) • (-) No guarantee of structure, or separation of ‘implementation’ from ‘interface’ # 3 Exercises Exercise: name() and age() accessors The Person class has fields name and age. These are accessible by reaching in to the implementation details and extracting the list element, e.g., Person()$name. But this type of direct manipulation is considered bad practice; it is better to have ‘accessors’ that define the ‘application programming interface (API)’ for accessing information from the object. Write plain-old-functions (e.g., like name <- function(x) ...) to extract name and age.

name <- function(x)
x$name age <- function(x) x$age

Exercise: length() S3 method

?length indicates that this function is a (S3) generic. Define an S3 length method for the People class; note that we use this when validating the input to Employees().

length.People <- function(x)
length(name(x))

Exercise: S3 generics and methods

Revise name() and age() to be S3 generics and methods. Reflect on the beneifts and costs of plain-old-functions versus generic + methods.

name <- function(x)
UseMethod("name")

name.Person <- function(x)
x$name age <- function(x) UseMethod("age") age.Person <- function(x) x$age

Exercise: (Return here after S4 presentation)

Re-implement the People, Employees, and Company S3 example in S4. Compare and contrast. S4 is often viewed as verbose; is the S4 implementation longer or more complicated than the S3 implementation?

People <- setClass(
"People",
slots = c(name = "character", age = "numeric")
)

setValidity("People", function(object) {
if (!identical(length(name(object)), length(age(object)))) {
"lengths of name() and age() differ"
} else TRUE
})
## Class "People" [in ".GlobalEnv"]
##
## Slots:
##
## Name:       name       age
## Class: character   numeric
Employees <- setClass(
"Employees",
contains = "People",
slots = c(job = "character")
)

setValidity("Employees", function(object) {
if (!identical(length(employees(object)), length(jobs(object)))) {
"length of employee() and job() differ"
} else TRUE
})
## Class "Employees" [in ".GlobalEnv"]
##
## Slots:
##
## Name:        job      name       age
## Class: character character   numeric
##
## Extends: "People"
Company <- setClass(
"Company",
slots = c(company_name = "character", employee = "Employees")
)

setValidity("Company", function(object) {
if (length(company_name) != 1 || !is.na(company_name)) {
"company_name must be length 1 and not NA"
} else TRUE
})
## Class "Company" [in ".GlobalEnv"]
##
## Slots:
##
## Name:  company_name     employee
## Class:    character    Employees

# 4 Acknowledgements

Research reported in this course was supported by the National Human Genome Research Institute and the National Cancer Institute of the National Institutes of Health under award numbers U41HG004059 and U24CA180996.

This project has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement number 633974)