Author: Martin Morgan Date: 22 July, 2019
1 + 2
## [1] 3
x = c(1, 2, 3)
1:3 # sequence of integers from 1 to 3
## [1] 1 2 3
x + c(4, 5, 6) # vectorized
## [1] 5 7 9
x + 4 # recycling
## [1] 5 6 7
Vectors
numeric()
, character()
, logical()
, integer()
, complex()
, …NA
: ‘not available’factor()
: values from restricted set of ‘levels’.Operations
==
, <
, <=
, >
, >=
, …|
(or), &
(and), !
(not)[
, e.g., x[c(2, 3)]
[<-
, e.g., x[c(1, 3)] = x[c(1, 3)]
is.na()
Functions
x = rnorm(100)
y = x + rnorm(100)
plot(x, y)
data.frame
df <- data.frame(Independent = x, Dependent = y)
head(df)
## Independent Dependent
## 1 -0.4338047 -0.5779168
## 2 -0.2769985 -1.0665115
## 3 -1.6966211 -1.8769578
## 4 -0.6481076 -0.9540841
## 5 -2.1015776 -1.1166887
## 6 0.7109163 -0.3363154
df[1:5, 1:2]
## Independent Dependent
## 1 -0.4338047 -0.5779168
## 2 -0.2769985 -1.0665115
## 3 -1.6966211 -1.8769578
## 4 -0.6481076 -0.9540841
## 5 -2.1015776 -1.1166887
df[1:5, ]
## Independent Dependent
## 1 -0.4338047 -0.5779168
## 2 -0.2769985 -1.0665115
## 3 -1.6966211 -1.8769578
## 4 -0.6481076 -0.9540841
## 5 -2.1015776 -1.1166887
plot(Dependent ~ Independent, df) # 'formula' interface
df[, 1]
, df[, "Indep"]
, df[[1]]
,
df[["Indep"]]
, df$Indep
Exercise: plot only values with Dependent > 0
, Independent > 0
Select rows
ridx <- (df$Dependent > 0) & (df$Independent > 0)
Plot subset
plot(Dependent ~ Independent, df[ridx, ])
Skin the cat another way
plot(
Dependent ~ Independent, df,
subset = (Dependent > 0) & (Independent > 0)
)
fit <- lm(Dependent ~ Independent, df) # linear model -- regression
anova(fit) # summary table
## Analysis of Variance Table
##
## Response: Dependent
## Df Sum Sq Mean Sq F value Pr(>F)
## Independent 1 92.664 92.664 70.32 3.787e-13 ***
## Residuals 98 129.139 1.318
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(Dependent ~ Independent, df)
abline(fit)
lm()
: plain-old functionfit
: an object of class “lm”anova()
: a generic with a specific method for class “lm”class(fit)
## [1] "lm"
methods(class="lm")
## [1] add1 alias anova case.names
## [5] coerce confint cooks.distance deviance
## [9] dfbeta dfbetas drop1 dummy.coef
## [13] effects extractAIC family formula
## [17] hatvalues influence initialize kappa
## [21] labels logLik model.frame model.matrix
## [25] nobs plot predict print
## [29] proj qr residuals rstandard
## [33] rstudent show simulate slotsFromS3
## [37] summary variable.names vcov
## see '?methods' for accessing help and source code
?"plot" # plain-old-function or generic
?"plot.formula" # method
?"plot.lm" # method for object of class 'lm', plot(fit)
library(ggplot2)
ggplot(df, aes(x = Independent, y = Dependent)) +
geom_point() + geom_smooth(method = "lm")
library(ggplot2)
, once per session)Started 2002 as a platform for understanding analysis of microarray data
1,750 packages. Domains of expertise:
Important themes
Resources
A distinctive feature of Bioconductor – use of objects for representing data
library(Biostrings)
dna <- DNAStringSet(c("AACTCC", "CTGCA"))
dna
## A DNAStringSet instance of length 2
## width seq
## [1] 6 AACTCC
## [2] 5 CTGCA
reverseComplement(dna)
## A DNAStringSet instance of length 2
## width seq
## [1] 6 GGAGTT
## [2] 5 TGCAG