Contents

1 History

2 Vectors and data frames

1 + 2
## [1] 3
x = c(1, 2, 3)
1:3             # sequence of integers from 1 to 3
## [1] 1 2 3
x + c(4, 5, 6)  # vectorized
## [1] 5 7 9
x + 4           # recycling
## [1] 5 6 7

Vectors

Operations

Functions

x = rnorm(100)
y = x + rnorm(100)
plot(x, y)

data.frame

df <- data.frame(Independent = x, Dependent = y)
head(df)
##   Independent  Dependent
## 1   0.1709466 -1.9343537
## 2  -1.0432251 -1.4366633
## 3   0.1874114 -0.6666756
## 4  -1.0410644 -1.9896486
## 5   0.7765771  1.9752691
## 6  -1.4374832 -1.6424475
df[1:5, 1:2]
##   Independent  Dependent
## 1   0.1709466 -1.9343537
## 2  -1.0432251 -1.4366633
## 3   0.1874114 -0.6666756
## 4  -1.0410644 -1.9896486
## 5   0.7765771  1.9752691
df[1:5, ]
##   Independent  Dependent
## 1   0.1709466 -1.9343537
## 2  -1.0432251 -1.4366633
## 3   0.1874114 -0.6666756
## 4  -1.0410644 -1.9896486
## 5   0.7765771  1.9752691
plot(Dependent ~ Independent, df)  # 'formula' interface

Exercise: plot only values with Dependent > 0, Independent > 0

  1. Select rows

    ridx <- (df$Dependent > 0) & (df$Independent > 0)
  2. Plot subset

    plot(Dependent ~ Independent, df[ridx, ])

  3. Skin the cat another way

    plot(
        Dependent ~ Independent, df,
        subset = (Dependent > 0) & (Independent > 0)
    )

3 Analysis: functions, classes, methods

fit <- lm(Dependent ~ Independent, df)  # linear model -- regression
anova(fit)                              # summary table
## Analysis of Variance Table
## 
## Response: Dependent
##             Df  Sum Sq Mean Sq F value    Pr(>F)    
## Independent  1 118.609 118.609  118.03 < 2.2e-16 ***
## Residuals   98  98.483   1.005                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(Dependent ~ Independent, df)
abline(fit)

class(fit)
## [1] "lm"
methods(class="lm")
##  [1] add1           alias          anova          case.names    
##  [5] coerce         confint        cooks.distance deviance      
##  [9] dfbeta         dfbetas        drop1          dummy.coef    
## [13] effects        extractAIC     family         formula       
## [17] hatvalues      influence      initialize     kappa         
## [21] labels         logLik         model.frame    model.matrix  
## [25] nobs           plot           predict        print         
## [29] proj           qr             residuals      rstandard     
## [33] rstudent       show           simulate       slotsFromS3   
## [37] summary        variable.names vcov          
## see '?methods' for accessing help and source code

4 Help!

?"plot"          # plain-old-function or generic
?"plot.formula"  # method

5 Packages

library(ggplot2)
ggplot(df, aes(x = Independent, y = Dependent)) +
    geom_point() + geom_smooth(method = "lm")