
## ----cor-scale-setup-----------------------------------------------------
set.seed(123)
timer <- function(dim, FUN, nrep=3) {
    print(dim)
    m <- matrix(runif(dim[1] * dim[2]), dim[1])
    mean(replicate(nrep, system.time(FUN(m))["elapsed"]))
}
parm <- expand.grid(m=10^(4:5),
    n=as.integer(seq(20, 300, length.out=3)))


## ----cor-scale, eval=FALSE-----------------------------------------------
## parm$cor <- apply(parm[,1:2], 1, timer, cor)
## xtabs(cor ~ m + n, parm)


## ----correlation-impl----------------------------------------------------
fastcor <- function(m) {
    m <- t(m)
    m <- m - rowMeans(m)           # center
    m <- m / sqrt(rowSums(m^2))    # scale
    tcrossprod(m)                  # cross-product
}


## ----correlation-setup---------------------------------------------------
## 'small' data set initially
m <- 100000; n <- 50
mat <- matrix(runif(m * n), m)


## ----fastcor-timing-identity---------------------------------------------
system.time(c0 <- cor(mat))
system.time(c1 <- fastcor(mat))
all.equal(c0, c1)                  # why not identical()?


## ----fastcor-scale, eval=FALSE-------------------------------------------
## parm$fastcor <- apply(parm[,1:2], 1, timer, fastcor)
## parm$crossprod <- apply(parm[,1:2], 1, timer, crossprod)


## ----cor-scale-plot,eval=FALSE-------------------------------------------
## library(lattice)
## xyplot(sqrt(cor) + sqrt(fastcor) + sqrt(crossprod) ~ n,
##     group=m, parm, type="b", pch=20, cex=2, layout=c(3, 1),
##     xlab="Columns", ylab="sqrt(Time)", main="Native",
##     key=simpleKey(text=sprintf("%d", unique(parm$m)), lines=TRUE,
##       points=FALSE, x=.02, y=.95, title="Rows", cex.title=1))


