## ----cor-scale-setup----------------------------------------------------- set.seed(123) timer <- function(dim, FUN, nrep=3) { print(dim) m <- matrix(runif(dim[1] * dim[2]), dim[1]) mean(replicate(nrep, system.time(FUN(m))["elapsed"])) } parm <- expand.grid(m=10^(4:5), n=as.integer(seq(20, 300, length.out=3))) ## ----cor-scale, eval=FALSE----------------------------------------------- ## parm\$cor <- apply(parm[,1:2], 1, timer, cor) ## xtabs(cor ~ m + n, parm) ## ----correlation-impl---------------------------------------------------- fastcor <- function(m) { m <- t(m) m <- m - rowMeans(m) # center m <- m / sqrt(rowSums(m^2)) # scale tcrossprod(m) # cross-product } ## ----correlation-setup--------------------------------------------------- ## 'small' data set initially m <- 100000; n <- 50 mat <- matrix(runif(m * n), m) ## ----fastcor-timing-identity--------------------------------------------- system.time(c0 <- cor(mat)) system.time(c1 <- fastcor(mat)) all.equal(c0, c1) # why not identical()? ## ----fastcor-scale, eval=FALSE------------------------------------------- ## parm\$fastcor <- apply(parm[,1:2], 1, timer, fastcor) ## parm\$crossprod <- apply(parm[,1:2], 1, timer, crossprod) ## ----cor-scale-plot,eval=FALSE------------------------------------------- ## library(lattice) ## xyplot(sqrt(cor) + sqrt(fastcor) + sqrt(crossprod) ~ n, ## group=m, parm, type="b", pch=20, cex=2, layout=c(3, 1), ## xlab="Columns", ylab="sqrt(Time)", main="Native", ## key=simpleKey(text=sprintf("%d", unique(parm\$m)), lines=TRUE, ## points=FALSE, x=.02, y=.95, title="Rows", cex.title=1))