## ----style, echo=FALSE, results='asis'----------------------------------- BiocStyle::markdown() suppressPackageStartupMessages({ library(rtracklayer) library(BiocParallel) library(GenomicFiles) library(TxDb.Hsapiens.UCSC.hg19.knownGene) }) ## ----rtracklayer-file-classes-------------------------------------------- ## rtracklayer menagerie library(rtracklayer) names(getClass("RTLFile")@subclasses) ## ----benchmark-f0-------------------------------------------------------- f0 <- function(n) { ## inefficient! ans <- numeric() for (i in seq_len(n)) ans <- c(ans, exp(i)) ans } ## ----benchmark----------------------------------------------------------- f1 <- function(n) { ans <- numeric(n) for (i in seq_len(n)) ans[[i]] <- exp(i) ans } f2 <- function(n) sapply(seq_len(n), exp) f3 <- function(n) exp(seq_len(n)) ## ----parallel-sleep------------------------------------------------------ library(BiocParallel) fun <- function(i) { Sys.sleep(1) i } ## serial f0 <- function(n) lapply(seq_len(n), fun) ## parallel f1 <- function(n) bplapply(seq_len(n), fun) ## ----csaw-packages------------------------------------------------------- library(GenomicAlignments) library(GenomicFiles) library(BiocParallel) library(Rsamtools) library(GenomeInfoDb) ## ----olaps-chr----------------------------------------------------------- frag.len <- 110 spacing <- 50 chr <- "chr1" ## ----olaps-tileGenome---------------------------------------------------- fls <- dir("~/UseBioconductor-data/ChIPSeq/NFYA/", ".BAM$", full=TRUE) names(fls) <- sub(".fastq.*", "", basename(fls)) bfl <- BamFileList(fls, yieldSize=1000000) ## ----csaw-tiles---------------------------------------------------------- len <- seqlengths(keepStandardChromosomes(seqinfo(bfl)))[chr] tiles <- tileGenome(len, tilewidth=spacing, cut.last.tile.in.chrom=TRUE) ## ----yield--------------------------------------------------------------- yield <- function(x, ...) readGAlignments(x) ## ----map----------------------------------------------------------------- map <- function(x, tiles, frag.len, ...) { gr <- keepStandardChromosomes(granges(x)) countOverlaps(tiles, resize(gr, frag.len)) } ## ----reduce-------------------------------------------------------------- reduce <- `+` ## ----reduceByYield------------------------------------------------------- count1file <- function(bf, ...) reduceByYield(bf, yield, map, reduce, ...) ## ----count-overlaps-parallel, eval=FALSE--------------------------------- # counts <- bplapply(bfl, count1file, tiles=tiles, frag.len=frag.len) # counts <- simplify2array(counts) # dim(counts) # colSums(counts)