## ----style, echo = FALSE, results = 'asis'-------------------------------------------------------- BiocStyle::markdown() options(width=100, max.print=1000) knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE"))) ## ----setup, echo=FALSE, messages=FALSE, warnings=FALSE-------------------------------------------- suppressPackageStartupMessages({ library(GenomicFiles) library(BiocParallel) }) ## ----configure-test------------------------------------------------------------------------------- stopifnot( getRversion() >= '3.2' && getRversion() < '3.3', BiocInstaller::biocVersion() == "3.2" ) ## ----iteration------------------------------------------------------------------------------------ library(GenomicFiles) yield <- function(bfl) { ## input a chunk of alignments library(GenomicAlignments) readGAlignments(bfl, param=ScanBamParam(what="seq")) } map <- function(aln) { ## Count G or C nucleotides per read library(Biostrings) gc <- letterFrequency(mcols(aln)$seq, "GC") ## Summarize number of reads with 0, 1, ... G or C nucleotides tabulate(1 + gc, 73) # max. read length: 72 } reduce <- `+` ## ----iteration-doit------------------------------------------------------------------------------- library(RNAseqData.HNRNPC.bam.chr14) fls <- RNAseqData.HNRNPC.bam.chr14_BAMFILES bf <- BamFile(fls[1], yieldSize=100000) gc <- reduceByYield(bf, yield, map, reduce) plot(gc, type="h", xlab="GC Content per Aligned Read", ylab="Number of Reads") ## ----parallel-doit-------------------------------------------------------------------------------- library(BiocParallel) gc <- bplapply(BamFileList(fls), reduceByYield, yield, map, reduce) library(ggplot2) df <- stack(as.data.frame(lapply(gc, cumsum))) df$GC <- 0:72 ggplot(df, aes(x=GC, y=values)) + geom_line(aes(colour=ind)) + xlab("Number of GC Nucleotides per Read") + ylab("Number of Reads") ## ----sessionInfo---------------------------------------------------------------------------------- sessionInfo()