### R code from vignette source 'A10_Introduction.Rnw'

###################################################
### code chunk number 1: options
###################################################


### R code from vignette source 'A20_EfficientR.Rnw'

###################################################
### code chunk number 1: options
###################################################


###################################################
### code chunk number 2: prompt
###################################################
## assign values 5, 4, 3, 2, 1 to variable 'x'
x <- c(5, 4, 3, 2, 1)
x


###################################################
### code chunk number 3: colon
###################################################
x[2:4]


###################################################
### code chunk number 4: log
###################################################
log(x)


###################################################
### code chunk number 5: types
###################################################
c(1.1, 1.2, 1.3)         # numeric
c(FALSE, TRUE, FALSE)    # logical
c("foo", "bar", "baz")   # character, single or double quote ok
as.character(x)          # convert 'x' to character
typeof(x)                # the number 5 is numeric, not integer
typeof(2L)               # append 'L' to force integer
typeof(2:4)              # ':' produces a sequence of integers


###################################################
### code chunk number 6: factor
###################################################
sex <- factor(c("Male", "Female", NA), levels=c("Female", "Male"))
sex


###################################################
### code chunk number 7: lists
###################################################
lst <- list(a=1:3, b=c("foo", "bar"), c=sex)
lst


###################################################
### code chunk number 8: list-subset
###################################################
lst[c(3, 1)]             # another list
lst[["a"]]               # the element itself, selected by name


###################################################
### code chunk number 9: data.frame
###################################################
df <- data.frame(age=c(27L, 32L, 19L),
                 sex=factor(c("Male", "Female", "Male")))
df
df[c(1, 3),]
df[df$age > 20,]


###################################################
### code chunk number 10: matrix
###################################################
m <- matrix(1:12, nrow=3)
m
m[c(1, 3), c(2, 4)]


###################################################
### code chunk number 11: matrix-subset
###################################################
m[, 3]
m[, 3, drop=FALSE]


###################################################
### code chunk number 12: lm
###################################################
x <- rnorm(1000, sd=1)
y <- x + rnorm(1000, sd=.5)
fit <- lm(y ~ x)       # formula describes linear regression 
fit                    # an 'S3' object
anova(fit)
sqrt(var(resid(fit)))  # residuals accessor and subsequent transforms
class(fit)


###################################################
### code chunk number 13: function-args
###################################################
y <- 5:1
log(y)
args(log)        # arguments 'x' and 'base'; see ?log
log(y, base=2)   # 'base' is optional, with default value
try(log())       # 'x' required; 'try' continues even on error
args(data.frame) # ... represents variable number of arguments


###################################################
### code chunk number 14: named-args
###################################################
log(base=2, y)   # match argument 'base' by name, 'x' by position


###################################################
### code chunk number 15: S3-method-args
###################################################
args(anova)
args(anova.glm)


###################################################
### code chunk number 16: rbioc-pdata
###################################################
pdataFile <- system.file(package="SequenceAnalysisData", "extdata", 
                         "pData.csv")


###################################################
### code chunk number 17: rbioc-pdata-csv
###################################################
pdata <- read.table(pdataFile)  
dim(pdata)
names(pdata)
summary(pdata)


###################################################
### code chunk number 18: rbioc-pdata-subset
###################################################
head(pdata[,"sex"], 3)
head(pdata$sex, 3)
head(pdata[["sex"]], 3)
sapply(pdata, class)


###################################################
### code chunk number 19: rbioc-pdata-sextab
###################################################
table(pdata$sex, useNA="ifany")


###################################################
### code chunk number 20: rbioc-pdata-molbiol
###################################################
with(pdata, table(mol.biol, useNA="ifany"))


###################################################
### code chunk number 21: rbbioc-pdata-bcrabl
###################################################
ridx <- pdata$mol.biol %in% c("BCR/ABL", "NEG")


###################################################
### code chunk number 22: rbioc-pdata-molbiol-selected
###################################################
table(ridx)
sum(ridx)


###################################################
### code chunk number 23: rbioc-pdata-subset
###################################################
pdata1 <- pdata[ridx,]


###################################################
### code chunk number 24: rbioc-pdata-subset-levels
###################################################
levels(pdata1$mol.biol)


###################################################
### code chunk number 25: rbioc-pdata-subset-recode
###################################################
pdata1$mol.biol <- factor(pdata1$mol.biol)
table(pdata1$mol.biol)


###################################################
### code chunk number 26: rbioc-pdata-age-molbiol
###################################################
with(pdata1, t.test(age ~ mol.biol))


###################################################
### code chunk number 27: rbioc-pdata-boxplot (eval = FALSE)
###################################################
## ## not evaluated
## boxplot(age ~ mol.biol, pdata1)


###################################################
### code chunk number 28: S4-view (eval = FALSE)
###################################################
## selectMethod(countOverlaps, c("GRanges", "GRanges"))


###################################################
### code chunk number 29: S4
###################################################
library(Biostrings)
showMethods(complement)


###################################################
### code chunk number 30: S4-showMethods (eval = FALSE)
###################################################
## showMethods(class="DNAStringSet", where=search())


###################################################
### code chunk number 31: S4-help (eval = FALSE)
###################################################
## class ? DNAStringSet
## method ? "complement,DNAStringSet"


###################################################
### code chunk number 32: io-sketch (eval = FALSE)
###################################################
## ## not evaluated
## colClasses <-
##   c("NULL", "integer", "numeric", "NULL")
## df <- read.table("myfile", colClasses=colClasses)


###################################################
### code chunk number 33: iterative
###################################################
x <- runif(100000); x2 <- x^2
m <- matrix(x2, nrow=1000); y <- rowSums(m)


###################################################
### code chunk number 34: preallocate-and-fill-sketch (eval = FALSE)
###################################################
## ## not evaluated
## result <- numeric(nrow(df))
## for (i in seq_len(nrow(df)))
##   result[[i]] <- some_calc(df[i,])


###################################################
### code chunk number 35: unnecessary-names
###################################################
unlist(list(a=1:2)) # name 'a' becomes 'a1', 'a2'
unlist(list(a=1:2), use.names=FALSE)   # no names


###################################################
### code chunk number 36: algo-poly
###################################################
x <- 1:100; s <- sample(x, 10)
inS <- x %in% s


###################################################
### code chunk number 37: system.time
###################################################
m <- matrix(runif(200000), 20000)
system.time(apply(m, 1, sum))


###################################################
### code chunk number 38: rbenchmark
###################################################
library(rbenchmark)
f0 <- function(x) apply(x, 1, sum)
f1 <- function(x) rowSums(x)
benchmark(f0(m), f1(m), 
          columns=c("test", "elapsed", "relative"), 
          replications=5)


###################################################
### code chunk number 39: identical
###################################################
res1 <- apply(m, 1, sum)
res2 <- rowSums(m)
identical(res1, res2)
identical(c(1, -1), c(x=1, y=-1))
all.equal(c(1, -1), c(x=1, y=-1),
          check.attributes=FALSE)


### R code from vignette source 'A30_SequencesReadsAlignments.Rnw'

###################################################
### code chunk number 1: options
###################################################
library(SequenceAnalysisData)
library(BSgenome.Hsapiens.UCSC.hg19)
library(RNAseqData.HeLa.bam.chr14)
library(ShortRead)
library(Rsamtools)


###################################################
### code chunk number 2: gc-genome (eval = FALSE)
###################################################
## library(BSgenome.Hsapiens.UCSC.hg19)
## Hsapiens
## Hsapiens[[1]]
## Views(Hsapiens[[1]], 1000000, 1001000)


###################################################
### code chunk number 3: gcFunction-definition
###################################################
gcContent <- 
    function(x)
{
    alf <- alphabetFrequency(x, as.prob=TRUE)
    sum(alf[c("G", "C")])
}


###################################################
### code chunk number 4: gcFunction-chr1
###################################################
gcContent(Hsapiens[["chr1"]])


###################################################
### code chunk number 5: gcFunction-all
###################################################
chrs <- paste0("chr", c(1:22, "X", "Y"))
gc <- numeric(length(chrs))             # pre-allocate...
for (chr in seq_along(chrs))            # ...and fill
    gc[[chr]] <- gcContent(Hsapiens[[chr]])
names(gc) <- chrs
gc


###################################################
### code chunk number 6: fastq-format
###################################################
bigdata <- system.file("bigdata", package="SequenceAnalysisData")
fls <- dir(file.path(bigdata, "fastq"), full=TRUE)
cat(noquote(readLines(fls[[1]], 4)), sep="\n")


###################################################
### code chunk number 7: ascii
###################################################
cat(rawToChar(as.raw(32+1:47)),
    rawToChar(as.raw(32+48:94)), sep="\n")


###################################################
### code chunk number 8: readFastq
###################################################
library(ShortRead)
bigdata <- system.file("bigdata", package="SequenceAnalysisData")
fastqDir <- file.path(bigdata, "fastq")
fastqFiles <- dir(fastqDir, full=TRUE)
fq <- readFastq(fastqFiles[1])
fq


###################################################
### code chunk number 9: sread
###################################################
head(sread(fq), 3)
head(quality(fq), 3)
head(id(fq), 3)


###################################################
### code chunk number 10: getClasse
###################################################
getClass("ShortReadQ")


###################################################
### code chunk number 11: showMethods-ShortRead (eval = FALSE)
###################################################
## showMethods(class="ShortRead", where="package:ShortRead")


###################################################
### code chunk number 12: width-ShortRead
###################################################
table(width(fq))


###################################################
### code chunk number 13: width-ShortReadQ
###################################################
abc <- alphabetByCycle(sread(fq))
abc[1:4, 1:8]


###################################################
### code chunk number 14: FastqSampler
###################################################
sampler <- FastqSampler(fastqFiles[1], 1000000)
yield(sampler) # sample of 1000000 reads


###################################################
### code chunk number 15: fastq-discovery
###################################################
bigdata <- system.file("bigdata", package="SequenceAnalysisData")
fl <- file.path(bigdata, "fastq", "SRR031724_1_subset.fastq")


###################################################
### code chunk number 16: fastq-input-gc
###################################################
rm(fq); invisible(gc())


###################################################
### code chunk number 17: fastq-input
###################################################
fq <- readFastq(fls[1])


###################################################
### code chunk number 18: gcC
###################################################
alf0 <- alphabetFrequency(sread(fq), as.prob=TRUE, collapse=TRUE)
sum(alf0[c("G", "C")])


###################################################
### code chunk number 19: gc-reads
###################################################
alf0 <- alphabetFrequency(sread(fq), as.prob=TRUE)
hist(alf0[,c("G", "C")])


###################################################
### code chunk number 20: SAM
###################################################
fl <- system.file("extdata", "ex1.sam", package="Rsamtools")
noquote(strsplit(readLines(fl, 10), "\t")[10][[1]])


###################################################
### code chunk number 21: readGappedAlignments
###################################################
library(RNAseqData.HeLa.bam.chr14)
bamfls <- RNAseqData.HeLa.bam.chr14_BAMFILES
aln <- readGappedAlignments(bamfls[1])
head(aln, 3)


###################################################
### code chunk number 22: GappedAlignments-accessors
###################################################
table(strand(aln))
table(qwidth(aln))
range(width(aln))
head(sort(table(cigar(aln)), decreasing=TRUE))


###################################################
### code chunk number 23: GappedAlignments-mcols
###################################################
param <- ScanBamParam(what="seq")
aln <- readGappedAlignments(bamfls[1], param=param)
head(mcols(aln)$seq)


###################################################
### code chunk number 24: bam-gcContent
###################################################
bamGCContent <- 
    function(bamfl)
{
    param <- ScanBamParam(what="seq")
    aln <- readGappedAlignments(bamfl, param=param)
    seq <- mcols(aln)$seq
    alf0 <- alphabetFrequency(seq, as.prob=TRUE, collapse=TRUE)
    sum(alf0[c("G", "C")])
}


###################################################
### code chunk number 25: bam-gc
###################################################
bamGCContent(bamfls[1])


###################################################
### code chunk number 26: bam-gc-2
###################################################
sapply(bamfls, bamGCContent)


### R code from vignette source 'A40_Ranges.Rnw'

###################################################
### code chunk number 1: options
###################################################
library(GenomicFeatures)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
plotRanges <- function(x, xlim = x, main = deparse(substitute(x)),
                       col = "black", sep = 0.5, ...)
{
    height <- 1
    if (is(xlim, "Ranges"))
        xlim <- c(min(start(xlim)), max(end(xlim)))
    bins <- disjointBins(IRanges(start(x), end(x) + 1))
    plot.new()
    par(mai=c(0.6, 0.2, 0.6, 0.2))
    plot.window(xlim, c(0, max(bins)*(height + sep)))
    ybottom <- bins * (sep + height) - height
    rect(start(x)-0.5, ybottom, end(x)+0.5, ybottom + height, col = col, ...)
    title(main, cex.main=2.8, font.main=1)
    axis(1)
}


###################################################
### code chunk number 2: GRanges-genes
###################################################
genes <- GRanges(seqnames=c("3R", "X"),
                 ranges=IRanges(
                     start=c(19967117, 18962306),
                     end=c(19973212, 18962925)),
                 strand=c("+", "-"),
                 seqlengths=c(`3R`=27905053L, `X`=22422827L))


###################################################
### code chunk number 3: GRanges-display
###################################################
genes


###################################################
### code chunk number 4: GRanges-help (eval = FALSE)
###################################################
## ?GRanges


###################################################
### code chunk number 5: GRanges-vignettes (eval = FALSE)
###################################################
## vignette(package="GenomicRanges")


###################################################
### code chunk number 6: GRanges-basics
###################################################
genes[2]
strand(genes)
width(genes)
length(genes)
names(genes) <- c("FBgn0039155", "FBgn0085359")
genes  # now with names


###################################################
### code chunk number 7: ranges-ir
###################################################
ir <- IRanges(start=c(7, 9, 12, 14, 22:24),
              end=c(15, 11, 12, 18, 26, 27, 28))


###################################################
### code chunk number 8: ranges-ir-plot
###################################################
png("ranges-ir-plot.png", width=800, height=160)
plotRanges(ir, xlim=c(5, 35), main="Original")
dev.off()
png("ranges-shift-ir-plot.png", width=800, height=160)
plotRanges(shift(ir, 5), xlim=c(5, 35), main="Shift")
dev.off()
png("ranges-reduce-ir-plot.png", width=800, height=160)
plotRanges(reduce(ir), xlim=c(5, 35), main="Reduce")
dev.off()


###################################################
### code chunk number 9: ranges-shift-ir
###################################################
shift(ir, 5)


###################################################
### code chunk number 10: ranges-shift-ir-plot
###################################################
png("ranges-shift-ir-plot.png", width=800, height=160)
plotRanges(shift(ir, 5), xlim=c(5, 35), main="Shift")
dev.off()


###################################################
### code chunk number 11: ranges-reduce-ir
###################################################
reduce(ir)


###################################################
### code chunk number 12: ranges-reduce-ir-plot
###################################################
png("ranges-reduce-ir-plot.png", width=800, height=160)
plotRanges(reduce(ir), xlim=c(5, 35), main="Reduce")
dev.off()


###################################################
### code chunk number 13: coverage
###################################################
coverage(ir)


###################################################
### code chunk number 14: GRanges-mcols
###################################################
mcols(genes) <- DataFrame(EntrezId=c("42865", "2768869"),
                          Symbol=c("kal-1", "CG34330"))


###################################################
### code chunk number 15: GRanges-metadata
###################################################
metadata(genes) <-
    list(CreatedBy="A. User", Date=date())


###################################################
### code chunk number 16: GRangesList-eg-setup
###################################################
library(TxDb.Dmelanogaster.UCSC.dm3.ensGene)
txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene # alias
fbgn <- exonsBy(txdb, "gene")["FBgn0039155"]
seqlevels(fbgn) <- "chr3R"


###################################################
### code chunk number 17: GRangesList-eg
###################################################
fbgn


### R code from vignette source 'A50_AnnotationAndVisualization.Rnw'

###################################################
### code chunk number 1: setup
###################################################
library(BSgenome.Hsapiens.UCSC.hg19)
library(org.Hs.eg.db)
library(biomaRt)
library(ensemblVEP)
library(VariantAnnotation)
library(Gviz)


###################################################
### code chunk number 2: readVcf
###################################################
library(VariantAnnotation) 
fl <- system.file("extdata", "chr22.vcf.gz", package="VariantAnnotation")


###################################################
### code chunk number 3: ensemblVEP
###################################################
library(ensemblVEP)
fl <- system.file("extdata", "gl_chr1.vcf", package="VariantAnnotation")
gr <- ensemblVEP(fl)


###################################################
### code chunk number 4: show_GRanges
###################################################
head(gr, 3)


###################################################
### code chunk number 5: A50_AnnotationAndVisualization.Rnw:242-245
###################################################
fl <- system.file("extdata", "structural.vcf", package="VariantAnnotation")
param <- VEPParam(input=c(format="vcf"), output=c(vcf=TRUE))
vep <- ensemblVEP(fl, param)


###################################################
### code chunk number 6: rtn_VCF
###################################################
head(info(vep)$CSQ, 3)


###################################################
### code chunk number 7: parseCSQToGRanges
###################################################
csq <- parseCSQToGRanges(vep)
head(csq, 3)


###################################################
### code chunk number 8: select
###################################################
library(org.Hs.eg.db)
cols(org.Hs.eg.db)
keytypes(org.Hs.eg.db)
uniprotKeys <- head(keys(org.Hs.eg.db, keytype="UNIPROT"))
cols <- c("SYMBOL", "PATH")
select(org.Hs.eg.db, keys=uniprotKeys, cols=cols, keytype="UNIPROT")


###################################################
### code chunk number 9: select-kegg
###################################################
kegg <- select(org.Hs.eg.db, "00310", c("UNIPROT", "SYMBOL"), "PATH")
nrow(kegg)
head(kegg, 3)


###################################################
### code chunk number 10: biomaRt1 (eval = FALSE)
###################################################
## library(biomaRt)
## head(listMarts(), 3)                      ## list the marts
## head(listDatasets(useMart("ensembl")), 3) ## mart datasets
## ensembl <-                                ## fully specified mart
##     useMart("ensembl", dataset = "hsapiens_gene_ensembl")
## 
## head(listFilters(ensembl), 3)             ## filters
## myFilter <- "chromosome_name"
## head(filterOptions(myFilter, ensembl), 3) ## return values
## myValues <- c("21", "22")
## head(listAttributes(ensembl), 3)          ## attributes
## myAttributes <- c("ensembl_gene_id","chromosome_name")
## 
## ## assemble and query the mart
## res <- getBM(attributes =  myAttributes, filters =  myFilter,
##              values =  myValues, mart = ensembl)


###################################################
### code chunk number 11: seqlevels_rename
###################################################
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene 

fl <- system.file("extdata", "chr22.vcf.gz", package="VariantAnnotation")
vcf <- readVcf(fl, "hg19")
seqlevels(vcf, force=TRUE) <- c("22"="chr22")


###################################################
### code chunk number 12: locateVariants_find
###################################################
rd <- rowData(vcf)
loc <- locateVariants(rd, txdb, CodingVariants())
head(loc, 3)


###################################################
### code chunk number 13: locateVariants_example
###################################################
## Did any coding variants match more than one gene?
splt <- split(loc$GENEID, loc$QUERYID)
table(sapply(splt, function(x) length(unique(x)) > 1))

## Summarize the number of coding variants by gene ID
splt <- split(loc$QUERYID, loc$GENEID)
head(sapply(splt, function(x) length(unique(x))), 3)


###################################################
### code chunk number 14: Gviz
###################################################
library(Gviz)
data(cpgIslands)
chr <- "chr7"
genome <- "hg19"


###################################################
### code chunk number 15: AnnotationTrack
###################################################
atrack <- AnnotationTrack(cpgIslands, name="CpG")
plotTracks(atrack)


###################################################
### code chunk number 16: GenomeAxisTrack
###################################################
gtrack <- GenomeAxisTrack()
plotTracks(list(gtrack, atrack))


###################################################
### code chunk number 17: IdeogramTrack
###################################################
itrack <- IdeogramTrack(genome=genome, chromosome=chr)
plotTracks(list(itrack, gtrack, atrack))


###################################################
### code chunk number 18: GeneRegionTrack
###################################################
data(geneModels)
grtrack <- 
    GeneRegionTrack(geneModels, genome=genome,
                    chromosome=chr, name="Gene Model")
tracks <- list(itrack, gtrack, atrack, grtrack)
plotTracks(tracks)


###################################################
### code chunk number 19: Gviz-zoom
###################################################
plotTracks(tracks, from=2.5e7, to=2.8e7)


###################################################
### code chunk number 20: Gviz-sequence
###################################################
library(BSgenome.Hsapiens.UCSC.hg19)
strack <- SequenceTrack(Hsapiens, chromosome=chr)
plotTracks(c(tracks, strack), from=26450430, to=26450490, cex=.8)


###################################################
### code chunk number 21: Gviz-data
###################################################
## some data
lim <- c(26700000, 26900000)
coords <- seq(lim[1], lim[2], 101)
dat <- runif(length(coords) - 1, min=-10, max=10)

## DataTrack
dtrack <- 
    DataTrack(data=dat, start=coords[-length(coords)],
              end= coords[-1], chromosome=chr, genome=genome,
              name="Uniform Random")
plotTracks(c(tracks, dtrack))


###################################################
### code chunk number 22: Gviz-figure
###################################################
pdf("GvizFigure.pdf")
plotTracks(c(tracks, dtrack))
xx <- capture.output(dev.off())


### R code from vignette source 'A60_LargeData.Rnw'

###################################################
### code chunk number 1: options
###################################################
library(SequenceAnalysisData)
library(RNAseqData.HeLa.bam.chr14)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(Rsamtools)
library(ShortRead)
library(parallel); options(mc.cores=detectCores())
library(lattice)


###################################################
### code chunk number 2: restriction-what
###################################################
param <- ScanBamParam(what=c("rname", "pos", "cigar"))


###################################################
### code chunk number 3: restriction-which
###################################################
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
exByGn <- exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "gene")
seqlevels(exByGn, force=TRUE) <- "chr14"
gns <- unlist(range(exByGn))
param <- ScanBamParam(which=gns)


###################################################
### code chunk number 4: restrictions-several
###################################################
param <- ScanBamParam(what=c("rname", "pos", "cigar"), which=gns)


###################################################
### code chunk number 5: bigdata
###################################################
library(RNAseqData.HeLa.bam.chr14)
bamfls <- RNAseqData.HeLa.bam.chr14_BAMFILES


###################################################
### code chunk number 6: scanBam-restriction
###################################################
param <- ScanBamParam(what="cigar")
bam <- scanBam(bamfls[1], param=param)[[1]]
tail(sort(table(bam$cigar)))


###################################################
### code chunk number 7: readGappedAlignments-restriciton
###################################################
gal <- readGappedAlignments(bamfls[1])
head(gal, 3)
param <- ScanBamParam(what="seq")  ## also input sequence
gal <- readGappedAlignments(bamfls[1], param=param)
head(mcols(gal)$seq)


###################################################
### code chunk number 8: GC-sampling
###################################################
gcFunction <- 
    function(x)
{
    alf <- alphabetFrequency(x, as.prob=TRUE)
    rowSums(alf[,c("G", "C")])
}


###################################################
### code chunk number 9: fastq-files
###################################################
bigdata <- system.file("bigdata", package="SequenceAnalysisData")
fqfl <- dir(file.path(bigdata, "fastq"), ".fastq$", full=TRUE) #$


###################################################
### code chunk number 10: fastq-sampling
###################################################
sampler <- FastqSampler(fqfl, 100000)
fq <- yield(sampler)                    # 100,000 reads
lattice::densityplot(gcFunction(sread(fq)), plot.points=FALSE)
fq <- yield(sampler)                    # a different 100,000 reads


###################################################
### code chunk number 11: fastq-paired-end (eval = FALSE)
###################################################
## ## NOT RUN
## set.seed(123)
## end1 <- yield(FastqSampler("end_1.fastq"))
## set.seed(123)
## end2 <- yield(FastqSampler("end_2.fastq"))


###################################################
### code chunk number 12: readLines-chunks (eval = FALSE)
###################################################
## ## NOT RUN
## con <- file("<hypothetical-file>.txt")
## open(f)
## while (length(x <- readLines(f, n=10000))) {
##     ## work on character vector 'x'
## }
## close(f)


###################################################
### code chunk number 13: iteration-bam
###################################################
library(RNAseqData.HeLa.bam.chr14)
bamfl <- RNAseqData.HeLa.bam.chr14_BAMFILES[1]
countBam(bamfl)
bf <- BamFile(bamfl, yieldSize=200000) # could be larger, e.g., 2 million


###################################################
### code chunk number 14: BamFile-iter (eval = FALSE)
###################################################
## ## initialize, e.g., for step 3 ...
## open(bf)
## while (length(gal <- readGappedAlignments(bf))) {
##     ## step 2: do work...
##     ## step 3: aggregate results...
## }
## close(bf)


###################################################
### code chunk number 15: counter-iter
###################################################
counter <- 
    function(query, subject, ..., ignore.strand=TRUE)
    ## query: GRanges or GRangesList
    ## subject: GappedAlignments
{
    if (ignore.strand)
        strand(subject) <- "*"
    hits <- countOverlaps(subject, query)
    countOverlaps(query, subject[hits==1])
}


###################################################
### code chunk number 16: counter-roi
###################################################
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
query <- exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "gene")


###################################################
### code chunk number 17: counter-work (eval = FALSE)
###################################################
## ## initialize, e.g., for step 3 ...
## open(bf)
## while (length(gal <- readGappedAlignments(bf))) {
##     ## step 2: do work...
##     count0 <- counter(query, gal, ignore.strand=TRUE)
##     ## step 3: aggregate results...
## }
## close(bf)


###################################################
### code chunk number 18: counter-work
###################################################
counter1 <- 
    function(bf, query, ...)
{
    ## initialize, e.g., for step 3 ...
    counts <- integer(length(query))
    open(bf)
    while (length(gal <- readGappedAlignments(bf, ...))) {
        ## step 2: do work...
        count0 <- counter(query, gal, ignore.strand=TRUE)
        ## step 3: aggregate results...
        counts <- counts + count0
    }
    close(bf)
    counts
}


###################################################
### code chunk number 19: counter1
###################################################
bf <- BamFile(bamfl, yieldSize=500000)
counts <- counter1(bf, query)


###################################################
### code chunk number 20: count-bfl-make
###################################################
fls <- RNAseqData.HeLa.bam.chr14_BAMFILES # 8 BAM files
bamfls <- BamFileList(fls, yieldSize=500000) # yieldSize can be larger


###################################################
### code chunk number 21: count-bams (eval = FALSE)
###################################################
## counts <- simplify2array(lapply(bamfls, counter1, query))


###################################################
### code chunk number 22: count-bams-mclapply
###################################################
options(mc.cores=detectCores())         # use all cores
counts <- simplify2array(mclapply(bamfls, counter1, query))
head(counts[rowSums(counts) != 0,], 3)


### R code from vignette source 'A99_Supplement.Rnw'

###################################################
### code chunk number 1: run-examples-in-GRanges
###################################################
library(GenomicRanges)
example(GRanges)


###################################################
### code chunk number 2: shift
###################################################
shift(gr, 100)


###################################################
### code chunk number 3: selectMethod
###################################################
selectMethod("shift", "GRanges")


###################################################
### code chunk number 4: gal
###################################################
library(RNAseqData.HeLa.bam.chr14)
bamfiles <- RNAseqData.HeLa.bam.chr14_BAMFILES
gal <- readGappedAlignments(bamfiles[1])


###################################################
### code chunk number 5: read_ranges
###################################################
read_ranges <- granges(gal)
read_ranges


###################################################
### code chunk number 6: read_cvg
###################################################
read_cvg <- coverage(read_ranges)
read_cvg
read_cvg$chr14
mean(read_cvg$chr14)
max(read_cvg$chr14)


###################################################
### code chunk number 7: transcriptsBy
###################################################
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
tx_by_gene <- transcriptsBy(txdb, by="gene")
tx_by_gene


###################################################
### code chunk number 8: Type-of-Gene-ID
###################################################
txdb


###################################################
### code chunk number 9: transcripts-in-HNRNPC
###################################################
tx_by_gene[["3183"]]


###################################################
### code chunk number 10: tabulate-nb-of-transcripts-per-gene
###################################################
table(elementLengths(tx_by_gene))


###################################################
### code chunk number 11: gene_ranges
###################################################
gene_ranges <- range(tx_by_gene)
gene_ranges
gene_ranges[["3183"]]
table(elementLengths(gene_ranges))
gene_ranges[elementLengths(gene_ranges) == 2]
gene_ranges[elementLengths(gene_ranges) == 3]


###################################################
### code chunk number 12: unlist-gene_ranges
###################################################
gene_ranges <- gene_ranges[elementLengths(gene_ranges) == 1]
gene_ranges <- unlist(gene_ranges)


###################################################
### code chunk number 13: nhits_per_gene
###################################################
nhits_per_gene <- countOverlaps(gene_ranges, read_ranges, ignore.strand=TRUE)


###################################################
### code chunk number 14: nhits_per_gene-as-metadata-column
###################################################
mcols(gene_ranges)$nhits <- nhits_per_gene
gene_ranges


### R code from vignette source 'Genentech2013.Rnw'

###################################################
### code chunk number 1: setup
###################################################
stopifnot(BiocInstaller::biocVersion() == "2.12")