## ----style, echo=FALSE, results='asis'-----------------------------------
BiocStyle::markdown()
options(max.print=1000)
suppressPackageStartupMessages({
    library(genefilter)
    library(airway)
    library(DESeq2)
    library(GenomicAlignments)
    library(GenomicFeatures)
})

## ----sleep-t.test--------------------------------------------------------
head(sleep)
plot(extra ~ group, data = sleep)
## Traditional interface
with(sleep, t.test(extra[group == 1], extra[group == 2]))
## Formula interface
t.test(extra ~ group, sleep)
## equal variance between groups
t.test(extra ~ group, sleep, var.equal=TRUE)

## ----sleep-lm------------------------------------------------------------
## linear model; compare to t.test(var.equal=TRUE)
fit <- lm(extra ~ group, sleep)
anova(fit)

## ----sleep-model.matrix--------------------------------------------------
## underlying model, used in `lm.fit()`
model.matrix(extra ~ group, sleep)     # last column indicates group effect
model.matrix(extra ~ 0 + group, sleep) # contrast between columns

## ----sleep-diff----------------------------------------------------------
fit0 <- lm(extra ~ ID, sleep)
fit1 <- lm(extra ~ ID + group, sleep)
anova(fit0, fit1)
t.test(extra ~ group, sleep, var.equal=TRUE, paired=TRUE)

## ----airway-bam-path-----------------------------------------------------
library(airway)
path <- system.file(package="airway", "extdata")
dir(path)

## ----airway-csv----------------------------------------------------------
csvfile <- dir(path, "sample_table.csv", full=TRUE)
sampleTable <- read.csv(csvfile, row.names=1)
head(sampleTable)

## ----airway-bam----------------------------------------------------------
library(Rsamtools)
filenames <- dir(path, ".bam$", full=TRUE)
bamfiles <- BamFileList(filenames, yieldSize=1000000)
names(bamfiles) <- sub("_subset.bam", "", basename(filenames))

## ----airway-gtf-to-txdb--------------------------------------------------
library(GenomicFeatures)
gtffile <- file.path(path, "Homo_sapiens.GRCh37.75_subset.gtf")
txdb <- makeTxDbFromGFF(gtffile, format="gtf", circ_seqs=character())
genes <- exonsBy(txdb, by="gene")

## ------------------------------------------------------------------------
library(GenomicAlignments)
se <- summarizeOverlaps(features=genes, reads=bamfiles,
                        mode="Union",
                        singleEnd=FALSE,
                        ignore.strand=TRUE,
                        fragments=TRUE)
colData(se) <- as(sampleTable, "DataFrame")
se
colData(se)
rowData(se)
head(assay(se))

## ----airway-data---------------------------------------------------------
data(airway)
se <- airway

## ----airway-cell-dex-----------------------------------------------------
colData(se)

## ----airway-DESeq2-design------------------------------------------------
library(DESeq2)
dds <- DESeqDataSet(se, design = ~ cell + dex)
dds <- DESeq(dds)
res <- results(dds)

## ----plotcounts, fig.width=5, fig.height=5-------------------------------
topGene <- rownames(res)[which.min(res$padj)]
res[topGene,]
plotCounts(dds, gene=topGene, intgroup=c("dex"))

## ----plotma--------------------------------------------------------------
plotMA(res, ylim=c(-5,5))

## ----airway-DESeq2-hist--------------------------------------------------
hist(res$pvalue, breaks=50)

## ----airway-DESeq2-volcano-----------------------------------------------
plot(-log10(padj) ~ log2FoldChange, as.data.frame(res), pch=20)

