## ----setup, echo=FALSE, results="hide"---------------------------------------- knitr::opts_chunk$set(tidy=FALSE, cache=FALSE, dev="png", message=FALSE, error=FALSE, warning=FALSE) ## ----eval=FALSE--------------------------------------------------------------- # # 'coldata.csv': sample information table # coldata <- read.csv("coldata.csv") # library(tximeta) # y <- tximeta(coldata) # reads in counts and inf reps # library(fishpond) # y <- scaleInfReps(y) # scales counts # y <- labelKeep(y) # labels features to keep # set.seed(1) # y <- swish(y, x="condition") # simplest Swish case ## ----eval=FALSE--------------------------------------------------------------- # table(mcols(y)$qvalue < .05) ## ----eval=FALSE--------------------------------------------------------------- # y <- y[mcols(y)$keep,] ## ----------------------------------------------------------------------------- library(macrophage) dir <- system.file("extdata", package="macrophage") ## ----------------------------------------------------------------------------- coldata <- read.csv(file.path(dir, "coldata.csv")) head(coldata) ## ----------------------------------------------------------------------------- coldata <- coldata[,c(1,2,3,5)] names(coldata) <- c("names","id","line","condition") ## ----------------------------------------------------------------------------- coldata$files <- file.path(dir, "quants", coldata$names, "quant.sf.gz") all(file.exists(coldata$files)) ## ----------------------------------------------------------------------------- suppressPackageStartupMessages(library(SummarizedExperiment)) ## ----include=FALSE------------------------------------------------------------ # This hidden code chunk is only needed for Bioc build machines, # so that 'fishpond' will build regardless of whether # the machine can connect to ftp.ebi.ac.uk. # Using linkedTxomes to point to a GTF that lives in the macrophage pkg. # The chunk can be skipped if you have internet connection, # as tximeta will automatically ID the transcriptome and DL the GTF. library(tximeta) makeLinkedTxome( indexDir=file.path(dir, "gencode.v29_salmon_0.12.0"), source="myGENCODE", organism="Homo sapiens", release="29", genome="GRCh38", fasta="ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_29/gencode.v29.transcripts.fa.gz", gtf=file.path(dir, "gencode.v29.annotation.gtf.gz"), # local version write=FALSE ) # 'myGENCODE' is used to avoid tximeta() attempting to # pull down the TxDb from AnnotationHub, alterantively # once can specify useHub=FALSE. ## ----------------------------------------------------------------------------- coldata <- coldata[coldata$condition %in% c("naive","IFNg"),] coldata$condition <- factor(coldata$condition, levels=c("naive","IFNg")) ## ----------------------------------------------------------------------------- library(tximeta) se <- tximeta(coldata) ## ----------------------------------------------------------------------------- assayNames(se) ## ----------------------------------------------------------------------------- head(rownames(se)) ## ----include=FALSE------------------------------------------------------------ # hidden chunk to trick tximeta into knowing this is GENCODE, # this is only needed because above we labelled the txome myGENCODE metadata(se)$txomeInfo$source <- "GENCODE" ## ----------------------------------------------------------------------------- y <- se y <- y[seqnames(y) == "chr4",] ## ----results="hide", message=FALSE-------------------------------------------- library(fishpond) y <- scaleInfReps(y) y <- labelKeep(y) y <- y[mcols(y)$keep,] set.seed(1) y <- swish(y, x="condition", pair="line") ## ----------------------------------------------------------------------------- y_fast <- swish(y, x="condition", pair="line", fast=1) table(original=mcols(y)$qvalue < .1, fast=mcols(y_fast)$qvalue < .1) ## ----------------------------------------------------------------------------- table(mcols(y)$qvalue < .05) ## ----------------------------------------------------------------------------- most.sig <- with(mcols(y), order(qvalue, -abs(log2FC))) mcols(y)[head(most.sig),c("log2FC","qvalue")] ## ----------------------------------------------------------------------------- hist(mcols(y)$pvalue, col="grey") ## ----------------------------------------------------------------------------- with(mcols(y), table(sig=qvalue < .05, sign.lfc=sign(log2FC)) ) sig <- mcols(y)$qvalue < .05 lo <- order(mcols(y)$log2FC * sig) hi <- order(-mcols(y)$log2FC * sig) ## ----------------------------------------------------------------------------- top_up <- mcols(y)[head(hi),] names(top_up) cols <- c("log10mean","log2FC","pvalue","qvalue") print(as.data.frame(top_up)[,cols], digits=3) ## ----------------------------------------------------------------------------- top_down <- mcols(y)[head(lo),] print(as.data.frame(top_down)[,cols], digits=3) ## ----------------------------------------------------------------------------- plotInfReps(y, idx=hi[100], x="condition", cov="line") ## ----------------------------------------------------------------------------- plotMASwish(y, alpha=.05) ## ----------------------------------------------------------------------------- library(org.Hs.eg.db) y <- addIds(y, "SYMBOL", gene=TRUE) ## ----------------------------------------------------------------------------- plotMASwish(y, alpha=.05, xlim=c(.5,5.5)) with( subset(mcols(y), qvalue < .05 & abs(log2FC) > 4), text(log10mean, log2FC, SYMBOL, col="blue", pos=4, cex=.7) ) ## ----------------------------------------------------------------------------- gse <- summarizeToGene(se) gy <- gse gy <- gy[seqnames(gy) == "chr4",] ## ----results="hide", message=FALSE-------------------------------------------- gy <- scaleInfReps(gy) gy <- labelKeep(gy) gy <- gy[mcols(gy)$keep,] set.seed(1) gy <- swish(gy, x="condition", pair="line") ## ----------------------------------------------------------------------------- table(mcols(gy)$qvalue < .05) ## ----------------------------------------------------------------------------- hist(mcols(y)$pvalue, col="grey") ## ----------------------------------------------------------------------------- with(mcols(gy), table(sig=qvalue < .05, sign.lfc=sign(log2FC)) ) sig <- mcols(gy)$qvalue < .05 glo <- order(mcols(gy)$log2FC * sig) ghi <- order(-mcols(gy)$log2FC * sig) ## ----------------------------------------------------------------------------- gtop_up <- mcols(gy)[head(ghi),] print(as.data.frame(gtop_up)[,cols], digits=3) gtop_down <- mcols(gy)[head(glo),] print(as.data.frame(gtop_down)[,cols], digits=3) ## ----------------------------------------------------------------------------- plotInfReps(gy, idx=ghi[100], x="condition", cov="line") ## ----------------------------------------------------------------------------- plotMASwish(gy, alpha=.05) ## ----------------------------------------------------------------------------- library(org.Hs.eg.db) gy <- addIds(gy, "SYMBOL", gene=TRUE) ## ----------------------------------------------------------------------------- plotMASwish(gy, alpha=.05, xlim=c(.5,5.5)) with( subset(mcols(gy), qvalue < .05 & abs(log2FC) > 3), text(log10mean, log2FC, SYMBOL, col="blue", pos=4, cex=.7) ) ## ----------------------------------------------------------------------------- # run on the transcript-level dataset iso <- isoformProportions(y) iso <- swish(iso, x="condition", pair="line") ## ----------------------------------------------------------------------------- coldata <- read.csv(file.path(dir, "coldata.csv")) coldata <- coldata[,c(1,2,3,5)] names(coldata) <- c("names","id","line","condition") coldata$files <- file.path(dir, "quants", coldata$names, "quant.sf.gz") se <- tximeta(coldata) ## ----------------------------------------------------------------------------- se$ifng <- factor(ifelse( grepl("IFNg",se$condition), "treated","control")) se$salmonella <- factor(ifelse( grepl("SL1344",se$condition), "infected","control")) with(colData(se), table(ifng, salmonella)) ## ----------------------------------------------------------------------------- y2 <- se y2 <- y2[seqnames(y2) == "chr1",] ## ----------------------------------------------------------------------------- y2$pair <- factor(y2$line) levels(y2$pair) <- LETTERS[1:6] # simplify names y2$pair <- as.character(y2$pair) y2$pair[y2$ifng == "control"] y2$pair[y2$ifng == "treated"] y2$pair[y2$ifng == "treated"] <- rep(LETTERS[7:12], each=2) y2$pair <- factor(y2$pair) head(table(y2$pair, y2$salmonella)) ## ----results="hide", message=FALSE-------------------------------------------- y2 <- scaleInfReps(y2) y2 <- labelKeep(y2) y2 <- y2[mcols(y2)$keep,] set.seed(1) y2 <- swish(y2, x="salmonella", cov="ifng", pair="pair", interaction=TRUE) ## ----------------------------------------------------------------------------- hist(mcols(y2)$pvalue, col="grey") ## ----------------------------------------------------------------------------- plotMASwish(y2, alpha=.05) ## ----------------------------------------------------------------------------- idx <- with(mcols(y2), which(qvalue < .05 & log2FC > 5)) plotInfReps(y2, idx[1], x="ifng", cov="salmonella") plotInfReps(y2, idx[2], x="ifng", cov="salmonella") ## ----eval=FALSE--------------------------------------------------------------- # vignette(topic="allelic", package="fishpond") ## ----eval=FALSE--------------------------------------------------------------- # dir <- system.file("extdata", package="tximportData") # files <- file.path(dir,"alevin/neurons_900_v014/alevin/quants_mat.gz") # neurons <- tximeta(files, type="alevin", # skipMeta=TRUE, # just for vignette # dropInfReps=TRUE, # alevinArgs=list(filterBarcodes=TRUE)) ## ----eval=FALSE--------------------------------------------------------------- # library(SingleCellExperiment) # sce <- as(neurons, "SingleCellExperiment") # sce$cluster <- factor(paste0("cl",sample(1:6,ncol(sce),TRUE))) # par(mfrow=c(2,1), mar=c(2,4,2,1)) # plotInfReps(sce, "ENSMUSG00000072235.6", x="cluster", # legend=TRUE) # plotInfReps(sce, "ENSMUSG00000072235.6", x="cluster", # reorder=FALSE) ## ----echo=FALSE, eval=FALSE--------------------------------------------------- # par(mfrow=c(2,1), mar=c(2,4,2,1)) # plotInfReps(sce[,1:50], "ENSMUSG00000072235.6", x="cluster") # plotInfReps(sce[,1:150], "ENSMUSG00000072235.6", x="cluster") ## ----eval=FALSE--------------------------------------------------------------- # y <- labelKeep(y, minCount=3, minN=10) # y <- y[mcols(y)$keep,] # subset genes ## ----eval=FALSE--------------------------------------------------------------- # assays(y) <- lapply(assays(y), as.matrix) # make dense matrices # y <- scaleInfReps(y, lengthCorrect=FALSE, sfFun=sfFun) # y <- swish(y, x="condition") ## ----eval=FALSE--------------------------------------------------------------- # y <- makeInfReps(y, 20) ## ----eval=FALSE--------------------------------------------------------------- # library(SingleCellExperiment) # y <- as(y, "SingleCellExperiment") # # then, after filtering genes and cells... # # compute and store sizeFactors calculated over all genes # y <- scran::computeSumFactors(y) # # split swish objects into 8 RDS files: # splitSwish(y, nsplits=8, prefix="swish", snakefile="Snakefile") # # now, run snakemake from command line # # after finished, results back into R: # y <- addStatsFromCSV(y, "summary.csv") ## ----------------------------------------------------------------------------- set.seed(1) y <- makeSimSwishData() ## ----------------------------------------------------------------------------- y <- scaleInfReps(y, saveMeanScaled=TRUE) tail(assayNames(y),4) # 'meanScaled' assay y <- labelKeep(y) y <- y[mcols(y)$keep,] ## ----------------------------------------------------------------------------- norm_cts_for_batch <- assay(y, "meanScaled") # use batch factor estimation method of your choosing # ... ## ----------------------------------------------------------------------------- w1 <- runif(ncol(y)) # here simulated w1, use real instead w2 <- runif(ncol(y)) # here simulated w2, use real instead W <- data.frame(w1, w2) infRepIdx <- grep("infRep",assayNames(y),value=TRUE) nreps <- length(infRepIdx) ## ----------------------------------------------------------------------------- suppressPackageStartupMessages(library(limma)) mm <- model.matrix(~condition, colData(y)) pc <- .1 for (k in seq_len(nreps)) { logInfRep <- log(assay(y, infRepIdx[k]) + pc) logInfRep <- limma::removeBatchEffect( logInfRep, covariates=W, design=mm) assay(y, infRepIdx[k]) <- exp(logInfRep) } ## ----------------------------------------------------------------------------- y <- swish(y, x="condition") ## ----------------------------------------------------------------------------- gres <- mcols(gy)[mcols(gy)$keep,] min(gres$qvalue, na.rm=TRUE) # min nominal FDR is not 0 with(gres, plot(stat, -log10(qvalue))) with(gres, plot(log2FC, -log10(qvalue))) abline(v=0, col="red") with(gres, plot(log2FC, -log10(qvalue), xlim=c(-1.5,1.5), ylim=c(0,1.5))) abline(v=0, col="red") ## ----------------------------------------------------------------------------- y3 <- se y3 <- y3[seqnames(y3) == "chr4",] y3 <- y3[,y3$condition %in% c("naive","IFNg")] y3 <- labelKeep(y3) y3 <- y3[mcols(y3)$keep,] y3 <- computeInfRV(y3) mcols(y3)$meanCts <- rowMeans(assays(y3)[["counts"]]) with(mcols(y3), plot(meanCts, meanInfRV, log="xy")) hist(log10(mcols(y3)$meanInfRV), col="grey50", border="white", breaks=20, xlab="mean InfRV", main="Txp-level inferential uncertainty") ## ----echo=FALSE--------------------------------------------------------------- n <- 8 condition <- rep(1:2,length=2*n) group <- rep(1:2,each=n) pair <- rep(c(1:n),each=2) cols <- c("dodgerblue","goldenrod4") plot(1:(2*n), rep(0,2*n), ylim=c(-.5,3.5), type="n", xaxt="n", yaxt="n", xlab="samples", ylab="permutation") abline(v=8.5, lty=2) axis(2, 0:3, c("orig",1:3), las=2) text(1:(2*n), rep(0,2*n), pair, col=cols[condition], cex=2) set.seed(1) for (i in 1:3) { perms <- rep(2*sample(n,n),each=2) - rep(1:0,length=2*n) text(1:(2*n), rep(i,2*n), pair[perms], col=cols[condition[perms]], cex=2) } ## ----echo=FALSE--------------------------------------------------------------- n <- 8 condition <- rep(c(1:2,1:2),each=n/2) group <- rep(1:2,each=n) id <- LETTERS[1:(2*n)] cols <- c("dodgerblue","goldenrod4") plot(1:(2*n), rep(0,2*n), ylim=c(-.5,3.5), type="n", xaxt="n", yaxt="n", xlab="samples", ylab="permutation") abline(v=8.5, lty=2) axis(2, 0:3, c("orig",1:3), las=2) text(1:(2*n), rep(0,2*n), id, col=cols[condition], cex=2) set.seed(3) for (i in 1:3) { id.perms <- character(2*n) grp1 <- id[group==1] grp2 <- id[group==2] id.perms[c(1:4,9:12)] <- sample(id[condition==1],n) idx1 <- id.perms[c(1:4,9:12)] %in% grp1 id.perms[c(5:8,13:16)][idx1] <- sample(id[condition==2 & group==1],sum(idx1)) idx2 <- id.perms[c(1:4,9:12)] %in% grp2 id.perms[c(5:8,13:16)][idx2] <- sample(id[condition==2 & group==2],sum(idx2)) text(1:(2*n), rep(i,2*n), id.perms, col=cols[condition], cex=2) } arrows(3,1.5,1.3,1.15,length=.1) arrows(3,1.5,4.7,1.15,length=.1) ## ----------------------------------------------------------------------------- sessionInfo()