## ----echo=FALSE, include=FALSE------------------------------------------------ knitr::opts_chunk$set(tidy = FALSE, cache = TRUE, dev = "png", message = FALSE, error = FALSE, warning = TRUE) ## ----warning=FALSE------------------------------------------------------------ library(GenomicFeatures) download.file( "https://ftp.ensembl.org/pub/release-62/gtf/drosophila_melanogaster/Drosophila_melanogaster.BDGP5.25.62.gtf.gz", destfile="Drosophila_melanogaster.BDGP5.25.62.gtf.gz") txdb = makeTxDbFromGFF("Drosophila_melanogaster.BDGP5.25.62.gtf.gz") file.remove("Drosophila_melanogaster.BDGP5.25.62.gtf.gz") flattenedAnnotation = exonicParts( txdb, linked.to.single.gene.only=TRUE ) names(flattenedAnnotation) = sprintf("%s:E%0.3d", flattenedAnnotation$gene_id, flattenedAnnotation$exonic_part) ## ----------------------------------------------------------------------------- library(pasillaBamSubset) library(Rsamtools) library(GenomicAlignments) bamFiles = c( untreated1_chr4(), untreated3_chr4() ) bamFiles = BamFileList( bamFiles ) seqlevelsStyle(flattenedAnnotation) = "UCSC" se = summarizeOverlaps( flattenedAnnotation, BamFileList(bamFiles), singleEnd=FALSE, fragments=TRUE, ignore.strand=TRUE ) ## ----buildExonCountSet, eval=FALSE-------------------------------------------- # library(DEXSeq) # colData(se)$condition = factor(c("untreated1", "untreated3")) # colData(se)$libType = factor(c("single-end", "paired-end")) # dxd = DEXSeqDataSetFromSE( se, design= ~ sample + exon + condition:exon ) ## ----startVignette------------------------------------------------------------ data(pasillaDEXSeqDataSet, package="pasilla") ## ----------------------------------------------------------------------------- colData(dxd) ## ----------------------------------------------------------------------------- head( counts(dxd), 5 ) ## ----------------------------------------------------------------------------- split( seq_len(ncol(dxd)), colData(dxd)$exon ) ## ----------------------------------------------------------------------------- head( featureCounts(dxd), 5 ) ## ----------------------------------------------------------------------------- head( rowRanges(dxd), 3 ) ## ----------------------------------------------------------------------------- sampleAnnotation( dxd ) ## ----------------------------------------------------------------------------- dxd = estimateSizeFactors( dxd ) ## ----------------------------------------------------------------------------- dxd = estimateDispersions( dxd ) ## ----fitdiagnostics, fig.small=TRUE, fig.cap="Fit Diagnostics. The initial per-exon dispersion estimates (shown by black points), the fitted mean-dispersion values function (red line), and the shrinked values in blue"---- plotDispEsts( dxd ) ## ----testForDEU1-------------------------------------------------------------- dxd = testForDEU( dxd ) ## ----estFC-------------------------------------------------------------------- dxd = estimateExonFoldChanges( dxd, fitExpToVar="condition") ## ----results1----------------------------------------------------------------- dxr1 = DEXSeqResults( dxd ) dxr1 ## ----results2----------------------------------------------------------------- mcols(dxr1)$description ## ----tallyExons--------------------------------------------------------------- table ( dxr1$padj < 0.1 ) ## ----tallyGenes--------------------------------------------------------------- table ( tapply( dxr1$padj < 0.1, dxr1$groupID, any ) ) ## ----MvsA, fig.small=TRUE, fig.cap="MA plot. Mean expression versus $log{2}$ fold change plot. Significant hits at an $FDR=0.1$ are coloured in red."---- plotMA( dxr1, cex=0.8 ) ## ----design------------------------------------------------------------------- sampleAnnotation(dxd) ## ----formulas2---------------------------------------------------------------- formulaFullModel = ~ sample + exon + type:exon + condition:exon formulaReducedModel = ~ sample + exon + type:exon ## ----estDisps_again----------------------------------------------------------- dxd = estimateDispersions( dxd, formula = formulaFullModel ) ## ----test_again--------------------------------------------------------------- dxd = testForDEU( dxd, reducedModel = formulaReducedModel, fullModel = formulaFullModel ) ## ----res_again---------------------------------------------------------------- dxr2 = DEXSeqResults( dxd ) ## ----table2------------------------------------------------------------------- table( dxr2$padj < 0.1 ) ## ----table3------------------------------------------------------------------- table( before = dxr1$padj < 0.1, now = dxr2$padj < 0.1 ) ## ----plot1, fig.height=8, fig.width=12, fig.cap="Fitted expression. The plot represents the expression estimates from a call to `testForDEU()`. Shown in red is the exon that showed significant differential exon usage."---- plotDEXSeq( dxr2, "FBgn0010909", legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 ) ## ----checkClaim--------------------------------------------------------------- wh = (dxr2$groupID=="FBgn0010909") stopifnot(sum(dxr2$padj[wh] < formals(plotDEXSeq)$FDR)==1) ## ----plot2, fig.height=8, fig.width=12, fig.cap="Transcripts. As in the previous plots, but including the annotated transcript models"---- plotDEXSeq( dxr2, "FBgn0010909", displayTranscripts=TRUE, legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 ) ## ----plot3, fig.height=8, fig.width=12, fig.cap="Normalized counts. As in the previous plots, with normalized count values of each exon in each of the samples."---- plotDEXSeq( dxr2, "FBgn0010909", expression=FALSE, norCounts=TRUE, legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 ) ## ----plot4, fig.height=8, fig.width=12, fig.cap="Fitted splicing. As in the previous plots, but after subtraction of overall changes in gene expression."---- plotDEXSeq( dxr2, "FBgn0010909", expression=FALSE, splicing=TRUE, legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 ) ## ----DEXSeqHTML, cache=TRUE, eval=FALSE--------------------------------------- # DEXSeqHTML( dxr2, FDR=0.1, color=c("#FF000080", "#0000FF80") ) ## ----para1,cache=TRUE,results='hide', eval=FALSE------------------------------ # BPPARAM = MultiCoreParam(4) # dxd = estimateSizeFactors( dxd ) # dxd = estimateDispersions( dxd, BPPARAM=BPPARAM) # dxd = testForDEU( dxd, BPPARAM=BPPARAM) # dxd = estimateExonFoldChanges(dxd, BPPARAM=BPPARAM) ## ----alldeu------------------------------------------------------------------- dxr = DEXSeq(dxd) class(dxr) ## ----pergeneqval-------------------------------------------------------------- numbOfGenes = sum( perGeneQValue(dxr) < 0.1) numbOfGenes ## ----systemFile--------------------------------------------------------------- pythonScriptsDir = system.file( "python_scripts", package="DEXSeq" ) list.files(pythonScriptsDir) system.file( "python_scripts", package="DEXSeq", mustWork=TRUE ) ## ----loadDEXSeq--------------------------------------------------------------- inDir = system.file("extdata", package="pasilla") countFiles = list.files(inDir, pattern="fb.txt$", full.names=TRUE) basename(countFiles) flattenedFile = list.files(inDir, pattern="gff$", full.names=TRUE) basename(flattenedFile) ## ----sampleTable-------------------------------------------------------------- sampleTable = data.frame( row.names = c( "treated1", "treated2", "treated3", "untreated1", "untreated2", "untreated3", "untreated4" ), condition = c("knockdown", "knockdown", "knockdown", "control", "control", "control", "control" ), libType = c( "single-end", "paired-end", "paired-end", "single-end", "single-end", "paired-end", "paired-end" ) ) ## ----check-------------------------------------------------------------------- sampleTable ## ----message=FALSE------------------------------------------------------------ library( "DEXSeq" ) dxd = DEXSeqDataSetFromHTSeq( countFiles, sampleData=sampleTable, design= ~ sample + exon + condition:exon, flattenedfile=flattenedFile ) ## ----acc---------------------------------------------------------------------- head( geneIDs(dxd) ) head( exonIDs(dxd) ) ## ----grmethods---------------------------------------------------------------- interestingRegion = GRanges( "chr2L", IRanges(start=3872658, end=3875302) ) subsetByOverlaps( x=dxr, ranges=interestingRegion ) findOverlaps( query=dxr, subject=interestingRegion ) ## ----sessionInfo-------------------------------------------------------------- sessionInfo()