## ----echo=FALSE--------------------------------------------------------------- knitr::opts_chunk$set(message = FALSE, warning = FALSE) ## ----------------------------------------------------------------------------- library(ngsReports) ## ----------------------------------------------------------------------------- fileDir <- system.file("extdata", package = "ngsReports") files <- list.files(fileDir, pattern = "fastqc.zip$", full.names = TRUE) fdl <- FastqcDataList(files) ## ----------------------------------------------------------------------------- getModule(fdl[[1]], "Summary") ## ----results='hide'----------------------------------------------------------- reads <- readTotals(fdl) ## ----------------------------------------------------------------------------- library(dplyr) library(pander) reads %>% dplyr::filter(grepl("R1", Filename)) %>% pander( big.mark = ",", caption = "Read totals from R1 libraries", justify = "lr" ) ## ----plotSummary, fig.cap="Default summary of FastQC flags.", fig.wide = TRUE---- plotSummary(fdl) ## ----------------------------------------------------------------------------- plotReadTotals(fdl) ## ----------------------------------------------------------------------------- plotReadTotals(fdl) + theme( legend.position = c(1, 1), legend.justification = c(1, 1), legend.background = element_rect(colour = "black") ) ## ----fig.cap = "Example showing the Per_base_sequence_quality plot for a single FastqcData object."---- plotBaseQuals(fdl[[1]]) ## ----fig.cap="Example showing the Mean Per Base Squence Qualities for a set of FastQC reports."---- plotBaseQuals(fdl) ## ----------------------------------------------------------------------------- plotBaseQuals(fdl[1:4], plotType = "boxplot") ## ----fig.cap = "Example plot showing Per_sequence_quality_scores for an individual file."---- plotSeqQuals(fdl[[1]]) ## ----fig.cap = "Example heatmaps showing Per_sequence_quality_scores for a set of files."---- plotSeqQuals(fdl) ## ----------------------------------------------------------------------------- r2 <- grepl("R2", names(fdl)) plotSeqQuals(fdl[r2], plotType = "line") ## ----fig.cap="Individual Per_base_sequence_content plot"---------------------- plotSeqContent(fdl[[1]]) ## ----fig.cap="Combined Per_base_sequence_content plot"------------------------ plotSeqContent(fdl) ## ----------------------------------------------------------------------------- plotSeqContent(fdl[1:2], plotType = "line", nc = 1) ## ----fig.cap = "Adapter Content plot for a single FastQC report"-------------- plotAdapterContent(fdl[[1]]) ## ----fig.cap = "Heatmap showing Total Adapter Content by position across a set of FastQC reports"---- plotAdapterContent(fdl) ## ----fig.cap = "Example Sequence Duplication Levels plot for an individual file."---- plotDupLevels(fdl[[1]]) ## ----fig.cap = "Sequence Duplication Levels for multiple files"--------------- plotDupLevels(fdl) ## ----------------------------------------------------------------------------- gcAvail(gcTheoretical, "Genome") ## ----fig.cap = "Example GC Content plot using the Hsapiens Transcriptome for the theoretical distribution."---- plotGcContent(fdl[[1]], species = "Hsapiens", gcType = "Transcriptome") ## ----fig.cap = "Example GC content showing the difference between observed and theoretical GC content across multiple files."---- plotGcContent(fdl) ## ----fig.cap = "Example GC content plot represented as a line plot instead of a heatmap."---- plotGcContent(fdl, plotType = "line", gcType = "Transcriptome") ## ----message=FALSE, warning=FALSE, eval=FALSE--------------------------------- # faFile <- system.file( # "extdata", "Athaliana.TAIR10.tRNA.fasta", # package = "ngsReports") # plotGcContent(fdl, Fastafile = faFile, n = 1000) ## ----fig.wide = TRUE---------------------------------------------------------- plotOverrep(fdl[[1]]) ## ----------------------------------------------------------------------------- plotOverrep(fdl) ## ----eval = FALSE------------------------------------------------------------- # overRep2Fasta(fdl, n = 10) ## ----------------------------------------------------------------------------- fl <- c("Sample1.trimmomaticPE.txt") trimmomaticLogs <- system.file("extdata", fl, package = "ngsReports") df <- importNgsLogs(trimmomaticLogs) ## ----------------------------------------------------------------------------- df %>% dplyr::select("Filename", contains("Surviving"), "Dropped") %>% pander( split.tables = Inf, style = "rmarkdown", big.mark = ",", caption = "Select columns as an example of output from trimmomatic." ) ## ----------------------------------------------------------------------------- fls <- c("bowtiePE.txt", "bowtieSE.txt") bowtieLogs <- system.file("extdata", fls, package = "ngsReports") df <- importNgsLogs(bowtieLogs, type = "bowtie") ## ----------------------------------------------------------------------------- df %>% dplyr::select("Filename", starts_with("Reads")) %>% pander( split.tables = Inf, style = "rmarkdown", big.mark = ",", caption = "Select columns as an example of output from bowtie." ) ## ----------------------------------------------------------------------------- starLog <- system.file("extdata", "log.final.out", package = "ngsReports") df <- importNgsLogs(starLog, type = "star") ## ----echo=FALSE--------------------------------------------------------------- df %>% dplyr::select("Filename", contains("Unique")) %>% pander( split.tables = Inf, style = "rmarkdown", big.mark = ",", caption = "Select columns as output from STAR" ) ## ----------------------------------------------------------------------------- flagstatLog <- system.file("extdata", "flagstat.txt", package = "ngsReports") df <- importNgsLogs(flagstatLog, type = "flagstat") ## ----echo=FALSE--------------------------------------------------------------- df %>% pander( split.tables = Inf, style = "rmarkdown", big.mark = ",", caption = "Flagstat output for a single file" ) ## ----------------------------------------------------------------------------- sysDir <- system.file("extdata", package = "ngsReports") fl <- list.files(sysDir, "Dedup_metrics.txt", full.names = TRUE) dupMetrics <- importNgsLogs(fl, type = "duplicationMetrics", which = "metrics") str(dupMetrics) ## ----fig.cap = "Example Bowtie logs for PE and SE sequencing"----------------- plotAlignmentSummary(bowtieLogs, type = "bowtie") ## ----fig.cap = "Example STAR aligner logs"------------------------------------ plotAlignmentSummary(starLog, type = "star") ## ----fig.cap = "Example plot after running BUSCO v3 on the Drosophila melanogaster reference genome"---- buscoLog <- system.file("extdata", "short_summary_Dmelanogaster_Busco.txt", package = "ngsReports") plotAssemblyStats(buscoLog, type = "busco") ## ----fig.cap = "Example plot after running quast on two shortread assemblies"---- fls <- c("quast1.tsv", "quast2.tsv") quastLog <- system.file("extdata", fls, package = "ngsReports") plotAssemblyStats(quastLog, type = "quast") ## ----fig.cap = "Example parallel coordinate plot after running quast on two shortread assemblies"---- plotAssemblyStats(quastLog, type = "quast", plotType = "paracoord") ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()