## ----setup, include=FALSE----------------------------------------------------- library(knitr) library(kableExtra) ## ----comparison_table, include=FALSE,eval=FALSE------------------------------- # qckitfastq <- c("yes","yes","yes+","yes") # seqTools <- c("no","yes","yes","yes") # ShortRead <- c("no","no","no","yes") # FASTQC <- c("yes","yes*","yes*","yes*") # metrics <- data.frame(qckitfastq,seqTools,ShortRead,FASTQC) # rownames(metrics) <- c("Read Length Distribution", # "Per Base Read Quality", "Nucleotide Read Content", "GC Content") # kable(metrics) # # need to do per read sequence quality # # + indicates that the program... # # Here, '*' indicates that the program truncates the file or computes on only the first x samples ## ----loading_file------------------------------------------------------------- library(qckitfastq) infile <- system.file("extdata", "10^5_reads_test.fq.gz", package = "qckitfastq") fseq <- seqTools::fastqq(infile) ## ----read_length-------------------------------------------------------------- read_len <- read_length(fseq) kable(head(read_len)) %>% kable_styling() plot_read_length(read_len) ## ----per_base_sequence_quality------------------------------------------------ bs <- per_base_quality(infile) kable(head(bs)) %>% kable_styling() plot_per_base_quality(bs) ## ----per_read_quality--------------------------------------------------------- prq <- per_read_quality(infile) kable(head(prq)) %>% kable_styling() plot_per_read_quality(prq) ## ----gc_content--------------------------------------------------------------- gc_df <- GC_content(infile) kable(head(gc_df)) %>% kable_styling() plot_GC_content(gc_df) ## ----nucleotide_read_content-------------------------------------------------- scA <- read_base_content(fseq, content = "A") kable(head(scA)) %>% kable_styling() rc <- read_content(fseq) kable(head(rc)) %>% kable_styling() plot_read_content(rc) ## ----kmer_count--------------------------------------------------------------- km <- kmer_count(infile,k=6) kable(head(km)) %>% kable_styling() ## ----overrep_reads------------------------------------------------------------ overrep_reads<-overrep_reads(infile) knitr::kable(head(overrep_reads,n = 5)) %>% kable_styling() plot_overrep_reads(overrep_reads) ## ----overrep_kmer------------------------------------------------------------- overkm <-overrep_kmer(infile,7) knitr::kable(head(overkm,n=10)) %>% kable_styling() plot_overrep_kmer(overkm) ## ----adapter_content---------------------------------------------------------- if(.Platform$OS.type != "windows") { infile2 <- system.file("extdata", "test.fq.gz", package = "qckitfastq") ac_sorted <- adapter_content(infile2) kable(head(ac_sorted)) %>% kable_styling() plot_adapter_content(ac_sorted) } ## ----eval=FALSE, include=FALSE------------------------------------------------ # ### Benchmarking # # #To demonstrate the utility of our functions on large datasets... # #(need to benchmark against ShortRead) # #library(seqTools) # #library(ShortRead) # #library(rbenchmark) # #sampler <- FastqSampler('E-MTAB-1147/fastq/ERR127302_1.fastq.gz', 20000)