## ----setup, echo=FALSE--------------------------------------------------- library(LearnBioconductor) stopifnot(BiocInstaller::biocVersion() == "3.0") ## ----style, echo = FALSE, results = 'asis'------------------------------- BiocStyle::markdown() knitr::opts_chunk$set(tidy=FALSE) ## ----BSgenome-require, message=FALSE------------------------------------- suppressPackageStartupMessages({ library(BSgenome.Hsapiens.UCSC.hg19) }) chr14_range = GRanges("chr14", IRanges(1, seqlengths(Hsapiens)["chr14"])) chr14_dna <- getSeq(Hsapiens, chr14_range) letterFrequency(chr14_dna, "GC", as.prob=TRUE) ## ----ranges, message=FALSE----------------------------------------------- suppressPackageStartupMessages({ library(GenomicRanges) }) gr <- GRanges("A", IRanges(c(10, 20, 22), width=5), "+") shift(gr, 1) # 1-based coordinates! range(gr) # intra-range reduce(gr) # inter-range coverage(gr) setdiff(range(gr), gr) # 'introns' ## ----bam-require--------------------------------------------------------- suppressPackageStartupMessages({ library(GenomicRanges) library(GenomicAlignments) library(Rsamtools) }) ## our 'region of interest' roi <- GRanges("chr14", IRanges(19653773, width=1)) ## sample data suppressPackageStartupMessages({ library('RNAseqData.HNRNPC.bam.chr14') }) bf <- BamFile(RNAseqData.HNRNPC.bam.chr14_BAMFILES[[1]], asMates=TRUE) ## alignments, junctions, overlapping our roi paln <- readGAlignmentsList(bf) j <- summarizeJunctions(paln, with.revmap=TRUE) j_overlap <- j[j %over% roi] ## supporting reads paln[j_overlap$revmap[[1]]] ## ----vcf, message=FALSE-------------------------------------------------- ## input variants suppressPackageStartupMessages({ library(VariantAnnotation) }) fl <- system.file("extdata", "chr22.vcf.gz", package="VariantAnnotation") vcf <- readVcf(fl, "hg19") seqlevels(vcf) <- "chr22" ## known gene model suppressPackageStartupMessages({ library(TxDb.Hsapiens.UCSC.hg19.knownGene) }) coding <- locateVariants(rowData(vcf), TxDb.Hsapiens.UCSC.hg19.knownGene, CodingVariants()) head(coding) ## ----require------------------------------------------------------------- suppressPackageStartupMessages({ library(GenomicRanges) }) ## ----help, eval=FALSE---------------------------------------------------- # help(package="GenomicRanges") # vignette(package="GenomicRanges") # vignette(package="GenomicRanges", "GenomicRangesHOWTOs") # ?GRanges ## ----ShortRead, messages=FALSE------------------------------------------- ## 1. attach ShortRead and BiocParallel suppressPackageStartupMessages({ library(ShortRead) library(BiocParallel) }) ## 2. create a vector of file paths fls <- dir("~/fastq", pattern="*fastq", full=TRUE) ## ----fakestats, eval=FALSE----------------------------------------------- # ## 3. collect statistics # stats0 <- qa(fls) ## ----realstats, echo=FALSE, results="hide"------------------------------- data(stats0) ## ----browseStats--------------------------------------------------------- ## 4. generate and browse the report if (interactive()) browseURL(report(stats0)) ## ----ShortRead-qa-all---------------------------------------------------- data(qa_all) if (interactive()) browseURL(report(qa_all)) ## ----setup-view, message=FALSE, warning=FALSE---------------------------- ## 1.a 'Annotation' packages suppressPackageStartupMessages({ library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(org.Hs.eg.db) }) ## 1.b -- map 'seqlevels' as recorded in the TxDb file to those in the ## BAM file fl <- "~/igv/genomes/hg19_alias.tab" map <- with(read.delim(fl, header=FALSE, stringsAsFactors=FALSE), setNames(V1, V2)) seqlevels(TxDb.Hsapiens.UCSC.hg19.knownGene, force=TRUE) <- map ## 2. Symbol -> Entrez ID -> Gene coordinates sym2eg <- select(org.Hs.eg.db, "SPARCL1", "ENTREZID", "SYMBOL") exByGn <- exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "gene") sparcl1exons <- exByGn[[sym2eg$ENTREZID]] ## 3. Aligned reads suppressPackageStartupMessages({ library(GenomicAlignments) }) fl <- "~/bam/SRR1039508_sorted.bam" sparcl1gene <- range(sparcl1exons) param <- ScanBamParam(which=sparcl1gene) aln <- readGAlignmentPairs(fl, param=param) ## ----compatibleAlignments, warning=FALSE--------------------------------- ## 5.a. exons-by-transcript for our gene of interest txids <- select(TxDb.Hsapiens.UCSC.hg19.knownGene, sym2eg$ENTREZID, "TXID", "GENEID")$TXID exByTx <- exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "tx")[txids] ## 5.b compatible alignments hits <- findCompatibleOverlaps(query=aln, subject=exByTx) good <- seq_along(aln) %in% queryHits(hits) table(good) ## ----coding-sequence, warning=FALSE-------------------------------------- ## reset seqlevels restoreSeqlevels(TxDb.Hsapiens.UCSC.hg19.knownGene) ## a. cds coordinates, grouped by transcript txids <- select(TxDb.Hsapiens.UCSC.hg19.knownGene, sym2eg$ENTREZID, "TXID", "GENEID")$TXID cdsByTx <- cdsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "tx")[txids] ## b. coding sequence from relevant reference genome suppressPackageStartupMessages({ library(BSgenome.Hsapiens.UCSC.hg19) }) dna <- extractTranscriptSeqs(BSgenome.Hsapiens.UCSC.hg19, cdsByTx) protein <- translate(dna) ## ----GenomicRanges-howtos, eval=FALSE------------------------------------ # browseVignettes("GenomicRanges")