## ----echo=FALSE, results="hide", warning=FALSE-------------------------------- suppressPackageStartupMessages({ library('annotation') }) ## ----echo=FALSE--------------------------------------------------------------- suppressPackageStartupMessages(library(annotation)) ## ----------------------------------------------------------------------------- hub <- AnnotationHub() ## ----------------------------------------------------------------------------- mcols(query(hub, "clinvar.vcf", "GRCh37"))[,"sourceurl", drop=FALSE] ## ----------------------------------------------------------------------------- fl <- query(hub, "clinvar.vcf", "GRCh37")[[1]] ## ----------------------------------------------------------------------------- vcf <- readVcf(fl, "hg19") dim(vcf) ## ----------------------------------------------------------------------------- txdb_hg19 <- TxDb.Hsapiens.UCSC.hg19.knownGene head(seqlevels(txdb_hg19)) seqlevels(vcf) seqlevels(vcf) <- paste0("chr", seqlevels(vcf)) ## ----------------------------------------------------------------------------- seqlevels(vcf, pruning.mode="coarse") <- c("chr3", "chr18") seqlevels(txdb_hg19) <- c("chr3", "chr18") ## ----------------------------------------------------------------------------- intersect(seqlevels(txdb_hg19), seqlevels(vcf)) ## ----------------------------------------------------------------------------- unique(genome(txdb_hg19)) unique(genome(vcf)) ## ----------------------------------------------------------------------------- gr_hg19 <- rowRanges(vcf) ## ----------------------------------------------------------------------------- txdb_mm10 <- keepStandardChromosomes(TxDb.Mmusculus.UCSC.mm10.ensGene) ## ----------------------------------------------------------------------------- head(seqlevels(txdb_mm10)) gr_mm10 <- GRanges("chr4", IRanges(c(4000000, 107889000), width=1000)) ## ----------------------------------------------------------------------------- unique(genome(txdb_mm10)) genome(gr_mm10) <- "mm10" ## ----------------------------------------------------------------------------- loc_hg19 <- locateVariants(gr_hg19, txdb_hg19, AllVariants()) table(loc_hg19$LOCATION) loc_mm10 <- locateVariants(gr_mm10, txdb_mm10, AllVariants()) table(loc_mm10$LOCATION) ## ----------------------------------------------------------------------------- cols <- c("UNIPROT", "PFAM") keys <- na.omit(unique(loc_hg19$GENEID)) head(select(org.Hs.eg.db, keys, cols, keytype="ENTREZID")) ## ----------------------------------------------------------------------------- keys <- unique(loc_mm10$GENEID) head(select(org.Mm.eg.db, keys, cols, keytype="ENSEMBL")) ## ----------------------------------------------------------------------------- hub <- AnnotationHub() hub_hg19 <- subset(hub, (hub$species == "Homo sapiens") & (hub$genome == "hg19")) length(hub_hg19) ## ----echo=FALSE--------------------------------------------------------------- ov_hg19 <- lapply(1:3, function(i) subsetByOverlaps(hub_hg19[[i]], gr_hg19)) ## ----------------------------------------------------------------------------- ov_hg19 <- lapply(1:3, function(i) subsetByOverlaps(hub_hg19[[i]], gr_hg19)) ## ----------------------------------------------------------------------------- names(ov_hg19) <- names(hub_hg19)[1:3] lapply(ov_hg19, head, n=3) ## ----------------------------------------------------------------------------- head(predictCoding(vcf, txdb_hg19, Hsapiens), 3) ## ----sess--------------------------------------------------------------------- sessionInfo()