Contents

1 Annotation resources

Packages

library(org.Hs.eg.db)
columns(org.Hs.eg.db)
##  [1] "ACCNUM"       "ALIAS"        "ENSEMBL"      "ENSEMBLPROT" 
##  [5] "ENSEMBLTRANS" "ENTREZID"     "ENZYME"       "EVIDENCE"    
##  [9] "EVIDENCEALL"  "GENENAME"     "GO"           "GOALL"       
## [13] "IPI"          "MAP"          "OMIM"         "ONTOLOGY"    
## [17] "ONTOLOGYALL"  "PATH"         "PFAM"         "PMID"        
## [21] "PROSITE"      "REFSEQ"       "SYMBOL"       "UCSCKG"      
## [25] "UNIGENE"      "UNIPROT"
mapIds(org.Hs.eg.db, c("BRCA1", "BRCA2"), "ENSEMBL", keytype="SYMBOL")
## 'select()' returned 1:1 mapping between keys and columns
##             BRCA1             BRCA2 
## "ENSG00000012048" "ENSG00000139618"
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
exons(TxDb.Hsapiens.UCSC.hg38.knownGene)
## GRanges object with 581036 ranges and 1 metadata column:
##                    seqnames           ranges strand |   exon_id
##                       <Rle>        <IRanges>  <Rle> | <integer>
##        [1]             chr1   [29554, 30039]      + |         1
##        [2]             chr1   [30267, 30667]      + |         2
##        [3]             chr1   [30366, 30503]      + |         3
##        [4]             chr1   [30564, 30667]      + |         4
##        [5]             chr1   [30976, 31097]      + |         5
##        ...              ...              ...    ... .       ...
##   [581032] chrUn_KI270750v1 [148668, 148843]      + |    581032
##   [581033] chrUn_KI270752v1 [   144,    268]      + |    581033
##   [581034] chrUn_KI270752v1 [ 21813,  21944]      + |    581034
##   [581035] chrUn_KI270752v1 [  3497,   3623]      - |    581035
##   [581036] chrUn_KI270752v1 [  9943,  10067]      - |    581036
##   -------
##   seqinfo: 455 sequences (1 circular) from hg38 genome
exonsBy(TxDb.Hsapiens.UCSC.hg38.knownGene, "tx")
## GRangesList object of length 197782:
## $1 
## GRanges object with 3 ranges and 3 metadata columns:
##       seqnames         ranges strand |   exon_id   exon_name exon_rank
##          <Rle>      <IRanges>  <Rle> | <integer> <character> <integer>
##   [1]     chr1 [29554, 30039]      + |         1        <NA>         1
##   [2]     chr1 [30564, 30667]      + |         4        <NA>         2
##   [3]     chr1 [30976, 31097]      + |         5        <NA>         3
## 
## $2 
## GRanges object with 2 ranges and 3 metadata columns:
##       seqnames         ranges strand | exon_id exon_name exon_rank
##   [1]     chr1 [30267, 30667]      + |       2      <NA>         1
##   [2]     chr1 [30976, 31109]      + |       6      <NA>         2
## 
## $3 
## GRanges object with 1 range and 3 metadata columns:
##       seqnames         ranges strand | exon_id exon_name exon_rank
##   [1]     chr1 [30366, 30503]      + |       3      <NA>         1
## 
## ...
## <197779 more elements>
## -------
## seqinfo: 455 sequences (1 circular) from hg38 genome

Web resources

library(biomaRt)

## Discover and then selected mart
ensembl <- useMart("ensembl", dataset="hsapiens_gene_ensembl")

## Gene symbols associated with GO-annotated MAP kinase
## activity (GO id GO:0004704)
getBM(attributes = c('entrezgene','hgnc_symbol'), 
      filters = 'go', 
      values = 'GO:0004707',
      mart = ensembl)
##    entrezgene hgnc_symbol
## 1        1432      MAPK14
## 2        5596       MAPK4
## 3      225689      MAPK15
## 4        5603      MAPK13
## 5        5601       MAPK9
## 6       51701         NLK
## 7        5594       MAPK1
## 8        5599       MAPK8
## 9        5602      MAPK10
## 10       6300      MAPK12
## 11       5597       MAPK6
## 12       5600      MAPK11
## 13       5598       MAPK7
## 14       5595       MAPK3

‘Hubs’

library(AnnotationHub)
AnnotationHub()
## updating metadata:
## retrieving 1 resource
## snapshotDate(): 2017-04-25
## AnnotationHub with 40134 records
## # snapshotDate(): 2017-04-25 
## # $dataprovider: BroadInstitute, Ensembl, UCSC, Haemcode, ftp://ftp.ncbi....
## # $species: Homo sapiens, Mus musculus, Bos taurus, Pan troglodytes, Dani...
## # $rdataclass: GRanges, BigWigFile, FaFile, TwoBitFile, ChainFile, OrgDb,...
## # additional mcols(): taxonomyid, genome, description,
## #   coordinate_1_based, maintainer, rdatadateadded, preparerclass,
## #   tags, rdatapath, sourceurl, sourcetype 
## # retrieve records with, e.g., 'object[["AH2"]]' 
## 
##             title                                               
##   AH2     | Ailuropoda_melanoleuca.ailMel1.69.dna.toplevel.fa   
##   AH3     | Ailuropoda_melanoleuca.ailMel1.69.dna_rm.toplevel.fa
##   AH4     | Ailuropoda_melanoleuca.ailMel1.69.dna_sm.toplevel.fa
##   AH5     | Ailuropoda_melanoleuca.ailMel1.69.ncrna.fa          
##   AH6     | Ailuropoda_melanoleuca.ailMel1.69.pep.all.fa        
##   ...       ...                                                 
##   AH56649 | org.Thermoplasmatales_archaeon_BRNA1.eg.sqlite      
##   AH56650 | org.Ignicoccus_hospitalis_KIN4|I.eg.sqlite          
##   AH56651 | org.Desulfurococcus_amylolyticus_DSM_16532.eg.sqlite
##   AH56652 | org.Pandoravirus_dulcis.eg.sqlite                   
##   AH56653 | org.Methanocaldococcus_infernus_ME.eg.sqlite
query(AnnotationHub(), "grasp")   # see library(grasp2db)
## snapshotDate(): 2017-04-25
## AnnotationHub with 1 record
## # snapshotDate(): 2017-04-25 
## # names(): AH21414
## # $dataprovider: NHLBI
## # $species: Homo sapiens
## # $rdataclass: SQLiteConnection
## # $rdatadateadded: 2015-01-08
## # $title: Bioconductor distribution of grasp2 v. 2.0.0.0
## # $description: Build 2.0.0.0 of the grasp2 data base, with 2,082 GWAS st...
## # $taxonomyid: 9606
## # $genome: hg19
## # $sourcetype: GRASP
## # $sourceurl: https://s3.amazonaws.com/NHLBI_public/GRASP/GraspFullDatase...
## # $sourcesize: NA
## # $tags: c("SNP", "Annotation", "GRASP2") 
## # retrieve record with 'object[["AH21414"]]'
query(AnnotationHub(), c("release-88", "homo"))
## snapshotDate(): 2017-04-25
## AnnotationHub with 9 records
## # snapshotDate(): 2017-04-25 
## # $dataprovider: Ensembl
## # $species: Homo sapiens
## # $rdataclass: TwoBitFile, GRanges
## # additional mcols(): taxonomyid, genome, description,
## #   coordinate_1_based, maintainer, rdatadateadded, preparerclass,
## #   tags, rdatapath, sourceurl, sourcetype 
## # retrieve records with, e.g., 'object[["AH53536"]]' 
## 
##             title                                           
##   AH53536 | Homo_sapiens.GRCh38.88.abinitio.gtf             
##   AH53537 | Homo_sapiens.GRCh38.88.chr.gtf                  
##   AH53538 | Homo_sapiens.GRCh38.88.chr_patch_hapl_scaff.gtf 
##   AH53539 | Homo_sapiens.GRCh38.88.gtf                      
##   AH54337 | Homo_sapiens.GRCh38.cdna.all.2bit               
##   AH54338 | Homo_sapiens.GRCh38.dna.primary_assembly.2bit   
##   AH54339 | Homo_sapiens.GRCh38.dna_rm.primary_assembly.2bit
##   AH54340 | Homo_sapiens.GRCh38.dna_sm.primary_assembly.2bit
##   AH54341 | Homo_sapiens.GRCh38.ncrna.2bit
library(ExperimentHub)
ExperimentHub()
## updating metadata:
## retrieving 1 resource
## snapshotDate(): 2016-10-01
## ExperimentHub with 201 records
## # snapshotDate(): 2016-10-01 
## # $dataprovider: Department of Psychology, Abdul Haq Campus, Federal Urdu...
## # $species: Homo Sapiens, Homo sapiens, Mus musculus
## # $rdataclass: ExpressionSet, CellMapperList, GAlignmentPairs, Summarized...
## # additional mcols(): taxonomyid, genome, description,
## #   coordinate_1_based, maintainer, rdatadateadded, preparerclass,
## #   tags, rdatapath, sourceurl, sourcetype 
## # retrieve records with, e.g., 'object[["EH1"]]' 
## 
##           title                                                           
##   EH1   | RNA-Sequencing and clinical data for 7706 tumor samples from ...
##   EH164 | RNA-Sequencing and clinical data for 9246 tumor samples from ...
##   EH165 | RNA-Sequencing and clinical data for 741 normal samples from ...
##   EH166 | ERR188297                                                       
##   EH167 | ERR188088                                                       
##   ...     ...                                                             
##   EH359 | ZellerG_2014.marker_abundance.stool                             
##   EH360 | ZellerG_2014.marker_presence.stool                              
##   EH361 | ZellerG_2014.metaphlan_bugs_list.stool                          
##   EH362 | ZellerG_2014.pathabundance_relab.stool                          
##   EH363 | ZellerG_2014.pathcoverage.stool
query(ExperimentHub(), "TCGA")
## snapshotDate(): 2016-10-01
## ExperimentHub with 3 records
## # snapshotDate(): 2016-10-01 
## # $dataprovider: GEO
## # $species: Homo sapiens
## # $rdataclass: SummarizedExperiment, ExpressionSet
## # additional mcols(): taxonomyid, genome, description,
## #   coordinate_1_based, maintainer, rdatadateadded, preparerclass,
## #   tags, rdatapath, sourceurl, sourcetype 
## # retrieve records with, e.g., 'object[["EH1"]]' 
## 
##           title                                                           
##   EH1   | RNA-Sequencing and clinical data for 7706 tumor samples from ...
##   EH164 | RNA-Sequencing and clinical data for 9246 tumor samples from ...
##   EH165 | RNA-Sequencing and clinical data for 741 normal samples from ...

2 End matter

2.1 Session Info

sessionInfo()
## R version 3.4.0 (2017-04-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 14.04.5 LTS
## 
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=de_DE.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=de_DE.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=de_DE.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] SummarizedExperiment_1.6.3             
##  [2] DelayedArray_0.2.7                     
##  [3] matrixStats_0.52.2                     
##  [4] ExperimentHub_1.2.0                    
##  [5] AnnotationHub_2.8.1                    
##  [6] KEGGREST_1.16.0                        
##  [7] biomaRt_2.32.0                         
##  [8] TxDb.Hsapiens.UCSC.hg38.knownGene_3.4.0
##  [9] GenomicFeatures_1.28.2                 
## [10] GenomicRanges_1.28.3                   
## [11] GenomeInfoDb_1.12.1                    
## [12] org.Hs.eg.db_3.4.1                     
## [13] AnnotationDbi_1.38.1                   
## [14] IRanges_2.10.2                         
## [15] S4Vectors_0.14.3                       
## [16] Biobase_2.36.2                         
## [17] BiocGenerics_0.22.0                    
## [18] BiocStyle_2.4.0                        
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-35               htmltools_0.3.6              
##  [3] rtracklayer_1.36.3            yaml_2.1.14                  
##  [5] interactiveDisplayBase_1.14.0 XML_3.98-1.7                 
##  [7] DBI_0.6-1                     BiocParallel_1.10.1          
##  [9] GenomeInfoDbData_0.99.0       stringr_1.2.0                
## [11] zlibbioc_1.22.0               Biostrings_2.44.1            
## [13] codetools_0.2-15              memoise_1.1.0                
## [15] evaluate_0.10                 knitr_1.16                   
## [17] httpuv_1.3.3                  BiocInstaller_1.26.0         
## [19] curl_2.6                      Rcpp_0.12.11                 
## [21] xtable_1.8-2                  backports_1.1.0              
## [23] XVector_0.16.0                mime_0.5                     
## [25] Rsamtools_1.28.0              png_0.1-7                    
## [27] digest_0.6.12                 stringi_1.1.5                
## [29] bookdown_0.4                  shiny_1.0.3                  
## [31] rprojroot_1.2                 grid_3.4.0                   
## [33] tools_3.4.0                   bitops_1.0-6                 
## [35] magrittr_1.5                  RCurl_1.95-4.8               
## [37] RSQLite_1.1-2                 Matrix_1.2-10                
## [39] rmarkdown_1.5                 httr_1.2.1                   
## [41] R6_2.2.1                      GenomicAlignments_1.12.1     
## [43] compiler_3.4.0

2.2 Acknowledgements

Research reported in this tutorial was supported by the National Human Genome Research Institute and the National Cancer Institute of the National Institutes of Health under award numbers U41HG004059 and U24CA180996.

This project has received funding from the European Research Council (ERC) under the European Union’s Horizon 2020 research and innovation programme (grant agreement number 633974)