Authors: Koki Tsuyuzaki [aut, cre], Manabu Ishii [aut], Itoshi Nikaido [aut]
Last modified: 2023-10-17 10:35:21.778404
Compiled: Tue Oct 17 10:35:32 2023

1 Installation

To install this package, start R (>= 4.1.0) and enter:

if(!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install("AHPubMedDbs")

2 Fetch PubMed tibble datasets from AnnotationHub

The AHPubMedDbs package provides the metadata for all PubMed datasets , which is preprocessed as tibble format and saved in AnnotationHub. First we load/update the AnnotationHub resource.

library(AnnotationHub)
ah <- AnnotationHub()

Next we list all PubMed entries from AnnotationHub.

query(ah, "PubMed")
## AnnotationHub with 126 records
## # snapshotDate(): 2023-10-05
## # $dataprovider: NCBI
## # $species: NA
## # $rdataclass: data.table, Tibble, SQLiteFile
## # additional mcols(): taxonomyid, genome, description,
## #   coordinate_1_based, maintainer, rdatadateadded, preparerclass, tags,
## #   rdatapath, sourceurl, sourcetype 
## # retrieve records with, e.g., 'object[["AH91771"]]' 
## 
##              title                                   
##   AH91771  | SQLite for PubMed ID                    
##   AH91772  | SQLite for PubMed Abstract              
##   AH91773  | SQLite for PubMed Author Information    
##   AH91774  | SQLite for PMC                          
##   AH91775  | SQLite for MeSH (Descriptor)            
##   ...        ...                                     
##   AH114069 | Data.table for PubMed Author Information
##   AH114070 | Data.table for PMC                      
##   AH114071 | Data.table for MeSH (Descriptor)        
##   AH114072 | Data.table for MeSH (Qualifier)         
##   AH114073 | Data.table for MeSH (SCR)

We can confirm the metadata in AnnotationHub in Bioconductor S3 bucket with mcols().

mcols(query(ah, "PubMed"))
## DataFrame with 126 rows and 15 columns
##                           title dataprovider     species taxonomyid      genome
##                     <character>  <character> <character>  <integer> <character>
## AH91771    SQLite for PubMed ID         NCBI          NA         NA          NA
## AH91772  SQLite for PubMed Ab..         NCBI          NA         NA          NA
## AH91773  SQLite for PubMed Au..         NCBI          NA         NA          NA
## AH91774          SQLite for PMC         NCBI          NA         NA          NA
## AH91775  SQLite for MeSH (Des..         NCBI          NA         NA          NA
## ...                         ...          ...         ...        ...         ...
## AH114069 Data.table for PubMe..         NCBI          NA         NA          NA
## AH114070     Data.table for PMC         NCBI          NA         NA          NA
## AH114071 Data.table for MeSH ..         NCBI          NA         NA          NA
## AH114072 Data.table for MeSH ..         NCBI          NA         NA          NA
## AH114073 Data.table for MeSH ..         NCBI          NA         NA          NA
##                     description coordinate_1_based             maintainer
##                     <character>          <integer>            <character>
## AH91771                    PMID                  1 Koki Tsuyuzaki <k.t...
## AH91772  Correspondence table..                  1 Koki Tsuyuzaki <k.t...
## AH91773  Correspondence table..                  1 Koki Tsuyuzaki <k.t...
## AH91774  Correspondence table..                  1 Koki Tsuyuzaki <k.t...
## AH91775  Correspondence table..                  1 Koki Tsuyuzaki <k.t...
## ...                         ...                ...                    ...
## AH114069 Correspondence table..                  1 Koki Tsuyuzaki <k.t...
## AH114070 Correspondence table..                  1 Koki Tsuyuzaki <k.t...
## AH114071 Correspondence table..                  1 Koki Tsuyuzaki <k.t...
## AH114072 Correspondence table..                  1 Koki Tsuyuzaki <k.t...
## AH114073 Correspondence table..                  1 Koki Tsuyuzaki <k.t...
##          rdatadateadded preparerclass                           tags
##             <character>   <character>                         <AsIs>
## AH91771      2021-04-19   AHPubMedDbs         NCBI,PubMed,SQLite,...
## AH91772      2021-04-19   AHPubMedDbs         NCBI,PubMed,SQLite,...
## AH91773      2021-04-19   AHPubMedDbs         NCBI,PubMed,SQLite,...
## AH91774      2021-04-19   AHPubMedDbs            NCBI,PMC,SQLite,...
## AH91775      2021-04-19   AHPubMedDbs       Descriptor,MeSH,NCBI,...
## ...                 ...           ...                            ...
## AH114069     2023-09-27   AHPubMedDbs     data.table,NCBI,PubMed,...
## AH114070     2023-09-27   AHPubMedDbs        data.table,NCBI,PMC,...
## AH114071     2023-09-27   AHPubMedDbs data.table,Descriptor,MeSH,...
## AH114072     2023-09-27   AHPubMedDbs       data.table,MeSH,NCBI,...
## AH114073     2023-09-27   AHPubMedDbs       data.table,MeSH,NCBI,...
##           rdataclass              rdatapath              sourceurl  sourcetype
##          <character>            <character>            <character> <character>
## AH91771   SQLiteFile AHPubMedDbs/v001/pub.. https://github.com/r..         XML
## AH91772   SQLiteFile AHPubMedDbs/v001/abs.. https://github.com/r..         XML
## AH91773   SQLiteFile AHPubMedDbs/v001/aut.. https://github.com/r..         XML
## AH91774   SQLiteFile AHPubMedDbs/v001/pmc.. https://github.com/r..         XML
## AH91775   SQLiteFile AHPubMedDbs/v001/des.. https://github.com/r..         XML
## ...              ...                    ...                    ...         ...
## AH114069  data.table AHPubMedDbs/v006/aut.. https://github.com/r..         XML
## AH114070  data.table AHPubMedDbs/v006/pmc.. https://github.com/r..         XML
## AH114071  data.table AHPubMedDbs/v006/des.. https://github.com/r..         XML
## AH114072  data.table AHPubMedDbs/v006/qua.. https://github.com/r..         XML
## AH114073  data.table AHPubMedDbs/v006/scr.. https://github.com/r..         XML

We can retrieve only the PubMedDb tibble files as follows.

qr <- query(ah, c("PubMedDb"))
# pubmed_tibble <- qr[[1]]

Session information

## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.3 LTS
## 
## Matrix products: default
## BLAS:   /home/biocbuild/bbs-3.18-bioc/R/lib/libRblas.so 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB              LC_COLLATE=C              
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/New_York
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] AnnotationHub_3.9.2 BiocFileCache_2.9.1 dbplyr_2.3.4       
## [4] BiocGenerics_0.47.0 BiocStyle_2.29.2   
## 
## loaded via a namespace (and not attached):
##  [1] KEGGREST_1.41.4               xfun_0.40                    
##  [3] bslib_0.5.1                   Biobase_2.61.0               
##  [5] bitops_1.0-7                  vctrs_0.6.4                  
##  [7] tools_4.3.1                   generics_0.1.3               
##  [9] stats4_4.3.1                  curl_5.1.0                   
## [11] tibble_3.2.1                  fansi_1.0.5                  
## [13] AnnotationDbi_1.63.2          RSQLite_2.3.1                
## [15] blob_1.2.4                    pkgconfig_2.0.3              
## [17] S4Vectors_0.39.3              GenomeInfoDbData_1.2.10      
## [19] lifecycle_1.0.3               compiler_4.3.1               
## [21] Biostrings_2.69.2             GenomeInfoDb_1.37.6          
## [23] httpuv_1.6.11                 htmltools_0.5.6.1            
## [25] sass_0.4.7                    RCurl_1.98-1.12              
## [27] yaml_2.3.7                    interactiveDisplayBase_1.39.0
## [29] pillar_1.9.0                  later_1.3.1                  
## [31] crayon_1.5.2                  jquerylib_0.1.4              
## [33] ellipsis_0.3.2                cachem_1.0.8                 
## [35] mime_0.12                     tidyselect_1.2.0             
## [37] digest_0.6.33                 purrr_1.0.2                  
## [39] dplyr_1.1.3                   bookdown_0.36                
## [41] BiocVersion_3.18.0            fastmap_1.1.1                
## [43] cli_3.6.1                     magrittr_2.0.3               
## [45] utf8_1.2.3                    withr_2.5.1                  
## [47] filelock_1.0.2                promises_1.2.1               
## [49] rappdirs_0.3.3                bit64_4.0.5                  
## [51] rmarkdown_2.25                XVector_0.41.1               
## [53] httr_1.4.7                    bit_4.0.5                    
## [55] png_0.1-8                     memoise_2.0.1                
## [57] shiny_1.7.5.1                 evaluate_0.22                
## [59] knitr_1.44                    IRanges_2.35.3               
## [61] rlang_1.1.1                   Rcpp_1.0.11                  
## [63] xtable_1.8-4                  glue_1.6.2                   
## [65] DBI_1.1.3                     BiocManager_1.30.22          
## [67] jsonlite_1.8.7                R6_2.5.1                     
## [69] zlibbioc_1.47.0