## ----echo=FALSE--------------------------------------------------------------- source(system.file('vignettes_inc.R', package='biodb')) ## ----------------------------------------------------------------------------- biodb::genNewExtPkg(path='biodbChebiEx', dbName='chebi.ex', connType='compound', dbTitle='ChEBI connector example', entryType='xml', remote=TRUE) ## ----------------------------------------------------------------------------- list.files('biodbChebiEx', all.files=TRUE, recursive=TRUE) ## ----eval=FALSE, highlight=FALSE, code=readLines('biodbChebiEx/inst/definitions.yml')---- # # biodb example definitions file for extensions packages, version 1.0.0 # # databases: # chebi.ex: # name: ChEBI connector example # description: Write here the description of this database. # compound.db: true # entry.content.type: xml # parsing.expr: # accession: substring-after(//dbns:return/dbns:accessionId,'ACCESSION:') # name: # - //dbns:name # - //dbns:synonyms/dbns:data # mass: //dbns:mass # monoisotopic.mass: //dbns:monoisotopicMass # smiles: //dbns:return/dbns:smiles # inchi: //dbns:return/dbns:inchi # inchikey: //dbns:return/dbns:inchiKey # formula: # - //dbns:Formulae/dbns:source[text()='MyDatabase']/../dbns:data # - (//dbns:Formulae/dbns:data)[1] # xml.ns: # dbns: https://my.database.org/webservices/v1 # xsd: http://www.w3.org/2001/XMLSchema # searchable.fields: # - name # - monoisotopic.mass # - molecular.mass # - average.mass # - nominal.mass # remote: true # # Length in seconds of the connection sliding window # scheduler.t: 1 # # Number of connections allowed inside the connection sliding window # scheduler.n: 3 # urls: # # Base URL of the database server, where to find entry pages # base.url: https://my.database.org/mydb/ # # Webservice URL to use to contact web services # ws.url: https://my.database.org/webservices/mydb/3.2/ # # Add any other URL you need for the development of your connector # # Inside your code, you can get each of these URLs with a call like the following one: # # .self$getPropValSlot('urls', 'ws.url') # # fields: # chebi.ex.id: # description: ChEBI connector example ID # case.insensitive: true # forbids.duplicates: true # type: id # card: many ## ----------------------------------------------------------------------------- defFile <- system.file("extdata", "chebi_ex.yml", package='biodb') ## ----eval=FALSE, highlight=FALSE, code=readLines(system.file("extdata", "chebi_ex.yml", package='biodb'))---- # databases: # # chebi.ex: # name: ChEBI example connector # description: An example connector for ChEBI. # compound.db: true # entry.content.encoding: UTF-8 # entry.content.type: xml # parsing.expr: # accession: substring-after(//chebi:return/chebi:chebiId,'CHEBI:') # formula: # - //chebi:Formulae/chebi:source[text()='ChEBI']/../chebi:data # - (//chebi:Formulae/chebi:data)[1] # inchi: //chebi:return/chebi:inchi # inchikey: //chebi:return/chebi:inchiKey # mass: //chebi:mass # monoisotopic.mass: //chebi:monoisotopicMass # name: # - //chebi:chebiAsciiName # smiles: //chebi:return/chebi:smiles # searchable.fields: # - name # - monoisotopic.mass # - molecular.mass # remote: true # scheduler.t: 1 # scheduler.n: 3 # urls: # base.url: https://www.ebi.ac.uk/chebi/ # ws.url: https://www.ebi.ac.uk/webservices/chebi/2.0/ # xml.ns: # chebi: https://www.ebi.ac.uk/webservices/chebi # xsd: http://www.w3.org/2001/XMLSchema # # fields: # # chebi.ex.id: # description: ChEBI ID # type: id # card: many # forbids.duplicates: true # case.insensitive: true ## ----eval=FALSE, highlight=TRUE, code=readLines('biodbChebiEx/R/ChebiExEntry.R')---- # #' ChEBI connector example entry class. # #' # #' Entry class for ChEBI connector example. # #' # #' @seealso # #' \code{\link{BiodbXmlEntry}}. # #' # #' @examples # #' # Create an instance with default settings: # #' mybiodb <- biodb::newInst() # #' # #' # Get a connector that inherits from ChebiExConn: # #' conn <- mybiodb$getFactory()$createConn('chebi.ex') # #' # #' # Get the first entry # #' e <- conn$getEntry(conn$getEntryIds(1L)) # #' # #' # Terminate instance. # #' mybiodb$terminate() # #' # #' @import biodb # #' @import R6 # #' @export # ChebiExEntry <- R6::R6Class("ChebiExEntry", # inherit= # biodb::BiodbXmlEntry # , # # public=list( # # initialize=function(...) { # super$initialize(...) # } # # ,doCheckContent=function(content) { # # # You can do some more checks of the content here. # # return(TRUE) # } # # ,doParseFieldsStep2=function(parsed.content) { # # # TODO Implement your custom parsing processing here. # } # )) ## ----echo=FALSE, results='asis'----------------------------------------------- make_vignette_ref('details') ## ----eval=FALSE, highlight=TRUE, code=readLines('biodbChebiEx/R/ChebiExConn.R')---- # #' ChEBI connector example connector class. # #' # #' Connector class for ChEBI connector example. # #' # #' @seealso \code{\link{BiodbConn}}. # #' # #' @examples # #' # Create an instance with default settings: # #' mybiodb <- biodb::newInst() # #' # #' # Get a connector: # #' conn <- mybiodb$getFactory()$createConn('chebi.ex') # #' # #' # Get the first entry # #' e <- conn$getEntry(conn$getEntryIds(1L)) # #' # #' # Terminate instance. # #' mybiodb$terminate() # #' # #' @import biodb # #' @import R6 # #' @export # ChebiExConn <- R6::R6Class("ChebiExConn", # inherit=biodb::BiodbConn, # # public=list( # # initialize=function(...) { # super$initialize(...) # } # # ,wsFind=function(name="", retfmt=c('plain', 'parsed', 'ids', 'request')) { # # This is the implementation of a fictive web service called "find" that # # search for entries by name. # # Use it as an example for implementing your own web services. # # retfmt <- match.arg(retfmt) # # # Build request # params <- list(name=name) # url <- BiodbUrl$new(url=c(self$getPropValSlot('urls', 'ws.url'), 'find'), # params=params) # request <- self$makeRequest(method='get', url=url) # # # Return request # if (retfmt == 'request') # return(request) # # # Send request # # This the line that should be run for sending the request and getting the # # results: # #results <- self$getBiodb()$getRequestScheduler()$sendRequest(request) # # Instead, for this example, we just generate the results of this fictive # # web service: # results <- paste('{"0001": {"name": "name1"},', # ' "0198": {"name": "name2"},', # ' "9834": {"name": "name3"}}') # # # Parse # if (retfmt != 'plain') { # # # Parse JSON # results <- jsonlite::fromJSON(results, simplifyDataFrame=FALSE) # # # Get IDs # if (retfmt == 'ids') # results <- names(results) # } # # return(results) # } # ), # # private=list( # # doGetNbEntries=function(count=FALSE) { # # # Replace the call below if you have a direct way (specific web service for # # a remote database, provided method or information for a local database) # # to count entries for your database. # return(callSuper(count=count)) # } # # # ,doGetEntryIds=function(max.results=NA_integer_) { # # Overrides super class' method. # # ids <- NA_character_ # # # TODO Implement retrieval of accession numbers. # # return(ids) # } # # ,doSearchForEntries=function(fields=NULL, max.results=NA_integer_) { # # Overrides super class' method. # # ids <- character() # # # TODO Implement search of entries by filtering on values of fields. # # return(ids) # } # # ,doGetEntryContentRequest=function(id, concatenate=TRUE) { # # # TODO Modify the code below to build the URLs to get the contents of the # # entries. # # Depending on the database, you may have to build one URL for each # # individual entry or may be able to write just one or a few URL for all # # entries to retrieve. # u <- c(self$getPropValSlot('urls', 'base.url'), 'entries', # paste(id, 'xml', sep='.')) # url <- BiodbUrl$new(url=u)$toString() # # return(url) # } # # ,doGetEntryPageUrl=function(id) { # # # TODO Modify this code to build the individual URLs to the entry web pages # fct <- function(x) { # u <- c(self$getPropValSlot('urls', 'base.url'), 'entries', x) # BiodbUrl$new(url=u)$toString() # } # # return(vapply(id, fct, FUN.VALUE='')) # } # # ,doGetEntryImageUrl=function(id) { # # # TODO Modify this code to build the individual URLs to the entry images # fct <- function(x) { # u <- c(self$getPropValSlot('urls', 'base.url'), 'images', x, # 'image.png') # BiodbUrl$new(url=u)$toString() # } # # return(vapply(id, fct, FUN.VALUE='')) # } # )) ## ----eval=FALSE--------------------------------------------------------------- # wsGetLiteEntity=function(search=NULL, search.category='ALL', stars='ALL', # max.results=10, # retfmt=c('plain', 'parsed', 'request', 'ids')) { # } ## ----echo=FALSE, results='hide'----------------------------------------------- connClass <- system.file("extdata", "ChebiExConn.R", package='biodb') entryClass <- system.file("extdata", "ChebiExEntry.R", package='biodb') source(connClass) source(entryClass) ## ----code=readLines(connClass)------------------------------------------------ ChebiExConn <- R6::R6Class("ChebiExConn", inherit=biodb::BiodbConn, public=list( initialize=function(...) { super$initialize(...) }, wsGetLiteEntity=function(search=NULL, search.category='ALL', stars='ALL', max.results=10, retfmt=c('plain', 'parsed', 'request', 'ids')) { # Check parameters chk::chk_string(search) chk::chk_in(search.category, self$getSearchCategories()) chk::chk_number(max.results) chk::chk_gte(max.results, 0) chk::chk_in(stars, self$getStarsCategories()) retfmt <- match.arg(retfmt) # Build request params <- c(search=search, searchCategory=search.category, maximumResults=max.results, starsCategory=stars) url <- c(self$getPropValSlot('urls', 'ws.url'), 'test/getLiteEntity') request <- self$makeRequest(method='get', url=BiodbUrl$new(url=url, params=params), encoding='UTF-8') if (retfmt == 'request') return(request) # Send request results <- self$getBiodb()$getRequestScheduler()$sendRequest(request) # Parse if (retfmt != 'plain') { # Parse XML results <- XML::xmlInternalTreeParse(results, asText=TRUE) if (retfmt == 'ids') { ns <- self$getPropertyValue('xml.ns') results <- XML::xpathSApply(results, "//chebi:chebiId", XML::xmlValue, namespaces=ns) results <- sub('CHEBI:', '', results) if (length(grep("^[0-9]+$", results)) != length(results)) self$error("Impossible to parse XML to get entry IDs.") } } return(results) } ), private=list( doSearchForEntries=function(fields=NULL, max.results=0) { ids <- character() if ( ! is.null(fields)) { # Search by name if ('name' %in% names(fields)) ids <- self$wsGetLiteEntity(search=fields$name, search.category="ALL NAMES", max.results=0, retfmt='ids') } # Cut if (max.results > 0 && max.results < length(ids)) ids <- ids[seq_len(max.results)] return(ids) }, doGetEntryContentRequest=function(id, concatenate=TRUE) { url <- c(self$getPropValSlot('urls', 'ws.url'), 'test', 'getCompleteEntity') urls <- vapply(id, function(x) BiodbUrl$new(url=url, params=list(chebiId=x))$toString(), FUN.VALUE='') return(urls) }, doGetEntryIds=function(max.results=NA_integer_) { return(NULL) }, doGetEntryPageUrl=function(id) { # Overrides super class' method url <- c(self$getPropValSlot('urls', 'base.url'), 'searchId.do') fct <- function(x) { BiodbUrl$new(url=url, params=list(chebiId=x))$toString() } urls <- vapply(id, fct, FUN.VALUE='') return(urls) }, doGetEntryImageUrl=function(id) { # Overrides super class' method url <- c(self$getPropValSlot('urls', 'base.url'), 'displayImage.do') fct <- function(x) { BiodbUrl$new(url=url, params=list(defaultImage='true', imageIndex=0, chebiId=x, dimensions=400))$toString() } urls <- vapply(id, fct, FUN.VALUE='') return(urls) } )) ## ----code=readLines(entryClass)----------------------------------------------- ChebiExEntry <- R6::R6Class("ChebiExEntry", inherit=BiodbXmlEntry, public=list( initialize=function(...) { super$initialize(...) } ), private=list( doCheck=function(content) { # You can do some more checks of the content here. return(TRUE) } ,doParseFieldsStep2=function(parsed.content) { # TODO Implement your custom parsing processing here. } )) ## ----------------------------------------------------------------------------- mybiodb <- biodb::newInst() ## ----------------------------------------------------------------------------- mybiodb$loadDefinitions(defFile) ## ----------------------------------------------------------------------------- conn <- mybiodb$getFactory()$createConn('chebi.ex') ## ----------------------------------------------------------------------------- entry <- conn$getEntry('17001') entry$getFieldsAsDataframe() ## ----Closing of the biodb instance-------------------------------------------- mybiodb$terminate() ## ----------------------------------------------------------------------------- MyEntryClass <- R6::R6Class("MyEntryClass", inherit=biodb::BiodbCsvEntry, public=list( initialize=function() { super$initialize(sep=';', na.strings=c('', 'NA')) } )) ## ----eval=FALSE--------------------------------------------------------------- # doParseContent=function(content) { # # # Get lines of content # lines <- strsplit(content, "\r?\n")[[1]] # # return(lines) # }, # # doParseFieldsStep1=function(parsed.content) { # # # Get parsing expressions # parsing.expr <- .self$getParent()$getPropertyValue('parsing.expr') # # .self$.assertNotNull(parsed.content) # .self$.assertNotNa(parsed.content) # .self$.assertNotNull(parsing.expr) # .self$.assertNotNa(parsing.expr) # .self$.assertNotNull(names(parsing.expr)) # # # Loop on all parsing expressions # for (field in names(parsing.expr)) { # # # Match whole content # g <- stringr::str_match(parsed.content, parsing.expr[[field]]) # # # Get positive results # results <- g[ ! is.na(g[, 1]), , drop=FALSE] # # # Any match ? # if (nrow(results) > 0) # .self$setFieldValue(field, results[, 2]) # } # } ## ----eval=FALSE--------------------------------------------------------------- # doParseFieldsStep2=function(parsed.content) { # # # Remove fields with empty string # for (f in .self$getFieldNames()) { # v <- .self$getFieldValue(f) # if (is.character(v) && ! is.na(v) && v == '') # .self$removeField(f) # } # # # Correct InChIKey # if (.self$hasField('INCHIKEY')) { # v <- sub('^InChIKey=', '', .self$getFieldValue('INCHIKEY'), perl=TRUE) # .self$setFieldValue('INCHIKEY', v) # } # # # Synonyms # synonyms <- XML::xpathSApply(parsed.content, "//synonym", XML::xmlValue) # if (length(synonyms) > 0) # .self$appendFieldValue('name', synonyms) # } ## ----------------------------------------------------------------------------- sessionInfo()