From edeb0ac3b3b5445a9bc33ed62378c39836930d7b Mon Sep 17 00:00:00 2001 From: Aariq Date: Sun, 7 Jun 2020 15:07:03 -0400 Subject: [PATCH 01/43] added from arguments to function with match.arg() --- R/pubchem.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/pubchem.R b/R/pubchem.R index 7a854dd4..7146065d 100644 --- a/R/pubchem.R +++ b/R/pubchem.R @@ -413,11 +413,12 @@ pc_prop <- function(cid, properties = NULL, verbose = TRUE, ...) { #' pc_synonyms(5564, from = "cid") #' pc_synonyms(c("Aspirin", "Triclosan"), choices = 10) #' } -pc_synonyms <- function(query, from = "name", choices = NULL, verbose = TRUE, +pc_synonyms <- function(query, from = c("name", "cid", "sid", "aid", "smiles", "inchi", "inchikey"), choices = NULL, verbose = TRUE, arg = NULL, interactive = 0, ...) { # from can be cid | name | smiles | inchi | sdf | inchikey | formula # query <- c("Aspirin") # from = "name" + from <- match.arg(from) if (!missing("interactive")) stop("'interactive' is deprecated. Use 'choices' instead.") foo <- function(query, from, verbose, ...) { From 7a7d7cb603b1f4e53a522ea292eb49602ff38a23 Mon Sep 17 00:00:00 2001 From: Aariq Date: Mon, 8 Jun 2020 20:28:44 -0400 Subject: [PATCH 02/43] changed "type" argument to "from" for consistency --- R/alanwood.R | 31 ++++++++++++++++++------------- R/chemid.R | 25 +++++++++++++++---------- 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/R/alanwood.R b/R/alanwood.R index 48cf21a4..d0fa33ea 100644 --- a/R/alanwood.R +++ b/R/alanwood.R @@ -6,14 +6,14 @@ #' @importFrom stats rgamma #' #' @param query character; search string -#' @param type character; type of input ('cas' or 'commonname') +#' @param from character; type of input ('cas' or 'commonname') #' @param verbose logical; print message during processing to console? #' @param force_build logical; force building a new index? See #' \code{\link{build_aw_idx}} for more details. #' @return A list of eight entries: common-name, status, preferred IUPAC Name, #' IUPAC Name, cas, formula, activity, subactivity, inchikey, inchi and source #' url. -#' @note for type = 'cas' only the first matched link is returned. +#' @note for from = 'cas' only the first matched link is returned. #' Please respect Copyright, Terms and Conditions #' \url{http://www.alanwood.net/pesticides/legal.html}! #' @references Eduard Szöcs, Tamás Stirling, Eric R. Scott, Andreas Scharmüller, @@ -24,30 +24,35 @@ #' @export #' @examples #' \dontrun{ -#' aw_query('Fluazinam', type = 'commonname') -#' out <- aw_query(c('Fluazinam', 'Diclofop'), type = 'com') +#' aw_query('Fluazinam', from = 'commonname') +#' out <- aw_query(c('Fluazinam', 'Diclofop'), from = 'com') #' out #' # extract subactivity from object #' sapply(out, function(y) y$subactivity[1]) #' #' # use CAS-numbers -#' aw_query("79622-59-6", type = 'cas') +#' aw_query("79622-59-6", from = 'cas') #' } #' @seealso \code{\link{build_aw_idx}} -aw_query <- function(query, type = c("commonname", "cas"), verbose = TRUE, - force_build = FALSE) { - aw_idx <- build_aw_idx(verbose = FALSE, force_build) - foo <- function(query, type = c("commonname", "cas"), verbose) { + +aw_query <- function(query, from = c("commonname", "cas"), verbose = TRUE, + force_build = FALSE, type) { + if(!missing(type)) { + warning('"type" is deprecated. Please use "from" instead. ') + from <- type + } + aw_idx <- build_aw_idx(verbose, force_build) + foo <- function(query, from = c("commonname", "cas"), verbose) { on.exit(suppressWarnings(closeAllConnections())) - type <- match.arg(type) + from <- match.arg(from) # search links in indexes - if (type == "commonname") { + if (from == "commonname") { links <- aw_idx$links[aw_idx$source == "cn"] names <- aw_idx$linknames[aw_idx$source == "cn"] cname <- query } - if (type == "cas") { + if (from == "cas") { names <- aw_idx$names[aw_idx$source == "rn"] # select only first link links <- aw_idx$links[aw_idx$source == "rn"] @@ -131,7 +136,7 @@ aw_query <- function(query, type = c("commonname", "cas"), verbose = TRUE, source_url = source_url) return(out) } - out <- lapply(query, function(x) foo(x, type = type, verbose = verbose)) + out <- lapply(query, function(x) foo(x, from = from, verbose = verbose)) out <- setNames(out, query) class(out) <- c("aw_query", "list") return(out) diff --git a/R/chemid.R b/R/chemid.R index 0168471c..ad51f6ce 100644 --- a/R/chemid.R +++ b/R/chemid.R @@ -9,7 +9,7 @@ #' @importFrom utils URLencode URLdecode #' #' @param query character; query string -#' @param type character; type of query string. \code{"rn"} for registry number +#' @param from character; type of query string. \code{"rn"} for registry number #' or \code{"name"} for common name or \code{"inchikey"} for inchikey as input. #' @param match character; How should multiple hits be handeled? \code{"first"} #' returns only the first match, \code{"best"} the best matching (by name) ID, @@ -27,17 +27,17 @@ #' \dontrun{ #' # might fail if API is not available #' # query common name -#' y1 <- ci_query(c('Formaldehyde', 'Triclosan'), type = 'name') +#' y1 <- ci_query(c('Formaldehyde', 'Triclosan'), from = 'name') #' names(y1) #' str(y1[['Triclosan']]) # lots of information inside #' y1[['Triclosan']]$inchikey #' #' # Query by CAS -#' y2 <- ci_query('50-00-0', type = 'rn', match = 'first') +#' y2 <- ci_query('50-00-0', from = 'rn', match = 'first') #' y2[['50-00-0']]$inchikey #' #' # query by inchikey -#' y3 <- ci_query('WSFSSNUMVMOOMR-UHFFFAOYSA-N', type = 'inchikey') +#' y3 <- ci_query('WSFSSNUMVMOOMR-UHFFFAOYSA-N', from = 'inchikey') #' y3[[1]]$name #' #' # extract lop-P @@ -47,12 +47,17 @@ #' y$physprop$Value[y$physprop$`Physical Property` == 'log P (octanol-water)'] #' }) #' } -ci_query <- function(query, type = c('name', 'rn', 'inchikey'), +ci_query <- function(query, from = c('name', 'rn', 'inchikey'), match = c('best', 'first', 'ask', 'na'), - verbose = TRUE){ - type <- match.arg(type) + verbose = TRUE, type){ + if(!missing(type)) { + warning('"type" is deprecated. Please use "from" instead. ') + from <- type + } + + from <- match.arg(from) match <- match.arg(match) - foo <- function(query, type, match, verbose){ + foo <- function(query, from, match, verbose){ on.exit(suppressWarnings(closeAllConnections())) if (is.na(query)) { message('query is NA! Returning NA.\n') @@ -60,7 +65,7 @@ ci_query <- function(query, type = c('name', 'rn', 'inchikey'), } query <- URLencode(query) baseurl <- switch( - type, + from, rn = 'https://chem.nlm.nih.gov/chemidplus/rn/startswith/', name = "https://chem.nlm.nih.gov/chemidplus/name/startswith/", inchikey = "https://chem.nlm.nih.gov/chemidplus/inchikey/startswith/") @@ -217,7 +222,7 @@ ci_query <- function(query, type = c('name', 'rn', 'inchikey'), class(out) <- 'chemid' return(out) } - out <- lapply(query, foo, type = type, match = match, verbose = verbose) + out <- lapply(query, foo, from = from, match = match, verbose = verbose) out <- setNames(out, query) class(out) <- c('ci_query', 'list') return(out) From 8a853c493b87782f9c1d5f6c40f14e8639fdbd6d Mon Sep 17 00:00:00 2001 From: Aariq Date: Mon, 8 Jun 2020 21:35:55 -0400 Subject: [PATCH 03/43] added "query" and "from" arguments --- R/cts.R | 21 +++++++++++++-------- R/flavornet.R | 18 ++++++++++++------ 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/R/cts.R b/R/cts.R index 553edce3..7ba2f4d8 100644 --- a/R/cts.R +++ b/R/cts.R @@ -4,7 +4,8 @@ #' @import jsonlite #' @importFrom stats rgamma #' @importFrom stats setNames -#' @param inchikey character; InChIkey. +#' @param query character; InChIkey. +#' @param from character; currently only accepts "inchikey". #' @param verbose logical; should a verbose output be printed on the console? #' @return a list of lists (for each supplied inchikey): #' a list of 7. inchikey, inchicode, molweight, exactmass, formula, synonyms and externalIds @@ -29,14 +30,18 @@ #' # extract molecular weight #' sapply(out2, function(y) y$molweight) #' } -cts_compinfo <- function(inchikey, verbose = TRUE){ - # inchikey <- 'XEFQLINVKFYRCS-UHFFFAOYSA-N' - foo <- function(inchikey, verbose) { - if (!is.inchikey(inchikey)) { +cts_compinfo <- function(query, from = "inchikey", verbose = TRUE, inchikey){ + if (!missing(inchikey)) { + warning('"inchikey" is deprecated. Please use "query" instead.') + query <- inchikey + } + match.arg(from) + foo <- function(query, verbose) { + if (!is.inchikey(query)) { stop('Input is not a valid inchikey!') } baseurl <- "http://cts.fiehnlab.ucdavis.edu/service/compound" - qurl <- paste0(baseurl, '/', inchikey) + qurl <- paste0(baseurl, '/', query) if (verbose) message(qurl) Sys.sleep( rgamma(1, shape = 15, scale = 1/10)) @@ -47,8 +52,8 @@ cts_compinfo <- function(inchikey, verbose = TRUE){ } return(out) } - out <- lapply(inchikey, foo, verbose = verbose) - out <- setNames(out, inchikey) + out <- lapply(query, foo, verbose = verbose) + out <- setNames(out, query) class(out) <- c('cts_compinfo','list') return(out) } diff --git a/R/flavornet.R b/R/flavornet.R index 24f037c2..ccdcb280 100644 --- a/R/flavornet.R +++ b/R/flavornet.R @@ -6,7 +6,8 @@ #' @import xml2 #' @importFrom stats rgamma #' -#' @param CAS character; CAS number to search by. See \code{\link{is.cas}} for correct formatting +#' @param query character; CAS number to search by. See \code{\link{is.cas}} for correct formatting +#' @param from character; currently only CAS numbers are accepted. #' @param verbose logical; should a verbose output be printed on the console? #' @param ... not currently used #' @@ -24,11 +25,16 @@ #' } #' @export -fn_percept <- function(CAS, verbose = TRUE, ...) +fn_percept <- function(query, from = "cas", verbose = TRUE, CAS, ...) { - foo <- function (CAS, verbose){ + if (!missing(CAS)) { + warning('"CAS" is now deprecated. Please use "query" instead. ') + query <- CAS + } + match.arg(from) + foo <- function (query, verbose){ on.exit(suppressWarnings(closeAllConnections())) - qurl <- paste0("http://www.flavornet.org/info/",CAS,".html") + qurl <- paste0("http://www.flavornet.org/info/",query,".html") if (verbose) message(qurl) Sys.sleep(rgamma(1, shape = 10, scale = 1/10)) @@ -42,7 +48,7 @@ fn_percept <- function(CAS, verbose = TRUE, ...) percept <- gsub(pattern, "", doc.text) return(percept) } - percepts <- sapply(CAS, foo, verbose = verbose) - percepts <- setNames(percepts, CAS) + percepts <- sapply(query, foo, verbose = verbose) + percepts <- setNames(percepts, query) return(percepts) } \ No newline at end of file From 94a4994ff1604f158cea02772f575ba7f477ec11 Mon Sep 17 00:00:00 2001 From: Aariq Date: Mon, 8 Jun 2020 22:14:12 -0400 Subject: [PATCH 04/43] Made possible "from" arguments more consistent --- R/alanwood.R | 21 ++++++++++++++------- R/chebi.R | 44 ++++++++++++++++++++++++-------------------- 2 files changed, 38 insertions(+), 27 deletions(-) diff --git a/R/alanwood.R b/R/alanwood.R index d0fa33ea..7715abf6 100644 --- a/R/alanwood.R +++ b/R/alanwood.R @@ -6,7 +6,7 @@ #' @importFrom stats rgamma #' #' @param query character; search string -#' @param from character; type of input ('cas' or 'commonname') +#' @param from character; type of input ('cas' or 'name') #' @param verbose logical; print message during processing to console? #' @param force_build logical; force building a new index? See #' \code{\link{build_aw_idx}} for more details. @@ -24,7 +24,7 @@ #' @export #' @examples #' \dontrun{ -#' aw_query('Fluazinam', from = 'commonname') +#' aw_query('Fluazinam', from = 'name') #' out <- aw_query(c('Fluazinam', 'Diclofop'), from = 'com') #' out #' # extract subactivity from object @@ -35,18 +35,25 @@ #' } #' @seealso \code{\link{build_aw_idx}} -aw_query <- function(query, from = c("commonname", "cas"), verbose = TRUE, +aw_query <- function(query, from = c("name", "cas"), verbose = TRUE, force_build = FALSE, type) { - if(!missing(type)) { + if (!missing(type)) { warning('"type" is deprecated. Please use "from" instead. ') from <- type } + + if ("commonname" %in% from) { + warning('To search by compound name use "name" instead of "commonname"') + from <- "name" + } + from <- match.arg(from) aw_idx <- build_aw_idx(verbose, force_build) - foo <- function(query, from = c("commonname", "cas"), verbose) { + + foo <- function(query, from, verbose) { on.exit(suppressWarnings(closeAllConnections())) - from <- match.arg(from) + # search links in indexes - if (from == "commonname") { + if (from == "name") { links <- aw_idx$links[aw_idx$source == "cn"] names <- aw_idx$linknames[aw_idx$source == "cn"] cname <- query diff --git a/R/chebi.R b/R/chebi.R index 13a8abb8..7395f3a4 100644 --- a/R/chebi.R +++ b/R/chebi.R @@ -9,10 +9,8 @@ #' @importFrom stats setNames #' #' @param query character; search term. -#' @param from character; type of input, can be one of 'ALL', 'CHEBI ID', -#' 'CHEBI NAME', 'DEFINITION', 'ALL NAMES', 'IUPAC NAME', 'CITATIONS', -#' 'REGISTRY NUMBERS', 'MANUAL XREFS', 'AUTOMATIC XREFS', 'FORMULA', 'MASS', -#' 'MONOISOTOPIC MASS', 'CHARGE', 'INCHI/INCHI KEY', 'SMILES', 'SPECIES'. +#' @param from character; type of input. \code{"all"} searches all types and +#' \code{"name"} searches all names. #' @param match character; How should multiple hits be handled?, #' \code{"all"} all matches are returned, #' \code{"best"} the best matching (by the ChEBI searchscore) is returned, @@ -20,13 +18,12 @@ #' \code{"na"} returns NA if multiple hits are found. #' @param max_res integer; maximum number of results to be retrieved from the #' web service -#' @param stars character; type of input can be one of 'ALL', 'TWO ONLY', -#' 'THREE ONLY'. +#' @param stars character; #' @param verbose logical; should a verbose output be printed on the console? -#' @param ... optional arguments +#' @param ... currently unused #' @return returns a list of data.frames containing a chebiid, a chebiasciiname, -#' a searchscore and stars if matches were found. -#' If not, data.frame(NA) is returned +#' a searchscore and stars if matches were found. If not, data.frame(NA) is +#' returned #' #' @references Hastings J, Owen G, Dekker A, Ennis M, Kale N, Muthukrishnan V, #' Turner S, Swainston N, Mendes P, Steinbeck C. (2016). ChEBI in 2016: @@ -67,24 +64,31 @@ #' #' } get_chebiid <- function(query, - from = 'ALL', + from = c('all', 'chebi id', 'chebi name', 'definition', 'name', + 'iupac name', 'citations', 'registry numbers', 'manual xrefs', + 'automatic xrefs', 'formula', 'mass', 'monoisotopic mass', + 'charge', 'inchi', 'inchikey', 'smiles', 'species'), match = c("all", "best", "ask", "na"), max_res = 200, - stars = 'ALL', + stars = c('all', 'two only', 'three only'), verbose = TRUE, ...) { match <- match.arg(match) - foo <- function(query, match, from, max_res, stars, verbose, ...) { + from <- casefold(match.arg(from), upper = TRUE) + if (from == "NAME") { + from <- "ALL NAMES" + } + if (from == "inchi" | from == "inchikey") { + from <- "INCHI/INCHI KEY" + } + + stars <- casefold(match.arg(stars), upper = TRUE) + + foo <- function(query, from, match, max_res, stars, verbose, ...) { if (is.na(query)) return(data.frame(chebiid = NA_character_, query = NA_character_, stringsAsFactors = FALSE)) - from_all <- c('ALL', 'CHEBI ID', 'CHEBI NAME', 'DEFINITION', 'ALL NAMES', - 'IUPAC NAME', 'CITATIONS', 'REGISTRY NUMBERS', 'MANUAL XREFS', - 'AUTOMATIC XREFS', 'FORMULA', 'MASS', 'MONOISOTOPIC MASS', - 'CHARGE', 'INCHI/INCHI KEY', 'SMILES', 'SPECIES') - from <- match.arg(from, from_all) - stars_all <- c('ALL', 'TWO ONLY', 'THREE ONLY') - stars <- match.arg(stars, stars_all) + # query url <- 'http://www.ebi.ac.uk:80/webservices/chebi/2.0/webservice' headers <- c(Accept = 'text/xml', @@ -153,8 +157,8 @@ get_chebiid <- function(query, } out <- lapply(query, foo, - match = match, from = from, + match = match, max_res = max_res, stars = stars, verbose = verbose) From eaffafc0250ad90896f9da9dda337c5695102aa0 Mon Sep 17 00:00:00 2001 From: Aariq Date: Tue, 9 Jun 2020 13:49:36 -0400 Subject: [PATCH 05/43] added "cas" as a possible value for "from" and fixed #255 issue with _R_CHECK_LENGTH_1_CONDITION_ failing on CRAN --- R/chemid.R | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/R/chemid.R b/R/chemid.R index ad51f6ce..278a89e2 100644 --- a/R/chemid.R +++ b/R/chemid.R @@ -10,8 +10,12 @@ #' #' @param query character; query string #' @param from character; type of query string. \code{"rn"} for registry number -#' or \code{"name"} for common name or \code{"inchikey"} for inchikey as input. -#' @param match character; How should multiple hits be handeled? \code{"first"} +#' (see +#' \href{https://chem.nlm.nih.gov/chemidplus/jsp/chemidheavy/help.jsp#LiteSearchDataFields}{documentation} +#' for more details), \code{"name"} for common name, or \code{"inchikey"} for +#' inchikey as input. \code{"cas"} is a synonym for \code{"rn"} and provided +#' for consistency across functions. +#' @param match character; How should multiple hits be handled? \code{"first"} #' returns only the first match, \code{"best"} the best matching (by name) ID, #' \code{"ask"} enters an interactive mode and the user is asked for input, #' \code{"na"} returns NA if multiple hits are found. @@ -47,7 +51,7 @@ #' y$physprop$Value[y$physprop$`Physical Property` == 'log P (octanol-water)'] #' }) #' } -ci_query <- function(query, from = c('name', 'rn', 'inchikey'), +ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), match = c('best', 'first', 'ask', 'na'), verbose = TRUE, type){ if(!missing(type)) { @@ -82,7 +86,7 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey'), tit <- xml_text(xml_find_all(ttt, "//head/title")) no <- xml_text(xml_find_all(ttt, "//h3")) - if (length(no) != 0 && no == 'The following query produced no records:') { + if (length(no) != 0 && 'The following query produced no records:' %in% no) { message('Not found! Returning NA.\n') return(NA) } From af8a545ae376e521b51f071fbac83d9506a79554 Mon Sep 17 00:00:00 2001 From: Aariq Date: Tue, 9 Jun 2020 13:58:35 -0400 Subject: [PATCH 06/43] update tests and news --- DESCRIPTION | 2 +- NEWS.md | 7 +++++++ tests/testthat/test-alanwood.R | 4 ++-- tests/testthat/test-chebi.R | 2 +- tests/testthat/test-chemid.R | 17 ++++++++--------- 5 files changed, 19 insertions(+), 13 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 09912a81..8de8ac61 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,7 +7,7 @@ Description: Chemical information from around the web. This package interacts Flavornet, NIST Chemistry WebBook, OPSIN, PAN Pesticide Database, PubChem, SRS, Wikidata. Type: Package -Version: 1.0.0 +Version: 1.0.0.9000 Date: 2020-05-27 License: MIT + file LICENSE URL: https://docs.ropensci.org/webchem, https://github.com/ropensci/webchem diff --git a/NEWS.md b/NEWS.md index 7537e6cd..97029504 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,13 @@ * Download images of substances from ChemSpider with cs_img() +## Minor Improvements + +* The `"type"` argument in `ci_query()` and `aw_query()` has been changed to `"from"` for consistency with other functions +* `fn_percept()` and `cts_compinfo()` now have `"query"` and `"from"` arguments for consistency with other functions +* Possible values for `"from"` have been made more consistent across functions + + # webchem 1.0.0 ## NEW FEATURES diff --git a/tests/testthat/test-alanwood.R b/tests/testthat/test-alanwood.R index 6a746de8..b8a87d5d 100644 --- a/tests/testthat/test-alanwood.R +++ b/tests/testthat/test-alanwood.R @@ -4,7 +4,7 @@ test_that("examples in the article are unchanged", { skip_if_not(up, "Alanwood service is down") data("lc50", package = "webchem") - aw_data <- aw_query(lc50$cas[1:3], type = "cas") + aw_data <- aw_query(lc50$cas[1:3], from = "cas") igroup <- sapply(aw_data, function(y) y$subactivity[1]) expect_is(igroup, "character") @@ -19,7 +19,7 @@ test_that("alanwood, commonname", { skip_if_not(up, "Alanwood service is down") comps <- c("Fluazinam", "S-Metolachlor", "balloon", NA) - o1 <- aw_query(comps, type = "commonname") + o1 <- aw_query(comps, from = "name") expect_type(o1, "list") expect_equal(length(o1), 4) diff --git a/tests/testthat/test-chebi.R b/tests/testthat/test-chebi.R index b4a16ece..f5856c43 100644 --- a/tests/testthat/test-chebi.R +++ b/tests/testthat/test-chebi.R @@ -28,7 +28,7 @@ test_that("examples in the article are unchanged", { test_that("chebi returns correct results", { skip_on_cran() skip_if_not(up, "CHEBI service is down") - a <- get_chebiid("Glyphosate", from = "ALL") + a <- get_chebiid("Glyphosate", from = "all") b <- get_chebiid(c("triclosan", "glyphosate", "balloon", NA)) A <- chebi_comp_entity("CHEBI:27744") B <- chebi_comp_entity("27732") diff --git a/tests/testthat/test-chemid.R b/tests/testthat/test-chemid.R index cee6142f..fb06ce70 100644 --- a/tests/testthat/test-chemid.R +++ b/tests/testthat/test-chemid.R @@ -2,14 +2,13 @@ up <- ping_service("ci") test_that("chemid returns correct results", { skip_on_cran() skip_if_not(up, "CHEMID service is down") - skip("failing tests below") - o2 <- ci_query('50-00-0', type = 'rn') - o3 <- ci_query('WSFSSNUMVMOOMR-UHFFFAOYSA-N', type = 'inchikey') + o2 <- ci_query('50-00-0', from = 'rn') + o3 <- ci_query('WSFSSNUMVMOOMR-UHFFFAOYSA-N', from = 'inchikey') expect_type(o2, 'list') expect_type(o3, 'list') - o1 <- ci_query(c('xxxxx', NA, 'Aspirin', 'Triclosan'), type = 'name', match = 'best') + o1 <- ci_query(c('xxxxx', NA, 'Aspirin', 'Triclosan'), from = 'name', match = 'best') expect_is(o1, 'list') expect_true(length(o1) == 4) @@ -20,16 +19,16 @@ test_that("chemid returns correct results", { expect_length(o1[[3]], 9) expect_s3_class(o1[[3]]$physprop, "data.frame") - b1 <- ci_query('Tetracyclin', type = 'name') + b1 <- ci_query('Tetracyclin', from = 'name') expect_equal(b1[[1]]$name[1], "Tetracycline") - b2 <- ci_query('Edetic acid', type = 'name', match = 'best') + b2 <- ci_query('Edetic acid', from = 'name', match = 'best') expect_equal(b2[[1]]$name[1], "Edetic acid") expect_equal(attr(b2[[1]],'distance'), 0) # test multiple matches - m1 <- ci_query('Tetracyclin', type = 'name', match = 'first') - m2 <- ci_query('Tetracyclin', type = 'name', match = 'best') - m3 <- ci_query('Tetracyclin', type = 'name', match = 'na') + m1 <- ci_query('Tetracyclin', from = 'name', match = 'first') + m2 <- b1 #best is default + m3 <- ci_query('Tetracyclin', from = 'name', match = 'na') expect_type(m1, 'list') expect_type(m2, 'list') From 1f8e6748ed46a5a21817ee6b210eb7de7da4527c Mon Sep 17 00:00:00 2001 From: Aariq Date: Tue, 9 Jun 2020 14:29:00 -0400 Subject: [PATCH 07/43] added case-insensitive arg matching --- R/cts.R | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/R/cts.R b/R/cts.R index 7ba2f4d8..87baafa3 100644 --- a/R/cts.R +++ b/R/cts.R @@ -71,7 +71,7 @@ cts_compinfo <- function(query, from = "inchikey", verbose = TRUE, inchikey){ #' \code{'PubChem CID'}, \code{'ChemSpider'}, \code{'CAS'}. #' @param to character; type to convert to. #' @param first deprecated. Use choices = 1 instead. -#' @param choices to return only the first result, use 'choices = 1'. To choose a result from an interative menu, provide a number of choices to choose from or "all". +#' @param choices to return only the first result, use 'choices = 1'. To choose a result from an interactive menu, provide a number of choices to choose from or "all". #' @param verbose logical; should a verbose output be printed on the console? #' @param ... currently not used. #' @return a list of character vectors or if \code{choices} is used, then a single named vector. @@ -101,6 +101,17 @@ cts_convert <- function(query, from, to, first = FALSE, choices = NULL, verbose stop('Cannot handle multiple input or output types. Please provide only one argument for `from` and `to`.') } + from <- match.arg(tolower(from), c(tolower(cts_from()), "name")) + to <- match.arg(tolower(to), c(tolower(cts_to()), "name")) + + if (from == "name") { + from <- "chemical name" + } + + if (to == "name") { + to <- "chemical name" + } + foo <- function(query, from, to , first, verbose){ if (is.na(query)) return(NA) baseurl <- "http://cts.fiehnlab.ucdavis.edu/service/convert" @@ -119,14 +130,11 @@ cts_convert <- function(query, from, to, first = FALSE, choices = NULL, verbose return(NA) } out <- out$result[[1]] - # if (first) - # out <- out[1] out <- chooser(out, choices) return(out) } out <- lapply(query, foo, from = from, to = to, first = first, verbose = verbose) out <- setNames(out, query) - # if (first) if(!is.null(choices)) out <- unlist(out) return(out) From 33852b5bd695264e8aca72d3b432c1da440818b5 Mon Sep 17 00:00:00 2001 From: Aariq Date: Tue, 9 Jun 2020 15:50:37 -0400 Subject: [PATCH 08/43] made arguments case-insenstivie --- R/chebi.R | 9 ++++++--- R/cts.R | 8 ++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/R/chebi.R b/R/chebi.R index 7395f3a4..dd0964fb 100644 --- a/R/chebi.R +++ b/R/chebi.R @@ -68,13 +68,13 @@ get_chebiid <- function(query, 'iupac name', 'citations', 'registry numbers', 'manual xrefs', 'automatic xrefs', 'formula', 'mass', 'monoisotopic mass', 'charge', 'inchi', 'inchikey', 'smiles', 'species'), - match = c("all", "best", "ask", "na"), + match = c("all", "best", "first", "ask", "na"), max_res = 200, stars = c('all', 'two only', 'three only'), verbose = TRUE, ...) { match <- match.arg(match) - from <- casefold(match.arg(from), upper = TRUE) + from <- toupper(match.arg(from)) if (from == "NAME") { from <- "ALL NAMES" } @@ -82,7 +82,7 @@ get_chebiid <- function(query, from <- "INCHI/INCHI KEY" } - stars <- casefold(match.arg(stars), upper = TRUE) + stars <- toupper(match.arg(stars)) foo <- function(query, from, match, max_res, stars, verbose, ...) { if (is.na(query)) return(data.frame(chebiid = NA_character_, @@ -146,6 +146,9 @@ get_chebiid <- function(query, query = query, stringsAsFactors = FALSE)) } + if (match == "first") { + return(out[1, ]) + } } else { out <- data.frame(chebiid = NA_character_, query = query, diff --git a/R/cts.R b/R/cts.R index 87baafa3..60e54a9c 100644 --- a/R/cts.R +++ b/R/cts.R @@ -101,8 +101,8 @@ cts_convert <- function(query, from, to, first = FALSE, choices = NULL, verbose stop('Cannot handle multiple input or output types. Please provide only one argument for `from` and `to`.') } - from <- match.arg(tolower(from), c(tolower(cts_from()), "name")) - to <- match.arg(tolower(to), c(tolower(cts_to()), "name")) + from <- match.arg(tolower(from), c(cts_from(), "name")) + to <- match.arg(tolower(to), c(cts_to(), "name")) if (from == "name") { from <- "chemical name" @@ -160,7 +160,7 @@ cts_convert <- function(query, from, to, first = FALSE, choices = NULL, verbose #' cts_from() #' } cts_from <- function(verbose = TRUE){ - fromJSON('http://cts.fiehnlab.ucdavis.edu/service/conversion/fromValues') + tolower(fromJSON('http://cts.fiehnlab.ucdavis.edu/service/conversion/fromValues')) } @@ -183,5 +183,5 @@ cts_from <- function(verbose = TRUE){ #' cts_from() #' } cts_to <- function(verbose = TRUE){ - fromJSON('http://cts.fiehnlab.ucdavis.edu/service/conversion/toValues') + tolower(fromJSON('http://cts.fiehnlab.ucdavis.edu/service/conversion/toValues')) } From eba73cfdb654d045cfb016f0ff8cfe811527cf6c Mon Sep 17 00:00:00 2001 From: Aariq Date: Tue, 9 Jun 2020 15:50:54 -0400 Subject: [PATCH 09/43] added autotranslate function draft --- NAMESPACE | 1 + R/utils.R | 35 +++++++++++++++++++++++++++++++++++ man/autotranslate.Rd | 30 ++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+) create mode 100644 man/autotranslate.Rd diff --git a/NAMESPACE b/NAMESPACE index 3a3d534d..755f76d2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,7 @@ S3method(smiles,pan_query) S3method(smiles,pc_prop) S3method(smiles,wd_ident) export(as.cas) +export(autotranslate) export(aw_query) export(build_aw_idx) export(cas) diff --git a/R/utils.R b/R/utils.R index 43e5d075..ed5b571a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -505,3 +505,38 @@ matcher <- } } } + + + +#' Auto-translate identifiers and search databases +#' +#' Supply a query of any type (e.g. SMILES, CAS, name, InChI, etc.) along with any webchem function that has \code{query} and \code{from} arguments. If the function doesn't accpet the type of query you've supplied, this will try to automatically translate it using CTS and run the query. +#' +#' @param query character; the search term +#' @param from character; the format or type of query. Commonly accepted values are "name", "cas", "inchi", and "inchikey" +#' @param .f character; the (quoted) name of a webchem function +#' @param .verbose logical; print a message when translating query? +#' @param ... other arguments passed to the function specified with \code{.f} +#' +#' @return returns results from \code{.f} +#' @export +#' +#' @examples +#' \dontrun{ +#' autotranslate("XDDAORKBJWWYJS-UHFFFAOYSA-N", from = "inchikey", .f = "get_etoxid") +#' } +autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { + f <- rlang::as_function(.f) + pos_froms <- eval(rlang::fn_fmls(f)$from) + if (from %in% pos_froms) { + f(query = query, from = from, ...) + } else { + pos_froms <- pos_froms[pos_froms != "name"] #cts name conversion broken + new_from <- pos_froms[which(pos_froms %in% cts_to())[1]] + if(.verbose){ + message(glue::glue("{.f} doesn't accept {from}. Attempting to translate to {new_from} with CTS and re-running query ")) + } + new_query <- cts_convert(query, from = from, to = new_from, choices = 1) + f(query = new_query, from = new_from, ...) + } +} \ No newline at end of file diff --git a/man/autotranslate.Rd b/man/autotranslate.Rd new file mode 100644 index 00000000..dac92ea0 --- /dev/null +++ b/man/autotranslate.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{autotranslate} +\alias{autotranslate} +\title{Auto-translate identifiers and search databases} +\usage{ +autotranslate(query, from, .f, .verbose = TRUE, ...) +} +\arguments{ +\item{query}{character; the search term} + +\item{from}{character; the format or type of query. Commonly accepted values are "name", "cas", "inchi", and "inchikey"} + +\item{.f}{character; the (quoted) name of a webchem function} + +\item{.verbose}{logical; print a message when translating query?} + +\item{...}{other arguments passed to the function specified with \code{.f}} +} +\value{ +returns results from \code{.f} +} +\description{ +Supply a query of any type (e.g. SMILES, CAS, name, InChI, etc.) along with any webchem function that has \code{query} and \code{from} arguments. If the function doesn't accpet the type of query you've supplied, this will try to automatically translate it using CTS and run the query. +} +\examples{ +\dontrun{ +autotranslate("XDDAORKBJWWYJS-UHFFFAOYSA-N", from = "inchikey", .f = "get_etoxid") +} +} From ed9703639660c7e2da9adbe6bdb926cf1c6d6224 Mon Sep 17 00:00:00 2001 From: Aariq Date: Tue, 9 Jun 2020 15:51:14 -0400 Subject: [PATCH 10/43] re-built documentation --- man/aw_query.Rd | 15 ++++++++------- man/ci_query.Rd | 21 +++++++++++++-------- man/cts_compinfo.Rd | 6 ++++-- man/cts_convert.Rd | 2 +- man/fn_percept.Rd | 6 ++++-- man/get_chebiid.Rd | 23 +++++++++++------------ man/pc_synonyms.Rd | 2 +- 7 files changed, 42 insertions(+), 33 deletions(-) diff --git a/man/aw_query.Rd b/man/aw_query.Rd index e2541003..ca4b3c82 100644 --- a/man/aw_query.Rd +++ b/man/aw_query.Rd @@ -6,15 +6,16 @@ \usage{ aw_query( query, - type = c("commonname", "cas"), + from = c("name", "cas"), verbose = TRUE, - force_build = FALSE + force_build = FALSE, + type ) } \arguments{ \item{query}{character; search string} -\item{type}{character; type of input ('cas' or 'commonname')} +\item{from}{character; type of input ('cas' or 'name')} \item{verbose}{logical; print message during processing to console?} @@ -31,20 +32,20 @@ Query Alan Woods Compendium of Pesticide Common Names \url{http://www.alanwood.net/pesticides} } \note{ -for type = 'cas' only the first matched link is returned. +for from = 'cas' only the first matched link is returned. Please respect Copyright, Terms and Conditions \url{http://www.alanwood.net/pesticides/legal.html}! } \examples{ \dontrun{ -aw_query('Fluazinam', type = 'commonname') -out <- aw_query(c('Fluazinam', 'Diclofop'), type = 'com') +aw_query('Fluazinam', from = 'name') +out <- aw_query(c('Fluazinam', 'Diclofop'), from = 'com') out # extract subactivity from object sapply(out, function(y) y$subactivity[1]) # use CAS-numbers -aw_query("79622-59-6", type = 'cas') +aw_query("79622-59-6", from = 'cas') } } \references{ diff --git a/man/ci_query.Rd b/man/ci_query.Rd index 6bf5dfc4..8d9206bf 100644 --- a/man/ci_query.Rd +++ b/man/ci_query.Rd @@ -6,18 +6,23 @@ \usage{ ci_query( query, - type = c("name", "rn", "inchikey"), + from = c("name", "rn", "inchikey", "cas"), match = c("best", "first", "ask", "na"), - verbose = TRUE + verbose = TRUE, + type ) } \arguments{ \item{query}{character; query string} -\item{type}{character; type of query string. \code{"rn"} for registry number -or \code{"name"} for common name or \code{"inchikey"} for inchikey as input.} +\item{from}{character; type of query string. \code{"rn"} for registry number +(see +\href{https://chem.nlm.nih.gov/chemidplus/jsp/chemidheavy/help.jsp#LiteSearchDataFields}{documentation} +for more details), \code{"name"} for common name, or \code{"inchikey"} for +inchikey as input. \code{"cas"} is a synonym for \code{"rn"} and provided +for consistency across functions.} -\item{match}{character; How should multiple hits be handeled? \code{"first"} +\item{match}{character; How should multiple hits be handled? \code{"first"} returns only the first match, \code{"best"} the best matching (by name) ID, \code{"ask"} enters an interactive mode and the user is asked for input, \code{"na"} returns NA if multiple hits are found.} @@ -41,17 +46,17 @@ Medicine, \url{https://www.nlm.nih.gov/databases/download.html}. \dontrun{ # might fail if API is not available # query common name -y1 <- ci_query(c('Formaldehyde', 'Triclosan'), type = 'name') +y1 <- ci_query(c('Formaldehyde', 'Triclosan'), from = 'name') names(y1) str(y1[['Triclosan']]) # lots of information inside y1[['Triclosan']]$inchikey # Query by CAS -y2 <- ci_query('50-00-0', type = 'rn', match = 'first') +y2 <- ci_query('50-00-0', from = 'rn', match = 'first') y2[['50-00-0']]$inchikey # query by inchikey -y3 <- ci_query('WSFSSNUMVMOOMR-UHFFFAOYSA-N', type = 'inchikey') +y3 <- ci_query('WSFSSNUMVMOOMR-UHFFFAOYSA-N', from = 'inchikey') y3[[1]]$name # extract lop-P diff --git a/man/cts_compinfo.Rd b/man/cts_compinfo.Rd index 5dcfcd04..f32320b9 100644 --- a/man/cts_compinfo.Rd +++ b/man/cts_compinfo.Rd @@ -4,10 +4,12 @@ \alias{cts_compinfo} \title{Get record details from Chemical Translation Service (CTS)} \usage{ -cts_compinfo(inchikey, verbose = TRUE) +cts_compinfo(query, from = "inchikey", verbose = TRUE, inchikey) } \arguments{ -\item{inchikey}{character; InChIkey.} +\item{query}{character; InChIkey.} + +\item{from}{character; currently only accepts "inchikey".} \item{verbose}{logical; should a verbose output be printed on the console?} } diff --git a/man/cts_convert.Rd b/man/cts_convert.Rd index eb24d96c..a9dfdb40 100644 --- a/man/cts_convert.Rd +++ b/man/cts_convert.Rd @@ -24,7 +24,7 @@ cts_convert( \item{first}{deprecated. Use choices = 1 instead.} -\item{choices}{to return only the first result, use 'choices = 1'. To choose a result from an interative menu, provide a number of choices to choose from or "all".} +\item{choices}{to return only the first result, use 'choices = 1'. To choose a result from an interactive menu, provide a number of choices to choose from or "all".} \item{verbose}{logical; should a verbose output be printed on the console?} diff --git a/man/fn_percept.Rd b/man/fn_percept.Rd index 3f8878d3..4dae9bb3 100644 --- a/man/fn_percept.Rd +++ b/man/fn_percept.Rd @@ -4,10 +4,12 @@ \alias{fn_percept} \title{Retrieve flavor percepts from www.flavornet.org} \usage{ -fn_percept(CAS, verbose = TRUE, ...) +fn_percept(query, from = "cas", verbose = TRUE, CAS, ...) } \arguments{ -\item{CAS}{character; CAS number to search by. See \code{\link{is.cas}} for correct formatting} +\item{query}{character; CAS number to search by. See \code{\link{is.cas}} for correct formatting} + +\item{from}{character; currently only CAS numbers are accepted.} \item{verbose}{logical; should a verbose output be printed on the console?} diff --git a/man/get_chebiid.Rd b/man/get_chebiid.Rd index 5df0a862..1057b9a9 100644 --- a/man/get_chebiid.Rd +++ b/man/get_chebiid.Rd @@ -6,10 +6,12 @@ \usage{ get_chebiid( query, - from = "ALL", - match = c("all", "best", "ask", "na"), + from = c("all", "chebi id", "chebi name", "definition", "name", "iupac name", + "citations", "registry numbers", "manual xrefs", "automatic xrefs", "formula", + "mass", "monoisotopic mass", "charge", "inchi", "inchikey", "smiles", "species"), + match = c("all", "best", "first", "ask", "na"), max_res = 200, - stars = "ALL", + stars = c("all", "two only", "three only"), verbose = TRUE, ... ) @@ -17,10 +19,8 @@ get_chebiid( \arguments{ \item{query}{character; search term.} -\item{from}{character; type of input, can be one of 'ALL', 'CHEBI ID', -'CHEBI NAME', 'DEFINITION', 'ALL NAMES', 'IUPAC NAME', 'CITATIONS', -'REGISTRY NUMBERS', 'MANUAL XREFS', 'AUTOMATIC XREFS', 'FORMULA', 'MASS', -'MONOISOTOPIC MASS', 'CHARGE', 'INCHI/INCHI KEY', 'SMILES', 'SPECIES'.} +\item{from}{character; type of input. \code{"all"} searches all types and +\code{"name"} searches all names.} \item{match}{character; How should multiple hits be handled?, \code{"all"} all matches are returned, @@ -31,17 +31,16 @@ get_chebiid( \item{max_res}{integer; maximum number of results to be retrieved from the web service} -\item{stars}{character; type of input can be one of 'ALL', 'TWO ONLY', -'THREE ONLY'.} +\item{stars}{character;} \item{verbose}{logical; should a verbose output be printed on the console?} -\item{...}{optional arguments} +\item{...}{currently unused} } \value{ returns a list of data.frames containing a chebiid, a chebiasciiname, -a searchscore and stars if matches were found. -If not, data.frame(NA) is returned + a searchscore and stars if matches were found. If not, data.frame(NA) is + returned } \description{ Returns a data.frame with a ChEBI entity ID (chebiid), diff --git a/man/pc_synonyms.Rd b/man/pc_synonyms.Rd index d1623cb5..7a872e08 100644 --- a/man/pc_synonyms.Rd +++ b/man/pc_synonyms.Rd @@ -6,7 +6,7 @@ \usage{ pc_synonyms( query, - from = "name", + from = c("name", "cid", "sid", "aid", "smiles", "inchi", "inchikey"), choices = NULL, verbose = TRUE, arg = NULL, From c67d074f169f8722184aa215cc50a1e0f0cb714e Mon Sep 17 00:00:00 2001 From: Aariq Date: Tue, 9 Jun 2020 18:49:11 -0400 Subject: [PATCH 11/43] added dots to absorb unused arguments --- R/alanwood.R | 3 ++- R/flavornet.R | 2 +- R/pan.R | 3 ++- R/srs.R | 4 ++-- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/R/alanwood.R b/R/alanwood.R index 7715abf6..5e7d54db 100644 --- a/R/alanwood.R +++ b/R/alanwood.R @@ -10,6 +10,7 @@ #' @param verbose logical; print message during processing to console? #' @param force_build logical; force building a new index? See #' \code{\link{build_aw_idx}} for more details. +#' @param ... currently unused. #' @return A list of eight entries: common-name, status, preferred IUPAC Name, #' IUPAC Name, cas, formula, activity, subactivity, inchikey, inchi and source #' url. @@ -36,7 +37,7 @@ #' @seealso \code{\link{build_aw_idx}} aw_query <- function(query, from = c("name", "cas"), verbose = TRUE, - force_build = FALSE, type) { + force_build = FALSE, type, ...) { if (!missing(type)) { warning('"type" is deprecated. Please use "from" instead. ') from <- type diff --git a/R/flavornet.R b/R/flavornet.R index ccdcb280..fbafd3d8 100644 --- a/R/flavornet.R +++ b/R/flavornet.R @@ -9,7 +9,7 @@ #' @param query character; CAS number to search by. See \code{\link{is.cas}} for correct formatting #' @param from character; currently only CAS numbers are accepted. #' @param verbose logical; should a verbose output be printed on the console? -#' @param ... not currently used +#' @param ... currently unused #' #' @return A named character vector containing flavor percepts or NA's in the case of CAS numbers that are not found #' diff --git a/R/pan.R b/R/pan.R index 2d70b88d..95701f31 100644 --- a/R/pan.R +++ b/R/pan.R @@ -67,8 +67,9 @@ #' # extract Hydrolysis Half-life (Avg, Days) #' sapply(out, function(y) y$`Hydrolysis Half-life (Avg, Days)`) #' } -pan_query <- function(query, match = c('best', 'all', 'first'), verbose = TRUE, ...){ +pan_query <- function(query, from = c("name", "cas"), match = c('best', 'all', 'first'), verbose = TRUE, ...){ match <- match.arg(match) + match.arg(from) #not actually needed for this function to work foo <- function(query, match, verbose) { on.exit(suppressWarnings(closeAllConnections())) if (is.na(query)) { diff --git a/R/srs.R b/R/srs.R index 63ace3c3..e64605ed 100644 --- a/R/srs.R +++ b/R/srs.R @@ -5,7 +5,7 @@ #'@param query character; query ID. #'@param from character; type of query ID, e.g. \code{'itn'} , \code{'cas'}, #' \code{'epaid'}, \code{'tsn'}, \code{'name'}. -#' +#'@param ... not currently used. #'@return a list of lists (for each supplied query): a list of 22. subsKey, #' internalTrackingNumber, systematicName, epaIdentificationNumber, #' currentCasNumber, currentTaxonomicSerialNumber, epaName, substanceType, @@ -26,7 +26,7 @@ #' } srs_query <- function(query, - from = c("itn", "cas", "epaid", "tsn", "name")) { + from = c("itn", "cas", "epaid", "tsn", "name"), ...) { entity_url <- "https://cdxnodengn.epa.gov/cdx-srs-rest/" rst <- lapply(query, function(x) { From c5e15a87fb40372dbcc9f229b19e1ba83e5d037f Mon Sep 17 00:00:00 2001 From: Aariq Date: Tue, 9 Jun 2020 18:49:50 -0400 Subject: [PATCH 12/43] uniformity of outputs --- R/chebi.R | 25 +++++++++++-------------- R/chemspider.R | 8 +++++--- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/R/chebi.R b/R/chebi.R index dd0964fb..c13766d4 100644 --- a/R/chebi.R +++ b/R/chebi.R @@ -85,9 +85,8 @@ get_chebiid <- function(query, stars <- toupper(match.arg(stars)) foo <- function(query, from, match, max_res, stars, verbose, ...) { - if (is.na(query)) return(data.frame(chebiid = NA_character_, - query = NA_character_, - stringsAsFactors = FALSE)) + if (is.na(query)) return(tibble(query = NA_character_, + chebiid = NA_character_)) # query url <- 'http://www.ebi.ac.uk:80/webservices/chebi/2.0/webservice' @@ -122,9 +121,8 @@ get_chebiid <- function(query, out <- setNames(out, tolower(names(out))) if (nrow(out) == 0) { message('No result found. \n') - return(data.frame(chebiid = NA_character_, - query = query, - stringsAsFactors = FALSE)) + return(tibble(query = query, + chebiid = NA_character_)) } if (nrow(out) > 0) out$query <- query if (nrow(out) == 1) return(out) @@ -142,17 +140,16 @@ get_chebiid <- function(query, return(out[out$chebiid == matched, ]) } if (match == 'na') { - return(data.frame(chebiid = NA_character_, - query = query, - stringsAsFactors = FALSE)) + return(tibble(query = query, + chebiid = NA_character_)) } if (match == "first") { return(out[1, ]) } } else { - out <- data.frame(chebiid = NA_character_, - query = query, - stringsAsFactors = FALSE) + out <- tibble(query = query, + chebiid = NA_character_, + ) message('Returning NA (', http_status(res)$message, '). \n') return(out) @@ -166,8 +163,8 @@ get_chebiid <- function(query, stars = stars, verbose = verbose) out <- setNames(out, query) - out <- as_tibble(bind_rows(out)) - return(out) + out <- bind_rows(out) + return(dplyr::select(out, query, chebiid, everything())) } diff --git a/R/chemspider.R b/R/chemspider.R index 24726f9d..f10f36a6 100644 --- a/R/chemspider.R +++ b/R/chemspider.R @@ -191,7 +191,7 @@ get_csid <- function(query, match <- match.arg(match) foo <- function(x, from, match, verbose, apikey, ...) { - if (is.na(x)) return(NA) + if (is.na(x)) return(as.integer(NA)) res <- switch(from, name = cs_name_csid(x, apikey = apikey, control = cs_control(...)), @@ -200,8 +200,10 @@ get_csid <- function(query, inchi = cs_inchi_csid(x, apikey = apikey), inchikey = cs_inchikey_csid(x, apikey = apikey), smiles = cs_smiles_csid(x, apikey = apikey)) - res <- matcher(res, query = x, match = match, verbose = verbose) - if (length(res) == 0) res <- NA + if(length(res) > 1) { + res <- matcher(res, query = x, match = match, verbose = verbose) + } + if (length(res) == 0) res <- as.integer(NA) return(res) } out <- From b51ab917271e04bdc7b4deb3800338c618a9e3fb Mon Sep 17 00:00:00 2001 From: Aariq Date: Tue, 9 Jun 2020 19:01:58 -0400 Subject: [PATCH 13/43] added check_coverage() function --- NAMESPACE | 1 + R/utils.R | 40 ++++++++++++++++++++++++++++++++++++++++ man/check_coverage.Rd | 30 ++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+) create mode 100644 man/check_coverage.Rd diff --git a/NAMESPACE b/NAMESPACE index 755f76d2..e6c28d0e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -32,6 +32,7 @@ export(aw_query) export(build_aw_idx) export(cas) export(chebi_comp_entity) +export(check_coverage) export(ci_query) export(cid_compinfo) export(cir) diff --git a/R/utils.R b/R/utils.R index ed5b571a..6d5c7a90 100644 --- a/R/utils.R +++ b/R/utils.R @@ -539,4 +539,44 @@ autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { new_query <- cts_convert(query, from = from, to = new_from, choices = 1) f(query = new_query, from = new_from, ...) } +} + + +#' Check data source coverage of compounds +#' +#' Checks if entries are found in (most) data sources included in webchem +#' +#' @param query character; the search term +#' @param from character; the format or type of query. Commonly accepted values are "name", "cas", "inchi", and "inchikey" +#' @param sources character; which data sources to check. Data sources are identified by the prefix associated with webchem functions that query those databases. If not specified, all data sources listed will be checked. +#' +#' @return a tibble of logical values where \code{TRUE} indicates that a data source contains a record for the query +#' @export +#' +#' @examples +#' \dontrun{ +#' check_coverage("hexane", from = "name") +#' } +check_coverage <- function(query, from, + sources = c("etox", "pc", "chebi", "cs", + "aw", "fn", "pan", "srs")) { + sources <- match.arg(sources, several.ok = TRUE) + sources <- sapply(sources, switch, + "etox" = "get_etoxid", + "pc" = "get_cid", + "chebi" = "get_chebiid", + "cs" = "get_csid", + "aw" = "aw_query", + "fn" = "fn_percept", + "pan" = "pan_query", + "srs" = "srs_query") + + out <- map(sources, ~{ + x <- autotranslate(query, from = "name", .f = .x, match = "first") + if (inherits(x, "data.frame")) { + x <- x[[ncol(x)]] + } + !is.na(x) + }) %>% set_names(names(sources)) + return(bind_cols(query = query, out)) } \ No newline at end of file diff --git a/man/check_coverage.Rd b/man/check_coverage.Rd new file mode 100644 index 00000000..1080d6f7 --- /dev/null +++ b/man/check_coverage.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{check_coverage} +\alias{check_coverage} +\title{Check data source coverage of compounds} +\usage{ +check_coverage( + query, + from, + sources = c("etox", "pc", "chebi", "cs", "aw", "fn", "pan", "srs") +) +} +\arguments{ +\item{query}{character; the search term} + +\item{from}{character; the format or type of query. Commonly accepted values are "name", "cas", "inchi", and "inchikey"} + +\item{sources}{character; which data sources to check. Data sources are identified by the prefix associated with webchem functions that query those databases. If not specified, all data sources listed will be checked.} +} +\value{ +a tibble of logical values where \code{TRUE} indicates that a data source contains a record for the query +} +\description{ +Checks if entries are found in (most) data sources included in webchem +} +\examples{ +\dontrun{ +check_coverage("hexane", from = "name") +} +} From aa1f01446d6cee68cdc3759ea9483eef756772c7 Mon Sep 17 00:00:00 2001 From: Aariq Date: Tue, 9 Jun 2020 19:02:10 -0400 Subject: [PATCH 14/43] updated docs --- man/aw_query.Rd | 5 ++++- man/fn_percept.Rd | 2 +- man/pan_query.Rd | 8 +++++++- man/srs_query.Rd | 4 +++- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/man/aw_query.Rd b/man/aw_query.Rd index ca4b3c82..724d5e73 100644 --- a/man/aw_query.Rd +++ b/man/aw_query.Rd @@ -9,7 +9,8 @@ aw_query( from = c("name", "cas"), verbose = TRUE, force_build = FALSE, - type + type, + ... ) } \arguments{ @@ -21,6 +22,8 @@ aw_query( \item{force_build}{logical; force building a new index? See \code{\link{build_aw_idx}} for more details.} + +\item{...}{currently unused.} } \value{ A list of eight entries: common-name, status, preferred IUPAC Name, diff --git a/man/fn_percept.Rd b/man/fn_percept.Rd index 4dae9bb3..6ff851fe 100644 --- a/man/fn_percept.Rd +++ b/man/fn_percept.Rd @@ -13,7 +13,7 @@ fn_percept(query, from = "cas", verbose = TRUE, CAS, ...) \item{verbose}{logical; should a verbose output be printed on the console?} -\item{...}{not currently used} +\item{...}{currently unused} } \value{ A named character vector containing flavor percepts or NA's in the case of CAS numbers that are not found diff --git a/man/pan_query.Rd b/man/pan_query.Rd index 5746a5ea..ce2b56a5 100644 --- a/man/pan_query.Rd +++ b/man/pan_query.Rd @@ -4,7 +4,13 @@ \alias{pan_query} \title{Query the PAN Pesticide database} \usage{ -pan_query(query, match = c("best", "all", "first"), verbose = TRUE, ...) +pan_query( + query, + from = c("name", "cas"), + match = c("best", "all", "first"), + verbose = TRUE, + ... +) } \arguments{ \item{query}{character; searchterm, e.g. chemical name or CAS.} diff --git a/man/srs_query.Rd b/man/srs_query.Rd index 3189eab0..8708b326 100644 --- a/man/srs_query.Rd +++ b/man/srs_query.Rd @@ -4,13 +4,15 @@ \alias{srs_query} \title{Get record details from U.S. EPA Substance Registry Servives (SRS)} \usage{ -srs_query(query, from = c("itn", "cas", "epaid", "tsn", "name")) +srs_query(query, from = c("itn", "cas", "epaid", "tsn", "name"), ...) } \arguments{ \item{query}{character; query ID.} \item{from}{character; type of query ID, e.g. \code{'itn'} , \code{'cas'}, \code{'epaid'}, \code{'tsn'}, \code{'name'}.} + +\item{...}{not currently used.} } \value{ a list of lists (for each supplied query): a list of 22. subsKey, From b4db84b5bff44dc5344e5d7f65d979eb63a228a8 Mon Sep 17 00:00:00 2001 From: Aariq Date: Thu, 11 Jun 2020 11:01:58 -0400 Subject: [PATCH 15/43] added plot output (for fun and testing). need to eventually change to base R plot I think. --- R/utils.R | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/R/utils.R b/R/utils.R index 6d5c7a90..4ecf558b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -559,7 +559,8 @@ autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { #' } check_coverage <- function(query, from, sources = c("etox", "pc", "chebi", "cs", - "aw", "fn", "pan", "srs")) { + "aw", "fn", "pan", "srs"), + plot = FALSE) { sources <- match.arg(sources, several.ok = TRUE) sources <- sapply(sources, switch, "etox" = "get_etoxid", @@ -571,12 +572,37 @@ check_coverage <- function(query, from, "pan" = "pan_query", "srs" = "srs_query") - out <- map(sources, ~{ - x <- autotranslate(query, from = "name", .f = .x, match = "first") + foo <- function(.f, query, from) { + x <- autotranslate(query = query, from = from, .f = .f, match = "first") if (inherits(x, "data.frame")) { x <- x[[ncol(x)]] } - !is.na(x) - }) %>% set_names(names(sources)) - return(bind_cols(query = query, out)) -} \ No newline at end of file + return(!is.na(x)) + } + + out <- lapply(sources, foo, query = query, from = from) + out <- setNames(out, names(sources)) + out <- bind_cols(query = query, out) + if (plot) { + df <- out %>% + pivot_longer(-query, names_to = "source", values_to = "covered") %>% + group_by(source) %>% + mutate(num = sum(covered)) %>% + ungroup() %>% + arrange(desc(num), source) %>% + mutate(source = fct_inorder(source)) + + p <- + ggplot(df, aes(x = source, y = query, fill = covered)) + + geom_tile(color = "grey30") + + coord_fixed(expand = 0) + + scale_fill_manual(values = c("TRUE" = "#3BC03B", "FALSE" = "grey80")) + + scale_x_discrete(position = "top") + + theme(legend.position = "none", + axis.title = element_blank(), + axis.ticks = element_blank()) + + print(p) + } + return(out) +} From 1864fa35757915ccf9b1fea8dc0c4ce33839d270 Mon Sep 17 00:00:00 2001 From: Aariq Date: Thu, 11 Jun 2020 11:26:08 -0400 Subject: [PATCH 16/43] added additional translators --- R/utils.R | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/R/utils.R b/R/utils.R index 4ecf558b..c01a8e52 100644 --- a/R/utils.R +++ b/R/utils.R @@ -531,12 +531,36 @@ autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { if (from %in% pos_froms) { f(query = query, from = from, ...) } else { - pos_froms <- pos_froms[pos_froms != "name"] #cts name conversion broken - new_from <- pos_froms[which(pos_froms %in% cts_to())[1]] - if(.verbose){ - message(glue::glue("{.f} doesn't accept {from}. Attempting to translate to {new_from} with CTS and re-running query ")) + + #see if OPSIN can be used first because it is most reliable + opsin_output <- c("inchi", "stdinchi", "stdinchikey", "smiles") + if (from == "name" & any(pos_froms %in% opsin_output)) { + new_from <- pos_froms[which(pos_froms %in% opsin_output)[1]] + if(.verbose){ + message( + glue::glue("{.f} doesn't accept {from}. + Attempting to translate to {new_from} with OPSIN. ")) + } + new_query <- opsin_query(query)[[as_name(new_from)]] + + } else if (from == "inchi" & any(pos_froms %in% c("inchikey", "smiles"))){ + new_from <- pos_froms[which(pos_froms %in% c("inchikey", "smiles"))] + if(.verbose){ + message( + glue::glue("{.f} doesn't accept {from}. + Attempting to translate to {new_from} with ChemSpider. ")) + } + new_query <- cs_convert_multiple(query, from = from, to = new_from) + } else { #otherwise use CTS + pos_froms <- pos_froms[pos_froms != "name"] #cts name conversion broken + new_from <- pos_froms[which(pos_froms %in% cts_to())[1]] + if(.verbose){ + message( + glue::glue("{.f} doesn't accept {from}. + Attempting to translate to {new_from} with CTS. ")) + } + new_query <- cts_convert(query, from = from, to = new_from, choices = 1) } - new_query <- cts_convert(query, from = from, to = new_from, choices = 1) f(query = new_query, from = new_from, ...) } } From 859f9b78d0159589737b68e70a432e98d68e48e6 Mon Sep 17 00:00:00 2001 From: Aariq Date: Thu, 11 Jun 2020 11:27:12 -0400 Subject: [PATCH 17/43] removed additional translators because they are never used, I think. --- R/utils.R | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/R/utils.R b/R/utils.R index c01a8e52..b987ee23 100644 --- a/R/utils.R +++ b/R/utils.R @@ -531,41 +531,20 @@ autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { if (from %in% pos_froms) { f(query = query, from = from, ...) } else { - - #see if OPSIN can be used first because it is most reliable - opsin_output <- c("inchi", "stdinchi", "stdinchikey", "smiles") - if (from == "name" & any(pos_froms %in% opsin_output)) { - new_from <- pos_froms[which(pos_froms %in% opsin_output)[1]] - if(.verbose){ - message( - glue::glue("{.f} doesn't accept {from}. - Attempting to translate to {new_from} with OPSIN. ")) - } - new_query <- opsin_query(query)[[as_name(new_from)]] - - } else if (from == "inchi" & any(pos_froms %in% c("inchikey", "smiles"))){ - new_from <- pos_froms[which(pos_froms %in% c("inchikey", "smiles"))] - if(.verbose){ - message( - glue::glue("{.f} doesn't accept {from}. - Attempting to translate to {new_from} with ChemSpider. ")) - } - new_query <- cs_convert_multiple(query, from = from, to = new_from) - } else { #otherwise use CTS - pos_froms <- pos_froms[pos_froms != "name"] #cts name conversion broken - new_from <- pos_froms[which(pos_froms %in% cts_to())[1]] - if(.verbose){ - message( - glue::glue("{.f} doesn't accept {from}. + pos_froms <- pos_froms[pos_froms != "name"] #cts name conversion broken + new_from <- pos_froms[which(pos_froms %in% cts_to())[1]] + if(.verbose){ + message( + glue::glue("{.f} doesn't accept {from}. Attempting to translate to {new_from} with CTS. ")) - } - new_query <- cts_convert(query, from = from, to = new_from, choices = 1) } + new_query <- cts_convert(query, from = from, to = new_from, choices = 1) f(query = new_query, from = new_from, ...) } } + #' Check data source coverage of compounds #' #' Checks if entries are found in (most) data sources included in webchem From 924af451208de712a00216f64cc550b7537a9561 Mon Sep 17 00:00:00 2001 From: Aariq Date: Thu, 11 Jun 2020 12:06:26 -0400 Subject: [PATCH 18/43] moved to separate r file --- R/integration.R | 109 ++++++++++++++++++++++++++++++++++++++++++++++++ R/utils.R | 104 --------------------------------------------- 2 files changed, 109 insertions(+), 104 deletions(-) create mode 100644 R/integration.R diff --git a/R/integration.R b/R/integration.R new file mode 100644 index 00000000..ca0ee1b0 --- /dev/null +++ b/R/integration.R @@ -0,0 +1,109 @@ + +#' Auto-translate identifiers and search databases +#' +#' Supply a query of any type (e.g. SMILES, CAS, name, InChI, etc.) along with any webchem function that has \code{query} and \code{from} arguments. If the function doesn't accpet the type of query you've supplied, this will try to automatically translate it using CTS and run the query. +#' +#' @param query character; the search term +#' @param from character; the format or type of query. Commonly accepted values are "name", "cas", "inchi", and "inchikey" +#' @param .f character; the (quoted) name of a webchem function +#' @param .verbose logical; print a message when translating query? +#' @param ... other arguments passed to the function specified with \code{.f} +#' +#' @return returns results from \code{.f} +#' @export +#' +#' @examples +#' \dontrun{ +#' autotranslate("XDDAORKBJWWYJS-UHFFFAOYSA-N", from = "inchikey", .f = "get_etoxid") +#' } +autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { + f <- rlang::as_function(.f) + pos_froms <- eval(rlang::fn_fmls(f)$from) + if (from %in% pos_froms) { + f(query = query, from = from, ...) + } else { + pos_froms <- pos_froms[pos_froms != "name"] #cts name conversion broken + new_from <- pos_froms[which(pos_froms %in% cts_to())[1]] + if(.verbose){ + message( + glue::glue("{.f} doesn't accept {from}. + Attempting to translate to {new_from} with CTS. ")) + } + new_query <- cts_convert(query, from = from, to = new_from, choices = 1) + #would like to try a again if cts fails the first time (as it often does). + f(query = new_query, from = new_from, ...) + } +} + + + +#' Check data source coverage of compounds +#' +#' Checks if entries are found in (most) data sources included in webchem +#' +#' @param query character; the search term +#' @param from character; the format or type of query. Commonly accepted values are "name", "cas", "inchi", and "inchikey" +#' @param sources character; which data sources to check. Data sources are identified by the prefix associated with webchem functions that query those databases. If not specified, all data sources listed will be checked. +#' +#' @return a tibble of logical values where \code{TRUE} indicates that a data source contains a record for the query +#' @export +#' +#' @examples +#' \dontrun{ +#' check_coverage("hexane", from = "name") +#' } +check_coverage <- function(query, from, + sources = c("etox", "pc", "chebi", "cs", + "aw", "fn", "pan", "srs"), + plot = FALSE) { + sources <- match.arg(sources, several.ok = TRUE) + sources <- sapply(sources, switch, + "etox" = "get_etoxid", + "pc" = "get_cid", + "chebi" = "get_chebiid", + "cs" = "get_csid", + "aw" = "aw_query", + "fn" = "fn_percept", + "pan" = "pan_query", + "srs" = "srs_query") + + foo <- function(.f, query, from) { + # if a function errors (e.g. API is down) then return NA + x <- try(autotranslate(query = query, from = from, .f = .f, match = "first")) + if (inherits(x, "try-error")) { + return(NA) + } + if (inherits(x, "data.frame")) { + x <- x[[ncol(x)]] + } + return(!is.na(x)) + } + + out <- lapply(sources, foo, query = query, from = from) + out <- setNames(out, names(sources)) + out <- bind_cols(query = query, out) + + if (plot) { + df <- out %>% + pivot_longer(-query, names_to = "source", values_to = "covered") %>% + group_by(source) %>% + mutate(num = sum(covered)) %>% + ungroup() %>% + arrange(desc(num), source) %>% + mutate(source = fct_inorder(source)) + + p <- + ggplot(df, aes(x = source, y = query, fill = covered)) + + geom_tile(color = "grey30") + + coord_fixed(expand = 0) + + scale_fill_manual("Covered:", + values = c("TRUE" = "#3BC03B", "FALSE" = "#C7010B"), + na.value = "grey70") + + scale_x_discrete(position = "top") + + theme(axis.title = element_blank(), + axis.ticks = element_blank()) + + print(p) + } + return(out) +} \ No newline at end of file diff --git a/R/utils.R b/R/utils.R index b987ee23..43e5d075 100644 --- a/R/utils.R +++ b/R/utils.R @@ -505,107 +505,3 @@ matcher <- } } } - - - -#' Auto-translate identifiers and search databases -#' -#' Supply a query of any type (e.g. SMILES, CAS, name, InChI, etc.) along with any webchem function that has \code{query} and \code{from} arguments. If the function doesn't accpet the type of query you've supplied, this will try to automatically translate it using CTS and run the query. -#' -#' @param query character; the search term -#' @param from character; the format or type of query. Commonly accepted values are "name", "cas", "inchi", and "inchikey" -#' @param .f character; the (quoted) name of a webchem function -#' @param .verbose logical; print a message when translating query? -#' @param ... other arguments passed to the function specified with \code{.f} -#' -#' @return returns results from \code{.f} -#' @export -#' -#' @examples -#' \dontrun{ -#' autotranslate("XDDAORKBJWWYJS-UHFFFAOYSA-N", from = "inchikey", .f = "get_etoxid") -#' } -autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { - f <- rlang::as_function(.f) - pos_froms <- eval(rlang::fn_fmls(f)$from) - if (from %in% pos_froms) { - f(query = query, from = from, ...) - } else { - pos_froms <- pos_froms[pos_froms != "name"] #cts name conversion broken - new_from <- pos_froms[which(pos_froms %in% cts_to())[1]] - if(.verbose){ - message( - glue::glue("{.f} doesn't accept {from}. - Attempting to translate to {new_from} with CTS. ")) - } - new_query <- cts_convert(query, from = from, to = new_from, choices = 1) - f(query = new_query, from = new_from, ...) - } -} - - - -#' Check data source coverage of compounds -#' -#' Checks if entries are found in (most) data sources included in webchem -#' -#' @param query character; the search term -#' @param from character; the format or type of query. Commonly accepted values are "name", "cas", "inchi", and "inchikey" -#' @param sources character; which data sources to check. Data sources are identified by the prefix associated with webchem functions that query those databases. If not specified, all data sources listed will be checked. -#' -#' @return a tibble of logical values where \code{TRUE} indicates that a data source contains a record for the query -#' @export -#' -#' @examples -#' \dontrun{ -#' check_coverage("hexane", from = "name") -#' } -check_coverage <- function(query, from, - sources = c("etox", "pc", "chebi", "cs", - "aw", "fn", "pan", "srs"), - plot = FALSE) { - sources <- match.arg(sources, several.ok = TRUE) - sources <- sapply(sources, switch, - "etox" = "get_etoxid", - "pc" = "get_cid", - "chebi" = "get_chebiid", - "cs" = "get_csid", - "aw" = "aw_query", - "fn" = "fn_percept", - "pan" = "pan_query", - "srs" = "srs_query") - - foo <- function(.f, query, from) { - x <- autotranslate(query = query, from = from, .f = .f, match = "first") - if (inherits(x, "data.frame")) { - x <- x[[ncol(x)]] - } - return(!is.na(x)) - } - - out <- lapply(sources, foo, query = query, from = from) - out <- setNames(out, names(sources)) - out <- bind_cols(query = query, out) - if (plot) { - df <- out %>% - pivot_longer(-query, names_to = "source", values_to = "covered") %>% - group_by(source) %>% - mutate(num = sum(covered)) %>% - ungroup() %>% - arrange(desc(num), source) %>% - mutate(source = fct_inorder(source)) - - p <- - ggplot(df, aes(x = source, y = query, fill = covered)) + - geom_tile(color = "grey30") + - coord_fixed(expand = 0) + - scale_fill_manual(values = c("TRUE" = "#3BC03B", "FALSE" = "grey80")) + - scale_x_discrete(position = "top") + - theme(legend.position = "none", - axis.title = element_blank(), - axis.ticks = element_blank()) - - print(p) - } - return(out) -} From ace8acac82066087199170727c6b5ad4389097d1 Mon Sep 17 00:00:00 2001 From: Aariq Date: Thu, 11 Jun 2020 12:06:35 -0400 Subject: [PATCH 19/43] added tests --- tests/testthat/test-integration.R | 32 +++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 tests/testthat/test-integration.R diff --git a/tests/testthat/test-integration.R b/tests/testthat/test-integration.R new file mode 100644 index 00000000..536ade67 --- /dev/null +++ b/tests/testthat/test-integration.R @@ -0,0 +1,32 @@ +# These all might occasionally fail because cts_translate() is currently somewhat unreliable. + +fn_up <- ping_service("fn") +test_that("autotranslate works when no translation needed", { + skip_if_not(fn_up, "Flavornet down!") + CASs <- c("75-07-0", "64-17-5") + a <- autotranslate(query = CASs, from = "cas", .f = "fn_percept", .verbose = FALSE) + b <- fn_percept(CASs) + expect_equal(a, b) +}) + +etox_up <- ping_service("etox") +test_that("autotranslate translates", { + skip_if_not(etox_up, "ETOX down!") + x <- autotranslate("XDDAORKBJWWYJS-UHFFFAOYSA-N", from = "inchikey", .f = "get_etoxid") + y <- get_etoxid("1071-83-6", from = "cas") + expect_equal(x, y) +}) + + +test_that("coverge function works", { + skip_if_not(fn_up) + skip_if_not(etox_up) + out <- check_coverage(c("triclosan", NA, "balloon"), + from = "name", + sources = c("etox", "fn")) + df <- tibble(query = c("triclosan", NA, "balloon"), + etox = c(TRUE, FALSE, FALSE), + fn = c(FALSE, FALSE, FALSE)) + expect_equivalent(out, df) +}) + From 18ee4f5435e107ffb8b56d89230c23e184923fa3 Mon Sep 17 00:00:00 2001 From: Aariq Date: Thu, 11 Jun 2020 12:50:55 -0400 Subject: [PATCH 20/43] fixed another skipped test #255 --- NAMESPACE | 3 +++ R/extractors.R | 28 +++++++++++++++++++++++++++- tests/testthat/test-extractors.R | 7 +++---- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index e6c28d0e..e8071f8b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ S3method(cas,aw_query) S3method(cas,chebi_comp_entity) +S3method(cas,ci_query) S3method(cas,cts_compinfo) S3method(cas,default) S3method(cas,etox_basic) @@ -10,6 +11,7 @@ S3method(cas,pan_query) S3method(cas,wd_ident) S3method(inchikey,aw_query) S3method(inchikey,chebi_comp_entity) +S3method(inchikey,ci_query) S3method(inchikey,cts_compinfo) S3method(inchikey,default) S3method(inchikey,etox_basic) @@ -19,6 +21,7 @@ S3method(inchikey,pc_prop) S3method(inchikey,wd_ident) S3method(smiles,aw_query) S3method(smiles,chebi_comp_entity) +S3method(smiles,ci_query) S3method(smiles,cts_compinfo) S3method(smiles,default) S3method(smiles,etox_basic) diff --git a/R/extractors.R b/R/extractors.R index 17ae8cc4..a27f2b19 100644 --- a/R/extractors.R +++ b/R/extractors.R @@ -61,6 +61,14 @@ cas.etox_basic <- function(x, ...) { }) } +#' @export +cas.ci_query <- function(x, ...) { + sapply(x, function(y) { + if (length(y) == 1 && is.na(y)) + return(NA) + unique(y$cas) + }) +} # InChIKey ---------------------------------------------------------------- #' @rdname extractors @@ -71,7 +79,7 @@ inchikey <- function(x, ...){ #' @export inchikey.default <- function(x, ...) { - stop(paste("No inchikey method for class", class(x))) + stop(paste(" No inchikey method for class", class(x))) } #' @export @@ -121,6 +129,15 @@ inchikey.cts_compinfo <- function(x, ...) { sapply(x, function(x) x$inchikey) } +#' @export +inchikey.ci_query <- function(x, ...) { + sapply(x, function(y) { + if (length(y) == 1 && is.na(y)) + return(NA) + unique(y$inchikey) + }) +} + # SMILES ------------------------------------------------------------------ #' @rdname extractors #' @export @@ -172,3 +189,12 @@ smiles.pc_prop <- function(x, ...) { smiles.wd_ident <- function(x, ...) { x$smiles } + +#' @export +smiles.ci_query <- function(x, ...) { + sapply(x, function(y) { + if (length(y) == 1 && is.na(y)) + return(NA) + unique(y$smiles) + }) +} diff --git a/tests/testthat/test-extractors.R b/tests/testthat/test-extractors.R index 89703800..abffbacc 100644 --- a/tests/testthat/test-extractors.R +++ b/tests/testthat/test-extractors.R @@ -13,12 +13,11 @@ test_that("extractors work with chemid", { skip_on_cran() skip_if_not(ping_service("ci"), "CHEMID service is down") - skip("ci_query isn't working right now") - out_ci_query <- ci_query(c('Aspirin', 'Triclosan'), type = 'name') + out_ci_query <- ci_query(c('Aspirin', 'Triclosan'), from = 'name') expect_equivalent(cas(out_ci_query), c("50-78-2", "3380-34-5")) expect_equivalent(inchikey(out_ci_query), c("BSYNRYMUTXBXSQ-UHFFFAOYSA-N", "XEFQLINVKFYRCS-UHFFFAOYSA-N")) - expect_equivalent(smiles(out_ci_query), c("CC(=O)", "c1(Oc2c(cc(Cl)")) + expect_equivalent(smiles(out_ci_query), c("CC(=O)", "Oc1cc(Cl)")) }) test_that("extractors work with opsin", { @@ -36,7 +35,7 @@ test_that("extractors work with Alanwood", { skip_on_cran() skip_if_not(ping_service("aw"), "Alanwood database not reachable") - out_aw_query <- aw_query(c('Fluazinam', 'Diclofop'), type = 'com') + out_aw_query <- aw_query(c('Fluazinam', 'Diclofop'), from = 'name') expect_equivalent(cas(out_aw_query), c("79622-59-6", "40843-25-2")) expect_equivalent(inchikey(out_aw_query), c("UZCGKGPEKUCDTF-UHFFFAOYSA-N", "OOLBCHYXZDXLDS-UHFFFAOYSA-N")) From 91471b217832503b2efbc3387e755e2f051d8c00 Mon Sep 17 00:00:00 2001 From: Aariq Date: Thu, 11 Jun 2020 13:33:18 -0400 Subject: [PATCH 21/43] finishing touches --- DESCRIPTION | 3 ++- NAMESPACE | 3 +++ R/integration.R | 12 ++++++++---- man/autotranslate.Rd | 4 ++-- man/check_coverage.Rd | 7 +++++-- tests/testthat/test-chebi.R | 2 +- 6 files changed, 21 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8de8ac61..b549153f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -46,6 +46,7 @@ Suggests: covr, robotstxt, knitr, - rmarkdown + rmarkdown, + ggplot2 RoxygenNote: 7.1.0 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index e8071f8b..1a265fe2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -84,6 +84,7 @@ export(srs_query) export(wd_ident) import(RCurl) import(dplyr) +import(ggplot2) import(httr) import(jsonlite) import(rvest) @@ -111,6 +112,8 @@ importFrom(purrr,map) importFrom(purrr,map2) importFrom(purrr,map_df) importFrom(purrr,map_dfr) +importFrom(rlang,as_function) +importFrom(rlang,fn_fmls) importFrom(rvest,html_table) importFrom(stats,rexp) importFrom(stats,rgamma) diff --git a/R/integration.R b/R/integration.R index ca0ee1b0..9cf6ae3e 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1,7 +1,7 @@ #' Auto-translate identifiers and search databases #' -#' Supply a query of any type (e.g. SMILES, CAS, name, InChI, etc.) along with any webchem function that has \code{query} and \code{from} arguments. If the function doesn't accpet the type of query you've supplied, this will try to automatically translate it using CTS and run the query. +#' Supply a query of any type (e.g. SMILES, CAS, name, InChI, etc.) along with any webchem function that has \code{query} and \code{from} arguments. If the function doesn't accept the type of query you've supplied, this will try to automatically translate it using CTS and run the query. #' #' @param query character; the search term #' @param from character; the format or type of query. Commonly accepted values are "name", "cas", "inchi", and "inchikey" @@ -10,6 +10,7 @@ #' @param ... other arguments passed to the function specified with \code{.f} #' #' @return returns results from \code{.f} +#' @importFrom rlang as_function fn_fmls #' @export #' #' @examples @@ -26,8 +27,8 @@ autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { new_from <- pos_froms[which(pos_froms %in% cts_to())[1]] if(.verbose){ message( - glue::glue("{.f} doesn't accept {from}. - Attempting to translate to {new_from} with CTS. ")) + paste0(.f, " doesn't accept ", from, ".\n", "Attempting to translte to ", new_from, " with CTS. ") + ) } new_query <- cts_convert(query, from = from, to = new_from, choices = 1) #would like to try a again if cts fails the first time (as it often does). @@ -44,10 +45,12 @@ autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { #' @param query character; the search term #' @param from character; the format or type of query. Commonly accepted values are "name", "cas", "inchi", and "inchikey" #' @param sources character; which data sources to check. Data sources are identified by the prefix associated with webchem functions that query those databases. If not specified, all data sources listed will be checked. +#' @param plot logical; plot a graphical representation of results. #' #' @return a tibble of logical values where \code{TRUE} indicates that a data source contains a record for the query #' @export -#' +#' @import dplyr +#' @import ggplot2 #' @examples #' \dontrun{ #' check_coverage("hexane", from = "name") @@ -84,6 +87,7 @@ check_coverage <- function(query, from, out <- bind_cols(query = query, out) if (plot) { + requireNamespace("ggplot2", quietly = TRUE) df <- out %>% pivot_longer(-query, names_to = "source", values_to = "covered") %>% group_by(source) %>% diff --git a/man/autotranslate.Rd b/man/autotranslate.Rd index dac92ea0..b62b25f6 100644 --- a/man/autotranslate.Rd +++ b/man/autotranslate.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/integration.R \name{autotranslate} \alias{autotranslate} \title{Auto-translate identifiers and search databases} @@ -21,7 +21,7 @@ autotranslate(query, from, .f, .verbose = TRUE, ...) returns results from \code{.f} } \description{ -Supply a query of any type (e.g. SMILES, CAS, name, InChI, etc.) along with any webchem function that has \code{query} and \code{from} arguments. If the function doesn't accpet the type of query you've supplied, this will try to automatically translate it using CTS and run the query. +Supply a query of any type (e.g. SMILES, CAS, name, InChI, etc.) along with any webchem function that has \code{query} and \code{from} arguments. If the function doesn't accept the type of query you've supplied, this will try to automatically translate it using CTS and run the query. } \examples{ \dontrun{ diff --git a/man/check_coverage.Rd b/man/check_coverage.Rd index 1080d6f7..271ae49f 100644 --- a/man/check_coverage.Rd +++ b/man/check_coverage.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/integration.R \name{check_coverage} \alias{check_coverage} \title{Check data source coverage of compounds} @@ -7,7 +7,8 @@ check_coverage( query, from, - sources = c("etox", "pc", "chebi", "cs", "aw", "fn", "pan", "srs") + sources = c("etox", "pc", "chebi", "cs", "aw", "fn", "pan", "srs"), + plot = FALSE ) } \arguments{ @@ -16,6 +17,8 @@ check_coverage( \item{from}{character; the format or type of query. Commonly accepted values are "name", "cas", "inchi", and "inchikey"} \item{sources}{character; which data sources to check. Data sources are identified by the prefix associated with webchem functions that query those databases. If not specified, all data sources listed will be checked.} + +\item{plot}{logical; plot a graphical representation of results.} } \value{ a tibble of logical values where \code{TRUE} indicates that a data source contains a record for the query diff --git a/tests/testthat/test-chebi.R b/tests/testthat/test-chebi.R index f5856c43..92a8afd3 100644 --- a/tests/testthat/test-chebi.R +++ b/tests/testthat/test-chebi.R @@ -38,7 +38,7 @@ test_that("chebi returns correct results", { expect_is(A, "list") expect_is(B, "list") - expect_equal(names(a)[1], "chebiid") + expect_equal(names(a)[2], "chebiid") expect_length(names(a), 5) expect_length(names(b), 5) expect_equal(A$`CHEBI:27744`$regnumbers$data[1], "1071-83-6") From 18ab9b9e9903465b4630022e42be6046fca5c2d7 Mon Sep 17 00:00:00 2001 From: Aariq Date: Thu, 11 Jun 2020 14:58:05 -0400 Subject: [PATCH 22/43] added Suggests to make plotting work. --- DESCRIPTION | 7 +++++-- NAMESPACE | 1 - NEWS.md | 5 +++++ R/integration.R | 8 +++++--- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b549153f..c4fb9629 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,7 +39,8 @@ Imports: purrr, data.tree, tibble, - base64enc + base64enc, + rlang Suggests: testthat, rcdk, @@ -47,6 +48,8 @@ Suggests: robotstxt, knitr, rmarkdown, - ggplot2 + ggplot2, + tidyr, + forcats RoxygenNote: 7.1.0 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 1a265fe2..34b72b89 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -84,7 +84,6 @@ export(srs_query) export(wd_ident) import(RCurl) import(dplyr) -import(ggplot2) import(httr) import(jsonlite) import(rvest) diff --git a/NEWS.md b/NEWS.md index 97029504..5edb21c0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,11 @@ * Download images of substances from ChemSpider with cs_img() +## New Features + +* `autotranslate()` is a wrapper that accepts any type of query and any webchem function with a `from` argument and will use CTS to translate the query if needed. +* `check_coverage()` checks if a query gets a hit in most databases integrated in webchem. Useful for deciding which of several databases to focus on given a set of chemicals. + ## Minor Improvements * The `"type"` argument in `ci_query()` and `aw_query()` has been changed to `"from"` for consistency with other functions diff --git a/R/integration.R b/R/integration.R index 9cf6ae3e..7416fe22 100644 --- a/R/integration.R +++ b/R/integration.R @@ -50,7 +50,6 @@ autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { #' @return a tibble of logical values where \code{TRUE} indicates that a data source contains a record for the query #' @export #' @import dplyr -#' @import ggplot2 #' @examples #' \dontrun{ #' check_coverage("hexane", from = "name") @@ -88,13 +87,16 @@ check_coverage <- function(query, from, if (plot) { requireNamespace("ggplot2", quietly = TRUE) + requireNamespace("tidyr", quietly = TRUE) + requireNamespace("forcats", quietly = TRUE) + df <- out %>% - pivot_longer(-query, names_to = "source", values_to = "covered") %>% + tidyr::pivot_longer(-query, names_to = "source", values_to = "covered") %>% group_by(source) %>% mutate(num = sum(covered)) %>% ungroup() %>% arrange(desc(num), source) %>% - mutate(source = fct_inorder(source)) + mutate(source = forcats::fct_inorder(source)) p <- ggplot(df, aes(x = source, y = query, fill = covered)) + From 52b0913c2e132e0030549ea8b3192c00ee96347e Mon Sep 17 00:00:00 2001 From: Aariq Date: Thu, 11 Jun 2020 15:11:53 -0400 Subject: [PATCH 23/43] I don't know why these tests are failing. They work when run manually in a fresh R session. --- tests/testthat/test-integration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-integration.R b/tests/testthat/test-integration.R index 536ade67..118b09c8 100644 --- a/tests/testthat/test-integration.R +++ b/tests/testthat/test-integration.R @@ -4,7 +4,7 @@ fn_up <- ping_service("fn") test_that("autotranslate works when no translation needed", { skip_if_not(fn_up, "Flavornet down!") CASs <- c("75-07-0", "64-17-5") - a <- autotranslate(query = CASs, from = "cas", .f = "fn_percept", .verbose = FALSE) + a <- autotranslate(query = CASs, from = "cas", .f = "fn_percept", .verbose = TRUE) b <- fn_percept(CASs) expect_equal(a, b) }) From e91096571318b9bc590f65efe3265771a1040cb7 Mon Sep 17 00:00:00 2001 From: Aariq Date: Thu, 11 Jun 2020 17:46:15 -0400 Subject: [PATCH 24/43] fixed a bug in chooser() utility I didn't know existed --- R/integration.R | 10 ++++------ R/utils.R | 14 +++++++------- tests/testthat/test-integration.R | 5 ++--- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/R/integration.R b/R/integration.R index 7416fe22..6c3a5431 100644 --- a/R/integration.R +++ b/R/integration.R @@ -20,20 +20,18 @@ autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { f <- rlang::as_function(.f) pos_froms <- eval(rlang::fn_fmls(f)$from) - if (from %in% pos_froms) { - f(query = query, from = from, ...) - } else { + if (!from %in% pos_froms) { pos_froms <- pos_froms[pos_froms != "name"] #cts name conversion broken new_from <- pos_froms[which(pos_froms %in% cts_to())[1]] if(.verbose){ message( - paste0(.f, " doesn't accept ", from, ".\n", "Attempting to translte to ", new_from, " with CTS. ") + paste0(.f, " doesn't accept ", from, ".\n", "Attempting to translate to ", new_from, " with CTS. ") ) } new_query <- cts_convert(query, from = from, to = new_from, choices = 1) #would like to try a again if cts fails the first time (as it often does). - f(query = new_query, from = new_from, ...) } + f(query = new_query, from = new_from, match = "best", ...) } @@ -83,7 +81,7 @@ check_coverage <- function(query, from, out <- lapply(sources, foo, query = query, from = from) out <- setNames(out, names(sources)) - out <- bind_cols(query = query, out) + out <- dplyr::bind_cols(query = query, out) if (plot) { requireNamespace("ggplot2", quietly = TRUE) diff --git a/R/utils.R b/R/utils.R index 43e5d075..bda8509d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -408,8 +408,11 @@ as.cas <- function(x){ #' chooser(test, "all") #' chooser(test, 3) chooser <- function(x, choices){ - if(interactive() & !is.null(choices)){ + if(choices == 1) { + out <- x[1] + } #only in an interactive R session when number of choices is specified + if(interactive()) { if(is.numeric(choices) & choices > length(x)) { choices = "all" warning('Number of choices excedes length of x, using all choices instead', @@ -419,19 +422,16 @@ chooser <- function(x, choices){ pick <- menu(x, graphics = FALSE, 'Select one:') out <- x[pick] } - if(choices == 1) { - out <- x[1] - } + if(is.numeric(choices) & choices > 1){ pick <- menu(head(x, choices), graphics = FALSE, 'Select one:') out <- x[pick] - } - } else { + } else { out <- x + } } return(out) } - #' matcher utility #' #' @param x a vector diff --git a/tests/testthat/test-integration.R b/tests/testthat/test-integration.R index 118b09c8..7e006e68 100644 --- a/tests/testthat/test-integration.R +++ b/tests/testthat/test-integration.R @@ -12,8 +12,8 @@ test_that("autotranslate works when no translation needed", { etox_up <- ping_service("etox") test_that("autotranslate translates", { skip_if_not(etox_up, "ETOX down!") - x <- autotranslate("XDDAORKBJWWYJS-UHFFFAOYSA-N", from = "inchikey", .f = "get_etoxid") - y <- get_etoxid("1071-83-6", from = "cas") + x <- autotranslate(query = "XDDAORKBJWWYJS-UHFFFAOYSA-N", from = "inchikey", .f = "get_etoxid") + y <- get_etoxid(query = "1071-83-6", from = "cas") expect_equal(x, y) }) @@ -29,4 +29,3 @@ test_that("coverge function works", { fn = c(FALSE, FALSE, FALSE)) expect_equivalent(out, df) }) - From b3f7a7ccb2f552bb6e52824ec5fec6455c261cdc Mon Sep 17 00:00:00 2001 From: Aariq Date: Thu, 11 Jun 2020 19:06:36 -0400 Subject: [PATCH 25/43] added deprecated arguments to documentation --- R/alanwood.R | 1 + R/chemid.R | 1 + R/cts.R | 1 + R/flavornet.R | 1 + R/integration.R | 5 ++++- R/pan.R | 1 + man/aw_query.Rd | 2 ++ man/ci_query.Rd | 2 ++ man/cts_compinfo.Rd | 2 ++ man/fn_percept.Rd | 2 ++ man/pan_query.Rd | 2 ++ 11 files changed, 19 insertions(+), 1 deletion(-) diff --git a/R/alanwood.R b/R/alanwood.R index 5e7d54db..c04ca3cc 100644 --- a/R/alanwood.R +++ b/R/alanwood.R @@ -11,6 +11,7 @@ #' @param force_build logical; force building a new index? See #' \code{\link{build_aw_idx}} for more details. #' @param ... currently unused. +#' @param type deprecated #' @return A list of eight entries: common-name, status, preferred IUPAC Name, #' IUPAC Name, cas, formula, activity, subactivity, inchikey, inchi and source #' url. diff --git a/R/chemid.R b/R/chemid.R index 278a89e2..9f3ca3e7 100644 --- a/R/chemid.R +++ b/R/chemid.R @@ -20,6 +20,7 @@ #' \code{"ask"} enters an interactive mode and the user is asked for input, #' \code{"na"} returns NA if multiple hits are found. #' @param verbose logical; should a verbose output be printed on the console? +#' @param type deprecated #' @return A list of 8 entries: name (vector), synonyms (vector), cas (vector), #' inchi (vector), inchikey (vector), smiles(vector), toxicity (data.frame), #' physprop (data.frame) and source_url. diff --git a/R/cts.R b/R/cts.R index 60e54a9c..97e9723e 100644 --- a/R/cts.R +++ b/R/cts.R @@ -7,6 +7,7 @@ #' @param query character; InChIkey. #' @param from character; currently only accepts "inchikey". #' @param verbose logical; should a verbose output be printed on the console? +#' @param inchikey deprecated #' @return a list of lists (for each supplied inchikey): #' a list of 7. inchikey, inchicode, molweight, exactmass, formula, synonyms and externalIds #' @author Eduard Szöcs, \email{eduardszoecs@@gmail.com} diff --git a/R/flavornet.R b/R/flavornet.R index fbafd3d8..035f05f5 100644 --- a/R/flavornet.R +++ b/R/flavornet.R @@ -9,6 +9,7 @@ #' @param query character; CAS number to search by. See \code{\link{is.cas}} for correct formatting #' @param from character; currently only CAS numbers are accepted. #' @param verbose logical; should a verbose output be printed on the console? +#' @param CAS deprecated #' @param ... currently unused #' #' @return A named character vector containing flavor percepts or NA's in the case of CAS numbers that are not found diff --git a/R/integration.R b/R/integration.R index 6c3a5431..378cc135 100644 --- a/R/integration.R +++ b/R/integration.R @@ -20,6 +20,7 @@ autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { f <- rlang::as_function(.f) pos_froms <- eval(rlang::fn_fmls(f)$from) + if (!from %in% pos_froms) { pos_froms <- pos_froms[pos_froms != "name"] #cts name conversion broken new_from <- pos_froms[which(pos_froms %in% cts_to())[1]] @@ -30,8 +31,10 @@ autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { } new_query <- cts_convert(query, from = from, to = new_from, choices = 1) #would like to try a again if cts fails the first time (as it often does). + from <- new_from + query <- new_query } - f(query = new_query, from = new_from, match = "best", ...) + f(query = query, from = from, ...) } diff --git a/R/pan.R b/R/pan.R index 95701f31..d47cb824 100644 --- a/R/pan.R +++ b/R/pan.R @@ -6,6 +6,7 @@ #' @importFrom rvest html_table #' @importFrom stats rgamma #' @param query character; searchterm, e.g. chemical name or CAS. +#' @param from character; one of "name" or "cas". #' @param match character; \code{match="all"} returns all matches, #' \code{match="first"} the first one and \code{match="best"} (recommended) the hit with the lowest #' Levenshtein distance between query and matching synonym. diff --git a/man/aw_query.Rd b/man/aw_query.Rd index 724d5e73..c11ba9d5 100644 --- a/man/aw_query.Rd +++ b/man/aw_query.Rd @@ -23,6 +23,8 @@ aw_query( \item{force_build}{logical; force building a new index? See \code{\link{build_aw_idx}} for more details.} +\item{type}{deprecated} + \item{...}{currently unused.} } \value{ diff --git a/man/ci_query.Rd b/man/ci_query.Rd index 8d9206bf..2e5b4afd 100644 --- a/man/ci_query.Rd +++ b/man/ci_query.Rd @@ -28,6 +28,8 @@ returns only the first match, \code{"best"} the best matching (by name) ID, \code{"na"} returns NA if multiple hits are found.} \item{verbose}{logical; should a verbose output be printed on the console?} + +\item{type}{deprecated} } \value{ A list of 8 entries: name (vector), synonyms (vector), cas (vector), diff --git a/man/cts_compinfo.Rd b/man/cts_compinfo.Rd index f32320b9..cf6ffb06 100644 --- a/man/cts_compinfo.Rd +++ b/man/cts_compinfo.Rd @@ -12,6 +12,8 @@ cts_compinfo(query, from = "inchikey", verbose = TRUE, inchikey) \item{from}{character; currently only accepts "inchikey".} \item{verbose}{logical; should a verbose output be printed on the console?} + +\item{inchikey}{deprecated} } \value{ a list of lists (for each supplied inchikey): diff --git a/man/fn_percept.Rd b/man/fn_percept.Rd index 6ff851fe..423cec11 100644 --- a/man/fn_percept.Rd +++ b/man/fn_percept.Rd @@ -13,6 +13,8 @@ fn_percept(query, from = "cas", verbose = TRUE, CAS, ...) \item{verbose}{logical; should a verbose output be printed on the console?} +\item{CAS}{deprecated} + \item{...}{currently unused} } \value{ diff --git a/man/pan_query.Rd b/man/pan_query.Rd index ce2b56a5..e40d5282 100644 --- a/man/pan_query.Rd +++ b/man/pan_query.Rd @@ -15,6 +15,8 @@ pan_query( \arguments{ \item{query}{character; searchterm, e.g. chemical name or CAS.} +\item{from}{character; one of "name" or "cas".} + \item{match}{character; \code{match="all"} returns all matches, \code{match="first"} the first one and \code{match="best"} (recommended) the hit with the lowest Levenshtein distance between query and matching synonym.} From fd220c8a1c42dd734c71599a3e587ccd35e255ce Mon Sep 17 00:00:00 2001 From: Aariq Date: Thu, 11 Jun 2020 19:06:56 -0400 Subject: [PATCH 26/43] REALLY fixed bug in chooser() --- R/utils.R | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/R/utils.R b/R/utils.R index bda8509d..b2e4b9c5 100644 --- a/R/utils.R +++ b/R/utils.R @@ -408,27 +408,26 @@ as.cas <- function(x){ #' chooser(test, "all") #' chooser(test, 3) chooser <- function(x, choices){ - if(choices == 1) { - out <- x[1] - } - #only in an interactive R session when number of choices is specified - if(interactive()) { + if (is.null(choices)) { + out <- x + } else if (choices == 1) { + out <- x[1] + } else if (interactive()) { if(is.numeric(choices) & choices > length(x)) { - choices = "all" + choices <- "all" warning('Number of choices excedes length of x, using all choices instead', immediate. = TRUE) } - if(choices == "all") { #then give all of x as possible choices + if (choices == "all") { #then give all of x as possible choices pick <- menu(x, graphics = FALSE, 'Select one:') out <- x[pick] } - - if(is.numeric(choices) & choices > 1){ + if (is.numeric(choices) & choices > 1){ pick <- menu(head(x, choices), graphics = FALSE, 'Select one:') out <- x[pick] - } else { - out <- x } + } else { + stop('Can only use "choices" in interactive mode.') } return(out) } From bd97009e854155711785dce9295a329c48a2f7e1 Mon Sep 17 00:00:00 2001 From: Aariq Date: Fri, 12 Jun 2020 10:11:27 -0400 Subject: [PATCH 27/43] switched NA to NA_character_ --- R/chemid.R | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/R/chemid.R b/R/chemid.R index 9f3ca3e7..0fc9a15b 100644 --- a/R/chemid.R +++ b/R/chemid.R @@ -66,7 +66,7 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), on.exit(suppressWarnings(closeAllConnections())) if (is.na(query)) { message('query is NA! Returning NA.\n') - return(NA) + return(NA_character_) } query <- URLencode(query) baseurl <- switch( @@ -82,14 +82,14 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), ttt <- try(read_html(qurl), silent = TRUE) if (inherits(ttt, 'try-error')) { message('Not found! Returning NA.\n') - return(NA) + return(NA_character_) } tit <- xml_text(xml_find_all(ttt, "//head/title")) no <- xml_text(xml_find_all(ttt, "//h3")) if (length(no) != 0 && 'The following query produced no records:' %in% no) { message('Not found! Returning NA.\n') - return(NA) + return(NA_character_) } # handle multiple inputs @@ -124,7 +124,7 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), if (match == 'na') { if (verbose) message("Returning NA. \n") - return(NA) + return(NA_character_) } if (match == 'ask') { @@ -133,7 +133,7 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), message("\nEnter rownumber of compounds (other inputs will return 'NA'):\n") # prompt take <- as.numeric(scan(n = 1, quiet = TRUE)) if (length(take) == 0) { - return(NA) + return(NA_character_) } if (take %in% seq_len(nrow(tochoose))) { hit_cas <- hit_cas[take] @@ -146,7 +146,7 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), if (is.na(hit_cas)) { if (verbose) message('CAS not found! Returning NA.\n') - return(NA) + return(NA_character_) } # retry with CAS-API @@ -163,25 +163,25 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), } if(is.na(xml_find_first(ttt, "//h3[contains(., 'Name of Substance')]/following-sibling::div[1]//li"))){ - name <- NA + name <- NA_character_ }else{ name <- xml_text(xml_find_all(ttt, "//h3[contains(., 'Name of Substance')]/following-sibling::div[1]//li")) } if(is.na(xml_find_first(ttt, "//h3[contains(., 'Synonyms')]/following-sibling::div[1]//li"))){ - synonyms <- NA + synonyms <- NA_character_ }else{ synonyms <- xml_text(xml_find_all(ttt, "//h3[contains(., 'Synonyms')]/following-sibling::div[1]//li")) } if(is.na(xml_find_first(ttt, "//h3[contains(., 'CAS Registry')]/following-sibling::ul[1]//li"))){ - cas <- NA + cas <- NA_character_ } else { cas <- xml_text(xml_find_all(ttt, "//h3[contains(., 'CAS Registry')]/following-sibling::ul[1]//li")) } if(is.na(xml_find_first(ttt, "//h3[contains(., 'InChI')]/following-sibling::text()[1]"))){ - inchi <- NA + inchi <- NA_character_ } else { inchi <- gsub('\\n|\\t', '', xml_text(xml_find_all(ttt, "//h3[contains(., 'InChI')]/following-sibling::text()[1]"))[1] @@ -189,7 +189,7 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), } if(is.na(xml_find_first(ttt, "//h3[contains(., 'InChIKey')]/following-sibling::text()[1]"))){ - inchikey <- NA + inchikey <- NA_character_ } else { inchikey <- gsub('\\n|\\t|\\r', '', xml_text(xml_find_all(ttt, "//h3[contains(., 'InChIKey')]/following-sibling::text()[1]")) @@ -197,7 +197,7 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), } if(is.na(xml_find_first(ttt, "//h3[contains(., 'Smiles')]/following-sibling::text()[1]"))){ - smiles <- NA + smiles <- NA_character_ } else { smiles <- gsub('\\n|\\t|\\r', '', xml_text(xml_find_all(ttt, "//h3[contains(., 'Smiles')]/following-sibling::text()[1]")) @@ -205,13 +205,13 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), } if(is.na(xml_find_first(ttt, "//h2[contains(., 'Toxicity')]/following-sibling::div//table"))){ - toxicity <- NA + toxicity <- NA_character_ } else { toxicity <- html_table(xml_find_all(ttt, "//h2[contains(., 'Toxicity')]/following-sibling::div//table"))[[1]] } if(is.na(xml_find_first(ttt, "//h2[contains(., 'Physical Prop')]/following-sibling::div//table"))){ - physprop <- NA + physprop <- NA_character_ } else { physprop <- html_table(xml_find_all(ttt, "//h2[contains(., 'Physical Prop')]/following-sibling::div//table"))[[1]] physprop[ , 'Value'] <- as.numeric(physprop[ , 'Value']) From b8f928949e243e712f5bf293c5e37942b189266e Mon Sep 17 00:00:00 2001 From: Aariq Date: Fri, 12 Jun 2020 10:34:47 -0400 Subject: [PATCH 28/43] Revert "switched NA to NA_character_" This reverts commit 04a13990e125ab66780df788892bab6cbbfcac71. --- R/chemid.R | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/R/chemid.R b/R/chemid.R index 0fc9a15b..9f3ca3e7 100644 --- a/R/chemid.R +++ b/R/chemid.R @@ -66,7 +66,7 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), on.exit(suppressWarnings(closeAllConnections())) if (is.na(query)) { message('query is NA! Returning NA.\n') - return(NA_character_) + return(NA) } query <- URLencode(query) baseurl <- switch( @@ -82,14 +82,14 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), ttt <- try(read_html(qurl), silent = TRUE) if (inherits(ttt, 'try-error')) { message('Not found! Returning NA.\n') - return(NA_character_) + return(NA) } tit <- xml_text(xml_find_all(ttt, "//head/title")) no <- xml_text(xml_find_all(ttt, "//h3")) if (length(no) != 0 && 'The following query produced no records:' %in% no) { message('Not found! Returning NA.\n') - return(NA_character_) + return(NA) } # handle multiple inputs @@ -124,7 +124,7 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), if (match == 'na') { if (verbose) message("Returning NA. \n") - return(NA_character_) + return(NA) } if (match == 'ask') { @@ -133,7 +133,7 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), message("\nEnter rownumber of compounds (other inputs will return 'NA'):\n") # prompt take <- as.numeric(scan(n = 1, quiet = TRUE)) if (length(take) == 0) { - return(NA_character_) + return(NA) } if (take %in% seq_len(nrow(tochoose))) { hit_cas <- hit_cas[take] @@ -146,7 +146,7 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), if (is.na(hit_cas)) { if (verbose) message('CAS not found! Returning NA.\n') - return(NA_character_) + return(NA) } # retry with CAS-API @@ -163,25 +163,25 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), } if(is.na(xml_find_first(ttt, "//h3[contains(., 'Name of Substance')]/following-sibling::div[1]//li"))){ - name <- NA_character_ + name <- NA }else{ name <- xml_text(xml_find_all(ttt, "//h3[contains(., 'Name of Substance')]/following-sibling::div[1]//li")) } if(is.na(xml_find_first(ttt, "//h3[contains(., 'Synonyms')]/following-sibling::div[1]//li"))){ - synonyms <- NA_character_ + synonyms <- NA }else{ synonyms <- xml_text(xml_find_all(ttt, "//h3[contains(., 'Synonyms')]/following-sibling::div[1]//li")) } if(is.na(xml_find_first(ttt, "//h3[contains(., 'CAS Registry')]/following-sibling::ul[1]//li"))){ - cas <- NA_character_ + cas <- NA } else { cas <- xml_text(xml_find_all(ttt, "//h3[contains(., 'CAS Registry')]/following-sibling::ul[1]//li")) } if(is.na(xml_find_first(ttt, "//h3[contains(., 'InChI')]/following-sibling::text()[1]"))){ - inchi <- NA_character_ + inchi <- NA } else { inchi <- gsub('\\n|\\t', '', xml_text(xml_find_all(ttt, "//h3[contains(., 'InChI')]/following-sibling::text()[1]"))[1] @@ -189,7 +189,7 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), } if(is.na(xml_find_first(ttt, "//h3[contains(., 'InChIKey')]/following-sibling::text()[1]"))){ - inchikey <- NA_character_ + inchikey <- NA } else { inchikey <- gsub('\\n|\\t|\\r', '', xml_text(xml_find_all(ttt, "//h3[contains(., 'InChIKey')]/following-sibling::text()[1]")) @@ -197,7 +197,7 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), } if(is.na(xml_find_first(ttt, "//h3[contains(., 'Smiles')]/following-sibling::text()[1]"))){ - smiles <- NA_character_ + smiles <- NA } else { smiles <- gsub('\\n|\\t|\\r', '', xml_text(xml_find_all(ttt, "//h3[contains(., 'Smiles')]/following-sibling::text()[1]")) @@ -205,13 +205,13 @@ ci_query <- function(query, from = c('name', 'rn', 'inchikey', 'cas'), } if(is.na(xml_find_first(ttt, "//h2[contains(., 'Toxicity')]/following-sibling::div//table"))){ - toxicity <- NA_character_ + toxicity <- NA } else { toxicity <- html_table(xml_find_all(ttt, "//h2[contains(., 'Toxicity')]/following-sibling::div//table"))[[1]] } if(is.na(xml_find_first(ttt, "//h2[contains(., 'Physical Prop')]/following-sibling::div//table"))){ - physprop <- NA_character_ + physprop <- NA } else { physprop <- html_table(xml_find_all(ttt, "//h2[contains(., 'Physical Prop')]/following-sibling::div//table"))[[1]] physprop[ , 'Value'] <- as.numeric(physprop[ , 'Value']) From 6e24d7311a4a88a8ac3830cc9a0729feea232e56 Mon Sep 17 00:00:00 2001 From: Aariq Date: Tue, 16 Jun 2020 08:59:28 -0400 Subject: [PATCH 29/43] fixed merge --- NEWS.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 5edb21c0..07d82cc1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,11 +1,8 @@ # webchem 1.0.0.9001 -## NEW FEATURES - -* Download images of substances from ChemSpider with cs_img() - ## New Features +* Download images of substances from ChemSpider with `cs_img()` * `autotranslate()` is a wrapper that accepts any type of query and any webchem function with a `from` argument and will use CTS to translate the query if needed. * `check_coverage()` checks if a query gets a hit in most databases integrated in webchem. Useful for deciding which of several databases to focus on given a set of chemicals. @@ -15,7 +12,6 @@ * `fn_percept()` and `cts_compinfo()` now have `"query"` and `"from"` arguments for consistency with other functions * Possible values for `"from"` have been made more consistent across functions - # webchem 1.0.0 ## NEW FEATURES From 14ccbd2fcf2588907eb58cde351e85603dd7193d Mon Sep 17 00:00:00 2001 From: Aariq Date: Fri, 19 Jun 2020 11:34:05 -0400 Subject: [PATCH 30/43] changed function name from check_coverage to has_entry --- NAMESPACE | 2 +- NEWS.md | 2 +- R/integration.R | 4 ++-- man/{check_coverage.Rd => has_entry.Rd} | 8 ++++---- tests/testthat/test-integration.R | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) rename man/{check_coverage.Rd => has_entry.Rd} (90%) diff --git a/NAMESPACE b/NAMESPACE index 34b72b89..aa00da53 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -35,7 +35,6 @@ export(aw_query) export(build_aw_idx) export(cas) export(chebi_comp_entity) -export(check_coverage) export(ci_query) export(cid_compinfo) export(cir) @@ -61,6 +60,7 @@ export(get_cid) export(get_csid) export(get_etoxid) export(get_wdid) +export(has_entry) export(inchikey) export(is.cas) export(is.inchikey) diff --git a/NEWS.md b/NEWS.md index 07d82cc1..10589bb6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,7 @@ * Download images of substances from ChemSpider with `cs_img()` * `autotranslate()` is a wrapper that accepts any type of query and any webchem function with a `from` argument and will use CTS to translate the query if needed. -* `check_coverage()` checks if a query gets a hit in most databases integrated in webchem. Useful for deciding which of several databases to focus on given a set of chemicals. +* `has_entry()` checks if a query gets a hit in most databases integrated in webchem. Useful for deciding which of several databases to focus on given a set of chemicals. ## Minor Improvements diff --git a/R/integration.R b/R/integration.R index 378cc135..6780d6ae 100644 --- a/R/integration.R +++ b/R/integration.R @@ -53,9 +53,9 @@ autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { #' @import dplyr #' @examples #' \dontrun{ -#' check_coverage("hexane", from = "name") +#' has_entry("hexane", from = "name") #' } -check_coverage <- function(query, from, +has_entry <- function(query, from, sources = c("etox", "pc", "chebi", "cs", "aw", "fn", "pan", "srs"), plot = FALSE) { diff --git a/man/check_coverage.Rd b/man/has_entry.Rd similarity index 90% rename from man/check_coverage.Rd rename to man/has_entry.Rd index 271ae49f..4133d37a 100644 --- a/man/check_coverage.Rd +++ b/man/has_entry.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/integration.R -\name{check_coverage} -\alias{check_coverage} +\name{has_entry} +\alias{has_entry} \title{Check data source coverage of compounds} \usage{ -check_coverage( +has_entry( query, from, sources = c("etox", "pc", "chebi", "cs", "aw", "fn", "pan", "srs"), @@ -28,6 +28,6 @@ Checks if entries are found in (most) data sources included in webchem } \examples{ \dontrun{ -check_coverage("hexane", from = "name") +has_entry("hexane", from = "name") } } diff --git a/tests/testthat/test-integration.R b/tests/testthat/test-integration.R index 7e006e68..b70fbf0e 100644 --- a/tests/testthat/test-integration.R +++ b/tests/testthat/test-integration.R @@ -21,7 +21,7 @@ test_that("autotranslate translates", { test_that("coverge function works", { skip_if_not(fn_up) skip_if_not(etox_up) - out <- check_coverage(c("triclosan", NA, "balloon"), + out <- has_entry(c("triclosan", NA, "balloon"), from = "name", sources = c("etox", "fn")) df <- tibble(query = c("triclosan", NA, "balloon"), From 398a504f48ef53e7585fc7158ef7155f5e2a0361 Mon Sep 17 00:00:00 2001 From: Aariq Date: Fri, 19 Jun 2020 13:46:45 -0400 Subject: [PATCH 31/43] pass NAs through --- R/flavornet.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/flavornet.R b/R/flavornet.R index 035f05f5..debf0b63 100644 --- a/R/flavornet.R +++ b/R/flavornet.R @@ -32,6 +32,7 @@ fn_percept <- function(query, from = "cas", verbose = TRUE, CAS, ...) warning('"CAS" is now deprecated. Please use "query" instead. ') query <- CAS } + if (is.na(query)) return(NA) match.arg(from) foo <- function (query, verbose){ on.exit(suppressWarnings(closeAllConnections())) From 86efab911b2c647248bd811241fa85e2ae84494d Mon Sep 17 00:00:00 2001 From: Aariq Date: Fri, 19 Jun 2020 13:47:31 -0400 Subject: [PATCH 32/43] switched to base R plotting --- DESCRIPTION | 4 +-- R/integration.R | 50 ++++++++++++++++--------------- tests/testthat/test-integration.R | 3 +- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c4fb9629..80ae2409 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -48,8 +48,6 @@ Suggests: robotstxt, knitr, rmarkdown, - ggplot2, - tidyr, - forcats + plot.matrix RoxygenNote: 7.1.0 VignetteBuilder: knitr diff --git a/R/integration.R b/R/integration.R index 6780d6ae..752ced28 100644 --- a/R/integration.R +++ b/R/integration.R @@ -87,30 +87,32 @@ has_entry <- function(query, from, out <- dplyr::bind_cols(query = query, out) if (plot) { - requireNamespace("ggplot2", quietly = TRUE) - requireNamespace("tidyr", quietly = TRUE) - requireNamespace("forcats", quietly = TRUE) - - df <- out %>% - tidyr::pivot_longer(-query, names_to = "source", values_to = "covered") %>% - group_by(source) %>% - mutate(num = sum(covered)) %>% - ungroup() %>% - arrange(desc(num), source) %>% - mutate(source = forcats::fct_inorder(source)) - - p <- - ggplot(df, aes(x = source, y = query, fill = covered)) + - geom_tile(color = "grey30") + - coord_fixed(expand = 0) + - scale_fill_manual("Covered:", - values = c("TRUE" = "#3BC03B", "FALSE" = "#C7010B"), - na.value = "grey70") + - scale_x_discrete(position = "top") + - theme(axis.title = element_blank(), - axis.ticks = element_blank()) - - print(p) + if (!requireNamespace("plot.matrix", quietly = TRUE)) { + warning("The plot.matrix package is required for plotting results") + } else { + out <- filter(out, !is.na(query)) + colorder <- select(out, -query) %>% + colSums(., na.rm = TRUE) %>% + sort(decreasing = TRUE) %>% + names() + pmat <- out %>% + select(all_of(colorder)) %>% + as.matrix() + opar <- par(no.readonly = TRUE) + par(mar=c(5.1, 7.1, 4.1, 4.1)) # adapt margins + plot.matrix:::plot.matrix( + pmat, + col = c("#C7010B", "#3BC03B"), + breaks = c(FALSE, TRUE), + na.col = "grey70", + axis.col = list(side = 3), + axis.row = list(las = 2, labels = out$query), + xlab = NA, + ylab = NA, + main = NA + ) + par(opar) + } } return(out) } \ No newline at end of file diff --git a/tests/testthat/test-integration.R b/tests/testthat/test-integration.R index b70fbf0e..846d2444 100644 --- a/tests/testthat/test-integration.R +++ b/tests/testthat/test-integration.R @@ -18,7 +18,7 @@ test_that("autotranslate translates", { }) -test_that("coverge function works", { +test_that("has_entry() function works", { skip_if_not(fn_up) skip_if_not(etox_up) out <- has_entry(c("triclosan", NA, "balloon"), @@ -29,3 +29,4 @@ test_that("coverge function works", { fn = c(FALSE, FALSE, FALSE)) expect_equivalent(out, df) }) + From a41f2997874850ef88b256c097e66ad4daeb1747 Mon Sep 17 00:00:00 2001 From: Aariq Date: Fri, 19 Jun 2020 17:39:25 -0400 Subject: [PATCH 33/43] small fixes --- R/flavornet.R | 2 +- R/integration.R | 8 ++++---- tests/testthat/test-chemspider.R | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/flavornet.R b/R/flavornet.R index debf0b63..d0f4523b 100644 --- a/R/flavornet.R +++ b/R/flavornet.R @@ -32,9 +32,9 @@ fn_percept <- function(query, from = "cas", verbose = TRUE, CAS, ...) warning('"CAS" is now deprecated. Please use "query" instead. ') query <- CAS } - if (is.na(query)) return(NA) match.arg(from) foo <- function (query, verbose){ + if (is.na(query)) return(NA) on.exit(suppressWarnings(closeAllConnections())) qurl <- paste0("http://www.flavornet.org/info/",query,".html") if (verbose) diff --git a/R/integration.R b/R/integration.R index 752ced28..ccee9164 100644 --- a/R/integration.R +++ b/R/integration.R @@ -98,9 +98,9 @@ has_entry <- function(query, from, pmat <- out %>% select(all_of(colorder)) %>% as.matrix() - opar <- par(no.readonly = TRUE) - par(mar=c(5.1, 7.1, 4.1, 4.1)) # adapt margins - plot.matrix:::plot.matrix( + opar <- graphics::par(no.readonly = TRUE) + graphics::par(mar=c(5.1, 7.1, 4.1, 4.1)) # adapt margins + plot( pmat, col = c("#C7010B", "#3BC03B"), breaks = c(FALSE, TRUE), @@ -111,7 +111,7 @@ has_entry <- function(query, from, ylab = NA, main = NA ) - par(opar) + graphics::par(opar) } } return(out) diff --git a/tests/testthat/test-chemspider.R b/tests/testthat/test-chemspider.R index d03bbc8a..2960c3ad 100644 --- a/tests/testthat/test-chemspider.R +++ b/tests/testthat/test-chemspider.R @@ -288,7 +288,7 @@ test_that("cs_img()", { imgs <- cs_img(c(682, 5363, "balloon", NA), dir = tempdir()) expect_true(file.exists(paste0(tempdir(), "/","682.png"))) - expect_true(file.exists(paste0(tempdir(), "/","5383.png"))) + expect_true(file.exists(paste0(tempdir(), "/","5363.png"))) }) # test_that("cs_extcompinfo()", { From d0defe75a2d40a0d2a53705a6510c55cf124b7e1 Mon Sep 17 00:00:00 2001 From: Aariq Date: Mon, 22 Jun 2020 19:21:50 -0400 Subject: [PATCH 34/43] updated `pc_synonyms` `cts_convert`, and `cir_query` to use `match` instead of `choices` #263 --- NEWS.md | 1 + R/cir.R | 51 ++++++++++++++++++----------------- R/cts.R | 27 ++++++++++++------- R/pubchem.R | 33 +++++++++++++---------- R/utils.R | 2 +- tests/testthat/test-cir.R | 8 +++--- tests/testthat/test-cts.R | 13 +-------- tests/testthat/test-pubchem.R | 4 +-- 8 files changed, 72 insertions(+), 67 deletions(-) diff --git a/NEWS.md b/NEWS.md index 10589bb6..3d05798e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,6 +11,7 @@ * The `"type"` argument in `ci_query()` and `aw_query()` has been changed to `"from"` for consistency with other functions * `fn_percept()` and `cts_compinfo()` now have `"query"` and `"from"` arguments for consistency with other functions * Possible values for `"from"` have been made more consistent across functions +* `pc_synonyms()` and `cir_query()` have been changed to use the `match` argument instead of `choices` for consistency with other functions # webchem 1.0.0 diff --git a/R/cir.R b/R/cir.R index 80b009e8..434fa38d 100644 --- a/R/cir.R +++ b/R/cir.R @@ -12,13 +12,14 @@ #' @param resolver character; what resolver should be used? If NULL (default) #' the identifier type is detected and the different resolvers are used in turn. #' See details for possible resolvers. -#' @param first deprecated, use choices = 1 to return only the first result -#' @param choices if \code{choices = 1}, returns only the first result. To get a -#' number of results to choose from in an interactive menu, provide the number -#' of choices you want or "all" to choose from all synonyms. +#' @param match character; How should multiple hits be handled? \code{"all"} +#' returns all matches, \code{"first"} returns only the first result, +#' \code{"ask"} enters an interactive mode and the user is asked for input, +#' \code{"na"} returns \code{NA} if multiple hits are found. +#' @param choices deprecated. Use the \code{match} argument instead. #' @param verbose logical; should a verbose output be printed on the console? #' @param ... currently not used. -#' @return A list of character vectors. If first = TRUE a vector. +#' @return A list of character vectors. #' @details #' CIR can resolve can be of the following \code{identifier}: Chemical Names, #' IUPAC names, @@ -61,14 +62,14 @@ #' \item \code{'protonable_group_count'} (Number of protonable groups). #' } #' -#' CIR first tries to determine the indetifier type submitted and then +#' CIR first tries to determine the identifier type submitted and then #' uses 'resolvers' to look up the data. #' If no \code{resolver} is supplied, CIR tries different resolvers in #' turn till a hit is found. #' E.g. for names CIR tries first to look up in OPSIN and if this fails #' the local name index of CIR. #' However, it can be also specified which resolvers to use -#' (if you know e.g. know your indentifier type) +#' (if you know e.g. know your identifier type) #' Possible \code{resolvers} are: #' \itemize{ #' \item \code{'name_by_cir'} (Lookup in name index of CIR), @@ -100,24 +101,29 @@ #' @examples #' \donttest{ #' # might fail if API is not available -#' cir_query('Triclosan', 'cas') -#' cir_query("3380-34-5", 'cas', first = TRUE) -#' cir_query("3380-34-5", 'cas', resolver = 'cas_number') -#' cir_query("3380-34-5", 'smiles') -#' cir_query('Triclosan', 'mw') +#' cir_query("Triclosan", "cas") +#' cir_query("3380-34-5", "cas", match = "first") +#' cir_query("3380-34-5", "cas", resolver = "cas_number") +#' cir_query("3380-34-5", "smiles") +#' cir_query("Triclosan", "mw") #' #' # multiple inputs -#' comp <- c('Triclosan', 'Aspirin') -#' cir_query(comp, 'cas', first = TRUE) +#' comp <- c("Triclosan", "Aspirin") +#' cir_query(comp, "cas", match = "first") #' #'} #' @export -cir_query <- function(identifier, representation = 'smiles', resolver = NULL, - first = FALSE, choices = NULL, verbose = TRUE, ...){ - if (first == TRUE) { - message("`first` is deprecated. Using `choices = 1` instead.") - choices = 1 +cir_query <- function(identifier, representation = "smiles", + resolver = NULL, + first = FALSE, + match = c("all", "first", "ask", "na"), + verbose = TRUE, + choices = NULL, + ...){ + if (!missing("choices")) { + stop("`choices` is deprecated. Use `match` instead.") } + match <- match.arg(match) foo <- function(identifier, representation, resolver, first, verbose) { if (is.na(identifier)) { return(NA) @@ -144,9 +150,7 @@ cir_query <- function(identifier, representation = 'smiles', resolver = NULL, message('No representation found... Returning NA.') return(NA) } - # if (first) - # out <- out[1] - out <- chooser(out, choices) + out <- matcher(out, query = identifier, match = match, verbose = verbose) # convert to numeric if (representation %in% c('mw', 'monoisotopic_mass', 'h_bond_donor_count', 'h_bond_acceptor_count', 'h_bond_center_count', @@ -162,8 +166,5 @@ cir_query <- function(identifier, representation = 'smiles', resolver = NULL, out <- lapply(identifier, foo, representation = representation, resolver = resolver, first = first, verbose = verbose) out <- setNames(out, identifier) - # if (first) - if(!is.null(choices)) - out <- unlist(out) return(out) } diff --git a/R/cts.R b/R/cts.R index 97e9723e..93f8979b 100644 --- a/R/cts.R +++ b/R/cts.R @@ -71,8 +71,11 @@ cts_compinfo <- function(query, from = "inchikey", verbose = TRUE, inchikey){ #' @param from character; type of query ID, e.g. \code{'Chemical Name'} , \code{'InChIKey'}, #' \code{'PubChem CID'}, \code{'ChemSpider'}, \code{'CAS'}. #' @param to character; type to convert to. -#' @param first deprecated. Use choices = 1 instead. -#' @param choices to return only the first result, use 'choices = 1'. To choose a result from an interactive menu, provide a number of choices to choose from or "all". +#' @param match character; How should multiple hits be handled? \code{"all"} +#' returns all matches, \code{"first"} returns only the first result, +#' \code{"ask"} enters an interactive mode and the user is asked for input, +#' \code{"na"} returns \code{NA} if multiple hits are found. +#' @param choices deprecated. Use the \code{match} argument instead. #' @param verbose logical; should a verbose output be printed on the console? #' @param ... currently not used. #' @return a list of character vectors or if \code{choices} is used, then a single named vector. @@ -95,15 +98,22 @@ cts_compinfo <- function(query, from = "inchikey", verbose = TRUE, inchikey){ #' comp <- c("triclosan", "hexane") #' cts_convert(comp, "Chemical Name", "cas") #' } -cts_convert <- function(query, from, to, first = FALSE, choices = NULL, verbose = TRUE, ...){ - if(!missing("first")) - stop('"first" is deprecated. Use "choices = 1" instead.') +cts_convert <- function(query, + from, + to, + match = c("all", "first", "ask", "na"), + verbose = TRUE, + choices = NULL, + ...){ + if(!missing("choices")) + stop('"choices" is deprecated. Use "match" instead.') if (length(from) > 1 | length(to) > 1) { stop('Cannot handle multiple input or output types. Please provide only one argument for `from` and `to`.') } from <- match.arg(tolower(from), c(cts_from(), "name")) to <- match.arg(tolower(to), c(cts_to(), "name")) + match <- match.arg(match) if (from == "name") { from <- "chemical name" @@ -113,7 +123,7 @@ cts_convert <- function(query, from, to, first = FALSE, choices = NULL, verbose to <- "chemical name" } - foo <- function(query, from, to , first, verbose){ + foo <- function(query, from, to, first, verbose){ if (is.na(query)) return(NA) baseurl <- "http://cts.fiehnlab.ucdavis.edu/service/convert" qurl <- paste0(baseurl, '/', from, '/', to, '/', query) @@ -131,13 +141,12 @@ cts_convert <- function(query, from, to, first = FALSE, choices = NULL, verbose return(NA) } out <- out$result[[1]] - out <- chooser(out, choices) + out <- matcher(out, match = match, query = query, verbose = verbose) return(out) } out <- lapply(query, foo, from = from, to = to, first = first, verbose = verbose) out <- setNames(out, query) - if(!is.null(choices)) - out <- unlist(out) + return(out) } diff --git a/R/pubchem.R b/R/pubchem.R index 7146065d..cf471020 100644 --- a/R/pubchem.R +++ b/R/pubchem.R @@ -375,15 +375,16 @@ pc_prop <- function(cid, properties = NULL, verbose = TRUE, ...) { #' @param query character; search term. #' @param from character; type of input, can be one of "name" (default), "cid", #' "sid", "aid", "smiles", "inchi", "inchikey" -#' @param interactive deprecated. Use the \code{choices} argument instead -#' @param choices to get only the first synonym, use \code{choices = 1}, to get -#' a number of synonyms to choose from in an interactive menu, provide the -#' number of choices you want or "all" to choose from all synonyms. +#' @param match character; How should multiple hits be handled? \code{"all"} +#' returns all matches, \code{"first"} returns only the first result, +#' \code{"ask"} enters an interactive mode and the user is asked for input, +#' \code{"na"} returns \code{NA} if multiple hits are found. +#' @param choices deprecated. Use the \code{match} argument instead. #' @param verbose logical; should a verbose output be printed on the console? -#' @param arg character; optinal arguments like "name_type=word" to match +#' @param arg character; optional arguments like "name_type=word" to match #' individual words. -#' @param ... optional arguments -#' @return a list of character vectors (one per query). If \code{choices} is used, a single named vector is returned instead. +#' @param ... currently unused +#' @return a named list. #' #' @references Wang, Y., J. Xiao, T. O. Suzek, et al. 2009 PubChem: A Public #' Information System for @@ -411,16 +412,20 @@ pc_prop <- function(cid, properties = NULL, verbose = TRUE, ...) { #' pc_synonyms("Aspirin") #' pc_synonyms(c("Aspirin", "Triclosan")) #' pc_synonyms(5564, from = "cid") -#' pc_synonyms(c("Aspirin", "Triclosan"), choices = 10) +#' pc_synonyms(c("Aspirin", "Triclosan"), match = "ask") #' } -pc_synonyms <- function(query, from = c("name", "cid", "sid", "aid", "smiles", "inchi", "inchikey"), choices = NULL, verbose = TRUE, - arg = NULL, interactive = 0, ...) { +pc_synonyms <- function(query, + from = c("name", "cid", "sid", "aid", "smiles", "inchi", "inchikey"), + match = c("all", "first", "ask", "na"), + verbose = TRUE, + arg = NULL, choices = NULL, ...) { # from can be cid | name | smiles | inchi | sdf | inchikey | formula # query <- c("Aspirin") # from = "name" from <- match.arg(from) - if (!missing("interactive")) - stop("'interactive' is deprecated. Use 'choices' instead.") + match <- match.arg(match) + if (!missing("choices")) + stop("'choices' is deprecated. Use 'match' instead.") foo <- function(query, from, verbose, ...) { if (is.na(query)) return(NA) prolog <- "https://pubchem.ncbi.nlm.nih.gov/rest/pug" @@ -444,10 +449,10 @@ pc_synonyms <- function(query, from = c("name", "cid", "sid", "aid", "smiles", " warning(cont$Fault$Details, ". Returning NA.") return(NA) } - out <- unlist(cont) + out <- unlist(cont)[-1] #first result is always an ID number names(out) <- NULL - out <- chooser(out, choices) + out <- matcher(out, query = query, match = match, verbose = verbose) } out <- lapply(query, foo, from = from, verbose = verbose) diff --git a/R/utils.R b/R/utils.R index b2e4b9c5..87568f0b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -436,7 +436,7 @@ chooser <- function(x, choices){ #' @param x a vector #' @param query what the query was, only used if match = "best" #' @param result what the result of the query was, only used if match = "best -#' @param match haracter; How should multiple hits be handeled? "all" returns +#' @param match character; How should multiple hits be handled? "all" returns #' all matched IDs, "first" only the first match, "best" the best matching (by #' name) ID, "ask" is a interactive mode and the user is asked for input, "na" #' @param verbose print messages? diff --git a/tests/testthat/test-cir.R b/tests/testthat/test-cir.R index b5aa92b8..9b871c2a 100644 --- a/tests/testthat/test-cir.R +++ b/tests/testthat/test-cir.R @@ -8,12 +8,12 @@ test_that("cir_query()", { expect_equal(cir_query("3380-34-5", 'stdinchikey', resolver = 'cas_number', verbose = FALSE)[[1]], "InChIKey=XEFQLINVKFYRCS-UHFFFAOYSA-N") expect_true(length(cir_query('Triclosan', 'cas', verbose = FALSE)[[1]]) > 1) - expect_message(cir_query("acetic acid", "mw", first = TRUE)) - expect_length(cir_query('Triclosan', 'cas', choices = 1, verbose = FALSE)[[1]], 1) + expect_message(cir_query("acetic acid", "mw", match = "first")) + expect_length(cir_query('Triclosan', 'cas', match = "first", verbose = FALSE)[[1]], 1) expect_length(cir_query(c('Triclosan', 'Aspirin'), 'cas', verbose = FALSE), 2) - skip("I have no clue why this one fails on R CMD check. It works when run in the console!") - expect_equivalent(cir_query('acetic acid', 'mw', choices = 1), c(`acetic acid` = 60.0524)) + # skip("I have no clue why this one fails on R CMD check. It works when run in the console!") + expect_equivalent(cir_query('acetic acid', 'mw', match = "first"), c(`acetic acid` = 60.0524)) }) diff --git a/tests/testthat/test-cts.R b/tests/testthat/test-cts.R index 802ac2a2..2f513ec0 100644 --- a/tests/testthat/test-cts.R +++ b/tests/testthat/test-cts.R @@ -25,25 +25,14 @@ test_that("cts_convert()", { expect_error(cts_convert(comp, c('Chemical Name', 'CAS'), 'CAS')) expect_error(cts_convert('Triclosan', 'CAS')) expect_true(is.na(suppressWarnings(cts_convert('xxxx', 'Chemical Name', 'inchikey'))[[1]])) - o1 <- cts_convert(comp, 'Chemical Name', 'inchikey', choices = 1, verbose = FALSE) + o1 <- cts_convert(comp, 'Chemical Name', 'inchikey', match = "first", verbose = FALSE) expect_length(o1, 2) expect_equal(o1[[1]], 'XEFQLINVKFYRCS-UHFFFAOYSA-N') - # cts_convert('acetic acid', 'Chemical Name', 'CAS', choices = 1) expect_equivalent(cts_convert(NA, from = "Chemical Name", to = "inchikey"), NA) }) -# # integration tests -# test_that("cts_compinfo(cir_query())", { -# chk_cts() -# chk_cir() -# inchikey <- cir_query('Triclosan', representation = 'stdinchikey', verbose = FALSE) -# inchikey <- gsub('InChIKey=', '', inchikey) -# expect_equal(round(cts_compinfo(inchikey, verbose = FALSE)[[1]][["molweight"]], 3), 289.542) -# }) - - test_that("fromto", { skip_on_cran() skip_if_not(up, "CTS service down") diff --git a/tests/testthat/test-pubchem.R b/tests/testthat/test-pubchem.R index 9a5b39f2..6030a66f 100644 --- a/tests/testthat/test-pubchem.R +++ b/tests/testthat/test-pubchem.R @@ -87,10 +87,10 @@ test_that("pc_synonyms", { skip_on_cran() skip_if_not(up, "PubChem service is down") expect_equivalent(pc_synonyms(NA), NA) - expect_equal(pc_synonyms("Triclosan")[[1]][1], "5564") + expect_equal(pc_synonyms("Acetyl Salicylic Acid")[[1]][1], "aspirin") expect_equal(length(pc_synonyms(c("Triclosan", "Aspirin"))), 2) expect_equal(pc_synonyms("BPGDAMSIGCZZLK-UHFFFAOYSA-N", - from = "inchikey")[[1]][1], "12345") + from = "inchikey")[[1]][1], "Methylene diacetate") expect_true(is.na(suppressWarnings(pc_synonyms("xxxx"))[[1]])) }) From 4deeec3bbe0558f2068b71c1abae05d2e5f0018b Mon Sep 17 00:00:00 2001 From: Aariq Date: Tue, 23 Jun 2020 09:31:32 -0400 Subject: [PATCH 35/43] changed get_etoxid to use matcher() internally. Added warning for match = "best" and from != "name". Match result output is now just the chemical name. --- NEWS.md | 3 ++- R/etox.R | 68 ++++++++++++++------------------------------------------ 2 files changed, 19 insertions(+), 52 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3d05798e..d102bbf2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,7 +11,8 @@ * The `"type"` argument in `ci_query()` and `aw_query()` has been changed to `"from"` for consistency with other functions * `fn_percept()` and `cts_compinfo()` now have `"query"` and `"from"` arguments for consistency with other functions * Possible values for `"from"` have been made more consistent across functions -* `pc_synonyms()` and `cir_query()` have been changed to use the `match` argument instead of `choices` for consistency with other functions +* `pc_synonyms()`, `cts_convert()`, `cir_query()` have been changed to use the `match` argument instead of `choices` for consistency with other functions +* `get_etoxid()` output changed slightly so that the matched chemical name string no longer includes the etoxid in parentheses. # webchem 1.0.0 diff --git a/R/etox.R b/R/etox.R index 60d7614d..fc5bd887 100644 --- a/R/etox.R +++ b/R/etox.R @@ -59,6 +59,9 @@ get_etoxid <- function(query, # checks from <- match.arg(from) match <- match.arg(match) + if (from != "name" & match == "best") { + warning("match = 'best' only makes sense when querying chemical names. ") + } foo <- function(query, from, match, verbose) { on.exit(suppressWarnings(closeAllConnections())) @@ -93,61 +96,24 @@ get_etoxid <- function(query, if (length(subs) == 0) { if (verbose) message("Substance not found! Returning NA.") - id <- NA - matched_sub <- NA - d <- NA - } - if (length(subs) > 0) { + hit <- tibble("query" = query, + "match" = NA, + "etoxid" = NA) + return(hit) + } else { links <- xml_attr(xml_find_all( tt, "//*/table[@class = 'listForm resultList']//a"), "href")[-1] - } - if (length(subs) == 1) { + + subs_names <- gsub(" \\(.*\\)", "", subs) id <- gsub("^.*\\?id=(.*)", "\\1", links) - d <- ifelse(match == "best", 0, as.character(0)) - matched_sub <- subs[1] - } - # multiple hits - if (length(subs) > 1) { - if (verbose) - message("More then one Link found. \n") - if (match == "na") { - if (verbose) - message("Returning NA. \n") - id <- NA - matched_sub <- NA - } - if (match == "all") { - if (verbose) - message("Returning all matches. \n") - id <- gsub("^.*\\?id=(.*)", "\\1", links) - matched_sub <- subs[sapply(id, function(x) grep(x, subs)[1])] - } - if (match == "first") { - if (verbose) - message("Returning first match. \n") - id <- gsub("^.*\\?id=(.*)", "\\1", links[1]) - matched_sub <- subs[grep(id[1], subs)[1]] - } - if (match == "best") { - if (verbose) - message("Returning best match. \n") - msubs <- gsub(" \\(.*\\)", "", subs) - dd <- adist(query, msubs) / nchar(msubs) - id <- gsub("^.*\\?id=(.*)", "\\1", links[which.min(dd)]) - matched_sub <- subs[which.min(dd)] - } - if (match == "ask") { - matched_sub <- chooser(subs, "all") - id <- gsub("^.*\\?id=(.*)", "\\1", links[which(subs == matched_sub)]) - } + + out <- matcher(id, query = query, result = subs_names, match = match) + + hit <- tibble("query" = query, + "match" = names(out), + "etoxid" = out) + return(hit) } - # return object - hit <- tibble::tibble( - "query" = query, - "match" = matched_sub, - "etoxid" = id - ) - return(hit) } out <- lapply(query, foo, from = from, match = match, verbose = verbose) out <- dplyr::bind_rows(out) From f6aa7723d929ead3bfd100623cbd69b7345e66c2 Mon Sep 17 00:00:00 2001 From: Aariq Date: Tue, 23 Jun 2020 09:41:14 -0400 Subject: [PATCH 36/43] changed default match = . updated tests --- R/etox.R | 2 +- tests/testthat/test-etox.R | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/R/etox.R b/R/etox.R index fc5bd887..799166b3 100644 --- a/R/etox.R +++ b/R/etox.R @@ -45,7 +45,7 @@ #' } get_etoxid <- function(query, from = c("name", "cas", "ec", "gsbl", "rtecs"), - match = c("best", "all", "first", "ask", "na"), + match = c("all", "best", "first", "ask", "na"), verbose = TRUE) { clean_char <- function(x) { # rm \n \t diff --git a/tests/testthat/test-etox.R b/tests/testthat/test-etox.R index bc2e672c..6a40a76c 100644 --- a/tests/testthat/test-etox.R +++ b/tests/testthat/test-etox.R @@ -26,8 +26,8 @@ test_that("examples in the article are unchanged", { c("8668", "8494", NA, "8397", "7240", "7331")) expect_equal( ids$match, - c("2,4-Xylenol ( 8668 )", "4-Chlor-2-methylphenol ( 8494 )", NA, - "Atrazin ( 8397 )", "Benzol ( 7240 )", "Desethylatrazin ( 7331 )")) + c("2,4-Xylenol", "4-Chlor-2-methylphenol", NA, + "Atrazin", "Benzol", "Desethylatrazin")) expect_equal(ids$query, c("2,4-Dimethylphenol", "4-Chlor-2-methylphenol", "4-para-nonylphenol", "Atrazin", "Benzol", "Desethylatrazin")) @@ -51,7 +51,7 @@ test_that("get_etoxid returns correct results", { o2 <- suppressWarnings(get_etoxid(comps, match = "all")) o3 <- get_etoxid("Triclosan", match = "first") o4 <- get_etoxid("Triclosan", match = "na") - o5 <- get_etoxid("1071-83-6", from = 'cas', match = 'best') + o5 <- get_etoxid("1071-83-6", from = 'cas', match = 'first') o6 <- get_etoxid("133483", from = "gsbl") o7 <- get_etoxid("203-157-5", from = "ec") do2 <- get_etoxid("Thiamethoxam") @@ -65,8 +65,8 @@ test_that("get_etoxid returns correct results", { expect_s3_class(o7, "data.frame") expect_s3_class(do2, "data.frame") - expect_equal(o1$etoxid, c("20179", "9051")) - expect_equal(o2$etoxid, c("89236", "20179", "9051")) + expect_equivalent(o1$etoxid, c("20179", "9051")) + expect_equivalent(o2$etoxid, c("89236", "20179", "9051")) }) test_that("examples from webchem article run", { @@ -78,16 +78,16 @@ test_that("examples from webchem article run", { ids <- get_etoxid(head(unique(jagst$substance),6), match = "best") expect_s3_class(ids, "data.frame") - expect_equal(ids$etoxid, c("8668","8494",NA,"8397","7240","7331")) - expect_equal(ids$match, c( - "2,4-Xylenol ( 8668 )", - "4-Chlor-2-methylphenol ( 8494 )", + expect_equivalent(ids$etoxid, c("8668","8494",NA,"8397","7240","7331")) + expect_equivalent(ids$match, c( + "2,4-Xylenol", + "4-Chlor-2-methylphenol", NA, - "Atrazin ( 8397 )", - "Benzol ( 7240 )", - "Desethylatrazin ( 7331 )" + "Atrazin", + "Benzol", + "Desethylatrazin" )) - expect_equal(ids$query, c( + expect_equivalent(ids$query, c( "2,4-Dimethylphenol", "4-Chlor-2-methylphenol", "4-para-nonylphenol", From b83aa1a8e072446a5896c06054711d7f0b62f240 Mon Sep 17 00:00:00 2001 From: Aariq Date: Tue, 23 Jun 2020 10:32:26 -0400 Subject: [PATCH 37/43] fix tests --- R/integration.R | 2 +- tests/testthat/test-etox.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/integration.R b/R/integration.R index ccee9164..96b0ece7 100644 --- a/R/integration.R +++ b/R/integration.R @@ -29,7 +29,7 @@ autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { paste0(.f, " doesn't accept ", from, ".\n", "Attempting to translate to ", new_from, " with CTS. ") ) } - new_query <- cts_convert(query, from = from, to = new_from, choices = 1) + new_query <- cts_convert(query, from = from, to = new_from, match = "first") #would like to try a again if cts fails the first time (as it often does). from <- new_from query <- new_query diff --git a/tests/testthat/test-etox.R b/tests/testthat/test-etox.R index 6a40a76c..4cd1f300 100644 --- a/tests/testthat/test-etox.R +++ b/tests/testthat/test-etox.R @@ -22,7 +22,7 @@ test_that("examples in the article are unchanged", { expect_is(ids, "data.frame") expect_equal(names(ids), c("query", "match", "etoxid")) - expect_equal(ids$etoxid, + expect_equivalent(ids$etoxid, c("8668", "8494", NA, "8397", "7240", "7331")) expect_equal( ids$match, From 7c3a889641c3129ef1c8d068a89dacbacb757f79 Mon Sep 17 00:00:00 2001 From: Aariq Date: Sat, 27 Jun 2020 14:00:02 -0400 Subject: [PATCH 38/43] addressing review of PR --- DESCRIPTION | 2 +- NEWS.md | 4 ++-- R/alanwood.R | 4 ++-- R/chebi.R | 44 +++++++++++++++++++++------------- R/chemspider.R | 4 ++-- R/cts.R | 17 +++++++++++-- man/aw_query.Rd | 2 +- man/cir_query.Rd | 34 +++++++++++++------------- man/cts_convert.Rd | 13 ++++++---- man/get_chebiid.Rd | 25 +++++++++++-------- man/get_etoxid.Rd | 2 +- man/pc_synonyms.Rd | 21 ++++++++-------- tests/testthat/test-alanwood.R | 2 +- 13 files changed, 104 insertions(+), 70 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 80ae2409..d50131c7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -49,5 +49,5 @@ Suggests: knitr, rmarkdown, plot.matrix -RoxygenNote: 7.1.0 +RoxygenNote: 7.1.1 VignetteBuilder: knitr diff --git a/NEWS.md b/NEWS.md index d102bbf2..efe04b81 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,12 +1,12 @@ # webchem 1.0.0.9001 -## New Features +## NEW FEATURES * Download images of substances from ChemSpider with `cs_img()` * `autotranslate()` is a wrapper that accepts any type of query and any webchem function with a `from` argument and will use CTS to translate the query if needed. * `has_entry()` checks if a query gets a hit in most databases integrated in webchem. Useful for deciding which of several databases to focus on given a set of chemicals. -## Minor Improvements +## MINOR IMPROVEMENTS * The `"type"` argument in `ci_query()` and `aw_query()` has been changed to `"from"` for consistency with other functions * `fn_percept()` and `cts_compinfo()` now have `"query"` and `"from"` arguments for consistency with other functions diff --git a/R/alanwood.R b/R/alanwood.R index c04ca3cc..aa943078 100644 --- a/R/alanwood.R +++ b/R/alanwood.R @@ -27,7 +27,7 @@ #' @examples #' \dontrun{ #' aw_query('Fluazinam', from = 'name') -#' out <- aw_query(c('Fluazinam', 'Diclofop'), from = 'com') +#' out <- aw_query(c('Fluazinam', 'Diclofop'), from = 'name') #' out #' # extract subactivity from object #' sapply(out, function(y) y$subactivity[1]) @@ -40,7 +40,7 @@ aw_query <- function(query, from = c("name", "cas"), verbose = TRUE, force_build = FALSE, type, ...) { if (!missing(type)) { - warning('"type" is deprecated. Please use "from" instead. ') + message('"type" is deprecated. Please use "from" instead. ') from <- type } diff --git a/R/chebi.R b/R/chebi.R index c13766d4..e1936589 100644 --- a/R/chebi.R +++ b/R/chebi.R @@ -10,15 +10,20 @@ #' #' @param query character; search term. #' @param from character; type of input. \code{"all"} searches all types and -#' \code{"name"} searches all names. -#' @param match character; How should multiple hits be handled?, -#' \code{"all"} all matches are returned, -#' \code{"best"} the best matching (by the ChEBI searchscore) is returned, -#' \code{"ask"} enters an interactive mode and the user is asked for input, -#' \code{"na"} returns NA if multiple hits are found. +#' \code{"name"} searches all names. Other options include \code{'chebi id'}, +#' \code{'chebi name'}, \code{'definition'}, \code{'iupac name'}, +#' \code{'citations'}, \code{'registry numbers'}, \code{'manual xrefs'}, +#' \code{'automatic xrefs'}, \code{'formula'}, \code{'mass'}, +#' \code{'monoisotopic mass'},\code{'charge'}, \code{'inchi'}, +#' \code{'inchikey'}, \code{'smiles'}, and \code{'species'} +#' @param match character; How should multiple hits be handled?, \code{"all"} +#' all matches are returned, \code{"best"} the best matching (by the ChEBI +#' searchscore) is returned, \code{"ask"} enters an interactive mode and the +#' user is asked for input, \code{"na"} returns NA if multiple hits are found. #' @param max_res integer; maximum number of results to be retrieved from the -#' web service -#' @param stars character; +#' web service +#' @param stars character; "three only" restricts results to those manualy +#' annotated by the ChEBI team. #' @param verbose logical; should a verbose output be printed on the console? #' @param ... currently unused #' @return returns a list of data.frames containing a chebiid, a chebiasciiname, @@ -47,9 +52,9 @@ #' ChEBI: a database and ontology for chemical entities of biological #' interest. Nucleic Acids Res. 36, D344–D350. #' @references Eduard Szöcs, Tamás Stirling, Eric R. Scott, Andreas Scharmüller, -#' Ralf B. Schäfer (2020). webchem: An R Package to Retrieve Chemical -#' Information from the Web. Journal of Statistical Software, 93(13). -#' . +#' Ralf B. Schäfer (2020). webchem: An R Package to Retrieve Chemical +#' Information from the Web. Journal of Statistical Software, 93(13). +#' . #' @author Andreas Scharmüller, \email{andschar@@protonmail.com} #' @export #' @examples @@ -118,7 +123,7 @@ get_chebiid <- function(query, cont <- try(content(res, type = 'text/xml', encoding = 'utf-8'), silent = TRUE) out <- l2df(as_list(xml_children(xml_find_first(cont, '//d1:return')))) - out <- setNames(out, tolower(names(out))) + out <- as_tibble(setNames(out, tolower(names(out)))) if (nrow(out) == 0) { message('No result found. \n') return(tibble(query = query, @@ -136,10 +141,17 @@ get_chebiid <- function(query, return(out[which.max(out$searchscore), ]) } if (match == "ask") { - matched <- chooser(out$chebiid, 'all') + matched <- + matcher( + out$chebiid, + query = query, + result = out$chebiasciiname, + match = "ask", + verbose = verbose + ) return(out[out$chebiid == matched, ]) } - if (match == 'na') { + if (match == "na") { return(tibble(query = query, chebiid = NA_character_)) } @@ -148,10 +160,8 @@ get_chebiid <- function(query, } } else { out <- tibble(query = query, - chebiid = NA_character_, - ) + chebiid = NA_character_) message('Returning NA (', http_status(res)$message, '). \n') - return(out) } } diff --git a/R/chemspider.R b/R/chemspider.R index f10f36a6..3bbde536 100644 --- a/R/chemspider.R +++ b/R/chemspider.R @@ -191,7 +191,7 @@ get_csid <- function(query, match <- match.arg(match) foo <- function(x, from, match, verbose, apikey, ...) { - if (is.na(x)) return(as.integer(NA)) + if (is.na(x)) return(NA_integer_) res <- switch(from, name = cs_name_csid(x, apikey = apikey, control = cs_control(...)), @@ -203,7 +203,7 @@ get_csid <- function(query, if(length(res) > 1) { res <- matcher(res, query = x, match = match, verbose = verbose) } - if (length(res) == 0) res <- as.integer(NA) + if (length(res) == 0) res <- NA_integer_ return(res) } out <- diff --git a/R/cts.R b/R/cts.R index 93f8979b..c9a78536 100644 --- a/R/cts.R +++ b/R/cts.R @@ -105,8 +105,21 @@ cts_convert <- function(query, verbose = TRUE, choices = NULL, ...){ - if(!missing("choices")) - stop('"choices" is deprecated. Use "match" instead.') + if(!missing("choices")) { + if (is.null(choices)) { + message('"choices" is deprecated. Using match = "all" instead.') + match <- "all" + } else if(choices == 1) { + message('"choices" is deprecated. Using match= "first" instead.') + match <- "first" + } else if ((is.numeric(choices) & choices > 1) | choices == "all") { + message('"choices" is deprecated. Using match = "ask" instead.') + match <- "ask" + } else { + message('"choices" is deprecated. Using match = "all" instead.') + match <- "all" + } + } if (length(from) > 1 | length(to) > 1) { stop('Cannot handle multiple input or output types. Please provide only one argument for `from` and `to`.') } diff --git a/man/aw_query.Rd b/man/aw_query.Rd index c11ba9d5..3cdc9cb2 100644 --- a/man/aw_query.Rd +++ b/man/aw_query.Rd @@ -44,7 +44,7 @@ Please respect Copyright, Terms and Conditions \examples{ \dontrun{ aw_query('Fluazinam', from = 'name') -out <- aw_query(c('Fluazinam', 'Diclofop'), from = 'com') +out <- aw_query(c('Fluazinam', 'Diclofop'), from = 'name') out # extract subactivity from object sapply(out, function(y) y$subactivity[1]) diff --git a/man/cir_query.Rd b/man/cir_query.Rd index ceec64d1..e4d99ee1 100644 --- a/man/cir_query.Rd +++ b/man/cir_query.Rd @@ -9,8 +9,9 @@ cir_query( representation = "smiles", resolver = NULL, first = FALSE, - choices = NULL, + match = c("all", "first", "ask", "na"), verbose = TRUE, + choices = NULL, ... ) } @@ -24,18 +25,19 @@ be returned. See details for possible representations.} the identifier type is detected and the different resolvers are used in turn. See details for possible resolvers.} -\item{first}{deprecated, use choices = 1 to return only the first result} - -\item{choices}{if \code{choices = 1}, returns only the first result. To get a -number of results to choose from in an interactive menu, provide the number -of choices you want or "all" to choose from all synonyms.} +\item{match}{character; How should multiple hits be handled? \code{"all"} +returns all matches, \code{"first"} returns only the first result, +\code{"ask"} enters an interactive mode and the user is asked for input, +\code{"na"} returns \code{NA} if multiple hits are found.} \item{verbose}{logical; should a verbose output be printed on the console?} +\item{choices}{deprecated. Use the \code{match} argument instead.} + \item{...}{currently not used.} } \value{ -A list of character vectors. If first = TRUE a vector. +A list of character vectors. } \description{ A interface to the Chemical Identifier Resolver (CIR). @@ -83,14 +85,14 @@ CIR can resolve can be of the following \code{identifier}: Chemical Names, \item \code{'protonable_group_count'} (Number of protonable groups). } - CIR first tries to determine the indetifier type submitted and then + CIR first tries to determine the identifier type submitted and then uses 'resolvers' to look up the data. If no \code{resolver} is supplied, CIR tries different resolvers in turn till a hit is found. E.g. for names CIR tries first to look up in OPSIN and if this fails the local name index of CIR. However, it can be also specified which resolvers to use - (if you know e.g. know your indentifier type) + (if you know e.g. know your identifier type) Possible \code{resolvers} are: \itemize{ \item \code{'name_by_cir'} (Lookup in name index of CIR), @@ -113,15 +115,15 @@ You can only make 1 request per second (this is a hard-coded feature). \examples{ \donttest{ # might fail if API is not available -cir_query('Triclosan', 'cas') -cir_query("3380-34-5", 'cas', first = TRUE) -cir_query("3380-34-5", 'cas', resolver = 'cas_number') -cir_query("3380-34-5", 'smiles') -cir_query('Triclosan', 'mw') +cir_query("Triclosan", "cas") +cir_query("3380-34-5", "cas", match = "first") +cir_query("3380-34-5", "cas", resolver = "cas_number") +cir_query("3380-34-5", "smiles") +cir_query("Triclosan", "mw") # multiple inputs -comp <- c('Triclosan', 'Aspirin') -cir_query(comp, 'cas', first = TRUE) +comp <- c("Triclosan", "Aspirin") +cir_query(comp, "cas", match = "first") } } diff --git a/man/cts_convert.Rd b/man/cts_convert.Rd index a9dfdb40..30351ab7 100644 --- a/man/cts_convert.Rd +++ b/man/cts_convert.Rd @@ -8,9 +8,9 @@ cts_convert( query, from, to, - first = FALSE, - choices = NULL, + match = c("all", "first", "ask", "na"), verbose = TRUE, + choices = NULL, ... ) } @@ -22,12 +22,15 @@ cts_convert( \item{to}{character; type to convert to.} -\item{first}{deprecated. Use choices = 1 instead.} - -\item{choices}{to return only the first result, use 'choices = 1'. To choose a result from an interactive menu, provide a number of choices to choose from or "all".} +\item{match}{character; How should multiple hits be handled? \code{"all"} +returns all matches, \code{"first"} returns only the first result, +\code{"ask"} enters an interactive mode and the user is asked for input, +\code{"na"} returns \code{NA} if multiple hits are found.} \item{verbose}{logical; should a verbose output be printed on the console?} +\item{choices}{deprecated. Use the \code{match} argument instead.} + \item{...}{currently not used.} } \value{ diff --git a/man/get_chebiid.Rd b/man/get_chebiid.Rd index 1057b9a9..80b717be 100644 --- a/man/get_chebiid.Rd +++ b/man/get_chebiid.Rd @@ -20,18 +20,23 @@ get_chebiid( \item{query}{character; search term.} \item{from}{character; type of input. \code{"all"} searches all types and -\code{"name"} searches all names.} +\code{"name"} searches all names. Other options include \code{'chebi id'}, +\code{'chebi name'}, \code{'definition'}, \code{'iupac name'}, +\code{'citations'}, \code{'registry numbers'}, \code{'manual xrefs'}, +\code{'automatic xrefs'}, \code{'formula'}, \code{'mass'}, +\code{'monoisotopic mass'},\code{'charge'}, \code{'inchi'}, +\code{'inchikey'}, \code{'smiles'}, and \code{'species'}} -\item{match}{character; How should multiple hits be handled?, -\code{"all"} all matches are returned, -\code{"best"} the best matching (by the ChEBI searchscore) is returned, -\code{"ask"} enters an interactive mode and the user is asked for input, -\code{"na"} returns NA if multiple hits are found.} +\item{match}{character; How should multiple hits be handled?, \code{"all"} +all matches are returned, \code{"best"} the best matching (by the ChEBI +searchscore) is returned, \code{"ask"} enters an interactive mode and the +user is asked for input, \code{"na"} returns NA if multiple hits are found.} \item{max_res}{integer; maximum number of results to be retrieved from the web service} -\item{stars}{character;} +\item{stars}{character; "three only" restricts results to those manualy +annotated by the ChEBI team.} \item{verbose}{logical; should a verbose output be printed on the console?} @@ -84,9 +89,9 @@ Hastings J, Owen G, Dekker A, Ennis M, Kale N, Muthukrishnan V, interest. Nucleic Acids Res. 36, D344–D350. Eduard Szöcs, Tamás Stirling, Eric R. Scott, Andreas Scharmüller, -Ralf B. Schäfer (2020). webchem: An R Package to Retrieve Chemical -Information from the Web. Journal of Statistical Software, 93(13). -. + Ralf B. Schäfer (2020). webchem: An R Package to Retrieve Chemical + Information from the Web. Journal of Statistical Software, 93(13). + . } \author{ Andreas Scharmüller, \email{andschar@protonmail.com} diff --git a/man/get_etoxid.Rd b/man/get_etoxid.Rd index 91c9c7bf..654809c3 100644 --- a/man/get_etoxid.Rd +++ b/man/get_etoxid.Rd @@ -7,7 +7,7 @@ get_etoxid( query, from = c("name", "cas", "ec", "gsbl", "rtecs"), - match = c("best", "all", "first", "ask", "na"), + match = c("all", "best", "first", "ask", "na"), verbose = TRUE ) } diff --git a/man/pc_synonyms.Rd b/man/pc_synonyms.Rd index 7a872e08..f863f87d 100644 --- a/man/pc_synonyms.Rd +++ b/man/pc_synonyms.Rd @@ -7,10 +7,10 @@ pc_synonyms( query, from = c("name", "cid", "sid", "aid", "smiles", "inchi", "inchikey"), - choices = NULL, + match = c("all", "first", "ask", "na"), verbose = TRUE, arg = NULL, - interactive = 0, + choices = NULL, ... ) } @@ -20,21 +20,22 @@ pc_synonyms( \item{from}{character; type of input, can be one of "name" (default), "cid", "sid", "aid", "smiles", "inchi", "inchikey"} -\item{choices}{to get only the first synonym, use \code{choices = 1}, to get -a number of synonyms to choose from in an interactive menu, provide the -number of choices you want or "all" to choose from all synonyms.} +\item{match}{character; How should multiple hits be handled? \code{"all"} +returns all matches, \code{"first"} returns only the first result, +\code{"ask"} enters an interactive mode and the user is asked for input, +\code{"na"} returns \code{NA} if multiple hits are found.} \item{verbose}{logical; should a verbose output be printed on the console?} -\item{arg}{character; optinal arguments like "name_type=word" to match +\item{arg}{character; optional arguments like "name_type=word" to match individual words.} -\item{interactive}{deprecated. Use the \code{choices} argument instead} +\item{choices}{deprecated. Use the \code{match} argument instead.} -\item{...}{optional arguments} +\item{...}{currently unused} } \value{ -a list of character vectors (one per query). If \code{choices} is used, a single named vector is returned instead. +a named list. } \description{ Search synonyms using PUG-REST, @@ -54,7 +55,7 @@ usage policies of the indicidual data sources pc_synonyms("Aspirin") pc_synonyms(c("Aspirin", "Triclosan")) pc_synonyms(5564, from = "cid") -pc_synonyms(c("Aspirin", "Triclosan"), choices = 10) +pc_synonyms(c("Aspirin", "Triclosan"), match = "ask") } } \references{ diff --git a/tests/testthat/test-alanwood.R b/tests/testthat/test-alanwood.R index b8a87d5d..27a92ff6 100644 --- a/tests/testthat/test-alanwood.R +++ b/tests/testthat/test-alanwood.R @@ -14,7 +14,7 @@ test_that("examples in the article are unchanged", { "phenyl organothiophosphate insecticides")) }) -test_that("alanwood, commonname", { +test_that("alanwood, name", { skip_on_cran() skip_if_not(up, "Alanwood service is down") From e29b47618a053129c68bf77fc1e65386330f9398 Mon Sep 17 00:00:00 2001 From: Aariq Date: Sat, 27 Jun 2020 14:44:27 -0400 Subject: [PATCH 39/43] add skips for integration function tests. --- tests/testthat/test-integration.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/testthat/test-integration.R b/tests/testthat/test-integration.R index 846d2444..afc6ae96 100644 --- a/tests/testthat/test-integration.R +++ b/tests/testthat/test-integration.R @@ -1,8 +1,13 @@ # These all might occasionally fail because cts_translate() is currently somewhat unreliable. fn_up <- ping_service("fn") +up <- ping_service("cts") + test_that("autotranslate works when no translation needed", { + skip_on_cran() skip_if_not(fn_up, "Flavornet down!") + skip_if_not(up, "CTS service down") + CASs <- c("75-07-0", "64-17-5") a <- autotranslate(query = CASs, from = "cas", .f = "fn_percept", .verbose = TRUE) b <- fn_percept(CASs) @@ -11,7 +16,10 @@ test_that("autotranslate works when no translation needed", { etox_up <- ping_service("etox") test_that("autotranslate translates", { + skip_on_cran() skip_if_not(etox_up, "ETOX down!") + skip_if_not(up, "CTS service down") + x <- autotranslate(query = "XDDAORKBJWWYJS-UHFFFAOYSA-N", from = "inchikey", .f = "get_etoxid") y <- get_etoxid(query = "1071-83-6", from = "cas") expect_equal(x, y) @@ -19,8 +27,11 @@ test_that("autotranslate translates", { test_that("has_entry() function works", { + skip_on_cran() skip_if_not(fn_up) skip_if_not(etox_up) + skip_if_not(up, "CTS service down") + out <- has_entry(c("triclosan", NA, "balloon"), from = "name", sources = c("etox", "fn")) From 76c32a8dab0e085241ba745e7b2deac287932038 Mon Sep 17 00:00:00 2001 From: Aariq Date: Thu, 2 Jul 2020 11:16:15 -0400 Subject: [PATCH 40/43] change pan example, update a test. --- R/pan.R | 6 +++--- tests/testthat/test-pubchem.R | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/pan.R b/R/pan.R index d47cb824..1ddf8e5a 100644 --- a/R/pan.R +++ b/R/pan.R @@ -62,11 +62,11 @@ #' # return only best hit #' pan_query('2,4-dichlorophenol', match = 'best')[[1]][c(1, 2, 5, 74)] #' -#' out <- pan_query(c('Triclosan', 'Aspirin'), 'best') +#' out <- pan_query(c('Glyphosate', 'Rotenone'), from = "name", match = 'best') #' out #' -#' # extract Hydrolysis Half-life (Avg, Days) -#' sapply(out, function(y) y$`Hydrolysis Half-life (Avg, Days)`) +#' # extract Acute Toxicity Summary +#' sapply(out, function(y) y$`Acute Toxicity Summary`) #' } pan_query <- function(query, from = c("name", "cas"), match = c('best', 'all', 'first'), verbose = TRUE, ...){ match <- match.arg(match) diff --git a/tests/testthat/test-pubchem.R b/tests/testthat/test-pubchem.R index 6030a66f..ba83bb83 100644 --- a/tests/testthat/test-pubchem.R +++ b/tests/testthat/test-pubchem.R @@ -158,7 +158,7 @@ test_that("pc_sect()", { c <- pc_sect(780286, "modify date", "assay") expect_s3_class(c, c("tbl_df", "tbl", "data.frame")) expect_equal(names(c), c("AID", "Name", "Result", "SourceName", "SourceID")) - expect_equal(c$Result, c("2014-05-03", "2018-09-28")) + expect_equal(c$Result, c("2014-05-03", "2018-09-28", "2020-06-30")) d <- pc_sect("1ZHY_A", "Sequence", "protein") expect_s3_class(d, c("tbl_df", "tbl", "data.frame")) From 99b528469e7d745121a2ee600c4645b3a2058ba9 Mon Sep 17 00:00:00 2001 From: Aariq Date: Fri, 3 Jul 2020 17:36:46 -0400 Subject: [PATCH 41/43] lintr suggestions --- R/integration.R | 42 +++++++++++++++++++++++-------- tests/testthat/test-integration.R | 17 +++++++++---- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/R/integration.R b/R/integration.R index 96b0ece7..c8e2d93d 100644 --- a/R/integration.R +++ b/R/integration.R @@ -1,14 +1,20 @@ #' Auto-translate identifiers and search databases #' -#' Supply a query of any type (e.g. SMILES, CAS, name, InChI, etc.) along with any webchem function that has \code{query} and \code{from} arguments. If the function doesn't accept the type of query you've supplied, this will try to automatically translate it using CTS and run the query. +#' Supply a query of any type (e.g. SMILES, CAS, name, InChI, etc.) along with +#' any webchem function that has \code{query} and \code{from} arguments. If the +#' function doesn't accept the type of query you've supplied, this will try to +#' automatically translate it using CTS and run the query. #' #' @param query character; the search term -#' @param from character; the format or type of query. Commonly accepted values are "name", "cas", "inchi", and "inchikey" +#' @param from character; the format or type of query. Commonly accepted values +#' are "name", "cas", "inchi", and "inchikey" #' @param .f character; the (quoted) name of a webchem function #' @param .verbose logical; print a message when translating query? #' @param ... other arguments passed to the function specified with \code{.f} -#' +#' @note During the translation step, only the first hit from CTS is used. +#' Therefore, using this function to translate on the fly is not foolproof and +#' care should be taken to verify the results. #' @return returns results from \code{.f} #' @importFrom rlang as_function fn_fmls #' @export @@ -24,9 +30,15 @@ autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { if (!from %in% pos_froms) { pos_froms <- pos_froms[pos_froms != "name"] #cts name conversion broken new_from <- pos_froms[which(pos_froms %in% cts_to())[1]] - if(.verbose){ + if (.verbose) { message( - paste0(.f, " doesn't accept ", from, ".\n", "Attempting to translate to ", new_from, " with CTS. ") + paste0(.f, + " doesn't accept ", + from, + ".\n", + "Attempting to translate to ", + new_from, + " with CTS. ") ) } new_query <- cts_convert(query, from = from, to = new_from, match = "first") @@ -44,11 +56,15 @@ autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { #' Checks if entries are found in (most) data sources included in webchem #' #' @param query character; the search term -#' @param from character; the format or type of query. Commonly accepted values are "name", "cas", "inchi", and "inchikey" -#' @param sources character; which data sources to check. Data sources are identified by the prefix associated with webchem functions that query those databases. If not specified, all data sources listed will be checked. +#' @param from character; the format or type of query. Commonly accepted values +#' are "name", "cas", "inchi", and "inchikey" +#' @param sources character; which data sources to check. Data sources are +#' identified by the prefix associated with webchem functions that query those +#' databases. If not specified, all data sources listed will be checked. #' @param plot logical; plot a graphical representation of results. #' -#' @return a tibble of logical values where \code{TRUE} indicates that a data source contains a record for the query +#' @return a tibble of logical values where \code{TRUE} indicates that a data +#' source contains a record for the query #' @export #' @import dplyr #' @examples @@ -72,7 +88,13 @@ has_entry <- function(query, from, foo <- function(.f, query, from) { # if a function errors (e.g. API is down) then return NA - x <- try(autotranslate(query = query, from = from, .f = .f, match = "first")) + x <- + try(autotranslate( + query = query, + from = from, + .f = .f, + match = "first" + )) if (inherits(x, "try-error")) { return(NA) } @@ -99,7 +121,7 @@ has_entry <- function(query, from, select(all_of(colorder)) %>% as.matrix() opar <- graphics::par(no.readonly = TRUE) - graphics::par(mar=c(5.1, 7.1, 4.1, 4.1)) # adapt margins + graphics::par(mar = c(5.1, 7.1, 4.1, 4.1)) # adapt margins plot( pmat, col = c("#C7010B", "#3BC03B"), diff --git a/tests/testthat/test-integration.R b/tests/testthat/test-integration.R index afc6ae96..fa22e676 100644 --- a/tests/testthat/test-integration.R +++ b/tests/testthat/test-integration.R @@ -1,4 +1,5 @@ -# These all might occasionally fail because cts_translate() is currently somewhat unreliable. +# These all might occasionally fail because cts_translate() is currently +# somewhat unreliable. fn_up <- ping_service("fn") up <- ping_service("cts") @@ -9,7 +10,13 @@ test_that("autotranslate works when no translation needed", { skip_if_not(up, "CTS service down") CASs <- c("75-07-0", "64-17-5") - a <- autotranslate(query = CASs, from = "cas", .f = "fn_percept", .verbose = TRUE) + a <- + autotranslate( + query = CASs, + from = "cas", + .f = "fn_percept", + .verbose = TRUE + ) b <- fn_percept(CASs) expect_equal(a, b) }) @@ -20,7 +27,8 @@ test_that("autotranslate translates", { skip_if_not(etox_up, "ETOX down!") skip_if_not(up, "CTS service down") - x <- autotranslate(query = "XDDAORKBJWWYJS-UHFFFAOYSA-N", from = "inchikey", .f = "get_etoxid") + x <- + autotranslate(query = "XDDAORKBJWWYJS-UHFFFAOYSA-N", from = "inchikey", .f = "get_etoxid") y <- get_etoxid(query = "1071-83-6", from = "cas") expect_equal(x, y) }) @@ -39,5 +47,4 @@ test_that("has_entry() function works", { etox = c(TRUE, FALSE, FALSE), fn = c(FALSE, FALSE, FALSE)) expect_equivalent(out, df) -}) - +}) \ No newline at end of file From fdf6547b0a9bc4970f0fd515768dc2a877c9f00e Mon Sep 17 00:00:00 2001 From: Aariq Date: Tue, 7 Jul 2020 19:04:02 -0400 Subject: [PATCH 42/43] change autotranslate to with_cts and make unexported. Re-run documentation. --- NAMESPACE | 1 - NEWS.md | 1 - R/integration.R | 7 +++--- man/autotranslate.Rd | 30 ------------------------ man/has_entry.Rd | 10 +++++--- man/pan_query.Rd | 6 ++--- man/with_cts.Rd | 39 +++++++++++++++++++++++++++++++ tests/testthat/test-integration.R | 8 +++---- 8 files changed, 56 insertions(+), 46 deletions(-) delete mode 100644 man/autotranslate.Rd create mode 100644 man/with_cts.Rd diff --git a/NAMESPACE b/NAMESPACE index aa00da53..670417ab 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,7 +30,6 @@ S3method(smiles,pan_query) S3method(smiles,pc_prop) S3method(smiles,wd_ident) export(as.cas) -export(autotranslate) export(aw_query) export(build_aw_idx) export(cas) diff --git a/NEWS.md b/NEWS.md index efe04b81..a3de8097 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,7 +3,6 @@ ## NEW FEATURES * Download images of substances from ChemSpider with `cs_img()` -* `autotranslate()` is a wrapper that accepts any type of query and any webchem function with a `from` argument and will use CTS to translate the query if needed. * `has_entry()` checks if a query gets a hit in most databases integrated in webchem. Useful for deciding which of several databases to focus on given a set of chemicals. ## MINOR IMPROVEMENTS diff --git a/R/integration.R b/R/integration.R index c8e2d93d..5e752697 100644 --- a/R/integration.R +++ b/R/integration.R @@ -17,13 +17,12 @@ #' care should be taken to verify the results. #' @return returns results from \code{.f} #' @importFrom rlang as_function fn_fmls -#' @export #' #' @examples #' \dontrun{ -#' autotranslate("XDDAORKBJWWYJS-UHFFFAOYSA-N", from = "inchikey", .f = "get_etoxid") +#' with_cts("XDDAORKBJWWYJS-UHFFFAOYSA-N", from = "inchikey", .f = "get_etoxid") #' } -autotranslate <- function(query, from, .f, .verbose = TRUE, ...) { +with_cts <- function(query, from, .f, .verbose = TRUE, ...) { f <- rlang::as_function(.f) pos_froms <- eval(rlang::fn_fmls(f)$from) @@ -89,7 +88,7 @@ has_entry <- function(query, from, foo <- function(.f, query, from) { # if a function errors (e.g. API is down) then return NA x <- - try(autotranslate( + try(with_cts( query = query, from = from, .f = .f, diff --git a/man/autotranslate.Rd b/man/autotranslate.Rd deleted file mode 100644 index b62b25f6..00000000 --- a/man/autotranslate.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/integration.R -\name{autotranslate} -\alias{autotranslate} -\title{Auto-translate identifiers and search databases} -\usage{ -autotranslate(query, from, .f, .verbose = TRUE, ...) -} -\arguments{ -\item{query}{character; the search term} - -\item{from}{character; the format or type of query. Commonly accepted values are "name", "cas", "inchi", and "inchikey"} - -\item{.f}{character; the (quoted) name of a webchem function} - -\item{.verbose}{logical; print a message when translating query?} - -\item{...}{other arguments passed to the function specified with \code{.f}} -} -\value{ -returns results from \code{.f} -} -\description{ -Supply a query of any type (e.g. SMILES, CAS, name, InChI, etc.) along with any webchem function that has \code{query} and \code{from} arguments. If the function doesn't accept the type of query you've supplied, this will try to automatically translate it using CTS and run the query. -} -\examples{ -\dontrun{ -autotranslate("XDDAORKBJWWYJS-UHFFFAOYSA-N", from = "inchikey", .f = "get_etoxid") -} -} diff --git a/man/has_entry.Rd b/man/has_entry.Rd index 4133d37a..b06667b5 100644 --- a/man/has_entry.Rd +++ b/man/has_entry.Rd @@ -14,14 +14,18 @@ has_entry( \arguments{ \item{query}{character; the search term} -\item{from}{character; the format or type of query. Commonly accepted values are "name", "cas", "inchi", and "inchikey"} +\item{from}{character; the format or type of query. Commonly accepted values +are "name", "cas", "inchi", and "inchikey"} -\item{sources}{character; which data sources to check. Data sources are identified by the prefix associated with webchem functions that query those databases. If not specified, all data sources listed will be checked.} +\item{sources}{character; which data sources to check. Data sources are +identified by the prefix associated with webchem functions that query those +databases. If not specified, all data sources listed will be checked.} \item{plot}{logical; plot a graphical representation of results.} } \value{ -a tibble of logical values where \code{TRUE} indicates that a data source contains a record for the query +a tibble of logical values where \code{TRUE} indicates that a data + source contains a record for the query } \description{ Checks if entries are found in (most) data sources included in webchem diff --git a/man/pan_query.Rd b/man/pan_query.Rd index e40d5282..214c6ddc 100644 --- a/man/pan_query.Rd +++ b/man/pan_query.Rd @@ -77,11 +77,11 @@ Retrieve information from the PAN database (\url{http://www.pesticideinfo.org/}) # return only best hit pan_query('2,4-dichlorophenol', match = 'best')[[1]][c(1, 2, 5, 74)] - out <- pan_query(c('Triclosan', 'Aspirin'), 'best') + out <- pan_query(c('Glyphosate', 'Rotenone'), from = "name", match = 'best') out - # extract Hydrolysis Half-life (Avg, Days) - sapply(out, function(y) y$`Hydrolysis Half-life (Avg, Days)`) + # extract Acute Toxicity Summary + sapply(out, function(y) y$`Acute Toxicity Summary`) } } \author{ diff --git a/man/with_cts.Rd b/man/with_cts.Rd new file mode 100644 index 00000000..d20a6de6 --- /dev/null +++ b/man/with_cts.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integration.R +\name{with_cts} +\alias{with_cts} +\title{Auto-translate identifiers and search databases} +\usage{ +with_cts(query, from, .f, .verbose = TRUE, ...) +} +\arguments{ +\item{query}{character; the search term} + +\item{from}{character; the format or type of query. Commonly accepted values +are "name", "cas", "inchi", and "inchikey"} + +\item{.f}{character; the (quoted) name of a webchem function} + +\item{.verbose}{logical; print a message when translating query?} + +\item{...}{other arguments passed to the function specified with \code{.f}} +} +\value{ +returns results from \code{.f} +} +\description{ +Supply a query of any type (e.g. SMILES, CAS, name, InChI, etc.) along with +any webchem function that has \code{query} and \code{from} arguments. If the +function doesn't accept the type of query you've supplied, this will try to +automatically translate it using CTS and run the query. +} +\note{ +During the translation step, only the first hit from CTS is used. + Therefore, using this function to translate on the fly is not foolproof and + care should be taken to verify the results. +} +\examples{ +\dontrun{ +with_cts("XDDAORKBJWWYJS-UHFFFAOYSA-N", from = "inchikey", .f = "get_etoxid") +} +} diff --git a/tests/testthat/test-integration.R b/tests/testthat/test-integration.R index fa22e676..04aaa506 100644 --- a/tests/testthat/test-integration.R +++ b/tests/testthat/test-integration.R @@ -4,14 +4,14 @@ fn_up <- ping_service("fn") up <- ping_service("cts") -test_that("autotranslate works when no translation needed", { +test_that("with_cts() works when no translation needed", { skip_on_cran() skip_if_not(fn_up, "Flavornet down!") skip_if_not(up, "CTS service down") CASs <- c("75-07-0", "64-17-5") a <- - autotranslate( + with_cts( query = CASs, from = "cas", .f = "fn_percept", @@ -22,13 +22,13 @@ test_that("autotranslate works when no translation needed", { }) etox_up <- ping_service("etox") -test_that("autotranslate translates", { +test_that("with_cts() translates", { skip_on_cran() skip_if_not(etox_up, "ETOX down!") skip_if_not(up, "CTS service down") x <- - autotranslate(query = "XDDAORKBJWWYJS-UHFFFAOYSA-N", from = "inchikey", .f = "get_etoxid") + with_cts(query = "XDDAORKBJWWYJS-UHFFFAOYSA-N", from = "inchikey", .f = "get_etoxid") y <- get_etoxid(query = "1071-83-6", from = "cas") expect_equal(x, y) }) From 19ba9d15b4c16c2a8fc5ce78438ef87ac4b7ccdc Mon Sep 17 00:00:00 2001 From: Aariq Date: Wed, 8 Jul 2020 12:32:17 -0400 Subject: [PATCH 43/43] changed function name from has_entry to find_db. Other minor changes to appease check() --- NAMESPACE | 2 +- NEWS.md | 2 +- R/chebi.R | 18 +++++++++--------- R/cir.R | 1 - R/integration.R | 4 ++-- man/cir_query.Rd | 1 - man/{has_entry.Rd => find_db.Rd} | 8 ++++---- tests/testthat/test-integration.R | 4 ++-- 8 files changed, 19 insertions(+), 21 deletions(-) rename man/{has_entry.Rd => find_db.Rd} (92%) diff --git a/NAMESPACE b/NAMESPACE index 670417ab..1b566218 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -53,13 +53,13 @@ export(cts_to) export(etox_basic) export(etox_targets) export(etox_tests) +export(find_db) export(fn_percept) export(get_chebiid) export(get_cid) export(get_csid) export(get_etoxid) export(get_wdid) -export(has_entry) export(inchikey) export(is.cas) export(is.inchikey) diff --git a/NEWS.md b/NEWS.md index a3de8097..d9bff25e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,7 +3,7 @@ ## NEW FEATURES * Download images of substances from ChemSpider with `cs_img()` -* `has_entry()` checks if a query gets a hit in most databases integrated in webchem. Useful for deciding which of several databases to focus on given a set of chemicals. +* `find_db()` checks if a query gets a hit in most databases integrated in webchem. Useful for deciding which of several databases to focus on given a set of chemicals. ## MINOR IMPROVEMENTS diff --git a/R/chebi.R b/R/chebi.R index e1936589..bca3f3ac 100644 --- a/R/chebi.R +++ b/R/chebi.R @@ -90,8 +90,8 @@ get_chebiid <- function(query, stars <- toupper(match.arg(stars)) foo <- function(query, from, match, max_res, stars, verbose, ...) { - if (is.na(query)) return(tibble(query = NA_character_, - chebiid = NA_character_)) + if (is.na(query)) return(tibble("query" = NA_character_, + "chebiid" = NA_character_)) # query url <- 'http://www.ebi.ac.uk:80/webservices/chebi/2.0/webservice' @@ -126,8 +126,8 @@ get_chebiid <- function(query, out <- as_tibble(setNames(out, tolower(names(out)))) if (nrow(out) == 0) { message('No result found. \n') - return(tibble(query = query, - chebiid = NA_character_)) + return(tibble("query" = query, + "chebiid" = NA_character_)) } if (nrow(out) > 0) out$query <- query if (nrow(out) == 1) return(out) @@ -152,15 +152,15 @@ get_chebiid <- function(query, return(out[out$chebiid == matched, ]) } if (match == "na") { - return(tibble(query = query, - chebiid = NA_character_)) + return(tibble("query" = query, + "chebiid" = NA_character_)) } if (match == "first") { return(out[1, ]) } } else { - out <- tibble(query = query, - chebiid = NA_character_) + out <- tibble("query" = query, + "chebiid" = NA_character_) message('Returning NA (', http_status(res)$message, '). \n') return(out) } @@ -174,7 +174,7 @@ get_chebiid <- function(query, verbose = verbose) out <- setNames(out, query) out <- bind_rows(out) - return(dplyr::select(out, query, chebiid, everything())) + return(dplyr::select(out, "query", "chebiid", everything())) } diff --git a/R/cir.R b/R/cir.R index 434fa38d..f3bb02aa 100644 --- a/R/cir.R +++ b/R/cir.R @@ -115,7 +115,6 @@ #' @export cir_query <- function(identifier, representation = "smiles", resolver = NULL, - first = FALSE, match = c("all", "first", "ask", "na"), verbose = TRUE, choices = NULL, diff --git a/R/integration.R b/R/integration.R index 5e752697..9b406cb5 100644 --- a/R/integration.R +++ b/R/integration.R @@ -68,9 +68,9 @@ with_cts <- function(query, from, .f, .verbose = TRUE, ...) { #' @import dplyr #' @examples #' \dontrun{ -#' has_entry("hexane", from = "name") +#' find_db("hexane", from = "name") #' } -has_entry <- function(query, from, +find_db <- function(query, from, sources = c("etox", "pc", "chebi", "cs", "aw", "fn", "pan", "srs"), plot = FALSE) { diff --git a/man/cir_query.Rd b/man/cir_query.Rd index e4d99ee1..84ec244b 100644 --- a/man/cir_query.Rd +++ b/man/cir_query.Rd @@ -8,7 +8,6 @@ cir_query( identifier, representation = "smiles", resolver = NULL, - first = FALSE, match = c("all", "first", "ask", "na"), verbose = TRUE, choices = NULL, diff --git a/man/has_entry.Rd b/man/find_db.Rd similarity index 92% rename from man/has_entry.Rd rename to man/find_db.Rd index b06667b5..c154a83e 100644 --- a/man/has_entry.Rd +++ b/man/find_db.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/integration.R -\name{has_entry} -\alias{has_entry} +\name{find_db} +\alias{find_db} \title{Check data source coverage of compounds} \usage{ -has_entry( +find_db( query, from, sources = c("etox", "pc", "chebi", "cs", "aw", "fn", "pan", "srs"), @@ -32,6 +32,6 @@ Checks if entries are found in (most) data sources included in webchem } \examples{ \dontrun{ -has_entry("hexane", from = "name") +find_db("hexane", from = "name") } } diff --git a/tests/testthat/test-integration.R b/tests/testthat/test-integration.R index 04aaa506..34f6b5b5 100644 --- a/tests/testthat/test-integration.R +++ b/tests/testthat/test-integration.R @@ -34,13 +34,13 @@ test_that("with_cts() translates", { }) -test_that("has_entry() function works", { +test_that("find_db() function works", { skip_on_cran() skip_if_not(fn_up) skip_if_not(etox_up) skip_if_not(up, "CTS service down") - out <- has_entry(c("triclosan", NA, "balloon"), + out <- find_db(c("triclosan", NA, "balloon"), from = "name", sources = c("etox", "fn")) df <- tibble(query = c("triclosan", NA, "balloon"),