From a4c01c1557274804f45efc81068ada7c0036ea6c Mon Sep 17 00:00:00 2001 From: Klangina Date: Thu, 24 Oct 2024 01:35:56 +0530 Subject: [PATCH 1/5] Issue-#109 a: - Created R/accnum.R - Added up2ncbi - Added documentation in roxygen --- R/accnum.R | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 R/accnum.R diff --git a/R/accnum.R b/R/accnum.R new file mode 100644 index 00000000..e3b7e1c1 --- /dev/null +++ b/R/accnum.R @@ -0,0 +1,35 @@ + + + + +#' Convert a UniProt ID to an NCBI Entrez Gene ID +#' +#' This function takes a single UniProt ID and returns the corresponding NCBI Entrez Gene ID. +#' It uses the `org.Hs.eg.db` package to perform the mapping. +#' +#' @author Klangina +#' @param uniprot_id A string representing a single UniProt ID. +#' @return A string representing the corresponding NCBI Entrez Gene ID. Returns `NA` if no mapping is found. +#' @examples +#' \dontrun{ +#' uniprot_id <- "P04217" +#' entrez_id <- up2ncbi(uniprot_id) +#' print(entrez_id) +#' } +#' @importFrom AnnotationDbi select +#' @import org.Hs.eg.db +#' @export +up2ncbi <- function(uniprot_id) { + # Use the select function to map the UniProt ID to an Entrez Gene ID + result <- AnnotationDbi::select(org.Hs.eg.db, + keys = uniprot_id, + columns = "ENTREZID", + keytype = "UNIPROT") + + # Check if the result is not empty and return the first Entrez ID + if (nrow(result) > 0 && !is.na(result$ENTREZID[1])) { + return(as.character(result$ENTREZID[1])) + } else { + return(NA) # Return NA if no mapping is found + } +} \ No newline at end of file From 9bc2ded969a6e32403e2322f914d2164c39e3e93 Mon Sep 17 00:00:00 2001 From: Klangina Date: Thu, 24 Oct 2024 01:47:13 +0530 Subject: [PATCH 2/5] Issue-#109-b: - Added ncbi2up to R/accnum.R - Added documentation in roxygen --- R/accnum.R | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/R/accnum.R b/R/accnum.R index e3b7e1c1..2be24c70 100644 --- a/R/accnum.R +++ b/R/accnum.R @@ -32,4 +32,37 @@ up2ncbi <- function(uniprot_id) { } else { return(NA) # Return NA if no mapping is found } +} + + + +#' Convert an NCBI Entrez Gene ID to a UniProt ID +#' +#' This function takes a single NCBI Entrez Gene ID and returns the corresponding UniProt ID. +#' It uses the `org.Hs.eg.db` package to perform the mapping. +#' +#' @param entrez_id A string representing a single NCBI Entrez Gene ID. +#' @return A string representing the corresponding UniProt ID. Returns `NA` if no mapping is found. +#' @examples +#' \dontrun{ +#' entrez_id <- "3586" +#' uniprot_id <- ncbi2up(entrez_id) +#' print(uniprot_id) +#' } +#' @importFrom AnnotationDbi select +#' @import org.Hs.eg.db +#' @export +ncbi2up <- function(entrez_id) { + # Use the select function to map the Entrez Gene ID to a UniProt ID + result <- AnnotationDbi::select(org.Hs.eg.db, + keys = entrez_id, + columns = "UNIPROT", + keytype = "ENTREZID") + + # Check if the result is not empty and return the first UniProt ID + if (nrow(result) > 0 && !is.na(result$UNIPROT[1])) { + return(as.character(result$UNIPROT[1])) + } else { + return(NA) # Return NA if no mapping is found + } } \ No newline at end of file From 089ef3561ec3e30e83002169457ff5fd05b8a70b Mon Sep 17 00:00:00 2001 From: Klangina Date: Mon, 28 Oct 2024 15:17:20 +0530 Subject: [PATCH 3/5] - Changes in function signature of ncbi2up. - Added better error handling and multiple outputs - Added examples [can be later converted into tests] --- R/accnum.R | 58 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/R/accnum.R b/R/accnum.R index 2be24c70..98c4c344 100644 --- a/R/accnum.R +++ b/R/accnum.R @@ -35,34 +35,48 @@ up2ncbi <- function(uniprot_id) { } - -#' Convert an NCBI Entrez Gene ID to a UniProt ID +#' Convert NCBI RefSeq Accessions to UniProt IDs #' -#' This function takes a single NCBI Entrez Gene ID and returns the corresponding UniProt ID. -#' It uses the `org.Hs.eg.db` package to perform the mapping. +#' This function takes one or more NCBI RefSeq accession numbers and returns the corresponding UniProt IDs. +#' It uses the org.Hs.eg.db package to perform the mapping. #' -#' @param entrez_id A string representing a single NCBI Entrez Gene ID. -#' @return A string representing the corresponding UniProt ID. Returns `NA` if no mapping is found. +#' @param ncbi_accessions A character vector of NCBI RefSeq accession numbers. +#' @return A data frame with columns 'REFSEQ' and 'UNIPROT', mapping RefSeq accessions to UniProt IDs. +#' Returns NA for UniProt if no mapping is found. #' @examples #' \dontrun{ -#' entrez_id <- "3586" -#' uniprot_id <- ncbi2up(entrez_id) -#' print(uniprot_id) +#' ncbi_accessions <- c("NP_000005.2", "NP_000007.1") +#' uniprot_ids <- ncbi2up(ncbi_accessions) +#' print(uniprot_ids) #' } #' @importFrom AnnotationDbi select #' @import org.Hs.eg.db #' @export -ncbi2up <- function(entrez_id) { - # Use the select function to map the Entrez Gene ID to a UniProt ID - result <- AnnotationDbi::select(org.Hs.eg.db, - keys = entrez_id, - columns = "UNIPROT", - keytype = "ENTREZID") - - # Check if the result is not empty and return the first UniProt ID - if (nrow(result) > 0 && !is.na(result$UNIPROT[1])) { - return(as.character(result$UNIPROT[1])) - } else { - return(NA) # Return NA if no mapping is found +ncbi2up <- function(ncbi_accessions) { + # Check if input is provided + if (length(ncbi_accessions) == 0) { + stop("No NCBI accessions provided.") } -} \ No newline at end of file + + # Strip version numbers from accessions + stripped_accessions <- gsub("\\.[0-9]+$", "", ncbi_accessions) + + # Perform the mapping + tryCatch({ + mapping <- AnnotationDbi::select( + org.Hs.eg.db, + keys = stripped_accessions, + columns = "UNIPROT", + keytype = "REFSEQ" + ) + + # Check if any mappings were found + if (nrow(mapping) == 0) { + warning("No UniProt IDs found for the given NCBI accessions.") + } + + return(mapping) + }, error = function(e) { + stop(paste("Error in mapping:", e$message)) + }) +} From 24e5f67c5fdb62e52dcc4f74a991a936c128fc1b Mon Sep 17 00:00:00 2001 From: Klangina Date: Mon, 28 Oct 2024 15:28:08 +0530 Subject: [PATCH 4/5] - Changes in function signature of up2ncbi. - Added better error handling and multiple outputs - Added examples [can be later converted into tests] --- R/accnum.R | 58 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/R/accnum.R b/R/accnum.R index 98c4c344..36c6f6d2 100644 --- a/R/accnum.R +++ b/R/accnum.R @@ -1,40 +1,50 @@ - - -#' Convert a UniProt ID to an NCBI Entrez Gene ID +#' Convert UniProt IDs to NCBI RefSeq Accessions +#' +#' This function takes one or more UniProt IDs and returns the corresponding NCBI RefSeq accessions. +#' It uses the org.Hs.eg.db package to perform the mapping. #' -#' This function takes a single UniProt ID and returns the corresponding NCBI Entrez Gene ID. -#' It uses the `org.Hs.eg.db` package to perform the mapping. -#' -#' @author Klangina -#' @param uniprot_id A string representing a single UniProt ID. -#' @return A string representing the corresponding NCBI Entrez Gene ID. Returns `NA` if no mapping is found. +#' @param uniprot_ids A character vector of UniProt IDs. +#' @return A data frame with columns 'UNIPROT' and 'REFSEQ', mapping UniProt IDs to RefSeq accessions. +#' Returns an empty data frame if no mappings are found. #' @examples #' \dontrun{ -#' uniprot_id <- "P04217" -#' entrez_id <- up2ncbi(uniprot_id) -#' print(entrez_id) +#' uniprot_ids <- c("P04217", "P01023") +#' refseq_accessions <- up2ncbi(uniprot_ids) +#' print(refseq_accessions) #' } #' @importFrom AnnotationDbi select #' @import org.Hs.eg.db #' @export -up2ncbi <- function(uniprot_id) { - # Use the select function to map the UniProt ID to an Entrez Gene ID - result <- AnnotationDbi::select(org.Hs.eg.db, - keys = uniprot_id, - columns = "ENTREZID", - keytype = "UNIPROT") - - # Check if the result is not empty and return the first Entrez ID - if (nrow(result) > 0 && !is.na(result$ENTREZID[1])) { - return(as.character(result$ENTREZID[1])) - } else { - return(NA) # Return NA if no mapping is found +up2ncbi <- function(uniprot_ids) { + # Check if input is provided + if (length(uniprot_ids) == 0) { + stop("No UniProt IDs provided.") } + + # Perform the mapping + tryCatch({ + mapping <- AnnotationDbi::select( + org.Hs.eg.db, + keys = uniprot_ids, + columns = "REFSEQ", + keytype = "UNIPROT" + ) + + # Check if any mappings were found + if (nrow(mapping) == 0) { + warning("No NCBI accessions found for the given UniProt IDs.") + } + + return(mapping) + }, error = function(e) { + stop(paste("Error in mapping:", e$message)) + }) } + #' Convert NCBI RefSeq Accessions to UniProt IDs #' #' This function takes one or more NCBI RefSeq accession numbers and returns the corresponding UniProt IDs. From 99247d0edbaca627cd9ab7ae3282b6bc1cd5ff59 Mon Sep 17 00:00:00 2001 From: Klangina Date: Mon, 28 Oct 2024 17:30:13 +0530 Subject: [PATCH 5/5] Added ncbi2ipg with similar functionality to the rpevious two functions. --- R/accnum.R | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/R/accnum.R b/R/accnum.R index 36c6f6d2..71da5b6a 100644 --- a/R/accnum.R +++ b/R/accnum.R @@ -90,3 +90,55 @@ ncbi2up <- function(ncbi_accessions) { stop(paste("Error in mapping:", e$message)) }) } + + + +#' Convert NCBI Protein Accessions to IPG (Identical Protein Group) IDs +#' +#' This function takes one or more NCBI protein accession numbers and returns the corresponding +#' IPG (Identical Protein Group) IDs. It uses the NCBI E-utilities API via the rentrez package +#' to perform the mapping. +#' +#' @param ncbi_ids A character vector of NCBI protein accession numbers. +#' @return A data frame with columns 'NCBI' and 'IPG', mapping NCBI protein accessions to IPG IDs. +#' Returns an empty data frame if no mappings are found. +#' @examples +#' \dontrun{ +#' ncbi_ids <- c("NP_000005.2", "NP_000007.1") +#' ipg_mappings <- ncbi2ipg(ncbi_ids) +#' print(ipg_mappings) +#' } +#' @importFrom rentrez entrez_search +#' @export +ncbi2ipg <- function(ncbi_ids) { + # Check if input is provided + if (length(ncbi_ids) == 0) { + stop("No NCBI IDs provided.") + } + + # Perform the mapping + tryCatch({ + # Search the IPG database for each NCBI ID + results <- lapply(ncbi_ids, function(id) { + search <- entrez_search(db = "ipg", term = paste0(id, "[PACC]")) + if (search$count > 0) { + data.frame(NCBI = id, IPG = search$ids, stringsAsFactors = FALSE) + } else { + NULL + } + }) + + # Combine results into a single data frame + mapping <- do.call(rbind, results) + + # Check if any mappings were found + if (is.null(mapping) || nrow(mapping) == 0) { + warning("No IPG mappings found for the given NCBI IDs.") + mapping <- data.frame(NCBI = character(0), IPG = character(0), stringsAsFactors = FALSE) + } + + return(mapping) + }, error = function(e) { + stop(paste("Error in mapping:", e$message)) + }) +} \ No newline at end of file