From a4c01c1557274804f45efc81068ada7c0036ea6c Mon Sep 17 00:00:00 2001
From: Klangina <xyzwhatever668@gmail.com>
Date: Thu, 24 Oct 2024 01:35:56 +0530
Subject: [PATCH 1/5] Issue-#109 a: - Created R/accnum.R - Added up2ncbi - 
 Added documentation in roxygen

---
 R/accnum.R | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 R/accnum.R

diff --git a/R/accnum.R b/R/accnum.R
new file mode 100644
index 00000000..e3b7e1c1
--- /dev/null
+++ b/R/accnum.R
@@ -0,0 +1,35 @@
+
+
+
+
+#' Convert a UniProt ID to an NCBI Entrez Gene ID
+#'
+#' This function takes a single UniProt ID and returns the corresponding NCBI Entrez Gene ID.
+#' It uses the `org.Hs.eg.db` package to perform the mapping.
+#' 
+#' @author Klangina
+#' @param uniprot_id A string representing a single UniProt ID.
+#' @return A string representing the corresponding NCBI Entrez Gene ID. Returns `NA` if no mapping is found.
+#' @examples
+#' \dontrun{
+#'   uniprot_id <- "P04217"
+#'   entrez_id <- up2ncbi(uniprot_id)
+#'   print(entrez_id)
+#' }
+#' @importFrom AnnotationDbi select
+#' @import org.Hs.eg.db
+#' @export
+up2ncbi <- function(uniprot_id) {
+  # Use the select function to map the UniProt ID to an Entrez Gene ID
+  result <- AnnotationDbi::select(org.Hs.eg.db,
+                                  keys = uniprot_id,
+                                  columns = "ENTREZID",
+                                  keytype = "UNIPROT")
+  
+  # Check if the result is not empty and return the first Entrez ID
+  if (nrow(result) > 0 && !is.na(result$ENTREZID[1])) {
+    return(as.character(result$ENTREZID[1]))
+  } else {
+    return(NA)  # Return NA if no mapping is found
+  }
+}
\ No newline at end of file

From 9bc2ded969a6e32403e2322f914d2164c39e3e93 Mon Sep 17 00:00:00 2001
From: Klangina <xyzwhatever668@gmail.com>
Date: Thu, 24 Oct 2024 01:47:13 +0530
Subject: [PATCH 2/5] Issue-#109-b: - Added ncbi2up to  R/accnum.R -  Added
 documentation in roxygen

---
 R/accnum.R | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/R/accnum.R b/R/accnum.R
index e3b7e1c1..2be24c70 100644
--- a/R/accnum.R
+++ b/R/accnum.R
@@ -32,4 +32,37 @@ up2ncbi <- function(uniprot_id) {
   } else {
     return(NA)  # Return NA if no mapping is found
   }
+}
+
+
+
+#' Convert an NCBI Entrez Gene ID to a UniProt ID
+#'
+#' This function takes a single NCBI Entrez Gene ID and returns the corresponding UniProt ID.
+#' It uses the `org.Hs.eg.db` package to perform the mapping.
+#'
+#' @param entrez_id A string representing a single NCBI Entrez Gene ID.
+#' @return A string representing the corresponding UniProt ID. Returns `NA` if no mapping is found.
+#' @examples
+#' \dontrun{
+#'   entrez_id <- "3586"
+#'   uniprot_id <- ncbi2up(entrez_id)
+#'   print(uniprot_id)
+#' }
+#' @importFrom AnnotationDbi select
+#' @import org.Hs.eg.db
+#' @export
+ncbi2up <- function(entrez_id) {
+  # Use the select function to map the Entrez Gene ID to a UniProt ID
+  result <- AnnotationDbi::select(org.Hs.eg.db,
+                                  keys = entrez_id,
+                                  columns = "UNIPROT",
+                                  keytype = "ENTREZID")
+  
+  # Check if the result is not empty and return the first UniProt ID
+  if (nrow(result) > 0 && !is.na(result$UNIPROT[1])) {
+    return(as.character(result$UNIPROT[1]))
+  } else {
+    return(NA)  # Return NA if no mapping is found
+  }
 }
\ No newline at end of file

From 089ef3561ec3e30e83002169457ff5fd05b8a70b Mon Sep 17 00:00:00 2001
From: Klangina <xyzwhatever668@gmail.com>
Date: Mon, 28 Oct 2024 15:17:20 +0530
Subject: [PATCH 3/5] - Changes in function signature of ncbi2up. - Added
 better error handling and multiple outputs - Added examples [can be later
 converted into tests]

---
 R/accnum.R | 58 +++++++++++++++++++++++++++++++++---------------------
 1 file changed, 36 insertions(+), 22 deletions(-)

diff --git a/R/accnum.R b/R/accnum.R
index 2be24c70..98c4c344 100644
--- a/R/accnum.R
+++ b/R/accnum.R
@@ -35,34 +35,48 @@ up2ncbi <- function(uniprot_id) {
 }
 
 
-
-#' Convert an NCBI Entrez Gene ID to a UniProt ID
+#' Convert NCBI RefSeq Accessions to UniProt IDs
 #'
-#' This function takes a single NCBI Entrez Gene ID and returns the corresponding UniProt ID.
-#' It uses the `org.Hs.eg.db` package to perform the mapping.
+#' This function takes one or more NCBI RefSeq accession numbers and returns the corresponding UniProt IDs.
+#' It uses the org.Hs.eg.db package to perform the mapping.
 #'
-#' @param entrez_id A string representing a single NCBI Entrez Gene ID.
-#' @return A string representing the corresponding UniProt ID. Returns `NA` if no mapping is found.
+#' @param ncbi_accessions A character vector of NCBI RefSeq accession numbers.
+#' @return A data frame with columns 'REFSEQ' and 'UNIPROT', mapping RefSeq accessions to UniProt IDs.
+#'         Returns NA for UniProt if no mapping is found.
 #' @examples
 #' \dontrun{
-#'   entrez_id <- "3586"
-#'   uniprot_id <- ncbi2up(entrez_id)
-#'   print(uniprot_id)
+#'   ncbi_accessions <- c("NP_000005.2", "NP_000007.1")
+#'   uniprot_ids <- ncbi2up(ncbi_accessions)
+#'   print(uniprot_ids)
 #' }
 #' @importFrom AnnotationDbi select
 #' @import org.Hs.eg.db
 #' @export
-ncbi2up <- function(entrez_id) {
-  # Use the select function to map the Entrez Gene ID to a UniProt ID
-  result <- AnnotationDbi::select(org.Hs.eg.db,
-                                  keys = entrez_id,
-                                  columns = "UNIPROT",
-                                  keytype = "ENTREZID")
-  
-  # Check if the result is not empty and return the first UniProt ID
-  if (nrow(result) > 0 && !is.na(result$UNIPROT[1])) {
-    return(as.character(result$UNIPROT[1]))
-  } else {
-    return(NA)  # Return NA if no mapping is found
+ncbi2up <- function(ncbi_accessions) {
+  # Check if input is provided
+  if (length(ncbi_accessions) == 0) {
+    stop("No NCBI accessions provided.")
   }
-}
\ No newline at end of file
+  
+  # Strip version numbers from accessions
+  stripped_accessions <- gsub("\\.[0-9]+$", "", ncbi_accessions)
+  
+  # Perform the mapping
+  tryCatch({
+    mapping <- AnnotationDbi::select(
+      org.Hs.eg.db,
+      keys = stripped_accessions,
+      columns = "UNIPROT",
+      keytype = "REFSEQ"
+    )
+    
+    # Check if any mappings were found
+    if (nrow(mapping) == 0) {
+      warning("No UniProt IDs found for the given NCBI accessions.")
+    }
+    
+    return(mapping)
+  }, error = function(e) {
+    stop(paste("Error in mapping:", e$message))
+  })
+}

From 24e5f67c5fdb62e52dcc4f74a991a936c128fc1b Mon Sep 17 00:00:00 2001
From: Klangina <xyzwhatever668@gmail.com>
Date: Mon, 28 Oct 2024 15:28:08 +0530
Subject: [PATCH 4/5] - Changes in function signature of up2ncbi.  - Added
 better error handling and multiple outputs  - Added examples [can be later
 converted into tests]

---
 R/accnum.R | 58 ++++++++++++++++++++++++++++++++----------------------
 1 file changed, 34 insertions(+), 24 deletions(-)

diff --git a/R/accnum.R b/R/accnum.R
index 98c4c344..36c6f6d2 100644
--- a/R/accnum.R
+++ b/R/accnum.R
@@ -1,40 +1,50 @@
 
 
-
-
-#' Convert a UniProt ID to an NCBI Entrez Gene ID
+#' Convert UniProt IDs to NCBI RefSeq Accessions
+#'
+#' This function takes one or more UniProt IDs and returns the corresponding NCBI RefSeq accessions.
+#' It uses the org.Hs.eg.db package to perform the mapping.
 #'
-#' This function takes a single UniProt ID and returns the corresponding NCBI Entrez Gene ID.
-#' It uses the `org.Hs.eg.db` package to perform the mapping.
-#' 
-#' @author Klangina
-#' @param uniprot_id A string representing a single UniProt ID.
-#' @return A string representing the corresponding NCBI Entrez Gene ID. Returns `NA` if no mapping is found.
+#' @param uniprot_ids A character vector of UniProt IDs.
+#' @return A data frame with columns 'UNIPROT' and 'REFSEQ', mapping UniProt IDs to RefSeq accessions.
+#'         Returns an empty data frame if no mappings are found.
 #' @examples
 #' \dontrun{
-#'   uniprot_id <- "P04217"
-#'   entrez_id <- up2ncbi(uniprot_id)
-#'   print(entrez_id)
+#'   uniprot_ids <- c("P04217", "P01023")
+#'   refseq_accessions <- up2ncbi(uniprot_ids)
+#'   print(refseq_accessions)
 #' }
 #' @importFrom AnnotationDbi select
 #' @import org.Hs.eg.db
 #' @export
-up2ncbi <- function(uniprot_id) {
-  # Use the select function to map the UniProt ID to an Entrez Gene ID
-  result <- AnnotationDbi::select(org.Hs.eg.db,
-                                  keys = uniprot_id,
-                                  columns = "ENTREZID",
-                                  keytype = "UNIPROT")
-  
-  # Check if the result is not empty and return the first Entrez ID
-  if (nrow(result) > 0 && !is.na(result$ENTREZID[1])) {
-    return(as.character(result$ENTREZID[1]))
-  } else {
-    return(NA)  # Return NA if no mapping is found
+up2ncbi <- function(uniprot_ids) {
+  # Check if input is provided
+  if (length(uniprot_ids) == 0) {
+    stop("No UniProt IDs provided.")
   }
+  
+  # Perform the mapping
+  tryCatch({
+    mapping <- AnnotationDbi::select(
+      org.Hs.eg.db,
+      keys = uniprot_ids,
+      columns = "REFSEQ",
+      keytype = "UNIPROT"
+    )
+    
+    # Check if any mappings were found
+    if (nrow(mapping) == 0) {
+      warning("No NCBI accessions found for the given UniProt IDs.")
+    }
+    
+    return(mapping)
+  }, error = function(e) {
+    stop(paste("Error in mapping:", e$message))
+  })
 }
 
 
+
 #' Convert NCBI RefSeq Accessions to UniProt IDs
 #'
 #' This function takes one or more NCBI RefSeq accession numbers and returns the corresponding UniProt IDs.

From 99247d0edbaca627cd9ab7ae3282b6bc1cd5ff59 Mon Sep 17 00:00:00 2001
From: Klangina <xyzwhatever668@gmail.com>
Date: Mon, 28 Oct 2024 17:30:13 +0530
Subject: [PATCH 5/5] Added ncbi2ipg with similar functionality to the rpevious
 two functions.

---
 R/accnum.R | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/R/accnum.R b/R/accnum.R
index 36c6f6d2..71da5b6a 100644
--- a/R/accnum.R
+++ b/R/accnum.R
@@ -90,3 +90,55 @@ ncbi2up <- function(ncbi_accessions) {
     stop(paste("Error in mapping:", e$message))
   })
 }
+
+
+
+#' Convert NCBI Protein Accessions to IPG (Identical Protein Group) IDs
+#'
+#' This function takes one or more NCBI protein accession numbers and returns the corresponding
+#' IPG (Identical Protein Group) IDs. It uses the NCBI E-utilities API via the rentrez package
+#' to perform the mapping.
+#'
+#' @param ncbi_ids A character vector of NCBI protein accession numbers.
+#' @return A data frame with columns 'NCBI' and 'IPG', mapping NCBI protein accessions to IPG IDs.
+#'         Returns an empty data frame if no mappings are found.
+#' @examples
+#' \dontrun{
+#'   ncbi_ids <- c("NP_000005.2", "NP_000007.1")
+#'   ipg_mappings <- ncbi2ipg(ncbi_ids)
+#'   print(ipg_mappings)
+#' }
+#' @importFrom rentrez entrez_search
+#' @export
+ncbi2ipg <- function(ncbi_ids) {
+  # Check if input is provided
+  if (length(ncbi_ids) == 0) {
+    stop("No NCBI IDs provided.")
+  }
+  
+  # Perform the mapping
+  tryCatch({
+    # Search the IPG database for each NCBI ID
+    results <- lapply(ncbi_ids, function(id) {
+      search <- entrez_search(db = "ipg", term = paste0(id, "[PACC]"))
+      if (search$count > 0) {
+        data.frame(NCBI = id, IPG = search$ids, stringsAsFactors = FALSE)
+      } else {
+        NULL
+      }
+    })
+    
+    # Combine results into a single data frame
+    mapping <- do.call(rbind, results)
+    
+    # Check if any mappings were found
+    if (is.null(mapping) || nrow(mapping) == 0) {
+      warning("No IPG mappings found for the given NCBI IDs.")
+      mapping <- data.frame(NCBI = character(0), IPG = character(0), stringsAsFactors = FALSE)
+    }
+    
+    return(mapping)
+  }, error = function(e) {
+    stop(paste("Error in mapping:", e$message))
+  })
+}
\ No newline at end of file