Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 109 a: #110

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions R/accnum.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@


#' Convert UniProt IDs to NCBI RefSeq Accessions
#'
#' This function takes one or more UniProt IDs and returns the corresponding NCBI RefSeq accessions.
#' It uses the org.Hs.eg.db package to perform the mapping.
#'
#' @param uniprot_ids A character vector of UniProt IDs.
#' @return A data frame with columns 'UNIPROT' and 'REFSEQ', mapping UniProt IDs to RefSeq accessions.
#' Returns an empty data frame if no mappings are found.
#' @examples
#' \dontrun{
#' uniprot_ids <- c("P04217", "P01023")
#' refseq_accessions <- up2ncbi(uniprot_ids)
#' print(refseq_accessions)
#' }
#' @importFrom AnnotationDbi select
#' @import org.Hs.eg.db
#' @export
up2ncbi <- function(uniprot_ids) {
# Check if input is provided
if (length(uniprot_ids) == 0) {
stop("No UniProt IDs provided.")
}

# Perform the mapping
tryCatch({
mapping <- AnnotationDbi::select(
org.Hs.eg.db,
keys = uniprot_ids,
columns = "REFSEQ",
keytype = "UNIPROT"
)

# Check if any mappings were found
if (nrow(mapping) == 0) {
warning("No NCBI accessions found for the given UniProt IDs.")
}

return(mapping)
}, error = function(e) {
stop(paste("Error in mapping:", e$message))
})
}



#' Convert NCBI RefSeq Accessions to UniProt IDs
#'
#' This function takes one or more NCBI RefSeq accession numbers and returns the corresponding UniProt IDs.
#' It uses the org.Hs.eg.db package to perform the mapping.
#'
#' @param ncbi_accessions A character vector of NCBI RefSeq accession numbers.
#' @return A data frame with columns 'REFSEQ' and 'UNIPROT', mapping RefSeq accessions to UniProt IDs.
#' Returns NA for UniProt if no mapping is found.
#' @examples
#' \dontrun{
#' ncbi_accessions <- c("NP_000005.2", "NP_000007.1")
#' uniprot_ids <- ncbi2up(ncbi_accessions)
#' print(uniprot_ids)
#' }
#' @importFrom AnnotationDbi select
#' @import org.Hs.eg.db
#' @export
ncbi2up <- function(ncbi_accessions) {
# Check if input is provided
if (length(ncbi_accessions) == 0) {
stop("No NCBI accessions provided.")
}

# Strip version numbers from accessions
stripped_accessions <- gsub("\\.[0-9]+$", "", ncbi_accessions)

# Perform the mapping
tryCatch({
mapping <- AnnotationDbi::select(
org.Hs.eg.db,
keys = stripped_accessions,
columns = "UNIPROT",
keytype = "REFSEQ"
)

# Check if any mappings were found
if (nrow(mapping) == 0) {
warning("No UniProt IDs found for the given NCBI accessions.")
}

return(mapping)
}, error = function(e) {
stop(paste("Error in mapping:", e$message))
})
}



#' Convert NCBI Protein Accessions to IPG (Identical Protein Group) IDs
#'
#' This function takes one or more NCBI protein accession numbers and returns the corresponding
#' IPG (Identical Protein Group) IDs. It uses the NCBI E-utilities API via the rentrez package
#' to perform the mapping.
#'
#' @param ncbi_ids A character vector of NCBI protein accession numbers.
#' @return A data frame with columns 'NCBI' and 'IPG', mapping NCBI protein accessions to IPG IDs.
#' Returns an empty data frame if no mappings are found.
#' @examples
#' \dontrun{
#' ncbi_ids <- c("NP_000005.2", "NP_000007.1")
#' ipg_mappings <- ncbi2ipg(ncbi_ids)
#' print(ipg_mappings)
#' }
#' @importFrom rentrez entrez_search
#' @export
ncbi2ipg <- function(ncbi_ids) {
# Check if input is provided
if (length(ncbi_ids) == 0) {
stop("No NCBI IDs provided.")
}

# Perform the mapping
tryCatch({
# Search the IPG database for each NCBI ID
results <- lapply(ncbi_ids, function(id) {
search <- entrez_search(db = "ipg", term = paste0(id, "[PACC]"))
if (search$count > 0) {
data.frame(NCBI = id, IPG = search$ids, stringsAsFactors = FALSE)
} else {
NULL
}
})

# Combine results into a single data frame
mapping <- do.call(rbind, results)

# Check if any mappings were found
if (is.null(mapping) || nrow(mapping) == 0) {
warning("No IPG mappings found for the given NCBI IDs.")
mapping <- data.frame(NCBI = character(0), IPG = character(0), stringsAsFactors = FALSE)
}

return(mapping)
}, error = function(e) {
stop(paste("Error in mapping:", e$message))
})
}