Skip to content

Commit

Permalink
Merge pull request #54 from bzhanglab/53-multi-list-ora-can-not-use-p…
Browse files Browse the repository at this point in the history
…re-defined-reference-list

fix(ora): add support for using pre-defined reference lists for multilist methods
  • Loading branch information
iblacksand authored Aug 1, 2024
2 parents beffc57 + 805553e commit e2bba4c
Show file tree
Hide file tree
Showing 22 changed files with 308 additions and 239 deletions.
1 change: 0 additions & 1 deletion .lintr
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
linters: linters_with_defaults(
line_length_linter = NULL,
commented_code_linter = NULL,
no_tab_linter = NULL,
object_name_linter = NULL
)

4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ Imports: methods, dplyr, doRNG, readr, parallel (>= 3.3.2),
igraph, whisker, apcluster, Rcpp, cluster, poolr
NeedsCompilation: yes
LinkingTo: Rcpp
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
SystemRequirements: Cargo (Rust's package manager), rustc v1.63.0+
Config/rextendr/version: 0.3.1
Config/rextendr/version: 0.3.1.9000
Config/WebGestaltR/MSRV: 1.63.0
Encoding: UTF-8
Suggests:
Expand Down
36 changes: 22 additions & 14 deletions R/WebGestaltRMultiOmics.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@
#' @param referenceTypes Vector of the ID types of the reference lists. The supported ID types
#' of WebGestaltR for the selected organism can be found by the function \code{listIdType}.
#' If the \code{organism} is \code{others}, users do not need to set this parameter.
#' @param referenceSets Users can directly select the reference sets from existing platforms in
#' WebGestaltR and do not need to provide the reference set through \code{referenceListFiles}.
#' All existing platforms supported in WebGestaltR can be found by the function
#' \code{listReferenceSets}. If \code{referenceListFiles} and \code{refereneceLists} are
#' \code{NULL}, WebGestaltR will use the \code{referenceSets} as the reference analyte sets.
#' Otherwise, WebGestaltR will use the user supplied reference set for enrichment analysis.
#' Must be a vector with length matching the input analyte list (i.e. c('genome', 'genome', 'KEGG'))
#' @param minNum WebGestaltR will exclude the categories with the number of annotated genes
#' less than \code{minNum} for enrichment analysis. The default is \code{10}.
#' @param maxNum WebGestaltR will exclude the categories with the number of annotated genes
Expand Down Expand Up @@ -99,7 +106,7 @@
#' @param useWeightedSetCover Use weighted set cover for ORA. Defaults to \code{TRUE}.
#' @param useAffinityPropagation Use affinity propagation for ORA. Defaults to \code{FALSE}.
#' @param usekMedoid Use k-medoid for ORA. Defaults to \code{TRUE}.
#' @param isMetaAnalysis whether to perform meta-analysis. Defaults to \code{TRUE}.
#' @param isMetaAnalysis whether to perform meta-analysis. Defaults to \code{TRUE}. FALSE is not currently implemented.
#' @param mergeMethod The method to merge the results from multiple omics (options: \code{mean}, \code{max}). Only used if \code{isMetaAnalysis = FALSE}. Defaults to \code{mean}.
#' @param normalizationMethod The method to normalize the results from multiple omics (options: \code{rank}, \code{median}, \code{mean}). Only used if \code{isMetaAnalysis = FALSE}.
#' @param kMedoid_k The number of clusters for k-medoid. Defaults to \code{25}.
Expand All @@ -112,7 +119,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL,
projectName = NULL, dagColor = "binary", saveRawGseaResult = FALSE, gseaPlotFormat = "png", nThreads = 1, cache = NULL,
hostName = "https://www.webgestalt.org/", useWeightedSetCover = TRUE, useAffinityPropagation = FALSE,
usekMedoid = FALSE, kMedoid_k = 25, isMetaAnalysis = TRUE, mergeMethod = "mean", normalizationMethod = "rank",
referenceLists = NULL, referenceListFiles = NULL, referenceTypes = NULL, listNames = NULL) {
referenceLists = NULL, referenceListFiles = NULL, referenceTypes = NULL, referenceSets = NULL, listNames = NULL) {
VALID_MERGE_METHODS <- c("mean", "max")
VALID_NORM_METHODS <- c("rank", "median", "mean")
VALID_ENRICH_METHODS <- c("ORA", "GSEA")
Expand All @@ -126,6 +133,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL,
referenceLists <- testNull(referenceLists)
referenceListFiles <- testNull(referenceListFiles)
referenceTypes <- testNull(referenceTypes)
referenceSets <- testNull(referenceSets)
}
organism <- testNull(organism)
enrichDatabase <- testNull(enrichDatabase)
Expand Down Expand Up @@ -193,7 +201,7 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL,
setCoverNum = setCoverNum, perNum = perNum, isOutput = isOutput, outputDirectory = outputDirectory, projectName = projectName,
dagColor = dagColor, nThreads = nThreads, cache = cache, hostName = hostName, useWeightedSetCover = useWeightedSetCover,
useAffinityPropagation = useAffinityPropagation, usekMedoid = usekMedoid, kMedoid_k = kMedoid_k, referenceLists = referenceLists,
referenceListFiles = referenceListFiles, referenceTypes = referenceTypes, listNames = listNames
referenceListFiles = referenceListFiles, referenceTypes = referenceTypes, referenceSets = referenceSets, listNames = listNames
)
## Meta-analysis
} else if (enrichMethod == "GSEA") {
Expand Down Expand Up @@ -266,18 +274,18 @@ WebGestaltRMultiOmics <- function(analyteLists = NULL, analyteListFiles = NULL,
all_sets <- list(geneSet = list(), geneSetDes = list(), geneSetDag = list(), geneSetNet = list(), standardId = list(), databases = list())
if (!is.null(enrichDatabase)) { # Need to get correct name for metabolite databases
if (length(unique(analyteTypes)) == 1) {
db <- get_gmt_file(hostName, analyteTypes[1], enrichDatabase, organism, cache)
res <- loadGeneSet(
organism = organism, enrichDatabase = db, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType,
enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, cache = cache, hostName = hostName, isMultiOmics = FALSE
)
elements <- names(res)
for (i in seq_along(analyteTypes)) {
for (j in seq_along(elements)) {
all_sets[[elements[j]]][[i]] <- res[[elements[j]]]
}
all_sets$databases[[i]] <- db
db <- get_gmt_file(hostName, analyteTypes[1], enrichDatabase, organism, cache)
res <- loadGeneSet(
organism = organism, enrichDatabase = db, enrichDatabaseFile = enrichDatabaseFile, enrichDatabaseType = enrichDatabaseType,
enrichDatabaseDescriptionFile = enrichDatabaseDescriptionFile, cache = cache, hostName = hostName, isMultiOmics = FALSE
)
elements <- names(res)
for (i in seq_along(analyteTypes)) {
for (j in seq_along(elements)) {
all_sets[[elements[j]]][[i]] <- res[[elements[j]]]
}
all_sets$databases[[i]] <- db
}
} else {
for (i in seq_along(analyteTypes)) {
db <- get_gmt_file(hostName, analyteTypes[i], enrichDatabase, organism, cache)
Expand Down
20 changes: 16 additions & 4 deletions R/WebGestaltRMultiomicsOra.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ WebGestaltRMultiOmicsOra <- function(analyteLists = NULL, analyteListFiles = NUL
topThr = 10, reportNum = 100, setCoverNum = 10, perNum = 1000, gseaP = 1, isOutput = TRUE, outputDirectory = getwd(),
projectName = NULL, dagColor = "binary", nThreads = 1, cache = NULL, hostName = "https://www.webgestalt.org/",
useWeightedSetCover = TRUE, useAffinityPropagation = FALSE, usekMedoid = FALSE, kMedoid_k = 25,
referenceLists = NULL, referenceListFiles = NULL, referenceTypes = NULL, listNames = NULL) {
referenceLists = NULL, referenceListFiles = NULL, referenceTypes = NULL, referenceSets = NULL, listNames = NULL) {
projectDir <- file.path(outputDirectory, paste0("Project_", projectName))
cat("Performing multi-omics ORA\nLoading the functional categories...\n")
all_sets <- .load_meta_gmt(enrichDatabase, enrichDatabaseFile, enrichDatabaseDescriptionFile, enrichDatabaseType, analyteLists, analyteListFiles, analyteTypes, organism, cache, hostName)
Expand Down Expand Up @@ -53,7 +53,7 @@ WebGestaltRMultiOmicsOra <- function(analyteLists = NULL, analyteListFiles = NUL

cat("Loading the reference lists...\n")
reference_lists <- list()
if (is.null(referenceLists)) {
if (!is.null(referenceListFiles)) {
for (i in seq_along(referenceListFiles)) {
referenceGeneList <- loadReferenceGene(
organism = organism, referenceGeneFile = referenceListFiles[i],
Expand All @@ -65,7 +65,7 @@ WebGestaltRMultiOmicsOra <- function(analyteLists = NULL, analyteListFiles = NUL
)
reference_lists[[i]] <- referenceGeneList
}
} else {
} else if (!is.null(referenceLists)) {
for (i in seq_along(analyteLists)) {
referenceGeneList <- loadReferenceGene(
organism = organism, referenceGeneFile = NULL,
Expand All @@ -77,6 +77,18 @@ WebGestaltRMultiOmicsOra <- function(analyteLists = NULL, analyteListFiles = NUL
)
reference_lists[[i]] <- referenceGeneList
}
} else { # use pre-defined reference lists
for (i in seq_along(interest_lists)) {
referenceGeneList <- loadReferenceGene(
organism = organism, referenceGeneFile = NULL,
referenceGene = NULL, referenceGeneType = NULL,
referenceSet = referenceSets[[i]], collapseMethod = collapseMethod,
hostName = hostName, geneSet = all_sets[["geneSet"]][[i]],
interestGeneList = interest_lists[[i]],
cache = cache
)
reference_lists[[i]] <- referenceGeneList
}
}
cat("Running multi-omics ORA...\n")
oraRes <- multiOraEnrichment(interest_lists, reference_lists, all_sets[["geneSet"]],
Expand Down Expand Up @@ -283,7 +295,7 @@ WebGestaltRMultiOmicsOra <- function(analyteLists = NULL, analyteListFiles = NUL
interestGeneFile_list = analyteListFiles, interestGene_list = interest_lists,
interestGeneType_list = analyteTypes, collapseMethod = collapseMethod,
referenceGeneFile_list = referenceListFiles, referenceGene_list = referenceLists,
referenceGeneType_list = referenceTypes, referenceSet_list = referenceLists, minNum = minNum,
referenceGeneType_list = referenceTypes, referenceSet_list = referenceSets, minNum = minNum,
maxNum = maxNum, fdrMethod = fdrMethod, sigMethod = sigMethod, fdrThr = fdrThr,
topThr = topThr, reportNum = reportNum, dagColor = dagColor, listNames = listNames
)
Expand Down
2 changes: 1 addition & 1 deletion R/WebGestaltRNta.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ WebGestaltRNta <- function(organism = "hsapiens", network = "network_PPI_BIOGRID
} else {
response <- POST(geneSetUrl, body = list(
organism = organism, database = "geneontology_Biological_Process",
fileType = "des", ids = goTermList, version = "2024"
fileType = "des", ids = goTermList, version = WEBGESTALT_DATA_VERSION
), encode = "json")
goId2Term <- read_tsv(content(response), col_names = c("id", "name"), col_types = "cc")
}
Expand Down
4 changes: 2 additions & 2 deletions R/cacheFile.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ cacheUrl <- function(dataUrl, cache = NULL, query = NULL) {
# cat("Reading from server: ", dataUrl, "\n")
if (!is.null(query)) {
if (!("version" %in% names(query))) {
query[["version"]] <- "2024"
query[["version"]] <- WEBGESTALT_DATA_VERSION
}
response <- GET(dataUrl, query = query)
} else {
response <- GET(dataUrl, query = list(version = "2024"))
response <- GET(dataUrl, query = list(version = WEBGESTALT_DATA_VERSION))
}
if (response$status_code != 200) {
return(response)
Expand Down
1 change: 1 addition & 0 deletions R/constants.R
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
WEBGESTALT_DATA_VERSION <- "2024"
1 change: 0 additions & 1 deletion R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# This file was created with the following call:
# .Call("wrap__make_WebGestaltR_wrappers", use_symbols = TRUE, package_name = "WebGestaltR")

#' @docType package
#' @usage NULL
#' @useDynLib WebGestaltR, .registration = TRUE
NULL
Expand Down
Loading

0 comments on commit e2bba4c

Please sign in to comment.