Skip to content

Commit

Permalink
accept df
Browse files Browse the repository at this point in the history
  • Loading branch information
stemangiola committed Nov 21, 2024
1 parent 6bef9e7 commit d8dc27d
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 65 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ importFrom(GenomicRanges,makeGRangesListFromDataFrame)
importFrom(Matrix,colSums)
importFrom(S4Vectors,metadata)
importFrom(SummarizedExperiment,SummarizedExperiment)
importFrom(SummarizedExperiment,as.data.frame)
importFrom(SummarizedExperiment,assays)
importFrom(SummarizedExperiment,colData)
importFrom(SummarizedExperiment,rowData)
Expand Down
42 changes: 33 additions & 9 deletions R/functions_SE.R
Original file line number Diff line number Diff line change
Expand Up @@ -1446,6 +1446,33 @@ univariable_differential_tissue_composition_SE = function(
unnest(surv_test, keep_empty = TRUE)
}

.resolve_complete_confounders_of_non_interest_df <- function(df, ...){

combination_of_factors_of_NON_interest =
# Factors
df |>
as_tibble(rownames = ".sample") |>
select(...) |>
suppressWarnings() |>
colnames() |>

# Combinations
combn(2) |>
t() |>
as_tibble() |>
set_names(c("factor_1", "factor_2"))

for(i in combination_of_factors_of_NON_interest |> nrow() |> seq_len()){
df =
df |>
resolve_complete_confounders_of_non_interest_pair_df(
!!as.symbol(combination_of_factors_of_NON_interest[i,]$factor_1),
!!as.symbol(combination_of_factors_of_NON_interest[i,]$factor_2)
)
}

df
}

#' Resolve Complete Confounders of Non-Interest
#'
Expand Down Expand Up @@ -1475,13 +1502,13 @@ univariable_differential_tissue_composition_SE = function(
#' # se is a SummarizedExperiment object
#' resolve_complete_confounders_of_non_interest(se, .factor_1 = factor1, .factor_2 = factor2)
#' @noRd
resolve_complete_confounders_of_non_interest_pair_SE <- function(se, .factor_1, .factor_2){
resolve_complete_confounders_of_non_interest_pair_df <- function(df, .factor_1, .factor_2){

.factor_1 <- enquo(.factor_1)
.factor_2 <- enquo(.factor_2)

cd =
colData(se) |>
df |>
as_tibble() |>
rowid_to_column() |>
distinct(rowid, !!.factor_1, !!.factor_2) |>
Expand Down Expand Up @@ -1516,15 +1543,12 @@ resolve_complete_confounders_of_non_interest_pair_SE <- function(se, .factor_1,
cd = cd |>
mutate(!!.factor_2 := if_else(n1 + n2 < 3, dummy_factor_2, !!.factor_2))
}

colData(se)[,c(quo_name(.factor_1), quo_name(.factor_2))] =
df[,c(quo_name(.factor_1), quo_name(.factor_2))] =
cd |>
unnest(se_data) |>
arrange(rowid) |>
select(!!.factor_1, !!.factor_2) |>
DataFrame()

se
select(!!.factor_1, !!.factor_2)

df
}

70 changes: 58 additions & 12 deletions R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -4914,24 +4914,70 @@ as_matrix <- function(tbl,

#' Resolve Complete Confounders of Non-Interest
#'
#' This generic function processes a SummarizedExperiment object to handle confounders
#' that are not of interest in the analysis. It dynamically handles combinations
#' of provided factors, adjusting the data by nesting and summarizing over these factors.
#' This function identifies and resolves complete confounders among specified factors of non-interest within a `SummarizedExperiment` object.
#' Complete confounders occur when the levels of one factor are entirely predictable based on the levels of another factor.
#' Such relationships can interfere with downstream analyses by introducing redundancy or collinearity.
#'
#' The function systematically examines pairs of specified factors and determines whether they are completely confounded.
#' If a pair of factors is found to be confounded, one of the factors is adjusted or removed to resolve the issue.
#' The adjusted `SummarizedExperiment` object is returned, preserving all assays and metadata except the resolved factors.
#'
#' @param se A SummarizedExperiment object that contains the data to be processed.
#' @param ... Arbitrary number of factor variables represented as symbols or quosures
#' to be considered for resolving confounders. These factors are processed
#' in combinations of two.
#' @param se A `SummarizedExperiment` object. This object contains assay data, row data (e.g., gene annotations), and column data (e.g., sample annotations).
#' @param ... Factors of non-interest (column names from `colData(se)`) to examine for complete confounders.
#'
#' @rdname resolve_complete_confounders_of_non_interest-methods
#' @details
#' Complete confounders of non-interest can create dependencies between variables that may bias statistical models or violate their assumptions.
#' This function systematically addresses this by:
#' 1. Identifying pairs of factors in the specified columns that are fully confounded.
#' 2. Resolving confounding by adjusting or removing one of the factors from the `colData` slot.
#'
#' @return A modified SummarizedExperiment object with confounders resolved.
#' The resolution strategy depends on the analysis context and can be modified in the helper function
#' `resolve_complete_confounders_of_non_interest_pair_SE()`. By default, the function removes one of the confounded factors.
#'
#' @return
#' A `SummarizedExperiment` object with resolved confounders. The object retains its structure, including assays and metadata,
#' but the column data (`colData`) is updated to reflect the resolved factors.
#'
#' @examples
#' # Not run:
#' # se is a SummarizedExperiment object
#' # resolve_complete_confounders_of_non_interest(se, factor1, factor2, factor3)
#' # Load necessary libraries
#' library(SummarizedExperiment)
#' library(dplyr)
#'
#' # Sample annotations
#' sample_annotations <- data.frame(
#' sample_id = paste0("Sample", seq(1, 9)),
#' factor_of_interest = c(rep("treated", 4), rep("untreated", 5)),
#' A = c("a1", "a2", "a1", "a2", "a1", "a2", "a1", "a2", "a3"),
#' B = c("b1", "b1", "b2", "b1", "b1", "b1", "b2", "b1", "b3"),
#' C = c("c1", "c1", "c1", "c1", "c1", "c1", "c1", "c1", "c3"),
#' stringsAsFactors = FALSE
#' )
#'
#' # Simulated assay data
#' assay_data <- matrix(rnorm(100 * 9), nrow = 100, ncol = 9)
#'
#' # Row data (e.g., gene annotations)
#' row_data <- data.frame(gene_id = paste0("Gene", seq_len(100)))
#'
#' # Create SummarizedExperiment object
#' se <- SummarizedExperiment(
#' assays = list(counts = assay_data),
#' rowData = row_data,
#' colData = DataFrame(sample_annotations)
#' )
#'
#' # Apply the function to resolve confounders
#' se_resolved <- resolve_complete_confounders_of_non_interest(se, A, B, C)
#'
#' # View the updated column data
#' colData(se_resolved)
#'
#' @seealso
#' \code{\link[SummarizedExperiment]{SummarizedExperiment}} for creating and handling `SummarizedExperiment` objects.
#'
#' @importFrom dplyr select
#' @importFrom rlang set_names
#' @importFrom tibble as_tibble
#' @export
setGeneric("resolve_complete_confounders_of_non_interest", function(se, ...) {
standardGeneric("resolve_complete_confounders_of_non_interest")
Expand Down
32 changes: 8 additions & 24 deletions R/methods_SE.R
Original file line number Diff line number Diff line change
Expand Up @@ -2816,33 +2816,17 @@ setMethod("describe_transcript", "RangedSummarizedExperiment", .describe_transcr
#' @importFrom dplyr select
#' @importFrom rlang set_names
#' @importFrom tibble as_tibble
#' @importFrom SummarizedExperiment as.data.frame
.resolve_complete_confounders_of_non_interest <- function(se, ...){

combination_of_factors_of_NON_interest =
# Factors
se[1,1, drop=FALSE] |>
colData() |>
as_tibble(rownames = ".sample") |>
select(...) |>
suppressWarnings() |>
colnames() |>

# Combinations
combn(2) |>
t() |>
as_tibble() |>
set_names(c("factor_1", "factor_2"))

for(i in combination_of_factors_of_NON_interest |> nrow() |> seq_len()){
se =
se |>
resolve_complete_confounders_of_non_interest_pair_SE(
!!as.symbol(combination_of_factors_of_NON_interest[i,]$factor_1),
!!as.symbol(combination_of_factors_of_NON_interest[i,]$factor_2)
)
}

colData(se) =
colData(se) |>
as.data.frame() |>
.resolve_complete_confounders_of_non_interest_df(...) |>
DataFrame()

se

}

#' resolve_complete_confounders_of_non_interest
Expand Down
28 changes: 8 additions & 20 deletions man/resolve_complete_confounders_of_non_interest-methods.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

74 changes: 74 additions & 0 deletions man/resolve_complete_confounders_of_non_interest.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d8dc27d

Please sign in to comment.