Skip to content

Commit

Permalink
Merge pull request #42 from CostaLab/devel
Browse files Browse the repository at this point in the history
Added turorials and updated vignettes.
  • Loading branch information
grasshoffm authored Oct 7, 2024
2 parents b2d3bad + 04221be commit 73adc91
Show file tree
Hide file tree
Showing 284 changed files with 15,276 additions and 5,333 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: sigurd
Type: Package
Title: Single cell Genotyping Using RNA Data
Version: 0.3.0
Version: 0.3.1
Authors@R: c(
person(given = "Martin",
family = "Grasshoff",
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export(GetCellInfoPerVariant)
export(GetVariantInfo)
export(HeatmapVoi)
export(LoadingMAEGATK_typewise)
export(LoadingRawMatrix_typewise)
export(LoadingVCF_typewise)
export(LoadingVarTrix_typewise)
export(Merging_SE_list)
Expand Down
67 changes: 64 additions & 3 deletions R/ClonalDefinition.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@
#'@param se SummarizedExperiment object.
#'@param variants_ls List of variants for clonal definition
#'@param grouping The meta data column used to split the cells into groups. Default = NULL
#'@param n_cores Number of cores you want to use. Numeric.
#'@param identities Vector of groups, like samples.
#'@param explicit Do you want to specify the variants forming a clone? Then the variant_ls needs to be a list of variants that each define a clone. One variant per group.
#'@param explicit_not Do you want to specify a set of variants that the cell may not have? One set per group.
#'@param explicit_min_vaf Minimum VAF for a cell to be considered positive.
#'@param verbose Should the function be verbose? Default = TRUE
#'@export
ClonalDefinition <- function(se, variants_ls, grouping = NULL, identities = NULL, n_cores = 1, verbose = TRUE){
ClonalDefinition <- function(se, variants_ls, grouping = NULL, identities = NULL, explicit = FALSE, explicit_not = NULL, explicit_min_vaf = 0.01, verbose = TRUE){
# Checking if the group variable is in the column data.
if(!is.null(grouping)){
# We check if the grouping variable is in the column data.
Expand Down Expand Up @@ -44,12 +46,71 @@ ClonalDefinition <- function(se, variants_ls, grouping = NULL, identities = NULL
}
# Checking if any of the variants is not in the SE object.
for(i in 1:length(variants_ls)){
check_variants <- all(variants_ls[[i]] %in% rownames(se))
check_variants <- all(unlist(variants_ls[[i]]) %in% rownames(se))
if(!check_variants){
stop(paste0("The set of variants number ", i, " has variants not in the object. Please check."))
}
}


if(explicit){
if(verbose) print("Explicit clone definition.")
# We prepare a new meta data column.
new_meta_data <- rep("OtherLineage", ncol(se))
names(new_meta_data) <- colnames(se)
for(i in 1:length(variants_ls)){
variants_ls_group <- variants_ls[[i]]

# If the grouping variables is not NULL, we get the relevant identity.
if(!is.null(grouping)){
identity_use <- identities[i]
cells_use <- SummarizedExperiment::colData(se)
cells_use <- cells_use[cells_use[,grouping] == identity_use, ]
cells_use <- rownames(cells_use)
se_use <- se[,cells_use]
} else{
se_use <- se
}

# We prepare the new meta data for the subset of the cells.
new_meta_data_subset <- rep("OtherLineage", ncol(se_use))
names(new_meta_data_subset) <- colnames(se_use)

# For each set of variants, we now assign a cell to be part of this clone.
for(j in 1:length(variants_ls_group)){
variants_use <- variants_ls_group[[j]]
# We check if a cell is mutated for the set of variants.
check_use <- as.matrix(SummarizedExperiment::assays(se_use)[["consensus"]][variants_use, , drop = FALSE])
if(explicit_min_vaf > 0){
frac_check <- as.matrix(SummarizedExperiment::assays(se_use)[["fraction"]][variants_use, , drop = FALSE])
check_use[frac_check < explicit_min_vaf] <- 1
}
check_use <- matrix(check_use %in% 2:3, ncol = ncol(check_use), nrow = nrow(check_use), dimnames = list(rownames(check_use), colnames(check_use)))
check_use <- colSums(check_use) == length(variants_use)
if(!is.null(explicit_not)){
variants_not <- explicit_not[[i]][[j]]
check_not <- as.matrix(SummarizedExperiment::assays(se_use)[["consensus"]][variants_not, , drop = FALSE])
if(explicit_min_vaf > 0){
frac_not <- as.matrix(SummarizedExperiment::assays(se_use)[["fraction"]][variants_not, , drop = FALSE])
check_not[frac_not < explicit_min_vaf] <- 1
}
check_not <- matrix(check_not %in% 2:3, ncol = ncol(check_not), nrow = nrow(check_not), dimnames = list(rownames(check_not), colnames(check_not)))
check_not <- colSums(check_not) > 0
check_not <- check_not[check_not]
check_use[names(check_not)] <- FALSE
}
check_use <- check_use[check_use]
# We check if a cell is mutated for any of the other variants.
new_meta_data_subset[names(check_use)] <- paste0("C", j)
# We add the new meta data to the combined meta data.
new_meta_data[names(new_meta_data_subset)] <- new_meta_data_subset
}
}
SummarizedExperiment::colData(se)$Clones <- new_meta_data
return(se)
}


# For each set of variants, we get all possible combinations.
combinations_ls <- list()
for(i in 1:length(variants_ls)){
Expand Down
100 changes: 100 additions & 0 deletions R/LoadingRawMatrix_typewise.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#'LoadingRawMatrix_typewise
#'@description
#'A function to load a dense matrix of genotyping information and reformat it. Each matrix should have a column with the cell barcodes, one with the reference reads and one with the alternative reads.
#'The type in the design matrix is used as the variant name.
#'@importFrom utils read.table
#'@importFrom SummarizedExperiment SummarizedExperiment
#'@importFrom Matrix colMeans rowMeans
#'@param samples_path Path to the input folder. Must include a barcodes file.
#'@param samples_file Path to the csv file with the samples to be loaded.
#'@param patient The patient you want to load.
#'@param patient_column The column that contains the patient information.
#'@param variant_use The variant of the respective matrices.
#'@param cells_include A vector of cell barcodes. Only these cells will be retained.
#'@param cells_exclude A vector of cell barcodes. These cells will be removed from the output.
#'@param cellbarcode_length The length of the cell barcode. This should be the length of the actual barcode plus two for the suffix (-1). Default = 18
#'@param verbose Should the function be verbose? Default = TRUE
#'@export
LoadingRawMatrix_typewise <- function(samples_file, patient, patient_column = "patient", variant_use = NULL, cells_include = NULL, cells_exclude = NULL, matrix_column_separator = ",", matrix_header = TRUE, cellbarcode_index = 1, ref_index = 2, alt_index = 3, cellbarcode_length = 18, verbose = TRUE){
if(is.null(variant_use)) stop ("You have to supply a variant.")
if(cellbarcode_index == 0) stop("Your cellbarcode_index cannot be 0.")
if(ref_index == 0) stop("Your ref_index cannot be 0.")
if(alt_index == 0) stop("Your alt_index cannot be 0.")

if(verbose) print("We load the design matrix.")
samples_file <- read.table(samples_file, sep = ",", header = TRUE)
if(!patient_column %in% colnames(samples_file)) stop("Your patient_column is not in the design matrix.")
if(!patient %in% samples_file$patient) stop("Your patient is not in the design matrix.")
samples_file <- subset(samples_file, patient == patient)
samples_file <- samples_file[grep("matrix", samples_file$source, ignore.case = TRUE),]
if(any(!variant_use %in% samples_file$type)){
stop("Your variant in not in the design matrix.")
}
samples_file <- samples_file[samples_file$type == variant_use, , drop = FALSE]
samples <- samples_file$sample
variant_use <- samples_file$type


if(verbose) print("We load the data.")
loaded_matrix_ls <- c()
cellbarcodes_total <- c()
for(i in 1:nrow(samples_file)){
loaded_matrix <- read.table(samples_file$input_path[i], sep = matrix_column_separator, header = matrix_header)
if(cellbarcode_index > ncol(loaded_matrix)) stop(paste0("Your cellbarcode_index is outside the matrix dimensions for the ", i , " sample."))
if(ref_index > ncol(loaded_matrix)) stop(paste0("Your ref_index is outside the matrix dimensions for the ", i , " sample."))
if(alt_index > ncol(loaded_matrix)) stop(paste0("Your alt_index is outside the matrix dimensions for the ", i , " sample."))
loaded_matrix[, cellbarcode_index] <- paste0(samples[i], "_", loaded_matrix[, cellbarcode_index])
if(!is.null(cells_include)){
if(verbose) paste0("We remove all cells not in the white list for sample ", i, ".")
# Check if any cells would be left.
check_leftover <- any(loaded_matrix[, cellbarcode_index] %in% cells_include)
if(!check_leftover) stop("No cells are in your supplied list for the ", i, " sample.")
loaded_matrix <- loaded_matrix[colnames(coverage_matrix_total) %in% cells_include, , drop = FALSE]
}
if(!is.null(cells_exclude)){
if(verbose) paste0("We remove all cells in the exclusion list for sample ", i, ".")
# Check if any cells would be left.
check_leftover <- all(loaded_matrix[, cellbarcode_index] %in% cells_exclude)
if(check_leftover) stop(paste0("All cells are in your exclusion list for sample ", i, "."))
loaded_matrix <- loaded_matrix[!loaded_matrix[, cellbarcode_index] %in% cells_exclude, , drop = FALSE]
}
cellbarcodes_total <- unique(c(cellbarcodes_total, loaded_matrix[, cellbarcode_index]))
loaded_matrix_ls[[paste0(samples[i], "_", samples_file$type[i])]] <- loaded_matrix
}


alts <- refs <- matrix(0, ncol = length(cellbarcodes_total), nrow = length(variant_use), dimnames = list(variant_use, cellbarcodes_total))
for(i in 1:length(loaded_matrix_ls)){
alts[i, loaded_matrix_ls[[i]][, cellbarcode_index]] <- loaded_matrix_ls[[i]][, alt_index]
refs[i, loaded_matrix_ls[[i]][, cellbarcode_index]] <- loaded_matrix_ls[[i]][, ref_index]
}
coverage <- alts + refs


ref_construction <- refs > 0
alt_construction <- alts > 0
alt_construction[alt_construction] <- 2
consensus <- ref_construction + alt_construction
fraction <- alts / coverage


coverage <- as(coverage, "CsparseMatrix")
alts <- as(alts, "CsparseMatrix")
refs <- as(refs, "CsparseMatrix")
consensus <- as(consensus, "CsparseMatrix")
fraction <- as(fraction, "CsparseMatrix")


if(verbose) print("We generate a SummarizedExperiment object containing the fraction and the consensus matrices.")
# We want an assay for the Consensus information and for the fraction.
# As meta data we add a data frame showing the cell id, the associated patient and the sample.
average_depth_per_cell <- Matrix::colMeans(coverage)
coverage_depth_per_variant <- Matrix::rowMeans(coverage)
meta_data <- data.frame(Cell = colnames(consensus), Patient = patient, Sample = substr(x = colnames(consensus), start = 1, stop = nchar(colnames(consensus))-(cellbarcode_length+1)), AverageCoverage = average_depth_per_cell)
rownames(meta_data) <- meta_data$Cell
meta_row <- data.frame(VariantName = rownames(consensus), Depth = coverage_depth_per_variant)
rownames(meta_row) <- meta_row$VariantName
se <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = consensus, fraction = fraction, coverage = coverage, alts = alts, refs = refs), colData = meta_data, rowData = meta_row)
return(se)
}

2 changes: 1 addition & 1 deletion R/RowWiseSplit.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#'@param n_cores Number of cores to use.
#'@param remove_nocalls Do you want to remove NoCall cells?
#'@export
RowWiseSplit <- function(se, n_cores = 1, remove_nocalls = TRUE){
RowWiseSplit <- function(se, n_cores = 1, minimum_reads, remove_nocalls = TRUE){
consensus <- SummarizedExperiment::assays(se)$consensus
consensus_list <- parallel::mclapply(rownames(se), SeparatingMatrixToList, total_matrix = consensus, remove_nocalls = remove_nocalls, mc.cores = n_cores)
names(consensus_list) <- rownames(se)
Expand Down
3 changes: 0 additions & 3 deletions R/VariantWiseFisherTest.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,6 @@ VariantWiseFisherTest <- function(variants_list, n_cores = 1, p_value_adjustment
if(verbose) print("We remove the NA P values.")
results_total <- results_total[!is.na(results_total$P),]

if(verbose) print("We remove the SNPs with a odds ratio lower than 1.")
results_total <- subset(results_total, OddsRatio > 1)

if(verbose) print(paste0("Adjusting P values using ", p_value_adjustment, "."))
results_total$P_adj <- stats::p.adjust(results_total$P, method = p_value_adjustment)
rownames(results_total) <- NULL
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mu
```

# Current Features v0.3.0
# Current Features v0.3.1

- Loading data from VarTrix and MAEGATK.
- Transforming the data to be compatible for joint analysis.
Expand Down
Loading

0 comments on commit 73adc91

Please sign in to comment.