-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #42 from CostaLab/devel
Added turorials and updated vignettes.
- Loading branch information
Showing
284 changed files
with
15,276 additions
and
5,333 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
#'LoadingRawMatrix_typewise | ||
#'@description | ||
#'A function to load a dense matrix of genotyping information and reformat it. Each matrix should have a column with the cell barcodes, one with the reference reads and one with the alternative reads. | ||
#'The type in the design matrix is used as the variant name. | ||
#'@importFrom utils read.table | ||
#'@importFrom SummarizedExperiment SummarizedExperiment | ||
#'@importFrom Matrix colMeans rowMeans | ||
#'@param samples_path Path to the input folder. Must include a barcodes file. | ||
#'@param samples_file Path to the csv file with the samples to be loaded. | ||
#'@param patient The patient you want to load. | ||
#'@param patient_column The column that contains the patient information. | ||
#'@param variant_use The variant of the respective matrices. | ||
#'@param cells_include A vector of cell barcodes. Only these cells will be retained. | ||
#'@param cells_exclude A vector of cell barcodes. These cells will be removed from the output. | ||
#'@param cellbarcode_length The length of the cell barcode. This should be the length of the actual barcode plus two for the suffix (-1). Default = 18 | ||
#'@param verbose Should the function be verbose? Default = TRUE | ||
#'@export | ||
LoadingRawMatrix_typewise <- function(samples_file, patient, patient_column = "patient", variant_use = NULL, cells_include = NULL, cells_exclude = NULL, matrix_column_separator = ",", matrix_header = TRUE, cellbarcode_index = 1, ref_index = 2, alt_index = 3, cellbarcode_length = 18, verbose = TRUE){ | ||
if(is.null(variant_use)) stop ("You have to supply a variant.") | ||
if(cellbarcode_index == 0) stop("Your cellbarcode_index cannot be 0.") | ||
if(ref_index == 0) stop("Your ref_index cannot be 0.") | ||
if(alt_index == 0) stop("Your alt_index cannot be 0.") | ||
|
||
if(verbose) print("We load the design matrix.") | ||
samples_file <- read.table(samples_file, sep = ",", header = TRUE) | ||
if(!patient_column %in% colnames(samples_file)) stop("Your patient_column is not in the design matrix.") | ||
if(!patient %in% samples_file$patient) stop("Your patient is not in the design matrix.") | ||
samples_file <- subset(samples_file, patient == patient) | ||
samples_file <- samples_file[grep("matrix", samples_file$source, ignore.case = TRUE),] | ||
if(any(!variant_use %in% samples_file$type)){ | ||
stop("Your variant in not in the design matrix.") | ||
} | ||
samples_file <- samples_file[samples_file$type == variant_use, , drop = FALSE] | ||
samples <- samples_file$sample | ||
variant_use <- samples_file$type | ||
|
||
|
||
if(verbose) print("We load the data.") | ||
loaded_matrix_ls <- c() | ||
cellbarcodes_total <- c() | ||
for(i in 1:nrow(samples_file)){ | ||
loaded_matrix <- read.table(samples_file$input_path[i], sep = matrix_column_separator, header = matrix_header) | ||
if(cellbarcode_index > ncol(loaded_matrix)) stop(paste0("Your cellbarcode_index is outside the matrix dimensions for the ", i , " sample.")) | ||
if(ref_index > ncol(loaded_matrix)) stop(paste0("Your ref_index is outside the matrix dimensions for the ", i , " sample.")) | ||
if(alt_index > ncol(loaded_matrix)) stop(paste0("Your alt_index is outside the matrix dimensions for the ", i , " sample.")) | ||
loaded_matrix[, cellbarcode_index] <- paste0(samples[i], "_", loaded_matrix[, cellbarcode_index]) | ||
if(!is.null(cells_include)){ | ||
if(verbose) paste0("We remove all cells not in the white list for sample ", i, ".") | ||
# Check if any cells would be left. | ||
check_leftover <- any(loaded_matrix[, cellbarcode_index] %in% cells_include) | ||
if(!check_leftover) stop("No cells are in your supplied list for the ", i, " sample.") | ||
loaded_matrix <- loaded_matrix[colnames(coverage_matrix_total) %in% cells_include, , drop = FALSE] | ||
} | ||
if(!is.null(cells_exclude)){ | ||
if(verbose) paste0("We remove all cells in the exclusion list for sample ", i, ".") | ||
# Check if any cells would be left. | ||
check_leftover <- all(loaded_matrix[, cellbarcode_index] %in% cells_exclude) | ||
if(check_leftover) stop(paste0("All cells are in your exclusion list for sample ", i, ".")) | ||
loaded_matrix <- loaded_matrix[!loaded_matrix[, cellbarcode_index] %in% cells_exclude, , drop = FALSE] | ||
} | ||
cellbarcodes_total <- unique(c(cellbarcodes_total, loaded_matrix[, cellbarcode_index])) | ||
loaded_matrix_ls[[paste0(samples[i], "_", samples_file$type[i])]] <- loaded_matrix | ||
} | ||
|
||
|
||
alts <- refs <- matrix(0, ncol = length(cellbarcodes_total), nrow = length(variant_use), dimnames = list(variant_use, cellbarcodes_total)) | ||
for(i in 1:length(loaded_matrix_ls)){ | ||
alts[i, loaded_matrix_ls[[i]][, cellbarcode_index]] <- loaded_matrix_ls[[i]][, alt_index] | ||
refs[i, loaded_matrix_ls[[i]][, cellbarcode_index]] <- loaded_matrix_ls[[i]][, ref_index] | ||
} | ||
coverage <- alts + refs | ||
|
||
|
||
ref_construction <- refs > 0 | ||
alt_construction <- alts > 0 | ||
alt_construction[alt_construction] <- 2 | ||
consensus <- ref_construction + alt_construction | ||
fraction <- alts / coverage | ||
|
||
|
||
coverage <- as(coverage, "CsparseMatrix") | ||
alts <- as(alts, "CsparseMatrix") | ||
refs <- as(refs, "CsparseMatrix") | ||
consensus <- as(consensus, "CsparseMatrix") | ||
fraction <- as(fraction, "CsparseMatrix") | ||
|
||
|
||
if(verbose) print("We generate a SummarizedExperiment object containing the fraction and the consensus matrices.") | ||
# We want an assay for the Consensus information and for the fraction. | ||
# As meta data we add a data frame showing the cell id, the associated patient and the sample. | ||
average_depth_per_cell <- Matrix::colMeans(coverage) | ||
coverage_depth_per_variant <- Matrix::rowMeans(coverage) | ||
meta_data <- data.frame(Cell = colnames(consensus), Patient = patient, Sample = substr(x = colnames(consensus), start = 1, stop = nchar(colnames(consensus))-(cellbarcode_length+1)), AverageCoverage = average_depth_per_cell) | ||
rownames(meta_data) <- meta_data$Cell | ||
meta_row <- data.frame(VariantName = rownames(consensus), Depth = coverage_depth_per_variant) | ||
rownames(meta_row) <- meta_row$VariantName | ||
se <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = consensus, fraction = fraction, coverage = coverage, alts = alts, refs = refs), colData = meta_data, rowData = meta_row) | ||
return(se) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.