Skip to content

Commit

Permalink
Merge pull request #29 from CostaLab/devel
Browse files Browse the repository at this point in the history
Fixes for the loading of VCF files.
  • Loading branch information
grasshoffm authored Mar 15, 2024
2 parents 3db0be2 + f61cb5b commit c16f8d6
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 12 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: sigurd
Type: Package
Title: Single cell Genotyping Using RNA Data
Version: 0.2.45
Version: 0.2.46
Authors@R: c(
person(given = "Martin",
family = "Grasshoff",
Expand Down
22 changes: 12 additions & 10 deletions R/LoadingVCF_typewise.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,30 +22,32 @@
#'@param samples_file Path to the csv file with the samples to be loaded.
#'@param vcf_path Path to the VCF file with the variants.
#'@param patient The patient you want to load.
#'@param patient_column The column that contains the patient information. Use merge, if all samples should be merged.
#'@param type_use The type of input. Only rows that have the specified type will be loaded.
#'@param min_reads The minimum number of reads we want. Otherwise we treat this as a NoCall. Default = NULL.
#'@param min_cells The minimum number of cells for a variant. Otherwise, we will remove a variant. Default = 2.
#'@param remove_N_alternative Remove all variants that have N as an alternative, see Description. Default = TRUE
#'@param cellbarcode_length The length of the cell barcode. This should be the length of the actual barcode plus two for the suffix (-1). Default = 18
#'@param verbose Should the function be verbose? Default = TRUE
#'@export
LoadingVCF_typewise <- function(samples_file, samples_path = NULL, vcf_path, patient, type_use = "scRNAseq_Somatic", min_reads = NULL, min_cells = 2, remove_N_alternative = TRUE, cellbarcode_length = 18, verbose = TRUE){
LoadingVCF_typewise <- function(samples_file, samples_path = NULL, vcf_path, patient, patient_column = "patient", type_use = "scRNAseq_Somatic", min_reads = NULL, min_cells = 2, remove_N_alternative = TRUE, cellbarcode_length = 18, verbose = TRUE){
if(!is.null(samples_path)){
if(verbose) print(paste0("Loading the data for sample ", patient, "."))
samples_file <- data.frame(patient = patient, sample = patient, input_path = samples_path)
samples <- samples_file$sample
} else{
if(verbose) print(paste0("Loading the data for patient ", patient, "."))
if(verbose) print("We read in the samples file.")
if(verbose) print("We read in the central input file.")
samples_file <- utils::read.csv(samples_file, stringsAsFactors = FALSE)
if(!patient_column %in% colnames(samples_file) & patient_column != "merge"){
stop(paste0("Error: the column ", patient_column, " is not in your central input file."))
}


if(verbose) print("We subset to the patient of interest.")
if(verbose) print("We subset to the relevant files.")
samples_file <- samples_file[grep("vcf", samples_file$source, ignore.case = TRUE),]
samples_file <- samples_file[samples_file$patient == patient,]
if(patient_column != "merge") samples_file <- samples_file[samples_file[,patient_column] == patient,]
samples_file <- samples_file[samples_file$type == type_use,]


if(verbose) print("We get the different samples.")
samples <- samples_file$sample
}
Expand Down Expand Up @@ -102,10 +104,10 @@ LoadingVCF_typewise <- function(samples_file, samples_path = NULL, vcf_path, pat
if(remove_N_alternative){
ref_matrix_total_n <- substr(rownames(ref_matrix_total), start = nchar(rownames(ref_matrix_total)), stop = nchar(rownames(ref_matrix_total)))
ref_matrix_total_n <- ref_matrix_total_n != "N"
ref_matrix_total <- ref_matrix_total[ref_matrix_total_n,]
reads_matrix_total <- reads_matrix_total[ref_matrix_total_n,]
coverage_matrix_total <- coverage_matrix_total[ref_matrix_total_n,]
consensus_matrix_total <- consensus_matrix_total[ref_matrix_total_n,]
ref_matrix_total <- ref_matrix_total[ref_matrix_total_n, , drop = FALSE]
reads_matrix_total <- reads_matrix_total[ref_matrix_total_n, , drop = FALSE]
coverage_matrix_total <- coverage_matrix_total[ref_matrix_total_n, , drop = FALSE]
consensus_matrix_total <- consensus_matrix_total[ref_matrix_total_n, , drop = FALSE]
rm(ref_matrix_total_n)
} else{
print("We keep all variants with an N as alternative allele. Please ensure that these variants are in your variant VCF file.")
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mu
```

# Current Features v0.2.45
# Current Features v0.2.46

- Loading data from VarTrix and MAEGATK.
- Transforming the data to be compatible for joint analysis.
Expand Down
3 changes: 3 additions & 0 deletions man/LoadingVCF_typewise.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit c16f8d6

Please sign in to comment.