Skip to content

Commit

Permalink
Merge pull request #243 from MoTrPAC/enhance/write-metabolomics
Browse files Browse the repository at this point in the history
Enhance/write metabolomics
  • Loading branch information
biodavidjm authored Sep 23, 2024
2 parents 37bab43 + 857bc1a commit db20073
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 36 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: MotrpacBicQC
Type: Package
Title: QC/QA functions for the MoTrPAC community
Version: 0.9.5
Date: 2024-05-22
Version: 0.9.51
Date: 2024-08-07
Author: MoTrPAC Bioinformatics Center
Maintainer: David Jimenez-Morales <davidjm@stanford.edu>
Description: R Package for the analysis of MoTrPAC datasets.
Expand Down
47 changes: 29 additions & 18 deletions R/metabolomics_qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ check_metadata_samples <- function(df,
# filter only expected columns
df <- filter_required_columns(df = df,
type = "m_s",
verbose = TRUE)
verbose = verbose)

# Check every column
# sample_id: si
Expand Down Expand Up @@ -276,12 +276,17 @@ check_metadata_samples <- function(df,
}

if("acquisition_date" %in% colnames(df)){
if( any(grepl(":", df$acquisition_date)) ){
if(verbose) message(" + (i) Assuming `acquisition_date` is in `MM/DD/YYYY HH:MM:SS AM/PM` format. Validating:")
icdt <- validate_dates_times(df = df, column_name = "acquisition_date", verbose = verbose)
if(any(is.na(df$acquisition_date))){
if(verbose) message(" - (-) `acquisition_date` has NA values: FAIL")
ic <- ic + 1
}else{
icdate <- validate_yyyymmdd_dates(df = df, date_column = "acquisition_date", verbose = verbose)
ic <- ic + icdate
if( any(grepl(":", df$acquisition_date)) ){
if(verbose) message(" + (i) Assuming `acquisition_date` is in `MM/DD/YYYY HH:MM:SS AM/PM` format. Validating:")
icdt <- validate_dates_times(df = df, column_name = "acquisition_date", verbose = verbose)
}else{
icdate <- validate_yyyymmdd_dates(df = df, date_column = "acquisition_date", verbose = verbose)
ic <- ic + icdate
}
}
}else{
if(verbose) message(" - (-) `acquisition_date` column missed: FAIL")
Expand Down Expand Up @@ -1200,15 +1205,17 @@ load_metabolomics_batch <- function(input_results_folder,
assay <- validate_assay(input_results_folder)
tissue_code <- validate_tissue(input_results_folder)

total_issues <- validate_metabolomics(input_results_folder = input_results_folder,
cas = cas,
return_n_issues = TRUE,
full_report = FALSE,
f_proof = FALSE,
verbose = FALSE)
total_issues <-
validate_metabolomics(
input_results_folder = input_results_folder,
cas = cas,
return_n_issues = TRUE,
full_report = FALSE,
f_proof = FALSE,
verbose = FALSE)

if(total_issues > 0){
message("\n\tWARNING!!! Too many issues identified (", total_issues,"). This batch should not be processed until the issues are solved")
message("\tWARNING!!! Too many issues identified (", total_issues,"). This batch should not be processed until the issues are solved")
}

# Load Metabolomics----
Expand Down Expand Up @@ -1376,8 +1383,10 @@ combine_metabolomics_batch <- function(input_results_folder,
verbose = TRUE){

# Load all datasets
metab_dfs <- load_metabolomics_batch(input_results_folder = input_results_folder,
cas = cas)
metab_dfs <-
load_metabolomics_batch(
input_results_folder = input_results_folder,
cas = cas, verbose = verbose)

if(verbose) message("\n## MERGE")
if(verbose) message("\nAll metabolomics datasets + basic phenotypic information")
Expand Down Expand Up @@ -1677,9 +1686,11 @@ write_metabolomics_releases <- function(input_results_folder,
if(verbose) message("+ Writing out ", cas, " ", phase_details, " ", tissue_code, " ", assay, " files", appendLF = FALSE)

# Load all datasets----
metab_dfs <- load_metabolomics_batch(input_results_folder = input_results_folder,
cas = cas,
verbose = FALSE)
metab_dfs <-
load_metabolomics_batch(
input_results_folder = input_results_folder,
cas = cas,
verbose = FALSE)

# Create output folder-------
if (is.null(folder_root)){
Expand Down
36 changes: 23 additions & 13 deletions R/misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -310,27 +310,37 @@ filter_required_columns <- function(df,
colnames(df) <- tolower(colnames(df))
missing_cols <- setdiff(emeta_metabo_coln_named, colnames(df))
if (length(missing_cols) > 0) {
if(verbose) message(" - (-) `metadata_metabolite`: Expected COLUMN NAMES are missed: FAIL")
message(paste0("\t The following required columns are not present: `", paste(missing_cols, collapse = ", "), "`"))
if (verbose) message(" - (-) `metadata_metabolite`: Expected COLUMN NAMES are missed: FAIL")
message(paste0("\t The following required columns are not present: `",
paste(missing_cols, collapse = ", "), "`"))
} else {
if(verbose) message(" + (+) All required columns present")
if (verbose) message(" + (+) All required columns present")
df <- subset(df, select = emeta_metabo_coln_named)
}
return(df)

} else if (type == "m_s"){
} else if (type == "m_s") {
emeta_sample_coln <- c("sample_id", "sample_type", "sample_order", "raw_file", "extraction_date", "acquisition_date", "lc_column_id")
required_cols <- setdiff(emeta_sample_coln, c("extraction_date", "acquisition_date", "lc_column_id"))
missing_cols <- setdiff(emeta_sample_coln, colnames(df))

if (length(missing_cols) > 0) {
if(verbose) message(" - (-) `metadata_sample`: Expected COLUMN NAMES are missed: FAIL")
message(paste0("\t The following required columns are not present: `", paste(missing_cols, collapse = ", "), "`"))
missing_required_cols <- setdiff(required_cols, colnames(df))

if (length(missing_required_cols) > 0) {
if (verbose) message(" - (-) `metadata_sample`: Expected COLUMN NAMES are missed: FAIL")
message(paste0("\t The following required columns are not present: `",
paste(missing_required_cols, collapse = ", "), "`"))
} else {
if(verbose) message(" + (+) All required columns present")
if (length(missing_cols) > 0) {
message(" - (-) `metadata_sample`: recently required COLUMN NAMES are missed: Adding with NA values: FAIL")
for (col in c("extraction_date", "acquisition_date", "lc_column_id")) {
if (!(col %in% colnames(df))) {
df[[col]] <- NA
}
}
}
if (verbose) message(" + (+) All required columns present")
df <- subset(df, select = emeta_sample_coln)
}
return(df)

} else if (type == "v_m"){
emeta_sample_coln <- c("vial_label", "tmt_plex")
if( all(emeta_sample_coln %in% colnames(df)) ){
Expand All @@ -348,10 +358,10 @@ filter_required_columns <- function(df,
if(verbose) message(" + (+) All required columns present (tmt18 experiment)")
df <- subset(df, select = emeta_sample_coln)
}else{
if(verbose) message(" - (-) Expected COLUMN NAMES are missed: FAIL")
message(" - (-) Expected COLUMN NAMES are missed: FAIL")
}
}else{
if(verbose) message(" - (-) Expected COLUMN NAMES are missed: FAIL")
message(" - (-) Expected COLUMN NAMES are missed: FAIL")
}
return(df)
} else if (type == "olproteins"){
Expand Down
4 changes: 3 additions & 1 deletion R/validations.R
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,9 @@ validate_dates_times <- function(df, column_name, verbose = TRUE) {
#' validate_lc_column_id(df, column_name = "lc_column_id")
#'
#' @export
validate_lc_column_id <- function(df, column_name, verbose = TRUE) {
validate_lc_column_id <- function(df,
column_name,
verbose = TRUE) {

# issue counter
ic <- 0
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-metabolomics_qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ test_that("check_metadata_metabolites returns the right number of issues", {
})

test_that("check_metadata_sample returns the right number of issues", {
expect_equal(check_metadata_samples(df = metadata_sample_named, cas = "umichigan", return_n_issues = TRUE, verbose = FALSE), 3)
expect_equal(check_metadata_samples(df = metadata_sample_unnamed, cas = "umichigan", return_n_issues = TRUE, verbose = FALSE), 3)
expect_equal(check_metadata_samples(df = metadata_sample_named, cas = "umichigan", return_n_issues = TRUE, verbose = FALSE), 2)
expect_equal(check_metadata_samples(df = metadata_sample_unnamed, cas = "umichigan", return_n_issues = TRUE, verbose = FALSE), 2)
})

test_that("check_results returns the right number of issues", {
Expand Down

0 comments on commit db20073

Please sign in to comment.