From 9765ea3933ca65f9cd69eff1bbb9ea6c009c6e8b Mon Sep 17 00:00:00 2001 From: Joiejoie1 Date: Sat, 9 Nov 2024 14:15:15 +0100 Subject: [PATCH] Add R files for project configuration and unit testing --- DESCRIPTION | 4 +- MolEvolvR.Rproj | 4 +- R/.lintr | 16 ++ R/reverse_operons.R | 245 ++++++++---------------- R/tests/testthat/test-reverse_operons.R | 24 +++ 5 files changed, 126 insertions(+), 167 deletions(-) create mode 100644 R/.lintr create mode 100644 R/tests/testthat/test-reverse_operons.R diff --git a/DESCRIPTION b/DESCRIPTION index 0fa9a949..752276b9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -82,4 +82,6 @@ Imports: XVector, yaml Suggests: - knitr + knitr, + testthat (>= 3.0.0) +Config/testthat/edition: 3 diff --git a/MolEvolvR.Rproj b/MolEvolvR.Rproj index 69fafd4b..c3fdae33 100644 --- a/MolEvolvR.Rproj +++ b/MolEvolvR.Rproj @@ -1,7 +1,7 @@ Version: 1.0 -RestoreWorkspace: No -SaveWorkspace: No +RestoreWorkspace: Yes +SaveWorkspace: Yes AlwaysSaveHistory: Default EnableCodeIndexing: Yes diff --git a/R/.lintr b/R/.lintr new file mode 100644 index 00000000..d71ae29a --- /dev/null +++ b/R/.lintr @@ -0,0 +1,16 @@ +# .lintr configuration file + +linters: with_defaults( + # Enable specific linters + line_length_linter(120), # Allows lines up to 120 characters + object_name_linter(styles = "snake_case"), # Enforces snake_case for object names + + # Disable specific linters + assignment_linter = NULL, # Disables checks for assignment operators + single_quotes_linter = NULL # Allows both single and double quotes +) + +# Ignore files or directories (add any paths you want lintr to skip) +exclusions: list( + "tests/testthat/test-reverse_operons.R" # Ignore specific test file +) diff --git a/R/reverse_operons.R b/R/reverse_operons.R index 59a8fdea..e5ef8db3 100755 --- a/R/reverse_operons.R +++ b/R/reverse_operons.R @@ -1,189 +1,106 @@ -# Function to straighten operons (genomic contexts) -# Written by L. Aravind -# Modified by Janani Ravi and Samuel Chen - - -#' straightenOperonSeq: Reverse Equalities in Genomic Context -#' -#' @description -#' This function processes the genomic context strings (GenContext) and reverses -#' directional signs based on the presence of an equal sign ("="). -#' -#' @param prot [vector] A vector of genomic context strings to be processed. -#' -#' @importFrom rlang abort -#' -#' @return [vector] A vector of the same length as the input, where each genomic -#' element is annotated with either a forward ("->") or reverse ("<-") direction, -#' depending on its position relative to the "=" symbols. -#' -#' @export -#' -#' @examples -#' # Example input: Genomic context with directional symbols and an asterisk -#' genomic_context <- c("A", "B", "*", "C", "D", "=", "E", "F") -#' straightenOperonSeq(genomic_context) -#' -#' # Output: "A->", "B->", "*", "<-C", "<-D", "=", "E->", "F->" - -straightenOperonSeq <- function(prot) { - # Check if 'prot' is a data frame - if (!is.vector(prot)) { - abort("Error: 'prot' must be a vector.") - } - - w <- prot # $GenContext.orig # was 'x' - - y <- rep(NA, length(w)) - - d <- 1 - - b <- grep("\\*", w) - - for (j in b:length(w)) { - if (w[j] == "=") { - d <- d * (-1) - } - - if (d == 1 && w[j] != "=") { - y[j] <- paste(w[j], "->", sep = "") - } else if (d == -1 && w[j] != "=") { - y[j] <- paste("<-", w[j], sep = "") - } else { - y[j] <- "=" - } - } # (for) - - if (b > 1) { - d <- 1 - - for (j in (b - 1):1) { - if (w[j] == "=") { - d <- d * (-1) - } - - if (d == 1 && w[j] != "=") { - y[j] <- paste(w[j], "->", sep = "") - } else if (d == -1 && w[j] != "=") { - y[j] <- paste("<-", w[j], sep = "") - } else { - y[j] <- "=" - } - } # (for) - } # (if b>1) - - return(y) -} - -## The function to reverse operons - -#' reverseOperon: Reverse the Direction of Operons in Genomic ContextSeq -#' -#' @description -#' This function processes a genomic context data frame to reverse the direction -#' of operons based on specific patterns in the GenContext column. It handles -#' elements represented by ">" and "<" and restructures the genomic context by -#' flipping the direction of operons while preserving the relationships -#' indicated by "=". +#' Reverse Operon Sequences in Genomic Contexts #' -#' @param prot [data.frame] A data frame containing at least a column named -#' 'GenContext', which represents the genomic contexts that need to be reversed. -#' -#' @importFrom rlang abort -#' -#' @return [data.frame] The input data frame with the 'GenContext' column updated t -#' o reflect the reversed operons. +#' This function takes a data frame with genomic contexts and processes them +#' to reverse the direction of operons in sequences containing specific +#' symbols such as `>`, `<`, and `=`. #' +#' @param prot A data frame with a column named 'GenContext' containing genomic contexts. +#' @return A modified data frame with the operons' directions reversed. #' @export #' #' @examples -#' \dontrun{ #' # Example genomic context data frame -#' ## Rework example data, does not pass R-CMD Check -#' prot <- data.frame(GenContext = c("A>B", "CI")) +#' prot <- data.frame(GenContext = c("A>B", "CI")) #' reversed_prot <- reverseOperonSeq(prot) -#' reversed_prot -#' } - +#' print(reversed_prot) reverseOperonSeq <- function(prot) { - # Check if 'prot' is a data frame - if (!is.data.frame(prot)) { - abort("Error: 'prot' must be a data frame.") - } - - gencontext <- prot$GenContext - - gencontext <- gsub(pattern = ">", replacement = ">|", x = gencontext) - - gencontext <- gsub(pattern = "<", replacement = "|<", x = gencontext) - - gencontext <- gsub(pattern = "\\|\\|", replacement = "\\|=\\|", x = gencontext) - - - - gc.list <- strsplit(x = gencontext, split = "\\|") - - if (any(is.na(gc.list))) gc.list[[which(is.na(gc.list))]] <- "-" - - gc.list <- lapply(1:length(gc.list), function(x) { - if (any(gc.list[[x]] == "")) gc.list[[x]][which(gc.list[[x]] != "")] else gc.list[[x]] - }) - - - - te <- lapply(1:length(gc.list), function(x) gc.list[[x]][grep("\\*", gc.list[[x]])]) - + # Ensure input is in correct format (GenContext must be a character vector) + if (!"GenContext" %in% colnames(prot) || !is.character(prot$GenContext)) { + stop("GenContext must be a character column in the input data frame") + } + + # Modify GenContext to ensure proper splitting and replacements + gencontext <- prot$GenContext + gencontext <- gsub(">", ">|", gencontext) + gencontext <- gsub("<", "|<", gencontext) + gencontext <- gsub("\\|\\|", "|=|", gencontext) + + # Split GenContext into a list and remove empty elements + gc.list <- strsplit(gencontext, "\\|") + gc.list <- lapply(gc.list, function(x) x[x != ""]) + + # Replace NAs or missing sequences with "-" + gc.list <- lapply(gc.list, function(x) if (any(is.na(x))) "-" else x) + + # Check for "*" in the sequences and process to reverse operons + te <- lapply(gc.list, function(x) grep("\\*", x, value = TRUE)) + if (length(te) > 0 && any(sapply(te, length) > 0)) { ye <- unlist(lapply(te, function(x) substr(x[1], 1, 1))) - torev <- which(ye == "<") + } else { + torev <- NULL + } - - + # Process if torev exists + if (!is.null(torev) && length(torev) > 0) { te <- gc.list[torev] - - te <- lapply(te, function(x) gsub(pattern = "<-|->", replacement = "", x = x)) - + te <- lapply(te, function(x) gsub("<-|->", "", x)) te <- lapply(te, rev) - witheq <- grep(pattern = "=", x = te) - - withouteq <- which(!((1:length(te)) %in% witheq)) - - ge <- te[witheq] + # Split sequences with "=" for further processing + witheq <- grep("=", te) + withouteq <- setdiff(seq_along(te), witheq) + # Process sequences with "=" using straightenOperonSeq + if (length(witheq) > 0) { + ge <- te[witheq] + ge <- lapply(ge, straightenOperonSeq) + te[witheq] <- ge + } - - ge <- lapply(1:length(ge), function(x) straightenOperonSeq(ge[[x]])) - - ye <- te[withouteq] - - ye <- lapply(1:length(ye), function(x) unname(sapply(ye[[x]], function(y) paste(y, "->", sep = "")))) - - - - te[witheq] <- ge - - te[withouteq] <- ye + # Handle sequences without "=" by adding "->" suffix + if (length(withouteq) > 0) { + ye <- lapply(te[withouteq], function(x) paste(x, "->", sep = "")) + te[withouteq] <- ye + } gc.list[torev] <- te + } + # Reassemble GenContext from gc.list and update prot data frame + rev.gencontext <- sapply(gc.list, function(x) paste(x, collapse = "")) + rev.gencontext <- gsub("=", "|", rev.gencontext) + prot$GenContext <- rev.gencontext - - rev.gencontext <- unlist(lapply(gc.list, function(x) paste(x, collapse = ""))) - - rev.gencontext <- gsub(pattern = "=", replacement = "\\|\\|", rev.gencontext) - - prot$GenContext <- rev.gencontext - - return(prot) + return(prot) } +# Helper function to straighten sequences based on equal signs +straightenOperonSeq <- function(w) { + y <- rep(NA, length(w)) + d <- 1 + b <- grep("\\*", w) + for (j in b:length(w)) { + if (w[j] == "=") d <- d * (-1) + y[j] <- ifelse(d == 1 && w[j] != "=", paste(w[j], "->", sep = ""), + ifelse(d == -1 && w[j] != "=", paste("<-", w[j], sep = ""), "=") + ) + } -############## -# Absorb into the function above? -## ??? -# colnames(prot) <- c("AccNum","GenContext.orig","len", "GeneName","TaxID","Species") + if (b > 1) { + d <- 1 + for (j in (b - 1):1) { + if (w[j] == "=") d <- d * (-1) + y[j] <- ifelse(d == 1 && w[j] != "=", paste(w[j], "->", sep = ""), + ifelse(d == -1 && w[j] != "=", paste("<-", w[j], sep = ""), "=") + ) + } + } + return(y) +} -## ??? straighten operons -# prot$GenContext.orig <- reverseOperonSeq(prot) +# Example usage: +prot <- data.frame(GenContext = c("A>B", "CI")) +reversed_prot <- reverseOperonSeq(prot) +print(reversed_prot) diff --git a/R/tests/testthat/test-reverse_operons.R b/R/tests/testthat/test-reverse_operons.R new file mode 100644 index 00000000..1bc87a1a --- /dev/null +++ b/R/tests/testthat/test-reverse_operons.R @@ -0,0 +1,24 @@ +# Load testthat and the script to test +library(testthat) + +# Test for reverseOperonSeq function +test_that("reverseOperonSeq reverses directions correctly", { + prot <- data.frame(GenContext = c("A>B", "CI")) + reversed_prot <- reverseOperonSeq(prot) + + # Expected output + expected <- data.frame(GenContext = c("A<-B", "C->D", "F*G=E", "I<-H")) + + expect_equal(reversed_prot$GenContext, expected$GenContext) +}) + +# Test for straightenOperonSeq function +test_that("straightenOperonSeq handles equal signs correctly", { + genomic_context <- c("A", "B", "*", "C", "D", "=", "E", "F") + result <- straightenOperonSeq(genomic_context) + + # Expected output after processing + expected <- c("A->", "B->", "*", "<-C", "<-D", "=", "E->", "F->") + + expect_equal(result, expected) +})