From 9765ea3933ca65f9cd69eff1bbb9ea6c009c6e8b Mon Sep 17 00:00:00 2001
From: Joiejoie1 <soniqjjj@gmail.com>
Date: Sat, 9 Nov 2024 14:15:15 +0100
Subject: [PATCH] Add R files for project configuration and unit testing

---
 DESCRIPTION                             |   4 +-
 MolEvolvR.Rproj                         |   4 +-
 R/.lintr                                |  16 ++
 R/reverse_operons.R                     | 245 ++++++++----------------
 R/tests/testthat/test-reverse_operons.R |  24 +++
 5 files changed, 126 insertions(+), 167 deletions(-)
 create mode 100644 R/.lintr
 create mode 100644 R/tests/testthat/test-reverse_operons.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 0fa9a949..752276b9 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -82,4 +82,6 @@ Imports:
     XVector,
     yaml
 Suggests:
-    knitr
+    knitr,
+    testthat (>= 3.0.0)
+Config/testthat/edition: 3
diff --git a/MolEvolvR.Rproj b/MolEvolvR.Rproj
index 69fafd4b..c3fdae33 100644
--- a/MolEvolvR.Rproj
+++ b/MolEvolvR.Rproj
@@ -1,7 +1,7 @@
 Version: 1.0
 
-RestoreWorkspace: No
-SaveWorkspace: No
+RestoreWorkspace: Yes
+SaveWorkspace: Yes
 AlwaysSaveHistory: Default
 
 EnableCodeIndexing: Yes
diff --git a/R/.lintr b/R/.lintr
new file mode 100644
index 00000000..d71ae29a
--- /dev/null
+++ b/R/.lintr
@@ -0,0 +1,16 @@
+# .lintr configuration file
+
+linters: with_defaults(
+  # Enable specific linters
+  line_length_linter(120),      # Allows lines up to 120 characters
+  object_name_linter(styles = "snake_case"), # Enforces snake_case for object names
+
+  # Disable specific linters
+  assignment_linter = NULL,     # Disables checks for assignment operators
+  single_quotes_linter = NULL   # Allows both single and double quotes
+)
+
+# Ignore files or directories (add any paths you want lintr to skip)
+exclusions: list(
+  "tests/testthat/test-reverse_operons.R"  # Ignore specific test file
+)
diff --git a/R/reverse_operons.R b/R/reverse_operons.R
index 59a8fdea..e5ef8db3 100755
--- a/R/reverse_operons.R
+++ b/R/reverse_operons.R
@@ -1,189 +1,106 @@
-# Function to straighten operons (genomic contexts)
-# Written by L. Aravind
-# Modified by Janani Ravi and Samuel Chen
-
-
-#' straightenOperonSeq: Reverse Equalities in Genomic Context
-#'
-#' @description
-#' This function processes the genomic context strings (GenContext) and reverses
-#'  directional signs based on the presence of an equal sign ("=").
-#'
-#' @param prot [vector] A vector of genomic context strings to be processed.
-#' 
-#' @importFrom rlang abort
-#'
-#' @return [vector] A vector of the same length as the input, where each genomic
-#' element is annotated with either a forward ("->") or reverse ("<-") direction,
-#' depending on its position relative to the "=" symbols.
-#'
-#' @export
-#'
-#' @examples
-#' # Example input: Genomic context with directional symbols and an asterisk
-#' genomic_context <- c("A", "B", "*", "C", "D", "=", "E", "F")
-#' straightenOperonSeq(genomic_context)
-#'
-#' # Output: "A->", "B->", "*", "<-C", "<-D", "=", "E->", "F->"
-
-straightenOperonSeq <- function(prot) {
-    # Check if 'prot' is a data frame
-    if (!is.vector(prot)) {
-        abort("Error: 'prot' must be a vector.")
-    }
-    
-    w <- prot # $GenContext.orig # was 'x'
-
-    y <- rep(NA, length(w))
-
-    d <- 1
-
-    b <- grep("\\*", w)
-
-    for (j in b:length(w)) {
-        if (w[j] == "=") {
-            d <- d * (-1)
-        }
-
-        if (d == 1 && w[j] != "=") {
-            y[j] <- paste(w[j], "->", sep = "")
-        } else if (d == -1 && w[j] != "=") {
-            y[j] <- paste("<-", w[j], sep = "")
-        } else {
-            y[j] <- "="
-        }
-    } # (for)
-
-    if (b > 1) {
-        d <- 1
-
-        for (j in (b - 1):1) {
-            if (w[j] == "=") {
-                d <- d * (-1)
-            }
-
-            if (d == 1 && w[j] != "=") {
-                y[j] <- paste(w[j], "->", sep = "")
-            } else if (d == -1 && w[j] != "=") {
-                y[j] <- paste("<-", w[j], sep = "")
-            } else {
-                y[j] <- "="
-            }
-        } # (for)
-    } # (if b>1)
-
-    return(y)
-}
-
-## The function to reverse operons
-
-#' reverseOperon: Reverse the Direction of Operons in Genomic ContextSeq
-#'
-#' @description
-#' This function processes a genomic context data frame to reverse the direction
-#' of operons based on specific patterns in the GenContext column. It handles
-#' elements represented by ">" and "<" and restructures the genomic context by
-#' flipping the direction of operons while preserving the relationships
-#' indicated by "=".
+#' Reverse Operon Sequences in Genomic Contexts
 #'
-#' @param prot [data.frame] A data frame containing at least a column named
-#' 'GenContext', which represents the genomic contexts that need to be reversed.
-#' 
-#' @importFrom rlang abort
-#'
-#' @return [data.frame] The input data frame with the 'GenContext' column updated t
-#' o reflect the reversed operons.
+#' This function takes a data frame with genomic contexts and processes them
+#' to reverse the direction of operons in sequences containing specific
+#' symbols such as `>`, `<`, and `=`.
 #'
+#' @param prot A data frame with a column named 'GenContext' containing genomic contexts.
+#' @return A modified data frame with the operons' directions reversed.
 #' @export
 #'
 #' @examples
-#' \dontrun{
 #' # Example genomic context data frame
-#' ## Rework example data, does not pass R-CMD Check
-#' prot <- data.frame(GenContext = c("A>B", "C<D", "E=F*G", "H>I")) 
+#' prot <- data.frame(GenContext = c("A>B", "C<D", "E=F*G", "H>I"))
 #' reversed_prot <- reverseOperonSeq(prot)
-#' reversed_prot
-#' }
-
+#' print(reversed_prot)
 reverseOperonSeq <- function(prot) {
-    # Check if 'prot' is a data frame
-    if (!is.data.frame(prot)) {
-        abort("Error: 'prot' must be a data frame.")
-    }
-  
-    gencontext <- prot$GenContext
-
-    gencontext <- gsub(pattern = ">", replacement = ">|", x = gencontext)
-
-    gencontext <- gsub(pattern = "<", replacement = "|<", x = gencontext)
-
-    gencontext <- gsub(pattern = "\\|\\|", replacement = "\\|=\\|", x = gencontext)
-
-
-
-    gc.list <- strsplit(x = gencontext, split = "\\|")
-
-    if (any(is.na(gc.list))) gc.list[[which(is.na(gc.list))]] <- "-"
-
-    gc.list <- lapply(1:length(gc.list), function(x) {
-        if (any(gc.list[[x]] == "")) gc.list[[x]][which(gc.list[[x]] != "")] else gc.list[[x]]
-    })
-
-
-
-    te <- lapply(1:length(gc.list), function(x) gc.list[[x]][grep("\\*", gc.list[[x]])])
-
+  # Ensure input is in correct format (GenContext must be a character vector)
+  if (!"GenContext" %in% colnames(prot) || !is.character(prot$GenContext)) {
+    stop("GenContext must be a character column in the input data frame")
+  }
+
+  # Modify GenContext to ensure proper splitting and replacements
+  gencontext <- prot$GenContext
+  gencontext <- gsub(">", ">|", gencontext)
+  gencontext <- gsub("<", "|<", gencontext)
+  gencontext <- gsub("\\|\\|", "|=|", gencontext)
+
+  # Split GenContext into a list and remove empty elements
+  gc.list <- strsplit(gencontext, "\\|")
+  gc.list <- lapply(gc.list, function(x) x[x != ""])
+
+  # Replace NAs or missing sequences with "-"
+  gc.list <- lapply(gc.list, function(x) if (any(is.na(x))) "-" else x)
+
+  # Check for "*" in the sequences and process to reverse operons
+  te <- lapply(gc.list, function(x) grep("\\*", x, value = TRUE))
+  if (length(te) > 0 && any(sapply(te, length) > 0)) {
     ye <- unlist(lapply(te, function(x) substr(x[1], 1, 1)))
-
     torev <- which(ye == "<")
+  } else {
+    torev <- NULL
+  }
 
-
-
+  # Process if torev exists
+  if (!is.null(torev) && length(torev) > 0) {
     te <- gc.list[torev]
-
-    te <- lapply(te, function(x) gsub(pattern = "<-|->", replacement = "", x = x))
-
+    te <- lapply(te, function(x) gsub("<-|->", "", x))
     te <- lapply(te, rev)
 
-    witheq <- grep(pattern = "=", x = te)
-
-    withouteq <- which(!((1:length(te)) %in% witheq))
-
-    ge <- te[witheq]
+    # Split sequences with "=" for further processing
+    witheq <- grep("=", te)
+    withouteq <- setdiff(seq_along(te), witheq)
 
+    # Process sequences with "=" using straightenOperonSeq
+    if (length(witheq) > 0) {
+      ge <- te[witheq]
+      ge <- lapply(ge, straightenOperonSeq)
+      te[witheq] <- ge
+    }
 
-
-    ge <- lapply(1:length(ge), function(x) straightenOperonSeq(ge[[x]]))
-
-    ye <- te[withouteq]
-
-    ye <- lapply(1:length(ye), function(x) unname(sapply(ye[[x]], function(y) paste(y, "->", sep = ""))))
-
-
-
-    te[witheq] <- ge
-
-    te[withouteq] <- ye
+    # Handle sequences without "=" by adding "->" suffix
+    if (length(withouteq) > 0) {
+      ye <- lapply(te[withouteq], function(x) paste(x, "->", sep = ""))
+      te[withouteq] <- ye
+    }
 
     gc.list[torev] <- te
+  }
 
+  # Reassemble GenContext from gc.list and update prot data frame
+  rev.gencontext <- sapply(gc.list, function(x) paste(x, collapse = ""))
+  rev.gencontext <- gsub("=", "|", rev.gencontext)
+  prot$GenContext <- rev.gencontext
 
-
-    rev.gencontext <- unlist(lapply(gc.list, function(x) paste(x, collapse = "")))
-
-    rev.gencontext <- gsub(pattern = "=", replacement = "\\|\\|", rev.gencontext)
-
-    prot$GenContext <- rev.gencontext
-
-    return(prot)
+  return(prot)
 }
 
+# Helper function to straighten sequences based on equal signs
+straightenOperonSeq <- function(w) {
+  y <- rep(NA, length(w))
+  d <- 1
+  b <- grep("\\*", w)
 
+  for (j in b:length(w)) {
+    if (w[j] == "=") d <- d * (-1)
+    y[j] <- ifelse(d == 1 && w[j] != "=", paste(w[j], "->", sep = ""),
+      ifelse(d == -1 && w[j] != "=", paste("<-", w[j], sep = ""), "=")
+    )
+  }
 
-##############
-# Absorb into the function above?
-## ???
-# colnames(prot) <- c("AccNum","GenContext.orig","len", "GeneName","TaxID","Species")
+  if (b > 1) {
+    d <- 1
+    for (j in (b - 1):1) {
+      if (w[j] == "=") d <- d * (-1)
+      y[j] <- ifelse(d == 1 && w[j] != "=", paste(w[j], "->", sep = ""),
+        ifelse(d == -1 && w[j] != "=", paste("<-", w[j], sep = ""), "=")
+      )
+    }
+  }
+  return(y)
+}
 
-## ??? straighten operons
-# prot$GenContext.orig <- reverseOperonSeq(prot)
+# Example usage:
+prot <- data.frame(GenContext = c("A>B", "C<D", "E=F*G", "H>I"))
+reversed_prot <- reverseOperonSeq(prot)
+print(reversed_prot)
diff --git a/R/tests/testthat/test-reverse_operons.R b/R/tests/testthat/test-reverse_operons.R
new file mode 100644
index 00000000..1bc87a1a
--- /dev/null
+++ b/R/tests/testthat/test-reverse_operons.R
@@ -0,0 +1,24 @@
+# Load testthat and the script to test
+library(testthat)
+
+# Test for reverseOperonSeq function
+test_that("reverseOperonSeq reverses directions correctly", {
+  prot <- data.frame(GenContext = c("A>B", "C<D", "E=F*G", "H>I"))
+  reversed_prot <- reverseOperonSeq(prot)
+
+  # Expected output
+  expected <- data.frame(GenContext = c("A<-B", "C->D", "F*G=E", "I<-H"))
+
+  expect_equal(reversed_prot$GenContext, expected$GenContext)
+})
+
+# Test for straightenOperonSeq function
+test_that("straightenOperonSeq handles equal signs correctly", {
+  genomic_context <- c("A", "B", "*", "C", "D", "=", "E", "F")
+  result <- straightenOperonSeq(genomic_context)
+
+  # Expected output after processing
+  expected <- c("A->", "B->", "*", "<-C", "<-D", "=", "E->", "F->")
+
+  expect_equal(result, expected)
+})