nshen7 · nshen7 · Feb 12, 2025 · Jan 30, 2025 · Jan 30, 2025 · Jan 30, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: vmrseq
 Type: Package
 Title: Probabilistic Modeling of Single-cell Methylation Heterogeneity
-Version: 0.99.2
+Version: 0.99.5
 Authors@R: person("Ning", "Shen", email = "ning.shen.wk@gmail.com",role = c("aut", "cre"))
 Description: High-throughput single-cell measurements of DNA methylation allows studying inter-cellular epigenetic heterogeneity, but this task faces the challenges of sparsity and noise. We present vmrseq, a statistical method that overcomes these challenges and identifies variably methylated regions accurately and robustly.
 License: MIT + file LICENSE
@@ -10,7 +10,7 @@ RoxygenNote: 7.2.3
 biocViews: Software, ImmunoOncology, DNAMethylation, Epigenetics, SingleCell,
   Sequencing, WholeGenome
 Depends: 
-    R (>= 4.4.0)
+    R (>= 4.5.0)
 Imports: 
     bumphunter,
     dplyr,

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# vmrseq 0.99.5
+
+# vmrseq 0.99.4
+
+# vmrseq 0.99.3
+
 # vmrseq 0.99.0
 
 * 2024/12/03: This package is currently under review by Bioconductor.
diff --git a/R/helper_functions.R b/R/helper_functions.R
@@ -9,11 +9,11 @@
 #' @usage data(cell_1)
 #' @format A data frame with 10000 rows and 5 variables (no column names):
 #' \describe{
-#'   \item{V1}{Chromosome}
-#'   \item{V2}{Genomic coordinate}
-#'   \item{V3}{Strand information}
-#'   \item{V4}{Number of methylated reads}
-#'   \item{V5}{Number of reads in total}
+#'   \item{chr}{Chromosome}
+#'   \item{mc_count}{Genomic coordinate}
+#'   \item{pos}{Strand information}
+#'   \item{strand}{Number of methylated reads}
+#'   \item{total}{Number of reads in total}
 #' }
 #' @references Luo, Chongyuan et al. \emph{Single-cell methylomes identify neuronal
 #' subtypes and regulatory elements in mammalian cortex.}. Science (New York, N.Y.)
@@ -36,11 +36,11 @@
 #' @usage data(cell_2)
 #' @format A data frame with 10000 rows and 5 variables (no column names):
 #' \describe{
-#'   \item{V1}{Chromosome}
-#'   \item{V2}{Genomic coordinate}
-#'   \item{V3}{Strand information}
-#'   \item{V4}{Number of methylated reads}
-#'   \item{V5}{Number of reads in total}
+#'   \item{chr}{Chromosome}
+#'   \item{mc_count}{Genomic coordinate}
+#'   \item{pos}{Strand information}
+#'   \item{strand}{Number of methylated reads}
+#'   \item{total}{Number of reads in total}
 #' }
 #' @references Luo, Chongyuan et al. \emph{Single-cell methylomes identify neuronal
 #' subtypes and regulatory elements in mammalian cortex.}. Science (New York, N.Y.)
@@ -62,11 +62,11 @@
 #' @usage data(cell_3)
 #' @format A data frame with 10000 rows and 5 variables (no column names):
 #' \describe{
-#'   \item{V1}{Chromosome}
-#'   \item{V2}{Genomic coordinate}
-#'   \item{V3}{Strand information}
-#'   \item{V4}{Number of methylated reads}
-#'   \item{V5}{Number of reads in total}
+#'   \item{chr}{Chromosome}
+#'   \item{mc_count}{Genomic coordinate}
+#'   \item{pos}{Strand information}
+#'   \item{strand}{Number of methylated reads}
+#'   \item{total}{Number of reads in total}
 #' }
 #' @references Luo, Chongyuan et al. \emph{Single-cell methylomes identify neuronal
 #' subtypes and regulatory elements in mammalian cortex.}. Science (New York, N.Y.)

diff --git a/R/poolData.R b/R/poolData.R
@@ -49,12 +49,12 @@
 #' 
 
 poolData <- function(cellFiles,
-                      sep,
-                      writeDir,
-                      chrNames,
-                      colData = NULL,
-                      sparseNAdrop = TRUE) {
-
+                     sep,
+                     writeDir,
+                     chrNames,
+                     colData = NULL,
+                     sparseNAdrop = TRUE) {
+  
   # TODO: making checks on input data format
   chrNames <- as.character(chrNames)
 

diff --git a/R/tpPlot.R b/R/tpPlot.R
@@ -18,7 +18,7 @@
 #' @export
 #'
 #' @examples
-#' tpPlot(tp0)
+#' tpPlot(vmrseq:::tp0)
 #' 
 tpPlot <- function(tp,
                     line_size = 0.2,

diff --git a/R/vmrseqFit.R b/R/vmrseqFit.R
@@ -4,7 +4,7 @@
 #' threshold on the variance of smoothed relative methylation levels and detect
 #' variably methylated regions (VMRs) by optimizing a hidden Markov model (HMM).
 #'
-#' @param gr \code{GRanges} object output by \code{vmrseq::vmrseq.smooth},
+#' @param gr \code{GRanges} object output by \code{vmrseq::vmrseqSmooth},
 #' containing genomic coordinates (chr, start, end) and summarized information
 #' (meth, total, var) of CpG sites in the input dataset.
 #' @param alpha positive scalar value between 0 and 1 that represents the

diff --git a/R/vmrseqOptimControl.R b/R/vmrseqOptimControl.R
@@ -29,6 +29,7 @@ vmrseqOptimControl <- function(
     eta = ifelse(backtrack, 0.05, 0.005),
     maxIter = 100
 ) {
+  if (any(inits <= 0 | inits >= 1)) stop('All values in inits has to between 0 and 1!')
   return(list(inits = inits, epsilon = epsilon,
               backtrack = backtrack,
               eta = eta, maxIter = maxIter))

diff --git a/R/vmrseqSmooth.R b/R/vmrseqSmooth.R
@@ -10,13 +10,13 @@
 #'
 #' @param SE \code{SummarizedExperiment} object with one (and only one) assay that
 #'  contains *binary* methylation status of CpG sites in individual cells. We
-#'  recommend using output by \code{vmrseq::data.pool} (i.e., an NA-dropped
+#'  recommend using output by \code{\link{poolData}} (i.e., an NA-dropped
 #'  HDF5-based SummarizedExperiment object) to prevent running out of memory.
 #' @param bpWindow positive integer that represents the width (in bp) of
 #'  smoothing window. Default value is 2000.
 #' @param sparseNAdrop logical value that represents whether the NA values are
 #'  droppped in the input \code{SE} object. \code{SE} objects output by
-#'  \code{vmrseq::data.pool} are NA dropped. See \code{?vmrseq::data.pool}
+#'  \code{\link{poolData}} are NA dropped. See \code{?vmrseq::poolData}
 #'  for details about NA-dropped representation.
 #' @param verbose logical value that indicates whether progress messages
 #'  should be printed to stdout. Defaults value is TRUE.
@@ -42,7 +42,7 @@
 #'  2. total: total (non-missing) cell count of the CpG
 #'  3. var: variance computed based on individual-cell smoothed relative methylation levels.
 #'
-#' @seealso \code{\link{data.pool}}, \code{\link{vmrseq.fit}}
+#' @seealso \code{\link{poolData}}, \code{\link{vmrseqFit}}
 #' @export
 #' 
 #' @examples

diff --git a/man/cell_1.Rd b/man/cell_1.Rd
diff --git a/man/cell_2.Rd b/man/cell_2.Rd
diff --git a/man/cell_3.Rd b/man/cell_3.Rd
diff --git a/man/tpPlot.Rd b/man/tpPlot.Rd
diff --git a/man/vmrseqFit.Rd b/man/vmrseqFit.Rd
diff --git a/man/vmrseqSmooth.Rd b/man/vmrseqSmooth.Rd
diff --git a/tests/testthat/test-vmrseqOptimControl.R b/tests/testthat/test-vmrseqOptimControl.R
@@ -0,0 +1,86 @@
+library(testthat)
+
+# Test 1: Check that the function returns a list with the correct structure
+test_that("vmrseqOptimControl returns a list with the correct structure", {
+  result <- vmrseqOptimControl()
+
+  # Check that the result is a list
+  expect_type(result, "list")
+
+  # Check that the list contains all expected elements
+  expect_named(result, c("inits", "epsilon", "backtrack", "eta", "maxIter"))
+
+  # Check types of each element
+  expect_type(result$inits, "double")    # inits should be numeric
+  expect_type(result$epsilon, "double")  # epsilon should be numeric
+  expect_type(result$backtrack, "logical")  # backtrack should be logical
+  expect_type(result$eta, "double")      # eta should be numeric
+  expect_type(result$maxIter, "double")  # maxIter should be numeric
+})
+
+# Test 2: Check default values
+test_that("vmrseqOptimControl returns correct default values", {
+  result <- vmrseqOptimControl()
+
+  expect_equal(result$inits, c(0.2, 0.5, 0.8))
+  expect_equal(result$epsilon, 1e-3)
+  expect_true(result$backtrack)
+  expect_equal(result$eta, 0.05)  # Default eta when backtrack = TRUE
+  expect_equal(result$maxIter, 100)
+})
+
+# Test 3: Check that custom values are used correctly
+test_that("vmrseqOptimControl uses custom parameter values", {
+  custom_inits <- c(0.1, 0.4, 0.9)
+  custom_epsilon <- 1e-5
+  custom_backtrack <- FALSE
+  custom_eta <- 0.01
+  custom_maxIter <- 200
+
+  result <- vmrseqOptimControl(
+    inits = custom_inits,
+    epsilon = custom_epsilon,
+    backtrack = custom_backtrack,
+    eta = custom_eta,
+    maxIter = custom_maxIter
+  )
+
+  expect_equal(result$inits, custom_inits)
+  expect_equal(result$epsilon, custom_epsilon)
+  expect_false(result$backtrack)
+  expect_equal(result$eta, custom_eta)
+  expect_equal(result$maxIter, custom_maxIter)
+})
+
+# Test 4: Check that eta defaults based on backtrack
+test_that("vmrseqOptimControl adjusts eta based on backtrack", {
+  # When backtrack is TRUE, eta should be 0.05
+  result_with_backtrack <- vmrseqOptimControl(backtrack = TRUE)
+  expect_equal(result_with_backtrack$eta, 0.05)
+
+  # When backtrack is FALSE, eta should default to 0.005
+  result_without_backtrack <- vmrseqOptimControl(backtrack = FALSE)
+  expect_equal(result_without_backtrack$eta, 0.005)
+})
+
+# Test 5: Check that inits values outside the range (0, 1) trigger an error
+test_that("vmrseqOptimControl throws an error for invalid inits values", {
+  # Create invalid inits vectors
+  invalid_inits_low <- c(-0.1, 0.5, 0.8)     # Contains a value < 0
+  invalid_inits_high <- c(0.2, 0.5, 1.1)      # Contains a value > 1
+  invalid_inits_edge <- c(0, 0.5, 0.8)        # Contains a value exactly at 0
+
+  # Expect an error when inits values are outside the valid range (0, 1)
+  expect_error(vmrseqOptimControl(inits = invalid_inits_low), 
+               "All values in inits has to between 0 and 1!")
+
+  expect_error(vmrseqOptimControl(inits = invalid_inits_high), 
+               "All values in inits has to between 0 and 1!")
+
+  expect_error(vmrseqOptimControl(inits = invalid_inits_edge), 
+               "All values in inits has to between 0 and 1!")
+
+  # Valid inits should not throw an error
+  valid_inits <- c(0.1, 0.5, 0.9)
+  expect_silent(vmrseqOptimControl(inits = valid_inits))
+})