Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Devel #8

Merged
merged 8 commits into from
Feb 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: vmrseq
Type: Package
Title: Probabilistic Modeling of Single-cell Methylation Heterogeneity
Version: 0.99.2
Version: 0.99.5
Authors@R: person("Ning", "Shen", email = "ning.shen.wk@gmail.com",role = c("aut", "cre"))
Description: High-throughput single-cell measurements of DNA methylation allows studying inter-cellular epigenetic heterogeneity, but this task faces the challenges of sparsity and noise. We present vmrseq, a statistical method that overcomes these challenges and identifies variably methylated regions accurately and robustly.
License: MIT + file LICENSE
Expand All @@ -10,7 +10,7 @@ RoxygenNote: 7.2.3
biocViews: Software, ImmunoOncology, DNAMethylation, Epigenetics, SingleCell,
Sequencing, WholeGenome
Depends:
R (>= 4.4.0)
R (>= 4.5.0)
Imports:
bumphunter,
dplyr,
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# vmrseq 0.99.5

# vmrseq 0.99.4

# vmrseq 0.99.3

# vmrseq 0.99.0

* 2024/12/03: This package is currently under review by Bioconductor.
30 changes: 15 additions & 15 deletions R/helper_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
#' @usage data(cell_1)
#' @format A data frame with 10000 rows and 5 variables (no column names):
#' \describe{
#' \item{V1}{Chromosome}
#' \item{V2}{Genomic coordinate}
#' \item{V3}{Strand information}
#' \item{V4}{Number of methylated reads}
#' \item{V5}{Number of reads in total}
#' \item{chr}{Chromosome}
#' \item{mc_count}{Genomic coordinate}
#' \item{pos}{Strand information}
#' \item{strand}{Number of methylated reads}
#' \item{total}{Number of reads in total}
#' }
#' @references Luo, Chongyuan et al. \emph{Single-cell methylomes identify neuronal
#' subtypes and regulatory elements in mammalian cortex.}. Science (New York, N.Y.)
Expand All @@ -36,11 +36,11 @@
#' @usage data(cell_2)
#' @format A data frame with 10000 rows and 5 variables (no column names):
#' \describe{
#' \item{V1}{Chromosome}
#' \item{V2}{Genomic coordinate}
#' \item{V3}{Strand information}
#' \item{V4}{Number of methylated reads}
#' \item{V5}{Number of reads in total}
#' \item{chr}{Chromosome}
#' \item{mc_count}{Genomic coordinate}
#' \item{pos}{Strand information}
#' \item{strand}{Number of methylated reads}
#' \item{total}{Number of reads in total}
#' }
#' @references Luo, Chongyuan et al. \emph{Single-cell methylomes identify neuronal
#' subtypes and regulatory elements in mammalian cortex.}. Science (New York, N.Y.)
Expand All @@ -62,11 +62,11 @@
#' @usage data(cell_3)
#' @format A data frame with 10000 rows and 5 variables (no column names):
#' \describe{
#' \item{V1}{Chromosome}
#' \item{V2}{Genomic coordinate}
#' \item{V3}{Strand information}
#' \item{V4}{Number of methylated reads}
#' \item{V5}{Number of reads in total}
#' \item{chr}{Chromosome}
#' \item{mc_count}{Genomic coordinate}
#' \item{pos}{Strand information}
#' \item{strand}{Number of methylated reads}
#' \item{total}{Number of reads in total}
#' }
#' @references Luo, Chongyuan et al. \emph{Single-cell methylomes identify neuronal
#' subtypes and regulatory elements in mammalian cortex.}. Science (New York, N.Y.)
Expand Down
12 changes: 6 additions & 6 deletions R/poolData.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@
#'

poolData <- function(cellFiles,
sep,
writeDir,
chrNames,
colData = NULL,
sparseNAdrop = TRUE) {

sep,
writeDir,
chrNames,
colData = NULL,
sparseNAdrop = TRUE) {
# TODO: making checks on input data format
chrNames <- as.character(chrNames)

Expand Down
2 changes: 1 addition & 1 deletion R/tpPlot.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#' @export
#'
#' @examples
#' tpPlot(tp0)
#' tpPlot(vmrseq:::tp0)
#'
tpPlot <- function(tp,
line_size = 0.2,
Expand Down
2 changes: 1 addition & 1 deletion R/vmrseqFit.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#' threshold on the variance of smoothed relative methylation levels and detect
#' variably methylated regions (VMRs) by optimizing a hidden Markov model (HMM).
#'
#' @param gr \code{GRanges} object output by \code{vmrseq::vmrseq.smooth},
#' @param gr \code{GRanges} object output by \code{vmrseq::vmrseqSmooth},
#' containing genomic coordinates (chr, start, end) and summarized information
#' (meth, total, var) of CpG sites in the input dataset.
#' @param alpha positive scalar value between 0 and 1 that represents the
Expand Down
1 change: 1 addition & 0 deletions R/vmrseqOptimControl.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ vmrseqOptimControl <- function(
eta = ifelse(backtrack, 0.05, 0.005),
maxIter = 100
) {
if (any(inits <= 0 | inits >= 1)) stop('All values in inits has to between 0 and 1!')
return(list(inits = inits, epsilon = epsilon,
backtrack = backtrack,
eta = eta, maxIter = maxIter))
Expand Down
6 changes: 3 additions & 3 deletions R/vmrseqSmooth.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
#'
#' @param SE \code{SummarizedExperiment} object with one (and only one) assay that
#' contains *binary* methylation status of CpG sites in individual cells. We
#' recommend using output by \code{vmrseq::data.pool} (i.e., an NA-dropped
#' recommend using output by \code{\link{poolData}} (i.e., an NA-dropped
#' HDF5-based SummarizedExperiment object) to prevent running out of memory.
#' @param bpWindow positive integer that represents the width (in bp) of
#' smoothing window. Default value is 2000.
#' @param sparseNAdrop logical value that represents whether the NA values are
#' droppped in the input \code{SE} object. \code{SE} objects output by
#' \code{vmrseq::data.pool} are NA dropped. See \code{?vmrseq::data.pool}
#' \code{\link{poolData}} are NA dropped. See \code{?vmrseq::poolData}
#' for details about NA-dropped representation.
#' @param verbose logical value that indicates whether progress messages
#' should be printed to stdout. Defaults value is TRUE.
Expand All @@ -42,7 +42,7 @@
#' 2. total: total (non-missing) cell count of the CpG
#' 3. var: variance computed based on individual-cell smoothed relative methylation levels.
#'
#' @seealso \code{\link{data.pool}}, \code{\link{vmrseq.fit}}
#' @seealso \code{\link{poolData}}, \code{\link{vmrseqFit}}
#' @export
#'
#' @examples
Expand Down
10 changes: 5 additions & 5 deletions man/cell_1.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions man/cell_2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions man/cell_3.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/tpPlot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/vmrseqFit.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/vmrseqSmooth.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

86 changes: 86 additions & 0 deletions tests/testthat/test-vmrseqOptimControl.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
library(testthat)

# Test 1: Check that the function returns a list with the correct structure
test_that("vmrseqOptimControl returns a list with the correct structure", {
result <- vmrseqOptimControl()

# Check that the result is a list
expect_type(result, "list")

# Check that the list contains all expected elements
expect_named(result, c("inits", "epsilon", "backtrack", "eta", "maxIter"))

# Check types of each element
expect_type(result$inits, "double") # inits should be numeric
expect_type(result$epsilon, "double") # epsilon should be numeric
expect_type(result$backtrack, "logical") # backtrack should be logical
expect_type(result$eta, "double") # eta should be numeric
expect_type(result$maxIter, "double") # maxIter should be numeric
})

# Test 2: Check default values
test_that("vmrseqOptimControl returns correct default values", {
result <- vmrseqOptimControl()

expect_equal(result$inits, c(0.2, 0.5, 0.8))
expect_equal(result$epsilon, 1e-3)
expect_true(result$backtrack)
expect_equal(result$eta, 0.05) # Default eta when backtrack = TRUE
expect_equal(result$maxIter, 100)
})

# Test 3: Check that custom values are used correctly
test_that("vmrseqOptimControl uses custom parameter values", {
custom_inits <- c(0.1, 0.4, 0.9)
custom_epsilon <- 1e-5
custom_backtrack <- FALSE
custom_eta <- 0.01
custom_maxIter <- 200

result <- vmrseqOptimControl(
inits = custom_inits,
epsilon = custom_epsilon,
backtrack = custom_backtrack,
eta = custom_eta,
maxIter = custom_maxIter
)

expect_equal(result$inits, custom_inits)
expect_equal(result$epsilon, custom_epsilon)
expect_false(result$backtrack)
expect_equal(result$eta, custom_eta)
expect_equal(result$maxIter, custom_maxIter)
})

# Test 4: Check that eta defaults based on backtrack
test_that("vmrseqOptimControl adjusts eta based on backtrack", {
# When backtrack is TRUE, eta should be 0.05
result_with_backtrack <- vmrseqOptimControl(backtrack = TRUE)
expect_equal(result_with_backtrack$eta, 0.05)

# When backtrack is FALSE, eta should default to 0.005
result_without_backtrack <- vmrseqOptimControl(backtrack = FALSE)
expect_equal(result_without_backtrack$eta, 0.005)
})

# Test 5: Check that inits values outside the range (0, 1) trigger an error
test_that("vmrseqOptimControl throws an error for invalid inits values", {
# Create invalid inits vectors
invalid_inits_low <- c(-0.1, 0.5, 0.8) # Contains a value < 0
invalid_inits_high <- c(0.2, 0.5, 1.1) # Contains a value > 1
invalid_inits_edge <- c(0, 0.5, 0.8) # Contains a value exactly at 0

# Expect an error when inits values are outside the valid range (0, 1)
expect_error(vmrseqOptimControl(inits = invalid_inits_low),
"All values in inits has to between 0 and 1!")

expect_error(vmrseqOptimControl(inits = invalid_inits_high),
"All values in inits has to between 0 and 1!")

expect_error(vmrseqOptimControl(inits = invalid_inits_edge),
"All values in inits has to between 0 and 1!")

# Valid inits should not throw an error
valid_inits <- c(0.1, 0.5, 0.9)
expect_silent(vmrseqOptimControl(inits = valid_inits))
})
Loading