Skip to content

Commit

Permalink
Merge branch 'devel'
Browse files Browse the repository at this point in the history
  • Loading branch information
DominikRafacz committed Oct 23, 2024
2 parents 9058aa1 + 10168a0 commit d5def3c
Show file tree
Hide file tree
Showing 19 changed files with 73 additions and 43 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ docs
^src/main\.cpp$
^src/CMakeLists\.txt$
^src/cmake-build-debug$
^CRAN-SUBMISSION$
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: tidysq
Type: Package
Title: Tidy Processing and Analysis of Biological Sequences
Version: 1.2.1
Date: 2022-01-29
Date: 2024-09-29
Authors@R: c(person("Dominik", "Rafacz",
email = "dominikrafacz@gmail.com",
comment = c(ORCID = "0000-0003-0925-1909"),
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Fixed:
* fixed incorrect codons
* add explicit casting of alphabet during construction of sequence prototype (fixes issues with new implementation of `union()`)
* add skips for tests that use not available packages from "Suggests"

# tidysq 1.2.0
## New features:
Expand Down
10 changes: 5 additions & 5 deletions R/export_sq.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' Export sq objects into other formats
#'
#' @templateVar name_null_ok TRUE
#'
#'
#' @description Converts object of class \code{\link[=sq-class]{sq}} to a class
#' from another package. Currently supported packages are \pkg{ape},
#' \pkg{bioseq}, \pkg{Bioconductor} and \pkg{seqinr}. For exact list of
Expand All @@ -13,8 +13,8 @@
#' @template name
#' @template three-dots
#'
#' @return An object with the format specified in the parameter. To find
#' information about the detailed structure of this object, see documentation
#' @return An object with the format specified in the parameter. To find
#' information about the detailed structure of this object, see documentation
#' of these objects.
#'
#' @details
Expand Down Expand Up @@ -44,7 +44,7 @@
#' }
#' }
#'
#' @examples
#' @examplesIf all(c(require("Biostrings", quietly = TRUE), require("ape", quietly = TRUE), require("bioseq", quietly = TRUE), require("seqinr", quietly = TRUE)))
#' # DNA and amino acid sequences can be exported to most packages
#' sq_ami <- sq(c("MVVGL", "LAVPP"), alphabet = "ami_bsc")
#' export_sq(sq_ami, "ape::AAbin")
Expand Down Expand Up @@ -72,7 +72,7 @@
export_sq <- function(x, export_format, name = NULL, ...) {
assert_string(export_format)
assert_character(name, len = vec_size(x), null.ok = TRUE)

UseMethod("export_sq")
}

Expand Down
16 changes: 8 additions & 8 deletions R/find_motifs.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#' Find given motifs
#'
#' @templateVar name_null_ok FALSE
#'
#'
#' @description Finds all given motifs in sequences and returns their positions.
#'
#'
#' @template x
#' @template name
#' @param motifs [\code{character}]\cr
Expand All @@ -12,20 +12,20 @@
#' @template dot-name
#' @template NA_letter
#' @template three-dots
#'
#'
#' @return A \code{\link[tibble]{tibble}} with following columns:
#' \item{name}{name of the sequence in which a motif was found}
#' \item{sought}{sought motif}
#' \item{found}{found subsequence, may differ from sought if the motif
#' contained ambiguous letters}
#' \item{start}{position of first element of found motif}
#' \item{end}{position of last element of found motif}
#'
#'
#' @details
#' This function allows search of a given motif or motifs in the \code{sq}
#' object. It returns all motifs found with their start and end positions within
#' a sequence.
#'
#'
#' @template motif_details
#'
#' @examples
Expand Down Expand Up @@ -53,7 +53,7 @@
#'
#' # Finding multicharacter motifs:
#' find_motifs(sq_atp, sq_names, c("nsA", "mYmY$"))
#'
#'
#' # It can be a part of tidyverse pipeline:
#' library(dplyr)
#' fasta_file <- system.file(package = "tidysq", "examples/example_aa.fasta")
Expand All @@ -80,7 +80,7 @@ find_motifs.sq <- function(x, name, motifs, ...,
assert_character(motifs, any.missing = FALSE)
assert_string(NA_letter, min.chars = 1)
assert_alph_no_special_chars(alphabet(x))

ret <- CPP_find_motifs(x, name, motifs, NA_letter)
as_tibble(ret)
}
Expand All @@ -95,6 +95,6 @@ find_motifs.data.frame <- function(x, motifs, ...,
assert_string(.sq, min.chars = 1)
assert_string(.name, min.chars = 1)
assert_subset(c(.sq, .name), colnames(x))

find_motifs(x[[.sq]], x[[.name]], motifs, ..., NA_letter = NA_letter)
}
15 changes: 9 additions & 6 deletions R/import_sq.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
#' Import sq objects from other objects
#'
#'
#' @description Creates \code{\link[=sq-class]{sq}} object from object of class
#' from another package. Currently supported packages are \pkg{ape},
#' \pkg{bioseq}, \pkg{Bioconductor} and \pkg{seqinr}. For exact list of
#' supported classes and resulting types, see details.
#'
#'
#' @param object [\code{any(1)}]\cr
#' An object of one of supported classes.
#' @template three-dots
#'
#'
#' @return A \code{\link[tibble]{tibble}} with \code{sq} column of
#' \code{\link[=sq-class]{sq}} type representing the same sequences as given
#' object; the object has a type corresponding to the input type; if given
#' sequences have names, output \code{\link[tibble]{tibble}} will also have
#' another column \code{name} with those names
#'
#'
#' @details
#' Currently supported classes are as follows:
#' \itemize{
Expand Down Expand Up @@ -59,22 +59,25 @@
#'
#' Providing object of class other than specified will result in an error.
#'
#' @examples
#' @examplesIf require("ape", quietly = TRUE)
#' # ape example
#' library(ape)
#' ape_dna <- as.DNAbin(list(one = c("C", "T", "C", "A"), two = c("T", "G", "A", "G", "G")))
#' import_sq(ape_dna)
#'
#' @examplesIf require("bioseq", quietly = TRUE)
#' # bioseq example
#' library(bioseq)
#' bioseq_rna <- new_rna(c(one = "ANBRY", two = "YUTUGGN"))
#' import_sq(bioseq_rna)
#'
#' @examplesIf require("Biostrings", quietly = TRUE)
#' # Biostrings example
#' library(Biostrings)
#' Biostrings_ami <- AAStringSet(c(one = "FEAPQLIWY", two = "EGITENAK"))
#' import_sq(Biostrings_ami)
#'
#' @examplesIf require("seqinr", quietly = TRUE)
#' # seqinr example
#' library(seqinr)
#' seqinr_dna <- as.SeqFastadna(c("C", "T", "C", "A"), name = "one")
Expand Down Expand Up @@ -246,6 +249,6 @@ import_sq.list <- function(object, separate = TRUE, ...) {
bind_into_sqibble <- function(x, name = NULL) {
if (is.null(name))
tibble(sq = x)
else
else
tibble(name = name, sq = x)
}
12 changes: 6 additions & 6 deletions R/write_fasta.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' Save sq to fasta file
#'
#' @templateVar name_null_ok FALSE
#'
#'
#' @description Writes \code{\link[=sq-class]{sq}} objects with their names to
#' a fasta file.
#'
Expand All @@ -17,7 +17,7 @@
#' @template three-dots
#'
#' @return No value is returned.
#'
#'
#' @details
#' Whenever a name has more letters than \code{width} parameter, nothing
#' happens, as only sequences are split to fit within designated space.
Expand All @@ -30,7 +30,7 @@
#' c("bat", "cat", "rat", "elephant_swallowed_by_A_snake"),
#' tempfile())
#' }
#'
#'
#' # It can be a part of tidyverse pipeline:
#' library(dplyr)
#' fasta_file <- system.file(package = "tidysq", "examples/example_aa.fasta")
Expand Down Expand Up @@ -58,7 +58,7 @@ write_fasta.sq <- function(x, name, file,
assert_string(file)
assert_count(width, positive = TRUE)
assert_string(NA_letter, min.chars = 1)

CPP_write_fasta(x, name, file, width, NA_letter)
}

Expand All @@ -73,6 +73,6 @@ write_fasta.data.frame <- function(x, file,
assert_string(.sq, min.chars = 1)
assert_string(.name, min.chars = 1)
assert_subset(c(.sq, .name), colnames(x))

write_fasta(x[[.sq]], x[[.name]], file, width = width, NA_letter = NA_letter)
}
}
1 change: 1 addition & 0 deletions cran-comments.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@

* This is a resubmission.
* Fixed issues related to new implementations of set operations on R-devel
* Added skips to tests that require using packages from "Suggests"
7 changes: 4 additions & 3 deletions man/export_sq.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 8 additions & 4 deletions man/import_sq.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/setup-tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ test_sq_only <- function(.func, ..., .data.frame_ok = FALSE) {
test_message <- if (.data.frame_ok)
"the first argument must be of sq or data.frame class" else
"the first argument must be of sq class"

test_that(test_message, {
expect_s3_class(rlang::catch_cnd(.func(1:7, ...)),
"error_no_method")
Expand Down
24 changes: 15 additions & 9 deletions tests/testthat/test-casting.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,6 @@ str_with_na_1 <- c("!A!T!!TC!", "", "AT!C!G!!", "", "!!A!!T!")
str_with_na_2 <- c("?A?T??TC?", "", "AT?C?G??", "", "??A??T?")
str_with_na_3 <- c("<?>A<?>T<?><?>TC<?>", "", "AT<?>C<?>G<?><?>", "", "<?><?>A<?><?>T<?>")

# added because of Biostrings warning
suppressWarnings({
biostr_dna_bsc <- Biostrings::DNAStringSet(str_dna_bsc)
})

seqinr_ami_bsc <- lapply(str_ami_bsc, function(x)
seqinr::as.SeqFastaAA(seqinr::s2c(x)))

# CASTING TO SQ ----
test_that("character vector is casted to sq with as.sq()", {
expect_identical(as.sq(str_rna_bsc),
Expand All @@ -62,11 +54,25 @@ test_that("character vector is casted to sq with as.sq()", {
sq(str_atp))
})

test_that("non-character objects are passed to import_sq()", {
test_that("Biostrings objects are passed to import_sq()", {
skip_if_not_installed("Biostrings")
# suppression because of Biostrings warning
suppressWarnings({
biostr_dna_bsc <- Biostrings::DNAStringSet(str_dna_bsc)
})
expect_identical(as.sq(biostr_dna_bsc),
import_sq(biostr_dna_bsc))
})

test_that("seqinr objects are passed to import_sq()", {
skip_if_not_installed("seqinr")
seqinr_ami_bsc <- lapply(str_ami_bsc, function(x)
seqinr::as.SeqFastaAA(seqinr::s2c(x)))
expect_identical(as.sq(seqinr_ami_bsc),
import_sq(seqinr_ami_bsc))
})

test_that("Non-importable objects in import_sq() throw an error", {
expect_error(as.sq(function(x, y) x + y))
})

Expand Down
2 changes: 2 additions & 0 deletions tests/testthat/test-export_sq.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
skip_if_not_installed("Biostrings")

# SETUP ----
sq_dna <- sq(c("TACTGGGCATG", "CAGGTCGGA", "TAGTAGTCCG", "", "ACGGT"),
alphabet = "dna_bsc")
Expand Down
2 changes: 2 additions & 0 deletions tests/testthat/test-find_motifs.R
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ test_that("'found' column can handle both special characters and ambiguous lette

# INDEX COLUMNS ----
test_that("'start' and 'end' columns have values between 1 and length(sequence)", {
skip_if_not_installed("purrr")
sqibble_1 <- find_motifs(sq_dna_bsc, names_5, "TAG")
sqibble_1[["found_length"]] <- get_sq_lengths(sqibble_1[["found"]])
purrr::pwalk(sqibble_1, function(names, sought, found, start, end, found_length) {
Expand Down Expand Up @@ -206,6 +207,7 @@ test_that("'start' and 'end' columns have values between 1 and length(sequence)"
})

test_that("index columns can be used to retrieve found subsequence from original sequence", {
skip_if_not_installed("purrr")
purrr::pwalk(find_motifs(sq_dna_bsc, names_5, "TAG"), function(names, sought, found, start, end) {
expect_identical(
bite(sq_dna_bsc[which(names == names_5)], start:end)[[1]],
Expand Down
2 changes: 2 additions & 0 deletions tests/testthat/test-pkg-Biostrings.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
skip_if_not_installed("Biostrings")

# SETUP ----
str_dna <- c("TACTGGGCATG", "CAGGTCGGA", "TAGTAGTCCG", "", "ACGGT")
str_1_dna <- "TCYYCAHGGCHA"
Expand Down
2 changes: 2 additions & 0 deletions tests/testthat/test-pkg-ape.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
skip_if_not_installed("ape")

# SETUP ----
str_dna <- c("TACTGGGCATG", "CAGGTCGGA", "TAGTAGTCCG", "", "ACGGT")
str_ami <- c("REGENERATED", "TECHNICAL", "FEAT")
Expand Down
2 changes: 2 additions & 0 deletions tests/testthat/test-pkg-bioseq.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
skip_if_not_installed("bioseq")

# SETUP ----
str_dna <- c("TACTGGGCATG", "CAGGTCGGA", "TAGTAGTCCG", "", "ACGGT")
str_rna <- c("", "KBS-UVW-AWWWG", "YGHHH-", "-CRASH", "MND-KUUBV-MY-")
Expand Down
2 changes: 2 additions & 0 deletions tests/testthat/test-pkg-seqinr.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
skip_if_not_installed("seqinr")

# SETUP ----
str_dna <- c("TACTGGGCATG", "CAGGTCGGA", "TAGTAGTCCG", "", "ACGGT")
str_ami <- c("REGENERATED", "TECHNICAL", "FEAT")
Expand Down
Loading

0 comments on commit d5def3c

Please sign in to comment.