Skip to content

Commit

Permalink
Merge branch 'devel'
Browse files Browse the repository at this point in the history
  • Loading branch information
DominikRafacz committed Feb 26, 2021
2 parents e7c724c + 9e402b6 commit d705ace
Show file tree
Hide file tree
Showing 34 changed files with 1,230 additions and 358 deletions.
11 changes: 5 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: tidysq
Type: Package
Title: Tidy processing and analysis of biological sequences
Title: Tidy Processing and Analysis of Biological Sequences
Version: 1.0.0
Date: 2020-12-10
Date: 2021-02-25
Authors@R: c(person("Dominik", "Rafacz",
email = "dominikrafacz@gmail.com",
comment = c(ORCID = "0000-0003-0925-1909"),
Expand Down Expand Up @@ -51,8 +51,8 @@ Imports:
tibble (>= 2.1.3),
vctrs (>= 0.3.0)
Suggests:
AmyloGram (>= 1.0),
ape (>= 5.3),
bioseq (>= 0.1.2),
Biostrings (>= 2.52.0),
covr,
knitr,
Expand All @@ -63,8 +63,8 @@ Suggests:
testthat (>= 2.1.0),
withr (>= 2.2.0)
License: GPL (>= 2)
URL: https://github.com/michbur/tidysq
BugReports: https://github.com/michbur/tidysq/issues
URL: https://github.com/BioGenies/tidysq
BugReports: https://github.com/BioGenies/tidysq/issues
SystemRequirements: GNU make,
C++17
NeedsCompilation: no
Expand All @@ -74,5 +74,4 @@ Language: en-US
RoxygenNote: 7.1.1
LinkingTo: Rcpp,
testthat
RdMacros: lifecycle
VignetteBuilder: knitr
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ S3method(as.sq,character)
S3method(as.sq,default)
S3method(bite,default)
S3method(bite,sq)
S3method(collapse,default)
S3method(collapse,sq)
S3method(complement,default)
S3method(complement,sq_dna_bsc)
S3method(complement,sq_dna_ext)
Expand Down Expand Up @@ -50,11 +52,16 @@ S3method(import_sq,SeqFastaAA)
S3method(import_sq,SeqFastadna)
S3method(import_sq,XStringSetList)
S3method(import_sq,alignment)
S3method(import_sq,bioseq_aa)
S3method(import_sq,bioseq_dna)
S3method(import_sq,bioseq_rna)
S3method(import_sq,default)
S3method(import_sq,list)
S3method(obj_print_data,sq)
S3method(obj_print_footer,sq)
S3method(obj_print_header,sq)
S3method(paste,default)
S3method(paste,sq)
S3method(pillar_shaft,encsq)
S3method(pillar_shaft,sq)
S3method(remove_ambiguous,default)
Expand Down Expand Up @@ -181,6 +188,7 @@ export("sq_type<-")
export(alphabet)
export(as.sq)
export(bite)
export(collapse)
export(complement)
export(export_sq)
export(find_invalid_letters)
Expand All @@ -202,6 +210,7 @@ export(is.sq_rna_bsc)
export(is.sq_rna_ext)
export(is.sq_unt)
export(is_empty_sq)
export(paste)
export(random_sq)
export(read_fasta)
export(remove_ambiguous)
Expand Down
12 changes: 10 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ CPP_bite <- function(x, indices, NA_letter, on_warning) {
.Call(`_tidysq_CPP_bite`, x, indices, NA_letter, on_warning)
}

CPP_collapse <- function(x, NA_letter) {
.Call(`_tidysq_CPP_collapse`, x, NA_letter)
}

CPP_complement <- function(x, NA_letter) {
.Call(`_tidysq_CPP_complement`, x, NA_letter)
}
Expand Down Expand Up @@ -53,6 +57,10 @@ CPP_pack_STRING <- function(proto, alphabet, NA_letter, ignore_case) {
.Call(`_tidysq_CPP_pack_STRING`, proto, alphabet, NA_letter, ignore_case)
}

CPP_paste <- function(list_of_x, NA_letter) {
.Call(`_tidysq_CPP_paste`, list_of_x, NA_letter)
}

CPP_random_sq <- function(n, len, alphabet, use_gap) {
.Call(`_tidysq_CPP_random_sq`, n, len, alphabet, use_gap)
}
Expand Down Expand Up @@ -81,8 +89,8 @@ CPP_substitute_letters <- function(x, encoding, NA_letter) {
.Call(`_tidysq_CPP_substitute_letters`, x, encoding, NA_letter)
}

CPP_translate <- function(x, table, NA_letter, interpret_as_stop) {
.Call(`_tidysq_CPP_translate`, x, table, NA_letter, interpret_as_stop)
CPP_translate <- function(x, table, NA_letter) {
.Call(`_tidysq_CPP_translate`, x, table, NA_letter)
}

CPP_typify <- function(x, dest_type, NA_letter) {
Expand Down
51 changes: 51 additions & 0 deletions R/collapse.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#' Collapse multiple sequences into one
#'
#' @description Joins sequences from a vector into a single sequence. Sequence
#' type remains unchanged.
#'
#' @template x
#' @template NA_letter
#' @template three-dots
#'
#' @return \code{\link[=sq-class]{sq}} object of the same type as input but with
#' exactly one sequence.
#'
#' @details
#' \code{collapse()} joins sequences from supplied \code{sq} object in the same
#' order as they appear in said vector. That is, if there are three sequences
#' AGGCT, ATCCGT and GAACGT, then resulting sequence will be AGGCTATCCGTGAACGT.
#' This operation does not alter the type of the input object nor its alphabet.
#'
#' @examples
#' # Creating objects to work on:
#' sq_ami <- sq(c("MIAANYTWIL","TIAALGNIIYRAIE", "NYERTGHLI", "MAYXXXIALN"),
#' alphabet = "ami_ext")
#' sq_dna <- sq(c("ATGCAGGA", "GACCGAACGAN", ""), alphabet = "dna_ext")
#' sq_unt <- sq(c("ATGCAGGA?", "TGACGAGCTTA", "", "TIAALGNIIYRAIE"))
#'
#' # Collapsing sequences:
#' collapse(sq_ami)
#' collapse(sq_dna)
#' collapse(sq_unt)
#'
#' # Empty sq objects are collapsed as well (into empty string - ""):
#' sq_empty <- sq(character(), alphabet = "rna_bsc")
#' collapse(sq_empty)
#'
#' @family order_functions
#' @export
collapse <- function(x, ...)
UseMethod("collapse")

#' @export
collapse.default <- function(x, ...)
stop("method 'collapse' isn't implemented for this type of object", call. = FALSE)

#' @rdname collapse
#' @export
collapse.sq <- function(x, ...,
NA_letter = getOption("tidysq_NA_letter")) {
assert_string(NA_letter, min.chars = 1)

CPP_collapse(x, NA_letter)
}
26 changes: 22 additions & 4 deletions R/export_sq.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
#'
#' @description Converts object of class \code{\link[=sq-class]{sq}} to a class
#' from another package. Currently supported packages are \pkg{ape},
#' \pkg{Bioconductor} and \pkg{seqinr}. For exact list of supported classes and
#' resulting types, see details.
#' \pkg{bioseq}, \pkg{Bioconductor} and \pkg{seqinr}. For exact list of
#' supported classes and resulting types, see details.
#'
#' @template x
#' @param export_format [\code{character(1)}]\cr
Expand All @@ -19,19 +19,22 @@
#' \item \strong{ami}:
#' \itemize{
#' \item \code{"ape::AAbin"}
#' \item \code{"bioseq::bioseq_aa"}
#' \item \code{"Biostrings::AAString"}
#' \item \code{"Biostrings::AAStringSet"}
#' \item \code{"seqinr::SeqFastaAA"}
#' }
#' \item \strong{dna}:
#' \itemize{
#' \item \code{"ape::DNAbin"}
#' \item \code{"bioseq::bioseq_dna"}
#' \item \code{"Biostrings::DNAString"}
#' \item \code{"Biostrings::DNAStringSet"}
#' \item \code{"seqinr::SeqFastadna"}
#' }
#' \item \strong{rna}:
#' \itemize{
#' \item \code{"bioseq::bioseq_rna"}
#' \item \code{"Biostrings::RNAString"}
#' \item \code{"Biostrings::RNAStringSet"}
#' }
Expand All @@ -41,19 +44,22 @@
#' # DNA and amino acid sequences can be exported to most packages
#' sq_ami <- sq(c("MVVGL", "LAVPP"), alphabet = "ami_bsc")
#' export_sq(sq_ami, "ape::AAbin")
#' export_sq(sq_ami, "bioseq::bioseq_aa")
#' export_sq(sq_ami, "Biostrings::AAStringSet", c("one", "two"))
#' export_sq(sq_ami, "seqinr::SeqFastaAA")
#'
#' sq_dna <- sq(c("TGATGAAGCGCA", "TTGATGGGAA"), alphabet = "dna_bsc")
#' export_sq(sq_dna, "ape::DNAbin", name = c("one", "two"))
#' export_sq(sq_dna, "bioseq::bioseq_dna")
#' export_sq(sq_dna, "Biostrings::DNAStringSet")
#' export_sq(sq_dna, "seqinr::SeqFastadna")
#'
#' # RNA sequences are limited to Biostrings
#' # RNA sequences are limited to Biostrings and bioseq
#' sq_rna <- sq(c("NUARYGCB", "", "DRKCNYBAU"), alphabet = "rna_ext")
#' export_sq(sq_rna, "bioseq::bioseq_rna")
#' export_sq(sq_rna, "Biostrings::RNAStringSet")
#'
#' # Biostrings accept single sequences as well
#' # Biostrings can export single sequences to simple strings as well
#' export_sq(sq_dna[1], "Biostrings::DNAString")
#'
#' @family output_functions
Expand All @@ -77,6 +83,10 @@ export_sq.sq_ami_bsc <- function(x, export_format, name = NULL, ...) {
assert_package_installed("ape")
ape::as.AAbin(setNames(lapply(unpack(x, "STRINGS"), `attributes<-`, NULL), name))
},
`bioseq::bioseq_aa` = {
assert_package_installed("bioseq")
bioseq::new_aa(setNames(as.character(x), name))
},
`Biostrings::AAString` = {
assert_package_installed("Biostrings")
if (vec_size(x) != 1)
Expand Down Expand Up @@ -113,6 +123,10 @@ export_sq.sq_dna_bsc <- function(x, export_format, name = NULL, ...) {
assert_package_installed("ape")
ape::as.DNAbin(setNames(lapply(unpack(x, "STRINGS"), `attributes<-`, NULL), name))
},
`bioseq::bioseq_dna` = {
assert_package_installed("bioseq")
bioseq::new_dna(setNames(as.character(x), name))
},
`Biostrings::DNAString` = {
assert_package_installed("Biostrings")
if (vec_size(x) != 1)
Expand Down Expand Up @@ -145,6 +159,10 @@ export_sq.sq_dna_ext <- export_sq.sq_dna_bsc
#' @export
export_sq.sq_rna_bsc <- function(x, export_format, name = NULL, ...) {
switch (export_format,
`bioseq::bioseq_rna` = {
assert_package_installed("bioseq")
bioseq::new_rna(setNames(as.character(x), name))
},
`Biostrings::RNAString` = {
assert_package_installed("Biostrings")
if (vec_size(x) != 1)
Expand Down
33 changes: 31 additions & 2 deletions R/import_sq.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#'
#' @description Creates \code{\link[=sq-class]{sq}} object from object of class
#' from another package. Currently supported packages are \pkg{ape},
#' \pkg{Bioconductor} and \pkg{seqinr}. For exact list of supported classes and
#' resulting types, see details.
#' \pkg{bioseq}, \pkg{Bioconductor} and \pkg{seqinr}. For exact list of
#' supported classes and resulting types, see details.
#'
#' @param object [\code{any(1)}]\cr
#' An object of one of supported classes.
Expand All @@ -25,6 +25,12 @@
#' \item \code{alignment} - exact type is guessed within \code{\link{sq}}
#' function
#' }
#' \item \code{bioseq}:
#' \itemize{
#' \item \code{bioseq_aa} - imported as \strong{ami_ext}
#' \item \code{bioseq_dna} - imported as \strong{dna_ext}
#' \item \code{bioseq_rna} - imported as \strong{rna_ext}
#' }
#' \item \code{Biostrings}:
#' \itemize{
#' \item \code{AAString} - imported as \strong{ami_ext} with exactly one
Expand Down Expand Up @@ -59,6 +65,11 @@
#' ape_dna <- as.DNAbin(list(one = c("C", "T", "C", "A"), two = c("T", "G", "A", "G", "G")))
#' import_sq(ape_dna)
#'
#' # bioseq example
#' library(bioseq)
#' bioseq_rna <- new_rna(c(one = "ANBRY", two = "YUTUGGN"))
#' import_sq(bioseq_rna)
#'
#' # Biostrings example
#' library(Biostrings)
#' Biostrings_ami <- AAStringSet(c(one = "FEAPQLIWY", two = "EGITENAK"))
Expand Down Expand Up @@ -125,6 +136,24 @@ import_sq.alignment <- function(object, ...) {
bind_into_sqibble(sq(object[["seq"]], ...), object[["nam"]])
}

#' @export
import_sq.bioseq_aa <- function(object, ...) {
# From package `bioseq`
bind_into_sqibble(sq(as.character(object), alphabet = "ami_ext"), names(object))
}

#' @export
import_sq.bioseq_dna <- function(object, ...) {
# From package `bioseq`
bind_into_sqibble(sq(as.character(object), alphabet = "dna_ext"), names(object))
}

#' @export
import_sq.bioseq_rna <- function(object, ...) {
# From package `bioseq`
bind_into_sqibble(sq(as.character(object), alphabet = "rna_ext"), names(object))
}

#' @export
import_sq.AAString <- function(object, ...) {
# From package `Biostrings`
Expand Down
52 changes: 52 additions & 0 deletions R/paste.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#' @export
paste <- function(...)
UseMethod("paste")

#' @export
paste.default <- function(...) {
base::paste(...)
}

#' Paste sequences in string-like fashion
#'
#' @description Joins multiple vectors of sequences into one vector.
#'
#' @param ... [\code{sq}]\cr
#' Sequences to paste together.
#' @template NA_letter
#'
#' @return \code{\link[=sq-class]{sq}} object of common type of input objects.
#' Common type is determined in the same process as for
#' \code{\link[=sq-concatenate]{c.sq}()}.
#'
#' @details
#' \code{paste()} joins sequences in the same way as it does with strings.
#' All \code{sq} objects must have the same length, that is, contain the same
#' number of sequences. An exception is made for scalar (length 1) \code{sq}
#' objects, which are replicated instead.
#'
#' @examples
#' # Creating objects to work on:
#' sq_dna_1 <- sq(c("TTCAGGGCTAG", "CGATTGC", "CAGTTTA"),
#' alphabet = "dna_bsc")
#' sq_dna_2 <- sq(c("ATCTTGAAG", "CATATGCGCTA", "ACGTGTCGA"),
#' alphabet = "dna_bsc")
#' sq_unt_1 <- sq(c("ATGCAGGA?", "TGACGAGCTTA", "", "TIAALGNIIYRAIE"))
#' sq_unt_2 <- sq(c("OVNU!!OK!!J", "GOK!MI!N!BB!", "DPOFIN!!", "??!?"))
#'
#' # Pasting sequences:
#' collapse(sq_dna_1, sq_dna_2)
#' collapse(sq_unt_1, sq_unt_2)
#' collapse(sq_dna_2, sq_unt_2, sq_dna_1)
#'
#' @family order_functions
#' @name paste
#' @export
paste.sq <- function(...,
NA_letter = getOption("tidysq_NA_letter")) {
# Throws error when there is no common size
vec_size_common(...)
assert_string(NA_letter, min.chars = 1)

CPP_paste(vec_cast_common(...), NA_letter)
}
Loading

0 comments on commit d705ace

Please sign in to comment.