Merge pull request #59 from TGuillerme/master

v0.4.1
TGuillerme · Nov 13, 2017 · 57ce1d8 · 57ce1d8
2 parents b4623bd + ddccfe2
commit 57ce1d8
Show file tree

Hide file tree

Showing 170 changed files with 6,990 additions and 2,996 deletions.
diff --git a/.gitignore b/.gitignore
@@ -10,6 +10,7 @@ doc/*.fls
 null.test/*
 #Other drafts
 drafts/*
+R/MarkOrig/*
 
 # Oldies
 inst/old/*

diff --git a/.travis.yml b/.travis.yml
@@ -2,11 +2,9 @@ language: r
 sudo: required
 warnings_are_errors: false
 
-# blacklist
 branches:
-  except:
-    - master
   only:
+#    - master  #(deactivated for revisions)
     - release
 
 # Code coverage

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,8 +2,8 @@ Package: dispRity
 Title: Measuring disparity in R
 Author: Thomas Guillerme <guillert@tcd.ie>
 Maintainer: Thomas Guillerme <guillert@tcd.ie>
-Version: 0.4
-Date: 2017-08-21
+Version: 0.4.1
+Date: 2017-11-13
 Description: A modular package for measuring disparity from multidimensional matrices. Disparity can be calculated from any matrix defining a multidimensional space. The package provides a set of implemented metrics to measure properties of the space and allows users to provide and test their own metrics. The package also provides functions for looking at disparity in a serial way (e.g. disparity through time) or per groups as well as visualising the results. Finally, this package provides several basic statistical tests for disparity analysis.
 Depends:
     R (>= 3.0.0),
@@ -15,6 +15,7 @@ Imports:
   paleotree,
   phangorn,
   phyclust,
+  geoscale,
   snow
 License: GPL-3
 VignetteBuilder: knitr

diff --git a/NAMESPACE b/NAMESPACE
@@ -11,6 +11,7 @@ importFrom("stats", "bw.nrd0", "coef", "dist", "glm", "p.adjust", "quantile", "r
 importFrom("utils", "combn", "data", "capture.output")
 importFrom("phyclust", "gen.seq.HKY")
 importFrom("phangorn", "dist.hamming", "NJ", "RF.dist", "CI", "RI", "optim.parsimony", "parsimony")
+# importFrom("geoscale", "timescales") #TG: somehow doesn't loads
 # importFrom("Claddis", "MorphDistMatrix")
 # importFrom("geomorph", "gpagen")
 # importFrom("methods", "hasArg")
@@ -45,6 +46,7 @@ export(ellipse.volume)
 export(convhull.volume)
 export(convhull.surface)
 export(diagonal)
+export(ancestral.distance)
 
 ##disparity tests
 export(bhatt.coeff)
@@ -64,12 +66,15 @@ export(merge.subsamples)
 export(size.subsamples)
 
 ##Package utilities
+export(nodes.coordinates)
 export(tree.age)
 export(make.metric)
 export(space.maker)
+export(random.circle)
 export(pair.plot)
 export(slice.tree)
 export(clean.data)
+export(get.bin.ages)
 
 ##Morphological data analysis
 export(sim.morpho)

diff --git a/R/Claddis.ordination.R b/R/Claddis.ordination.R
@@ -16,7 +16,7 @@
 #' Claddis.ordination(Claddis::Michaux1989)
 #' }
 #'
-#' @seealso \code{\link[Claddis]{MorphDistMatrix}}, \code{\link[Claddis]{ReadMorphNexus}}, \code{\link[Claddis]{MakeMorphNexus}}, \code{\link[stats]{cmdscale}}, \code{\link{custom.subsamples}}, \code{\link{time.subsamples}}, \code{\link{boot.matrix}}, \code{\link{dispRity}}.
+#' @seealso \code{\link[Claddis]{MorphDistMatrix}}, \code{\link[Claddis]{ReadMorphNexus}}, \code{\link[Claddis]{MakeMorphMatrix}}, \code{\link[stats]{cmdscale}}, \code{\link{custom.subsamples}}, \code{\link{time.subsamples}}, \code{\link{boot.matrix}}, \code{\link{dispRity}}.
 #' 
 #' @author Thomas Guillerme
 #' @export
@@ -30,7 +30,7 @@ Claddis.ordination <- function(data, distance = "Gower", transform = "arcsine_sq
     check.class(data, "list", msg = error_msg)
     ## Must have at least one matrix
     if(!any(names(data) %in% "matrix")) {
-        stop(error_msg)
+        stop(error_msg, call. = FALSE)
     }
     ## Matrix must be a matrix
     check.class(data$matrix, "matrix", msg = error_msg)

diff --git a/R/boot.matrix.R b/R/boot.matrix.R
@@ -88,19 +88,23 @@ boot.matrix <- function(data, bootstraps = 100, rarefaction = FALSE, dimensions,
 
     } else {
         ## Must be correct format
-        check.length(data, 3, " must be either a matrix or an output from the time.subsamples or cust.subsamples functions.")
+        check.length(data, 3, " must be either a matrix or an output from the time.subsamples or custom.subsamples functions.")
 
         ## With the correct names
         data_names <- names(data)
-        if(data_names[[1]] != "matrix" | data_names[[2]] != "call" | data_names[[3]] != "subsamples") {
-            stop(paste(match_call$data, "must be either a matrix or an output from the time.subsamples or cust.subsamples functions."))
+        if(is.null(data_names)) {
+            stop(paste(match_call$data, "must be either a matrix or an output from the time.subsamples or custom.subsamples functions."))
+        } else {
+            if(data_names[[1]] != "matrix" | data_names[[2]] != "call" | data_names[[3]] != "subsamples") {
+                stop(paste(match_call$data, "must be either a matrix or an output from the time.subsamples or custom.subsamples functions."))
+            }
         }
 
         if(length(data$subsamples) > 1) {
             ## Check if any subsamples has at least three rows
             elements_check <- unlist(lapply(unlist(data$subsamples, recursive = FALSE), function(X) length(X) < 3))
             if(any(elements_check)) {
-                stop(paste("The following subsamples have less than 3 elements: ", paste(unlist(strsplit(names(elements_check)[which(elements_check)], split = ".elements")), collapse = ", ") , "." , sep = ""))
+                warning(paste("The following subsamples have less than 3 elements: ", paste(unlist(strsplit(names(elements_check)[which(elements_check)], split = ".elements")), collapse = ", ") , ".\nThis might effect the bootstrap/rarefaction output." , sep = ""))
             }
         }
     }
@@ -119,11 +123,10 @@ boot.matrix <- function(data, bootstraps = 100, rarefaction = FALSE, dimensions,
     ## Is it logical?
     if(class(rarefaction) != "logical") {
         ## Is it numeric?
-        check.class(rarefaction, "numeric", " must be either numeric or logical.")
+        check.class(rarefaction, c("numeric", "integer"), " must be either numeric or logical.")
         rare_out <- rarefaction
     } else {
         if(rarefaction) {
-            #rarefaction <- lapply(unlist(lapply(data$subsamples, lapply, nrow), recursive = FALSE), seq, to = 3)
             rarefaction <- seq(from = nrow(data$matrix), to = 3)
             rare_out <- "full"
         } else {

diff --git a/R/boot.matrix_fun.R b/R/boot.matrix_fun.R
@@ -18,10 +18,18 @@ boot.single <- function(elements, rarefaction) {
 ## Performs bootstrap on one subsamples and all rarefaction levels
 replicate.bootstraps.verbose <- function(rarefaction, bootstraps, subsamples, boot.type.fun, verbose) {
     message(".", appendLF = FALSE)
-    return(replicate(bootstraps, boot.type.fun(subsamples$elements, rarefaction)))
+    if(length(subsamples$elements) == 1) {
+        return(matrix(rep(subsamples$elements[[1]], bootstraps), nrow = 1))
+    } else {
+        return(replicate(bootstraps, boot.type.fun(subsamples$elements, rarefaction)))
+    }
 }
 replicate.bootstraps.silent <- function(rarefaction, bootstraps, subsamples, boot.type.fun) {
-    return(replicate(bootstraps, boot.type.fun(subsamples$elements, rarefaction)))
+    if(length(subsamples$elements) == 1) {
+        return(matrix(rep(subsamples$elements[[1]], bootstraps), nrow = 1))
+    } else {
+        return(replicate(bootstraps, boot.type.fun(subsamples$elements, rarefaction)))
+    }
 }
 
 ## Performs bootstrap on multiple subsamples and all rarefaction levels

diff --git a/R/char.diff.R b/R/char.diff.R
@@ -57,7 +57,7 @@ char.diff <- function (matrix)  {
         options(warn = 0)
 
         ## Calculating the character difference
-        output <- round( 1 - ( abs(output-0.5)/0.5 ), digits = 10)
+        output <- round( 1 - ( abs(output-0.5)/0.5 ), digit = 10)
 
         class(output) <- c("matrix", "char.diff")
 

diff --git a/R/check.morpho.R b/R/check.morpho.R
@@ -40,6 +40,7 @@
 
 #DEBUG
 # stop("DEBUG check.morpho")
+# source("sanitizing.R")
 # random.tree <- rcoal(10)
 # matrix <- sim.morpho(random.tree, characters = 50, model = "ER", rates = c(rgamma, 1, 1))
 # orig.tree = random.tree
@@ -66,9 +67,9 @@ check.morpho <- function(matrix, orig.tree, parsimony = "fitch", first.tree = c(
         parsimony.algorithm <- phangorn::optim.parsimony
         method <- parsimony
     } else {
-        stop("User functions not implemented yet for model argument.")
-        use.optim.parsimony <- FALSE
-        parsimony.algorithm <- phangorn::parsimony
+        stop("User functions not implemented yet for parsimony argument.")
+        # use.optim.parsimony <- FALSE
+        # parsimony.algorithm <- phangorn::parsimony
     }
 
     #first.tree
@@ -97,6 +98,9 @@ check.morpho <- function(matrix, orig.tree, parsimony = "fitch", first.tree = c(
     #verbose
     check.class(verbose, "logical")
 
+    #distance
+    check.class(distance, "function")
+
 
     #CHECKING THE MATRIX
 
@@ -122,12 +126,12 @@ check.morpho <- function(matrix, orig.tree, parsimony = "fitch", first.tree = c(
     }
 
     #Get the quick and dirty most parsimonious tree
-    if(use.optim.parsimony == TRUE) {
+    # if(use.optim.parsimony == TRUE) {
         verbose.pars <- utils::capture.output(MP_tree <- parsimony.algorithm(tree = first_tree, data = matrix_phyDat, method = method, ...))
         #verbose.pars <- utils::capture.output(MP_tree <- parsimony.algorithm(tree = first_tree, data = matrix_phyDat, method = method)) ; warning("DEBUG")
-    } else {
-        verbose.pars <- utils::capture.output(MP_tree <- parsimony.algorithm(tree = first_tree, data = matrix_phyDat))
-    }
+    # } else {
+    #     verbose.pars <- utils::capture.output(MP_tree <- parsimony.algorithm(tree = first_tree, data = matrix_phyDat))
+    # }
 
     if(verbose != FALSE) {
         cat("Most parsimonious tree search:\n")
@@ -141,11 +145,11 @@ check.morpho <- function(matrix, orig.tree, parsimony = "fitch", first.tree = c(
     consistency_index <- phangorn::CI(MP_tree, matrix_phyDat)
 
     #Get the retention Index
-    retention_index <- phangorn::RI(MP_tree, matrix_phyDat)    
-
+    retention_index <- phangorn::RI(MP_tree, matrix_phyDat)
+    
     if(!missing(orig.tree)) {
         #Get the distance between the trees
-        tree_distance <- phangorn::RF.dist(MP_tree, unroot(orig.tree))
+        tree_distance <- distance(MP_tree, unroot(orig.tree))
 
         #Get the data out vectors (with distance)
         data_out <- c(parsimony_score, consistency_index, retention_index, tree_distance)

diff --git a/R/custom.subsamples.R b/R/custom.subsamples.R
@@ -99,12 +99,32 @@ custom.subsamples <- function(data, group) {
     } else {
         ## Using a list
 
-        if(all(unique(unlist(lapply(group, class))) %in% c("numeric", "integer"))) {
+        ## Check for empty groups
+        empty_groups <- is.na(group) | unlist(lapply(group, is.null))
+        if(any(empty_groups)) {
+            ## Prepare a warning message
+            empty_groups_names <- ifelse(!is.null(names(empty_groups)), paste(names(which(empty_groups)), collapse = ", "), paste(which(empty_groups), collapse = ", "))
+            being <- ifelse(length(which(empty_groups)) == 1, "is", "are")
+            subsample <- ifelse(length(which(empty_groups)) == 1, "Subsample", "Subsamples")
+            ## Send a warning messages
+            warning(paste(subsample, empty_groups_names, being, "empty."))
+
+            ## Replace NULL groups by NAs
+            null_groups <- unlist(lapply(group, is.null))
+            if(any(null_groups)) {
+                group[which(null_groups)] <- NA
+            }
+        } 
+        ## Select the groups for sanitising
+        group_select <- which(empty_groups != TRUE)
+
+        ## Cleaning groups
+        if(all(unique(unlist(lapply(group[group_select], class))) %in% c("numeric", "integer"))) {
             ## The list must have the same columns as in the data
-            if(max(unlist(group)) > nrow(data)) stop("Row numbers in group don't match the row numbers in data.")
+            if(max(unlist(group[group_select])) > nrow(data)) stop("Row numbers in group don't match the row numbers in data.")
         } else {
-            if(unique(unlist(lapply(group, class))) == "character") {
-                if(!all( as.character(unlist(group)) %in% as.character(rownames(data)))) stop("Row names in data and group arguments don't match.")
+            if(all(unique(unlist(lapply(group[group_select], class))) == "character")) {
+                if(!all( as.character(unlist(group[group_select])) %in% as.character(rownames(data)))) stop("Row names in data and group arguments don't match.")
 
                 ## Convert the row names into row numbers
                 group <- lapply(group, convert.name.to.numbers, data)
@@ -113,8 +133,6 @@ custom.subsamples <- function(data, group) {
                 stop("group argument must be a list of row names or row numbers.")
             }
         }
-        ## Checking if the groups have a list three elements
-        if(any(unlist(lapply(group, length)) < 3 )) stop("There must be at least three elements for each subsample.")
 
         ## Checking if the groups have names
         if(is.null(names(group))) names(group) <- seq(1:length(group))

diff --git a/R/dispRity.R b/R/dispRity.R
@@ -136,11 +136,14 @@ dispRity <- function(data, metric, dimensions, ..., verbose = FALSE) { #parallel
     ## Dimensions
     if(!missing(dimensions)) {
         ## Else must be a single numeric value (proportional)
-        check.class(dimensions, "numeric", " must be logical or a proportional threshold value.")
-        check.length(dimensions, 1, " must be logical or a proportional threshold value.", errorif = FALSE)
+        silent <- check.class(dimensions, c("numeric", "integer"), " must be a number or proportion of dimensions to keep.")
+        check.length(dimensions, 1, " must be a number or proportion of dimensions to keep.", errorif = FALSE)
         if(dimensions < 0) stop("Number of dimensions to remove cannot be less than 0.")
         if(dimensions < 1) dimensions <- round(dimensions * ncol(data$matrix))
-        if(dimensions > ncol(data$matrix)) stop("Number of dimensions to remove cannot be more than the number of columns in the matrix.")
+        if(dimensions > ncol(data$matrix)) {
+            warning(paste0("Dimension number too high: set to ", ncol(data$matrix), "."))
+            dimensions <- ncol(data$matrix)
+        }
         data$call$dimensions <- dimensions
     }
 
@@ -167,13 +170,23 @@ dispRity <- function(data, metric, dimensions, ..., verbose = FALSE) { #parallel
     if(length(data$call$disparity$metrics) == 0) {
         ## Data call had no metric calculated yet
         matrix_decomposition <- TRUE
+
+        ## Remove empty subsamples or with only one data point
+        elements <- unlist(lapply(lapply(data$subsamples, lapply, length), `[[`, 1))
+        elements_keep <- which(elements > 1)
+        removed_elements <- ifelse(length(elements_keep) != length(elements), TRUE, FALSE)
+
         ## Lapply through the subsamples
-        lapply_loop <- data$subsamples
+        lapply_loop <- data$subsamples[elements_keep]
+
     } else {
         ## Data has already been decomposed
         matrix_decomposition <- FALSE
         ## Lapply through the disparity scores (serried)
         lapply_loop <- data$disparity
+
+        ## No removed elements
+        removed_elements <- FALSE
     }
 
     # if(!do_parallel) {
@@ -186,6 +199,21 @@ dispRity <- function(data, metric, dimensions, ..., verbose = FALSE) { #parallel
     #     stopCluster(cluster)
     # }
 
+    ## Adding the removed elements as NAs
+    if(removed_elements) {
+        ## Creating empty disparity subsamples
+        empty_disparity <- lapply(data$subsamples[which(elements <= 1)], lapply, function(x) ifelse(x, NA, NA))
+
+        ## Merging the two subsamples
+        disparity <- c(disparity, empty_disparity)
+        disparity <- disparity[match(names(data$subsamples), names(disparity))]
+
+        ## Prepare a warning message
+        empty_group_names <- paste(names(which(elements <= 1)), collapse = ", ")
+        subsample <- ifelse(length(which(elements <=1)) > 1, "subsamples", "subsample")
+        warning(paste("Disparity not calculated for", subsample, empty_group_names, "(not enough data)."))
+    }
+
     ## Update the disparity
     data$disparity <- disparity