Merge pull request #65 from TGuillerme/master

v0.5 - covered with tests
TGuillerme · Dec 20, 2017 · 80f3e97 · 80f3e97
2 parents 57ce1d8 + b21105e
commit 80f3e97
Show file tree

Hide file tree

Showing 243 changed files with 9,106 additions and 3,180 deletions.
diff --git a/.github/issue_template.bug.md b/.github/issue_template.bug.md
@@ -0,0 +1 @@
+<!-- Bug issue template -->
diff --git a/.travis.yml b/.travis.yml
@@ -4,7 +4,7 @@ warnings_are_errors: false
 
 branches:
   only:
-#    - master  #(deactivated for revisions)
+    - master
     - release
 
 # Code coverage

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,8 +2,8 @@ Package: dispRity
 Title: Measuring disparity in R
 Author: Thomas Guillerme <guillert@tcd.ie>
 Maintainer: Thomas Guillerme <guillert@tcd.ie>
-Version: 0.4.1
-Date: 2017-11-13
+Version: 0.5
+Date: 2017-12-20
 Description: A modular package for measuring disparity from multidimensional matrices. Disparity can be calculated from any matrix defining a multidimensional space. The package provides a set of implemented metrics to measure properties of the space and allows users to provide and test their own metrics. The package also provides functions for looking at disparity in a serial way (e.g. disparity through time) or per groups as well as visualising the results. Finally, this package provides several basic statistical tests for disparity analysis.
 Depends:
     R (>= 3.0.0),
@@ -16,6 +16,8 @@ Imports:
   phangorn,
   phyclust,
   geoscale,
+  vegan,
+  geiger,
   snow
 License: GPL-3
 VignetteBuilder: knitr

diff --git a/LICENSE b/LICENSE
diff --git a/NAMESPACE b/NAMESPACE
@@ -3,18 +3,17 @@ import(ape)
 importFrom("paleotree", "timeSliceTree")
 importFrom("snow", "makeCluster", "stopCluster", "parLapply")
 importFrom("geometry", "convhulln")
-# importFrom("hypervolume", "hypervolume", "estimate_bandwidth", "get_volume")
 importFrom("ade4", "randtest", "as.randtest")
 importFrom("grDevices", "colorRampPalette", "grey")
 importFrom("graphics", "axis", "boxplot", "hist", "image", "lines", "mtext", "par", "plot", "points", "polygon", "text", "legend")
-importFrom("stats", "bw.nrd0", "coef", "dist", "glm", "p.adjust", "quantile", "rnorm", "var", "median", "cmdscale")
+importFrom("stats", "bw.nrd0", "coef", "glm", "p.adjust", "quantile", "rnorm", "var", "median", "cmdscale", "as.dist", "density")
 importFrom("utils", "combn", "data", "capture.output")
 importFrom("phyclust", "gen.seq.HKY")
 importFrom("phangorn", "dist.hamming", "NJ", "RF.dist", "CI", "RI", "optim.parsimony", "parsimony")
-# importFrom("geoscale", "timescales") #TG: somehow doesn't loads
+importFrom("vegan", "adonis", "vegdist")
+importFrom("geiger", "dtt", "ratematrix", "sim.char")
 # importFrom("Claddis", "MorphDistMatrix")
 # importFrom("geomorph", "gpagen")
-# importFrom("methods", "hasArg")
 # importFrom("RCurl", "getURL", "url.exists")
 
 ##C libraries
@@ -25,8 +24,8 @@ export(Claddis.ordination)
 export(geomorph.ordination)
 
 ##Functions
-export(time.subsamples)
-export(custom.subsamples)
+export(time.subsets)
+export(custom.subsets)
 export(boot.matrix)
 export(dispRity)
 export(test.dispRity)
@@ -46,11 +45,17 @@ export(ellipse.volume)
 export(convhull.volume)
 export(convhull.surface)
 export(diagonal)
-export(ancestral.distance)
+export(ancestral.dist)
+export(radius)
+export(n.ball.volume)
+export(pairwise.dist)
+export(span.tree.length)
 
 ##disparity tests
 export(bhatt.coeff)
 export(null.test)
+export(adonis.dispRity)
+export(dtt.dispRity)
 #export(sequential.test)
 #export(model.test)
 
@@ -61,9 +66,10 @@ export(matrix.dispRity)
 export(extract.dispRity)
 export(scale.dispRity)
 export(sort.dispRity)
-export(get.subsamples)
-export(merge.subsamples)
-export(size.subsamples)
+export(get.subsets)
+export(merge.subsets)
+export(size.subsets)
+export(extinction.subsets)
 
 ##Package utilities
 export(nodes.coordinates)
@@ -75,6 +81,7 @@ export(pair.plot)
 export(slice.tree)
 export(clean.data)
 export(get.bin.ages)
+export(crown.stem)
 
 ##Morphological data analysis
 export(sim.morpho)
@@ -83,7 +90,6 @@ export(get.contrast.matrix)
 export(apply.NA)
 export(char.diff)
 export(plot.char.diff)
-S3method(plot, char.diff)
 
 ##S3 methods
 S3method(plot, dispRity)
@@ -92,3 +98,6 @@ S3method(print, dispRity)
 S3method(matrix, dispRity)
 S3method(scale, dispRity)
 S3method(sort, dispRity)
+S3method(plot, char.diff)
+S3method(adonis, dispRity)
+S3method(dtt, dispRity)
diff --git a/R/Claddis.ordination.R b/R/Claddis.ordination.R
@@ -16,7 +16,7 @@
 #' Claddis.ordination(Claddis::Michaux1989)
 #' }
 #'
-#' @seealso \code{\link[Claddis]{MorphDistMatrix}}, \code{\link[Claddis]{ReadMorphNexus}}, \code{\link[Claddis]{MakeMorphMatrix}}, \code{\link[stats]{cmdscale}}, \code{\link{custom.subsamples}}, \code{\link{time.subsamples}}, \code{\link{boot.matrix}}, \code{\link{dispRity}}.
+#' @seealso \code{\link[Claddis]{MorphDistMatrix}}, \code{\link[Claddis]{ReadMorphNexus}}, \code{\link[Claddis]{MakeMorphMatrix}}, \code{\link[stats]{cmdscale}}, \code{\link{custom.subsets}}, \code{\link{time.subsets}}, \code{\link{boot.matrix}}, \code{\link{dispRity}}.
 #' 
 #' @author Thomas Guillerme
 #' @export

diff --git a/R/adonis.dispRity.R b/R/adonis.dispRity.R
@@ -0,0 +1,238 @@
+#' @title adonis dispRity (from \code{vegan::adonis})
+#'
+#' @description Passing \code{dispRity} objects to the \code{\link[vegan]{adonis}} function from the \code{vegan} package.
+#'
+#' @param data A \code{dispRity} object with subsets
+#' @param formula The model formula (default is \code{matrix ~ group}, see details)
+#' @param method The distance method to be passed to \code{\link[vegan]{adonis}} and eventually to \code{\link[vegan]{vegdist}} (see details - default \code{method ="euclidean"})
+#' @param ... Any optional arguments to be passed to \code{\link[vegan]{adonis}}
+#' @param warn \code{logical}, whether to print internal warnings (\code{TRUE}; default - advised) or not (\code{FALSE}).
+#' 
+#' @details
+#' The first element of the formula (the response) must be called \code{matrix} and the predictors must be existing in the subsets of the \code{dispRity} object.
+#'
+#' If \code{data$matrix} is not a distance matrix, distance is calculated using the \code{\link[stats]{dist}} function. The type of distance can be passed via the standard \code{method} argument that will be recycled by \code{\link[vegan]{adonis}}.
+#' 
+#' If the \code{dispRity} data has custom subsets with a single group, the formula is set to \code{matrix ~ group}.
+#' 
+#' If the \code{dispRity} data has custom subsets with multiple group categories (separated by a dot, e.g. \code{c("group1.cat1", "group1.cat2", "group2.catA", "group2.catB")} being two groups with two categories each), the default formula is \code{matrix ~ first_group} but can be set to any combination (e.g. \code{matrix ~ first_group + second_group}).
+#' 
+#' If the \code{dispRity} data has time subsets, the predictor is automatically set to \code{time}.
+#' 
+#' @seealso
+#' \code{\link[vegan]{adonis}}, \code{\link{test.dispRity}}, \code{\link{custom.subsets}}, \code{\link{time.subsets}}.
+#' 
+#' @examples
+#' ## Adonis with one groups 
+#' 
+#' ## Generating a random character matrix
+#' character_matrix <- sim.morpho(rtree(20), 50, rates = c(rnorm, 1, 0))
+#' ## Calculating the distance matrix
+#' distance_matrix <- as.matrix(dist(character_matrix))
+#' ## Creating two groups
+#' random_groups <- list("group1" = 1:10, "group2" = 11:20)
+#' 
+#' ## Generating a dispRity object
+#' random_disparity <- custom.subsets(distance_matrix, random_groups)
+#' ## Running a default NPMANOVA
+#' adonis.dispRity(random_disparity)
+#' 
+#' 
+#' ## Adonis with multiple groups
+#' 
+#' ## Creating a random matrix
+#' random_matrix <- matrix(data = rnorm(90), nrow = 10, 
+#'                      dimnames = list(letters[1:10]))
+#' ## Creating two groups with two states each
+#' groups <- as.data.frame(matrix(data = c(rep(1,5), rep(2,5), rep(c(1,2), 5)),
+#'          nrow = 10, ncol = 2, dimnames = list(letters[1:10], c("g1", "g2"))))
+#' 
+#' ## Creating the dispRity object
+#' multi_groups <- custom.subsets(random_matrix, groups)
+#' 
+#' ## Running the NPMANOVA
+#' adonis.dispRity(multi_groups, matrix ~ g1 + g2)
+#' 
+#' ## Adonis with time
+#' 
+#' ## Creating time series
+#' data(BeckLee_mat50)
+#' data(BeckLee_tree)
+#' data(BeckLee_ages)
+#' time_subsets <- time.subsets(BeckLee_mat50, BeckLee_tree, 
+#'      method = "discrete", inc.nodes = FALSE, time = c(100, 85, 65, 0),
+#'      FADLAD = BeckLee_ages)
+#' 
+#' ## Running the NPMANOVA with time as a predictor
+#' adonis.dispRity(time_subsets, matrix ~ time)
+#' 
+#' ## Running the NPMANOVA with each time bin as a predictor
+#' adonis.dispRity(time_subsets, matrix ~ time.subsets)
+#' 
+#' @seealso \code{\link{test.dispRity}}, \code{\link{custom.subsets}}, \code{\link{time.subsets}}
+#' 
+#' @author Thomas Guillerme
+#' @export
+
+# source("sanitizing.R")
+# source("adonis.dispRity_fun.R")
+# formula = matrix ~ group
+# method = "euclidean"
+# warn = TRUE
+
+adonis.dispRity <- function(data, formula = matrix ~ group, method = "euclidean", ..., warn = TRUE) {
+
+    match_call <- match.call()
+
+    ## data must be dispRity
+    check.class(data, "dispRity")
+
+    ## data must have subsets
+    if(is.null(data$subsets)) {
+        stop("The data must have subsets. Use custom.subsets() or time.subsets() to create some.")
+    } else {
+        if(is.na(match(data$call$subsets[1], "customised"))) {
+            ## Subsets are time
+            time_subsets <- TRUE
+        } else {
+            time_subsets <- FALSE
+        }
+    }
+
+    ## formula must be formula
+    check.class(formula, "formula")
+
+    ## formula must have the right response/predictors
+    formula_error_format <- "Formula must be of type: matrix ~ predictor(s) (where matrix is the response)."
+    check.length(formula, 3, msg = formula_error_format)
+    if(!formula[[1]] == "~") stop(formula_error_format)
+    if(!formula[[2]] == "matrix") stop(formula_error_format)
+    ## Non-default predictors
+    if(!formula[[3]] == "group") {
+
+        ## Predictor is time
+        if(formula[[3]] == "time" || formula[[3]] == "time.subsets") {
+
+            if(!time_subsets) {
+                stop(paste0(match_call$data, " has no time subsets.\nImpossible to use the following formula: ", match_call$formula))
+            }
+
+            ## Set up the model details
+            n_predictors <- ifelse(formula[[3]] == "time", 1, length(data$subsets))
+            group_names <- ifelse(formula[[3]] == "time", "time", NA)
+            group_variables <- NA
+            pool_time <- ifelse(formula[[3]] == "time", TRUE, FALSE)
+
+            ## Update the formula
+            if(formula[[3]] == "time.subsets") {
+                group_names_tmp <- paste0("t", gsub(" - ", "to", names(data$subsets)))
+                series <- paste(group_names_tmp, collapse = " + ")
+                formula <- formula(paste0("matrix ~ ", series))
+            } else {
+                time_f <- ~time
+                formula[[3]] <- time_f[[2]]
+            }
+        } else {
+
+            n_predictors <- length(formula[[3]])-1
+            group_variables <- names(data$subsets)
+            group_names <- unique(unlist(lapply(strsplit(group_variables, split = "\\."), `[[`, 1)))
+            split.variables <- function(one_group_name, group_variables) {
+                return(group_variables[grep(one_group_name, group_variables)])
+
+            }
+            group_variables <- lapply(as.list(group_names), split.variables, group_variables)
+
+            ## Check the predictors
+            for(predictor in 1:n_predictors) {
+                if(is.na(match(as.character(formula[[3]][[predictor + 1]]), group_names))) {
+                    stop(paste0("Predictor ", as.character(formula[[3]][[predictor + 1]]), " not found in ", match_call$data, " subsets.\n"))
+                }
+
+            }
+        }
+    } else {
+
+        n_predictors <- 1
+
+        if(time_subsets) {
+            ## Modifying group to be time
+            time_fun <- ~ time
+            formula[[3]] <- time_fun[[2]]
+            group_names <- "time"
+            group_variables <- NA
+            pool_time <- TRUE
+
+        } else {
+            group_names <- as.character(formula[[3]])
+            group_variables <- names(data$subsets)
+        }
+    }
+
+    ## methods
+    methods_avail <- c("manhattan", "euclidean", "canberra", "bray", "kulczynski", "jaccard", "gower", "altGower", "morisita", "horn", "mountford", "raup" , "binomial", "chao", "cao", "mahalanobis")
+    check.method(method, methods_avail, "method")
+
+    ## warnings
+    check.class(warn, "logical")
+
+    ## Checking if the data is a distance matrix or not
+    matrix <- check.dist.matrix(data$matrix, method = method)
+    was_dist <- matrix[[2]]
+    matrix <- matrix[[1]]
+    if(warn && !was_dist) {
+        warning("The input data for adonis.dispRity is a distance matrix.\nThe results are thus based on the distance matrix for the input data (i.e. dist(data$matrix)).\nMake sure that this is the desired methodological approach!")
+    }
+
+    ## Making the predictors table
+    predictors <- make.factors(data, group_names, group_variables, time_subsets, pool_time)
+
+    if(any(is.na(predictors))) {
+        ## Select the NA predictor
+        na_predictor <- as.vector(apply(predictors, 1, is.na))
+        warning(paste0("The following element(s) has no associated predictor value and was removed from the analysis: ", paste(attr(matrix, "Labels")[na_predictor], collapse = ", "), "."))
+
+        ## Removing the data from the predictors
+        predictors_tmp <- as.matrix(predictors[!na_predictor,])
+        colnames(predictors_tmp) <- colnames(predictors)
+        for(col in 1:ncol(predictors)) {
+            predictors_tmp[,col] <- factor(predictors_tmp[,col])
+        }
+        predictors <- data.frame(predictors_tmp)
+
+        ## Removing the data from the matrix
+        matrix_tmp <- as.matrix(matrix)
+        matrix_tmp <- matrix_tmp[,-which(na_predictor)]
+        matrix_tmp <- matrix_tmp[-which(na_predictor),] 
+        matrix <- as.dist(matrix_tmp)
+    }
+
+    ## Run adonis
+    adonis_out <- vegan::adonis(formula, predictors, method = method, ...)
+    #adonis_out <- vegan::adonis(formula, predictors, method = method) ; warning("DEBUG adonis.dispRity")
+
+    ## Update the formula
+    if(time_subsets) {
+        if(pool_time) {
+            time_f <- ~time
+        } else {
+            time_f <- ~time.subsets
+        }
+        formula[[3]] <- time_f[[2]]
+    }
+    if(!was_dist) {
+        matrix_f <- ~dist(matrix)
+        formula[[2]] <- matrix_f[[2]]
+    }
+
+    ## Update the call
+    adonis_out$call[[which(names(adonis_out$call) == "formula")]] <- as.expression(formula)[[1]]
+    adonis_out$call[[which(names(adonis_out$call) == "data")]] <- match_call$data
+    if(method == "euclidean") {
+        adonis_out$call[[which(names(adonis_out$call) == "method")]] <- "euclidean"
+    } else {
+        adonis_out$call[[which(names(adonis_out$call) == "method")]] <- match_call$method
+    }
+
+    return(adonis_out)
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,7 +4,7 @@ warnings_are_errors: false @@
     branches:
       only:
-    #    - master  #(deactivated for revisions)
+        - master
         - release
     # Code coverage
@@ Expand Down @@