update to 0.8.0

divDyn · Jun 12, 2019 · a4c911c · a4c911c
1 parent caa9db1
commit a4c911c
Show file tree

Hide file tree

Showing 59 changed files with 4,436 additions and 2,325 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,17 +1,17 @@
 Package: divDyn
 Type: Package
 Title: Diversity Dynamics using Fossil Sampling Data
-Version: 0.7.1
+Version: 0.8.0
 Author: Adam T. Kocsis, John Alroy, Carl J. Reddin, Wolfgang Kiessling
 Maintainer: Adam T. Kocsis <adam.t.kocsis@gmail.com>
 Description: Functions to describe sampling and diversity dynamics of fossil occurrence datasets (e.g. from the Paleobiology Database). The package includes methods to calculate range- and occurrence-based metrics of taxonomic richness, extinction and origination rates, along with traditional sampling measures. A powerful subsampling tool is also included that implements frequently used sampling standardization methods in a multiple bin-framework. The plotting of time series and the occurrence data can be simplified by the functions incorporated in the package, as well other calculations, such as environmental affinities and extinction selectivity testing. Details can be found in: Kocsis, A.T.; Reddin, C.J.; Alroy, J. and Kiessling, W. (2019) <doi:10.1101/423780>.
 License: CC BY 4.0
-Date: 2019-02-18
+Date: 2019-06-12
 BugReports: https://github.com/divDyn/r_package/issues
 Encoding: UTF-8
 LazyData: false
 Depends:
-    R (>= 3.3.0)
+    R (>= 3.5.0)
 Imports: 
 	Rcpp, 
 	stats,

diff --git a/NAMESPACE b/NAMESPACE
@@ -26,7 +26,9 @@ export(modeltab)
 export(fill)
 export(georange)
 export(tabinate)
-
+export(tsbars)
+export(repmatch)
+export(slice)
 
 importFrom("Rcpp", evalCpp)
 importFrom("grDevices", "col2rgb", "rgb")

diff --git a/R/affinity.R b/R/affinity.R
@@ -8,7 +8,7 @@
 #'
 #' \code{'majority'}: Environmental affinity will be assigned based on the number of occurrences of the taxon in the different environments, without taking sampling of the entire dataset into account. If the taxon has more occurrences in \emph{environment 1}, the function will return \emph{environment 1} as the preferred habitat. 
 #'
-#' \code{'binom'}: The proportion of occurrences of a taxon in \emph{environment 1} and \emph{environment 2} will be compared to a null model, which is based on the distribution of all occurrences from the stratigraphic range of the taxon (in \code{dat} or if provided, in \code{reldat}). Then a binomial test is run on with the numbers of the most likely preference (against all else). The \code{alpha} value indicates the significance of the binomial tests, setting \code{alpha} to \code{1} will effectively switch the testing off: if the ratio of occurrences for the taxon is different from the ratio observed in the dataset, an affinity will be assigned. This is the default method. If an environment is not sampled at all in the dataset to which the taxon's occurrences are compared to, the binomial method returns \code{NA} for the taxon's affinity. 
+#' \code{'binom'}: The proportion of occurrences of a taxon in \emph{environment 1} and \emph{environment 2} will be compared to a null model, which is based on the distribution of all occurrences from the stratigraphic range of the taxon (in \code{x} or if provided, in \code{reldat}). Then a binomial test is run on with the numbers of the most likely preference (against all else). The \code{alpha} value indicates the significance of the binomial tests, setting \code{alpha} to \code{1} will effectively switch the testing off: if the ratio of occurrences for the taxon is different from the ratio observed in the dataset, an affinity will be assigned. This is the default method. If an environment is not sampled at all in the dataset to which the taxon's occurrences are compared to, the binomial method returns \code{NA} for the taxon's affinity. 
 #' 
 #' \strong{References}
 #'
@@ -18,16 +18,16 @@
 #'
 #' Kiessling, W., & Kocsis, Á. T. (2015). Biodiversity dynamics and environmental occupancy of fossil azooxanthellate and zooxanthellate scleractinian corals. Paleobiology, 41(3), 402-414.
 #' 
-#' @param dat \code{(data.frame)} The occurrence dataset containing the taxa with unknown environmental affinities.
+#' @param x \code{(data.frame)} The occurrence dataset containing the taxa with unknown environmental affinities.
 #' @param env \code{(character)} The environmental variable of the occurrences.
 #' @param method \code{(character)} The method used for affinity calculations. Can be either \code{"binom"} or \code{"majority"}.
 #' @param tax \code{(character)} The column name of taxon names.
 #' @param bin \code{(character)} The column name of bin names.
 #' @param coll \code{(character)} The column name of collection identifiers (optional). If this is provided, then then the multiple entries of a taxon within the collections will be treated as 1.
 #' @param alpha \code{(numeric)} The alpha value of the binomial tests. By default binomial testing is off (\code{alpha=1}) and the methods returns that environment as the preferred one, which has the highest likelihood (odds ratio). 
-#' @param reldat \code{(data.frame)} Database with the same structure as \code{dat}.  \code{dat} is typically a subset of \code{reldat}. If given, the occurrence distribution of \code{reldat} is used 
-#' as the null model of sampling. Defaults to \code{NULL}, which means that \code{dat} itself will be used as \code{reldat}.
-#' @param na.rm \code{(logical)} Should the \code{NA} entries in the relevant columns of \code{dat} be omitted automatically?
+#' @param reldat \code{(data.frame)} Database with the same structure as \code{x}.  \code{x} is typically a subset of \code{reldat}. If given, the occurrence distribution of \code{reldat} is used 
+#' as the null model of sampling. Defaults to \code{NULL}, which means that \code{x} itself will be used as \code{reldat}.
+#' @param na.rm \code{(logical)} Should the \code{NA} entries in the relevant columns of \code{x} be omitted automatically?
 #' @param bycoll \code{(logical)} If set to \code{TRUE}, the number of collections (or samples, in \code{coll}) will be used rather than the number of occurrences.
 #'
 #' @examples
@@ -39,9 +39,9 @@
 #'	  aff<-affinity(fossilEnv, env="bath", tax="genus", bin="stg", alpha=1)
 #'	
 #' @export
-affinity<-function(dat, tax, bin, env, coll=NULL, method="binom", alpha=1,reldat=NULL, na.rm=FALSE, bycoll=FALSE){
+affinity<-function(x, tax, bin, env, coll=NULL, method="binom", alpha=1,reldat=NULL, na.rm=FALSE, bycoll=FALSE){
 # version 2.0
-#	dat <- fossilEnv
+#	x <- fossilEnv
 #	env <- "bath"
 #	tax <- "genus"
 #	bin <- "stg"
@@ -53,18 +53,18 @@ affinity<-function(dat, tax, bin, env, coll=NULL, method="binom", alpha=1,reldat
 
 	if(method=="majority" & !is.null(reldat)) warning("Majority rule selected, reldat will be ignored.")
 
-	if(bycoll) if(any(!coll%in%colnames(dat))) stop("The \'coll\' argument has to be a column name of \'dat\' if \'bycoll=TRUE\'.")
+	if(bycoll) if(any(!coll%in%colnames(x))) stop("The \'coll\' argument has to be a column name of \'x\' if \'bycoll=TRUE\'.")
 
-	# omit everything from dat that is not necessary
-		dat<-unique(dat[,c(coll, tax,bin, env)]) # this can be faster!!
+	# omit everything from x that is not necessary
+		x<-unique(x[,c(coll, tax,bin, env)]) # this can be faster!!
 
 	match.arg(method, c("binom", "majority"))
 
 	# omit NA bins
-	naBin <- is.na(dat[,bin])
-	naTax <- is.na(dat[,tax])
-	naEnv <- is.na(dat[,env])
-	if(!is.null(coll)) naColl <- is.na(dat[,coll])
+	naBin <- is.na(x[,bin, drop=TRUE])
+	naTax <- is.na(x[,tax, drop=TRUE])
+	naEnv <- is.na(x[,env, drop=TRUE])
+	if(!is.null(coll)) naColl <- is.na(x[,coll, drop=TRUE])
 
 	if(!na.rm){
 		# stop execution
@@ -75,29 +75,29 @@ affinity<-function(dat, tax, bin, env, coll=NULL, method="binom", alpha=1,reldat
 	}else{
 		# go forward
 		if(!is.null(coll)){
-			dat <- dat[!naBin & !naTax & !naEnv & !naColl, ]
+			x <- x[!naBin & !naTax & !naEnv & !naColl, ]
 		}else{
-			dat <- dat[!naBin & !naTax & !naEnv, ]
+			x <- x[!naBin & !naTax & !naEnv, ]
 		}
 	}
 
 	# the affinity variable
-		affLevels<-levels(factor(dat[,env]))
+		affLevels<-levels(factor(x[,env, drop=TRUE]))
 
 	# create an FAD-LAD matrix first
-		dFL<-fadlad(dat, tax, bin)
+		dFL<-fadlad(x, tax, bin)
 
 	# by this time fadLad will probably give a warning, but process this nevertheless
-		if(any(""==dat[,tax])){
+		if(any(""==x[,tax, drop=TRUE])){
 			rownames(dFL)[rownames(dFL)==""] <- "emptyQuotes"
-			dat[dat[,tax]=="",tax] <- "emptyQuotes"
+			x[x[,tax]=="",tax] <- "emptyQuotes"
 		}
 
 	# add the names to the matrix so apply can process it
 		dFL$taxon<-rownames(dFL)
 
 	# the bins
-		allBins<-sort(unique(dat[,bin]))
+		allBins<-sort(unique(x[,bin, drop=TRUE]))
 
 	# make a 3D array - bin, taxon, environment
 		occArr <- array(0, dim=c(length(allBins), nrow(dFL), length(affLevels)))
@@ -107,55 +107,55 @@ affinity<-function(dat, tax, bin, env, coll=NULL, method="binom", alpha=1,reldat
 	# fill it with values
 	for(i in 1:length(affLevels)){
 		# environment-specific subset
-		firstDat<-dat[dat[,env]==affLevels[i],]
+		firstDat<-x[x[,env, drop=TRUE]==affLevels[i],]
 
 		# tabulate
-		fTab<-table(firstDat[,bin], firstDat[,tax])
+		fTab<-table(firstDat[,bin, drop=TRUE], firstDat[,tax, drop=TRUE])
 		class(fTab)<-"matrix"
 
 		# add to the array
 		occArr[rownames(fTab), colnames(fTab),i]<-fTab
 	}
 
 	# reference/relative dataset
-		# as dat is not used from now on, use that to save time and memory
+		# as x is not used from now on, use that to save time and memory
 		# in case a relative dataset is added
 		if(!is.null(reldat)){
-			dat <- reldat
+			x <- reldat
 		}
 
 		# by-collection
 		if(bycoll){
 			# omit occurrence-level data, use only collections
-			dat <- unique(dat[,c(bin, env, coll)])
+			x <- unique(x[,c(bin, env, coll)])
 		}
 
 		# tabulate
-		relTab <- table(dat[,bin], dat[,env])
+		relTab <- table(x[,bin, drop=TRUE], x[,env, drop=TRUE])
 		class(relTab) <-"matrix"
 
 		# check if reldat was a different dataset
 		if(!is.null(reldat)){
 			# POTENTIAL FORKING POINT!
 			# select only those columns that are present in the analyzed dataset
-			if(any(!affLevels%in%colnames(relTab))) stop("The provided \'reldat\' does not contain all \'env\' entries of \'dat\'")
-			if(any(!allBins%in%rownames(relTab))) stop("The provided \'reldat\' does not contain all \'bin\' entries of \'dat\'")
+			if(any(!affLevels%in%colnames(relTab))) stop("The provided \'reldat\' does not contain all \'env\' entries of \'x\'")
+			if(any(!allBins%in%rownames(relTab))) stop("The provided \'reldat\' does not contain all \'bin\' entries of \'x\'")
 		}
 
 		# pass only the relevant entries
 		relTab<-relTab[as.character(allBins),affLevels]
 
 
 	# calculate the affinity of every taxon
-	affVarTaxon<-apply(dFL, 1, FUN=function(x){
+	affVarTaxon<-apply(dFL, 1, FUN=function(w){
 
 #	affVarTaxon<-character(length(dFL$taxon))
 #	for (i in 1:length(affVarTaxon)){
-#		x<- dFL[i,]
+#		w<- dFL[i,]
 
 	#subset of the taxons ranges
 		# bins where the taxon is present
-		vectRange<-as.numeric(x[1]):as.numeric(x[2])
+		vectRange<-as.numeric(w[1]):as.numeric(w[2])
 
 		# what you relate to
 			thisRel <- relTab[as.character(vectRange), , drop=FALSE]
@@ -164,7 +164,7 @@ affinity<-function(dat, tax, bin, env, coll=NULL, method="binom", alpha=1,reldat
 			relProbs<-apply(thisRel, 2, sum)/sum(thisRel)
 
 		# taxon occurrences in the different evnironment	
-			taxOccBin <- occArr[as.character(vectRange), as.character(x[length(x)]),, drop=FALSE]
+			taxOccBin <- occArr[as.character(vectRange), as.character(w[length(w)]),, drop=FALSE]
 			taxOcc <- apply(taxOccBin, c(2,3), sum)
 
 		# all occurrences of the taxon