diff --git a/.Rbuildignore b/.Rbuildignore index 9d6a1c7..a80f58b 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,10 +3,11 @@ ^README\.md ^\.github$ ^LICENSE\.md$ -^pics$ ^doc$ ^Meta$ ^\.vscode$ +^\.zenodo\.json$ ^_pkgdown\.yml$ ^docs$ ^pkgdown$ +^vignettes/secsse_performance\.Rmd$ diff --git a/.gitignore b/.gitignore index 8cf1bdc..ea14dc4 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,3 @@ test.R test2.R /doc/ /Meta/ -docs diff --git a/.vscode/launch.json b/.vscode/launch.json index 5c5e94e..28d08ca 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -7,6 +7,7 @@ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ + { "name": "(gbd) devtools::test()", "type": "cppdbg", @@ -68,7 +69,7 @@ "preLaunchTask": "genenv" }, { - "name": "(gbd) test_hanno.R", + "name": "(gbd) acc", "type": "cppdbg", "request": "launch", // The binary, not the script @@ -76,7 +77,7 @@ "args": [ "--vanilla", "-e", - "devtools::load_all(); source('${workspaceFolder}/test_hanno.R')" + "devtools::load_all(); source('${workspaceFolder}/secsse_acc.R')" ], "stopAtEntry": false, // needs to be generated, see below @@ -98,7 +99,7 @@ "preLaunchTask": "genenv" }, { - "name": "(gbd) secsse_acc.R", + "name": "(gbd) cla", "type": "cppdbg", "request": "launch", // The binary, not the script @@ -106,7 +107,37 @@ "args": [ "--vanilla", "-e", - "devtools::load_all(); source('${workspaceFolder}/secsse_acc.R')" + "devtools::load_all(); source('${workspaceFolder}/secsse_cla.R')" + ], + "stopAtEntry": false, + // needs to be generated, see below + "envFile": "${workspaceFolder}/.vscode/.env", + "cwd": "${workspaceFolder}", + "externalConsole": false, + "MIMode": "gdb", + //"miDebuggerPath": "/usr/bin/gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + // 'R' is a script that sets a ton of environment variables + // required by the R binary. This task emulates that part of + // the R script: + "preLaunchTask": "genenv" + }, + { + "name": "(gbd) store", + "type": "cppdbg", + "request": "launch", + // The binary, not the script + "program": "${env:HOME}/opt/bin/Rroot/lib/R/bin/exec/R", + "args": [ + "--vanilla", + "-e", + "devtools::load_all(); source('${workspaceFolder}/secsse_store.R')" ], "stopAtEntry": false, // needs to be generated, see below diff --git a/.vscode/settings.json b/.vscode/settings.json index 9617104..09b7112 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -75,6 +75,12 @@ "__verbose_abort": "cpp", "ios": "cpp", "locale": "cpp", - "version": "cpp" + "version": "cpp", + "__tree": "cpp", + "queue": "cpp", + "span": "cpp", + "charconv": "cpp", + "__errc": "cpp", + "__mutex_base": "cpp" } } \ No newline at end of file diff --git a/.zenodo.json b/.zenodo.json new file mode 100644 index 0000000..1f416f6 --- /dev/null +++ b/.zenodo.json @@ -0,0 +1,55 @@ +{ + "title": "secsse: Several Examined and Concealed States-Dependent Speciation and Extinction", + "license": "GPL-3.0", + "upload_type": "software", + "description": "
SecSSE is an R package designed for multistate data sets under a concealed state and speciation (hisse) framework. In this sense, it is parallel to the 'MuSSE' functionality implemented in diversitree, but it accounts for finding possible spurious relationships between traits and diversification rates ('false positives', Rabosky & Goldberg 2015) by testing against a 'hidden trait' (Beaulieu et al. 2013), which is responsible for more variation in diversification rates than the trait being investigated. <\/p>", + "keywords": [ + "Evolving traits", + "macroevolution", + "phylogenetic tools", + "speciation rates", + "model", + "maximum-likelihood", + "parameter estimation" + ], + "access_right": "open", + "language": "eng", + "contributors": [ + { + "name": "Janzen, Thijs", + "affiliation": "University of Groningen", + "orcid": "0000-0002-4162-1140", + "type": "ProjectMember" + }, + { + "name": "Hildenbrandt, Hanno", + "affiliation": "University of Groningen", + "orcid": "0000-0002-6784-1037", + "type": "ProjectMember" + }, + { + "name": "Santos Neves, Pedro", + "affiliation": "University of Groningen", + "orcid": "0000-0003-2561-4677", + "type": "ProjectMember" + } + ], + "creators": [ + { + "name": "Herrera Alsina, Leonel", + "affiliation": "University of Aberdeen", + "orcid": "0000-0003-0474-3592", + }, + { + "name": "van Els, Paul", + "affiliation": "Sovon Dutch Centre for Field Ornithology", + "orcid": "0000-0002-9499-8873", + }, + { + "name": "Etienne, Rampal S.", + "affiliation": "University of Groningen", + "orcid": "0000-0003-2142-7612", + }, + ], + "notes": "Compiled code (*.cpp and *.h files) is licensed under the BSL-1.0. See file COPYRIGHTS and LICENSE.note for mode details", +} \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 270190d..37be63b 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,8 +2,8 @@ Package: secsse Type: Package Title: Several Examined and Concealed States-Dependent Speciation and Extinction -Version: 2.6.0 -Date: 2023-06-27 +Version: 3.0.0 +Date: 2023-07-27 License: GPL (>= 3) | file LICENSE Authors@R: c( person(given = "Leonel", @@ -50,13 +50,11 @@ Imports: RcppParallel, ggplot2, tibble, - rlang, - stringr + rlang Suggests: diversitree, phytools, testthat, - testit, knitr, rmarkdown LinkingTo: @@ -67,8 +65,9 @@ NeedsCompilation: yes SystemRequirements: C++17 Encoding: UTF-8 LazyData: true -URL: https://github.com/rsetienne/secsse, - https://rsetienne.github.io/secsse/ +URL: https://rsetienne.github.io/secsse/, + https://github.com/rsetienne/secsse BugReports: https://github.com/rsetienne/secsse/issues VignetteBuilder: knitr RoxygenNote: 7.2.3 +Roxygen: list(markdown = TRUE) diff --git a/LICENSE.note b/LICENSE.note index a3bffe1..152bf82 100644 --- a/LICENSE.note +++ b/LICENSE.note @@ -1,18 +1,12 @@ The secsse package as a whole is distributed under >= GPL-3, the license of which can be found in the distributed file LICENSE. The secsse package includes code written by one of the package contributors that is distributed under BSL-1.0: * src/config.h -* src/rhs.h * src/odeint.h -* src/secsse_sim.h -* src/threaded_ll.h -* src/util.h +* src/secsse_rhs.h +* src/secsse_eval.h * src/secsse_sim.cpp -* src/util.cpp +* src/secsse_sim.h * src/secsse_loglik.cpp -* src/cla_loglik.cpp -* src/cla_loglik_threaded.cpp -* src/cla_secsse_store.cpp -* src/secsse_loglik_store.cpp -* src/secsse_loglik_threaded.cpp +* src/secsse_loglik.h Full copies of the BSL-1.0 license used by these files is included in `inst/LICENSE_1_0.txt`, as is a license and copyright notice on said files, while details are also in `inst/COPYRIGHTS`. diff --git a/NAMESPACE b/NAMESPACE index 0260adc..26f3528 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,22 +1,21 @@ # Generated by roxygen2: do not edit by hand export(cla_id_paramPos) -export(cla_secsse_eval) export(cla_secsse_loglik) export(cla_secsse_ml) export(cla_secsse_ml_func_def_pars) -export(create_default_lambda_list) -export(create_default_q_list) -export(create_lambda_matrices) -export(create_mus) -export(create_transition_matrix) +export(create_default_lambda_transition_matrix) +export(create_default_shift_matrix) +export(create_lambda_list) +export(create_mu_vector) +export(create_q_matrix) +export(default_params_doc) export(event_times) export(expand_q_matrix) export(extract_par_vals) export(fill_in) export(id_paramPos) export(plot_state_exact) -export(plot_state_exact_cla) export(prepare_full_lambdas) export(q_doubletrans) export(secsse_loglik) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..c52a4bc --- /dev/null +++ b/NEWS.md @@ -0,0 +1,92 @@ +# secsse 3.0.0 + +Version 3.0.0 extends the C++ code base used for the standard likelihood to the "cla_" +likelihood, harnessing the same computation improvement. + +## Breaking changes +* Function name changes: + * `create_lambda_matrices()` is now called `create_lambda_list()` + * `create_transition_matrix()` is now called `create_q_matrix()` + * `create_mus()` is now called `create_mu_vector()` + * `create_default_q_list()` is now called `create_default_shift_matrix()` + * `create_default_lambda_list ()` is now called `create_default_lambda_transition_matrix()` + * `create_default_q_list()` is now called `create_default_shift_matrix()` +* Package data files renamed: + * `phylo_Vign` is now called `phylo_vignette` + * `traitinfo` is now called `traits` + * `phy` is now called `example_phy_GeoSSE` +* `plot_state_exact()` argument `steps` renamed to `num_steps` and argument +`focal_tree` renamed to `phy` for consistency with other functions. + +## Major changes + +* Vastly improve the computational speed of "cla_" likelihood calculation. +* Optimization of parallelization resulting in better scaling with more threads +and faster run time for standard secsse and cla_secsse likelihood calculations. + +## Minor changes +* Added a `NEWS.md` file to track changes to the package. +* Documentation reworked into `default_params_doc()`. +* Several documentation formatting improvements and linking. Documentation now +follows and allows for roxygen2 markdown. +* A new vignette: + * _Using secsse with complete phylogenies (with extinction)_ `vignette("complete_tree", package = "secsse")` +* A new [pkgdown website](https://rsetienne.github.io/secsse/index.html)! + * It contains all the documentation and vignettes of the package, along with + additional interesting information like the _Secsse versions_ article with + details on performance and the development history of secsse. +* Revise, combine and simplify the _Using SecSSE ML search_ and _Setting up a +secsse analysis_ into the _Starting secsse_ vignette +`vignette("starting_secsse", package = "secsse")`. +* `secsse_sim()` argument `conditioning` now defaults to `"obs_states"` from +`"none"`. +* No longer Import package 'stringr' and Suggest package 'testit'. +* New organisation of code in .R, .cpp and .h files. (Developer side). +* Start archiving in Zenodo, with new .zenodo.json metadata file. + +## Bug fixes +* `secsse_sim()` fix bug causing error when simulating trees with extinct +species. + +# 2.6.0 + +## Major changes +* C++ code base for the standard likelihood, making smarter use of +parallelization, this marks another 10-fold increase in speed. + +## Minor changes +* Add a number of helper functions: `fill_in()`, `create_default_q_list()`, +`create_default_transition_list()`, `create_mus()` +* Implemented necessary changes to comply with CRAN clang16 build and solve +issue with the boost odeint library uninitialized variable +(see https://github.com/boostorg/odeint/issues/59 and more details at +https://github.com/rsetienne/DAISIE/pull/158) +* Updated Copyright license to the Boost Software License, Version 1.0 for +included C++ code (R code remains GPL>=3). + +## Bug fixes +* Fix memory leaks + +# 2.5.0 +Version 2.5.0 appeared in 2021 on GitHub and was published in May 2023 on CRAN. +Version 2.5.0 marks the first version using C++ to perform the integration, +and it used tbb (from the RcppParallel package) to perform multithreading. This +marks a ten fold increase in speed over previous versions. +Secondly, 2.5.0 introduces the function `secsse_sim()` to simulate a +diversification process using the (cla) secsse framework. +Lastly, in version 2.5.0 functions were added to allow visualisation of +inferred rates of speciation across the tree (e.g. `plot_state_exact()` and +`secsse_loglik_eval()`). + +# 2.0.0 +Version 2.0.0 appeared in June of 2019 on CRAN and extended the package with the +cla framework, e.g. including state shifts during speciation / asymmetric +inheritance during speciation. + +# 1.0.0 +The first version of secsse appeared in January of 2019 on CRAN. It used the +package deSolve to solve all integrations, and could switch between either using +a fully R based evaluation, or use FORTRAN to speed up calculations. +Furthermore, using the foreach package, within-R parallelization was +implemented. However, parallelization only situationally improved computation +times, and generally, computation was relatively slow. diff --git a/R/RcppExports.R b/R/RcppExports.R index 13a2890..b388e90 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,35 +1,19 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -ct_condition_cla <- function(y, t, ll, mm, Q, method, atol, rtol) { - .Call(`_secsse_ct_condition_cla`, y, t, ll, mm, Q, method, atol, rtol) +eval_cpp <- function(rhs, ances, states, forTime, lambdas, mus, Q, method, atol, rtol, is_complete_tree, num_steps) { + .Call(`_secsse_eval_cpp`, rhs, ances, states, forTime, lambdas, mus, Q, method, atol, rtol, is_complete_tree, num_steps) } -cla_calThruNodes_cpp <- function(ances, states_R, forTime_R, lambdas, mus, Q, method, atol, rtol, is_complete_tree) { - .Call(`_secsse_cla_calThruNodes_cpp`, ances, states_R, forTime_R, lambdas, mus, Q, method, atol, rtol, is_complete_tree) +calc_ll_cpp <- function(rhs, ances, states, forTime, lambdas, mus, Q, method, atol, rtol, is_complete_tree, see_states) { + .Call(`_secsse_calc_ll_cpp`, rhs, ances, states, forTime, lambdas, mus, Q, method, atol, rtol, is_complete_tree, see_states) } -calc_cla_ll_threaded <- function(ances, states_R, forTime_R, lambdas_R, mus_R, Q, num_threads = 1L, method = "odeint::bulirsch_stoer", is_complete_tree = FALSE) { - .Call(`_secsse_calc_cla_ll_threaded`, ances, states_R, forTime_R, lambdas_R, mus_R, Q, num_threads, method, is_complete_tree) +ct_condition_cpp <- function(rhs, state, t, lambdas, mus, Q, method, atol, rtol) { + .Call(`_secsse_ct_condition_cpp`, rhs, state, t, lambdas, mus, Q, method, atol, rtol) } -cla_calThruNodes_store_cpp <- function(ances, states_R, forTime_R, lambdas, mus, Q, method, atol, rtol, is_complete_tree, num_steps, verbose) { - .Call(`_secsse_cla_calThruNodes_store_cpp`, ances, states_R, forTime_R, lambdas, mus, Q, method, atol, rtol, is_complete_tree, num_steps, verbose) -} - -calThruNodes_cpp <- function(ances, states_R, forTime_R, lambdas, mus, Q, num_threads, abstol, reltol, method, is_complete_tree) { - .Call(`_secsse_calThruNodes_cpp`, ances, states_R, forTime_R, lambdas, mus, Q, num_threads, abstol, reltol, method, is_complete_tree) -} - -ct_condition <- function(y, t, ll, mm, Q, method, atol, rtol) { - .Call(`_secsse_ct_condition`, y, t, ll, mm, Q, method, atol, rtol) -} - -calThruNodes_store_cpp <- function(ances, states_R, forTime_R, lambdas, mus, Q, num_threads, abstol, reltol, method, is_complete_tree, num_steps, verbose) { - .Call(`_secsse_calThruNodes_store_cpp`, ances, states_R, forTime_R, lambdas, mus, Q, num_threads, abstol, reltol, method, is_complete_tree, num_steps, verbose) -} - -secsse_sim_cpp <- function(m_R, lambdas_R, q_R, max_time, max_species, init_states, condition, num_concealed_states, non_extinction, verbose, max_tries, seed) { - .Call(`_secsse_secsse_sim_cpp`, m_R, lambdas_R, q_R, max_time, max_species, init_states, condition, num_concealed_states, non_extinction, verbose, max_tries, seed) +secsse_sim_cpp <- function(m_R, lambdas_R, q_R, max_time, max_species, min_species, init_states, condition, num_concealed_states, non_extinction, verbose, max_tries, seed, conditioning_vec) { + .Call(`_secsse_secsse_sim_cpp`, m_R, lambdas_R, q_R, max_time, max_species, min_species, init_states, condition, num_concealed_states, non_extinction, verbose, max_tries, seed, conditioning_vec) } diff --git a/R/cla_secsse_eval.R b/R/cla_secsse_eval.R deleted file mode 100644 index 775b636..0000000 --- a/R/cla_secsse_eval.R +++ /dev/null @@ -1,98 +0,0 @@ -#' Evaluation of probabilities of observing states along branches. -#' @title Likelihood for SecSSE model, using Rcpp -#' @param parameter list where the first is a table where lambdas across -#' different modes of speciation are shown, the second mus and the third -#' transition rates. -#' @param phy phylogenetic tree of class phylo, ultrametric, fully-resolved, -#' rooted and with branch lengths. -#' @param traits vector with trait states, order of states must be the same as -#' tree tips, for help, see vignette. -#' @param num_concealed_states number of concealed states, generally equivalent -#' to number of examined states. -#' @param ancestral_states ancestral states matrix provided by -#' cla_secsse_loglik, this is used as starting points for manual integration -#' @param num_steps number of steps to integrate along a branch -#' @param cond condition on the existence of a node root: 'maddison_cond', -#' 'proper_cond'(default). For details, see vignette. -#' @param root_state_weight the method to weigh the states:'maddison_weigh -#' ,'proper_weights'(default) or 'equal_weights'. It can also be specified the -#' root state:the vector c(1,0,0) indicates state 1 was the root state. -#' @param sampling_fraction vector that states the sampling proportion per trait -#' state. It must have as many elements as trait states. -#' @param setting_calculation argument used internally to speed up calculation. -#' It should be leave blank (default : setting_calculation = NULL) -#' @param loglik_penalty the size of the penalty for all parameters; default is -#' 0 (no penalty) -#' @param is_complete_tree whether or not a tree with all its extinct species is -#' provided -#' @param method integration method used, available are: -#' "odeint::runge_kutta_cash_karp54", "odeint::runge_kutta_fehlberg78", -#' "odeint::runge_kutta_dopri5", "odeint::bulirsch_stoer" and -#' "odeint::runge_kutta4". Default method is:"odeint::bulirsch_stoer". -#' @param atol absolute tolerance of integration -#' @param rtol relative tolerance of integration -#' @param verbose provide intermediate verbose output if TRUE -#' @return The loglikelihood of the data given the parameters -#' @description Using see_ancestral_states = TRUE in the function -#' cla_secsse_loglik will provide posterior probabilities of the states of the -#' model on the nodes of the tree, but will not give the values on the branches. -#' This function evaluates these probabilities at fixed time intervals dt. -#' Because dt is fixed, this may lead to some inaccuracies, and dt is best -#' chosen as small as possible. -#' @export -cla_secsse_eval <- function(parameter, - phy, - traits, - num_concealed_states, - ancestral_states, - num_steps = NULL, - cond = "proper_cond", - root_state_weight = "proper_weights", - sampling_fraction, - setting_calculation = NULL, - loglik_penalty = 0, - is_complete_tree = FALSE, - method = "odeint::bulirsch_stoer", - atol = 1e-16, - rtol = 1e-16, - verbose = FALSE) { - lambdas <- parameter[[1]] - mus <- parameter[[2]] - parameter[[3]][is.na(parameter[[3]])] <- 0 - Q <- parameter[[3]] # nolint - - if (is.null(setting_calculation)) { - check_input(traits, - phy, - sampling_fraction, - root_state_weight, - is_complete_tree) - setting_calculation <- build_initStates_time(phy, - traits, - num_concealed_states, - sampling_fraction, - is_complete_tree, - mus) - } - - forTime <- setting_calculation$forTime # nolint - ances <- setting_calculation$ances - - calcul <- c() - ancescpp <- ances - 1 - forTimecpp <- forTime # nolint - forTimecpp[, c(1, 2)] <- forTimecpp[, c(1, 2)] - 1 # nolint - calcul <- cla_calThruNodes_store_cpp(ancescpp, - ancestral_states, - forTimecpp, - lambdas, - mus, - Q, - method, - atol, - rtol, - is_complete_tree, - ifelse(is.null(num_steps), 0, num_steps), - verbose) - return(calcul) -} diff --git a/R/cla_secsse_loglik.R b/R/cla_secsse_loglik.R deleted file mode 100755 index 43744b9..0000000 --- a/R/cla_secsse_loglik.R +++ /dev/null @@ -1,267 +0,0 @@ -#' Loglikelihood calculation for the cla_SecSSE model given a set of parameters -#' and data using Rcpp -#' @title Likelihood for SecSSE model, using Rcpp -#' @param parameter list where the first is a table where lambdas across -#' different modes of speciation are shown, the second mus and the third -#' transition rates. -#' @param phy phylogenetic tree of class phylo, ultrametric, fully-resolved, -#' rooted and with branch lengths. -#' @param traits vector with trait states, order of states must be the same as -#' tree tips, for help, see vignette. -#' @param num_concealed_states number of concealed states, generally equivalent -#' to number of examined states. -#' @param cond condition on the existence of a node root: 'maddison_cond', -#' 'proper_cond'(default). For details, see vignette. -#' @param root_state_weight the method to weigh the states:'maddison_weigh -#' ,'proper_weights'(default) or 'equal_weights'. It can also be specified the -#' root state:the vector c(1,0,0) indicates state 1 was the root state. -#' @param sampling_fraction vector that states the sampling proportion per trait -#' state. It must have as many elements as trait states. -#' @param setting_calculation argument used internally to speed up calculation. -#' It should be leave blank (default : setting_calculation = NULL) -#' @param see_ancestral_states should the ancestral states be shown? Deafault -#' FALSE -#' @param loglik_penalty the size of the penalty for all parameters; default is -#' 0 (no penalty) -#' @param is_complete_tree whether or not a tree with all its extinct species is -#' provided -#' @param num_threads number of threads to be used, default is 1. Set to -1 to -#' use all available threads. -#' @param method integration method used, available are: -#' "odeint::runge_kutta_cash_karp54", "odeint::runge_kutta_fehlberg78", -#' "odeint::runge_kutta_dopri5", "odeint::bulirsch_stoer" and -#' "odeint::runge_kutta4". Default method is:"odeint::bulirsch_stoer". -#' @param atol absolute tolerance of integration -#' @param rtol relative tolerance of integration -#' @return The loglikelihood of the data given the parameters -#' @note Multithreading might lead to a slightly reduced accuracy -#' (in the order of 1e-8) and is therefore not enabled by default. -#' Please use at your own discretion. -#' @examples -#'rm(list=ls(all=TRUE)) -#'library(secsse) -#'set.seed(13) -#'phylotree <- ape::rcoal(12, tip.label = 1:12) -#'traits <- sample(c(0,1,2),ape::Ntip(phylotree),replace=TRUE) -#'num_concealed_states <- 3 -#'sampling_fraction <- c(1,1,1) -#'phy <- phylotree -#'# the idparlist for a ETD model (dual state inheritance model of evolution) -#'# would be set like this: -#'idparlist <- cla_id_paramPos(traits,num_concealed_states) -#'lambd_and_modeSpe <- idparlist$lambdas -#'lambd_and_modeSpe[1,] <- c(1,1,1,2,2,2,3,3,3) -#'idparlist[[1]] <- lambd_and_modeSpe -#'idparlist[[2]][] <- 0 -#'masterBlock <- matrix(4,ncol=3,nrow=3,byrow=TRUE) -#'diag(masterBlock) <- NA -#'idparlist [[3]] <- q_doubletrans(traits,masterBlock,diff.conceal = FALSE) -#'# Now, internally, clasecsse sorts the lambda matrices, so they look like: -#'prepare_full_lambdas(traits,num_concealed_states,idparlist[[1]]) -#'# which is a list with 9 matrices, corresponding to the 9 states -#'# (0A,1A,2A,0B,etc) -#'# if we want to calculate a single likelihood: -#'parameter <- idparlist -#'lambda_and_modeSpe <- parameter$lambdas -#'lambda_and_modeSpe[1,] <- c(0.2,0.2,0.2,0.4,0.4,0.4,0.01,0.01,0.01) -#'parameter[[1]] <- prepare_full_lambdas(traits,num_concealed_states, -#'lambda_and_modeSpe) -#'parameter[[2]] <- rep(0,9) -#'masterBlock <- matrix(0.07, ncol=3, nrow=3, byrow=TRUE) -#'diag(masterBlock) <- NA -#'parameter [[3]] <- q_doubletrans(traits,masterBlock,diff.conceal = FALSE) -#'cla_secsse_loglik(parameter, phy, traits, num_concealed_states, -#' cond = 'maddison_cond', -#' root_state_weight = 'maddison_weights', sampling_fraction, -#' setting_calculation = NULL, -#' see_ancestral_states = FALSE, -#' loglik_penalty = 0) -#'# LL = -42.18407 -#' @export -cla_secsse_loglik <- function(parameter, - phy, - traits, - num_concealed_states, - cond = "proper_cond", - root_state_weight = "proper_weights", - sampling_fraction, - setting_calculation = NULL, - see_ancestral_states = FALSE, - loglik_penalty = 0, - is_complete_tree = FALSE, - num_threads = 1, - method = "odeint::bulirsch_stoer", - atol = 1e-8, - rtol = 1e-7) { - lambdas <- parameter[[1]] - mus <- parameter[[2]] - parameter[[3]][is.na(parameter[[3]])] <- 0 - Q <- parameter[[3]] # nolint - - num_modeled_traits <- ncol(Q) / floor(num_concealed_states) - - if (is.null(setting_calculation)) { - check_input(traits, - phy, - sampling_fraction, - root_state_weight, - is_complete_tree) - setting_calculation <- build_initStates_time(phy, - traits, - num_concealed_states, - sampling_fraction, - is_complete_tree, - mus, - num_modeled_traits, - first_time = TRUE) - } - states <- setting_calculation$states - - if (is_complete_tree) { - states <- build_states(phy = phy, - traits = traits, - num_concealed_states = num_concealed_states, - sampling_fraction = sampling_fraction, - is_complete_tree = is_complete_tree, - mus = mus, - num_unique_traits = num_modeled_traits, - first_time = FALSE) - } - - forTime <- setting_calculation$forTime # nolint - ances <- setting_calculation$ances - - if (num_concealed_states != round(num_concealed_states)) { - # for testing - d <- ncol(states) / 2 - new_states <- states[, c(1:sqrt(d), (d + 1):((d + 1) + sqrt(d) - 1))] - new_states <- states[, c(1, 2, 3, 10, 11, 12)] - states <- new_states - } - - loglik <- 0 - d <- ncol(states) / 2 - - if (see_ancestral_states == TRUE && num_threads != 1) { - warning("see ancestral states only works with one thread, - setting to one thread") - num_threads <- 1 - } - - calcul <- update_using_cpp(ances, states, forTime, lambdas, mus, Q, method, - atol, rtol, is_complete_tree, num_threads) - - mergeBranch <- calcul$mergeBranch # nolint - nodeM <- calcul$nodeM # nolint - loglik <- calcul$loglik - states <- calcul$states - - ## At the root - mergeBranch2 <- mergeBranch # nolint - lmb <- length(mergeBranch2) - - weight_states <- get_weight_states(root_state_weight, - num_concealed_states, - mergeBranch, - lambdas, - nodeM, - d, - is_cla = TRUE) - - if (cond == "maddison_cond") { - pre_cond <- rep(NA, lmb) # nolint - for (j in 1:lmb) { - pre_cond[j] <- sum(weight_states[j] * - lambdas[[j]] * - (1 - nodeM[1:d][j]) ^ 2) - } - mergeBranch2 <- mergeBranch2 / sum(pre_cond) # nolint - } - - if (is_complete_tree) { - timeInte <- max(abs(ape::branching.times(phy))) # nolint - y <- rep(0, lmb) - - nodeM <- ct_condition_cla(y, # nolint - timeInte, - lambdas, - mus, - Q, - method, - atol, - rtol) - nodeM <- c(nodeM, y) # nolint - } - - if (cond == "proper_cond") { - pre_cond <- rep(NA, lmb) # nolint - for (j in 1:lmb) { - pre_cond[j] <- sum(lambdas[[j]] * ((1 - nodeM[1:d]) %o% (1 - nodeM[1:d]))) - } - mergeBranch2 <- mergeBranch2 / pre_cond # nolint - } - - wholeLike_atRoot <- sum(mergeBranch2 * weight_states, na.rm = TRUE) # nolint - LL <- log(wholeLike_atRoot) + # nolint - loglik - - penalty(pars = parameter, - loglik_penalty = loglik_penalty) - - if (see_ancestral_states == TRUE) { - num_tips <- ape::Ntip(phy) - # last row contains safety entry from C++ (all zeros) - ancestral_states <- states[(num_tips + 1):(nrow(states) - 1), ] - ancestral_states <- - ancestral_states[, -1 * (1:(ncol(ancestral_states) / 2))] - rownames(ancestral_states) <- ances - return(list(ancestral_states = ancestral_states, LL = LL, states = states)) - } else { - return(LL) - } -} - -#' @keywords internal -update_using_cpp <- function(ances, states, forTime, lambdas, mus, Q, method, - atol, rtol, is_complete_tree, num_threads) { - calcul <- c() - - ancescpp <- ances - 1 - forTimecpp <- forTime # nolint - forTimecpp[, c(1, 2)] <- forTimecpp[, c(1, 2)] - 1 # nolint - - if (num_threads == 1) { - calcul <- cla_calThruNodes_cpp(ancescpp, - states, - forTimecpp, - lambdas, - mus, - Q, - method, - atol, - rtol, - is_complete_tree) - } else { - if (num_threads == -2) { - calcul <- calc_cla_ll_threaded(ancescpp, - states, - forTimecpp, - lambdas, - mus, - Q, - 1, - method, - is_complete_tree) - } else { - calcul <- calc_cla_ll_threaded(ancescpp, - states, - forTimecpp, - lambdas, - mus, - Q, - num_threads, - method, - is_complete_tree) - } - } - return(calcul) -} diff --git a/R/cla_secsse_ml.R b/R/cla_secsse_ml.R deleted file mode 100755 index 4ab4b6d..0000000 --- a/R/cla_secsse_ml.R +++ /dev/null @@ -1,258 +0,0 @@ -#' Maximum likehood estimation under Several examined and concealed -#' States-dependent Speciation and Extinction (SecSSE) with cladogenetic option -#' @title Maximum likehood estimation for (SecSSE) -#' @param phy phylogenetic tree of class phylo, ultrametric, rooted and with -#' branch lengths. -#' @param traits a vector with trait states for each tip in the phylogeny. -#' @param num_concealed_states number of concealed states, generally equivalent -#' to the number of examined states in the dataset. -#' @param idparslist overview of parameters and their values. -#' @param idparsopt id of parameters to be estimated. -#' @param initparsopt initial guess of the parameters to be estimated. -#' @param idparsfix id of the fixed parameters. -#' @param parsfix value of the fixed parameters. -#' @param cond condition on the existence of a node root: 'maddison_cond', -#' 'proper_cond'(default). For details, see vignette. -#' @param root_state_weight the method to weigh the states: -#' 'maddison_weights','proper_weights'(default) or 'equal_weights'. -#' It can also be specified the -#' root state:the vector c(1,0,0) indicates state 1 was the root state. -#' @param sampling_fraction vector that states the sampling proportion per -#' trait state. It must have as many elements as there are trait states. -#' @param tol maximum tolerance. Default is 'c(1e-04, 1e-05, 1e-05)'. -#' @param maxiter max number of iterations. Default is -#' '1000*round((1.25)^length(idparsopt))'. -#' @param optimmethod method used for optimization. Available are simplex and -#' subplex, default is 'subplex'. Simplex should only be used for debugging. -#' @param num_cycles number of cycles of the optimization (default is 1). -#' @param loglik_penalty the size of the penalty for all parameters; default is -#' 0 (no penalty) -#' @param is_complete_tree whether or not a tree with all its extinct species -#' is provided -#' @param verbose sets verbose output; default is verbose when optimmethod is -#' 'subplex' -#' @param num_threads number of threads. Set to -1 to use all available threads. -#' Default is one thread. -#' @param atol absolute tolerance of integration -#' @param rtol relative tolerance of integration -#' @param method integration method used, available are: -#' "odeint::runge_kutta_cash_karp54", "odeint::runge_kutta_fehlberg78", -#' "odeint::runge_kutta_dopri5", "odeint::bulirsch_stoer" and -#' "odeint::runge_kutta4". Default method is:"odeint::bulirsch_stoer". -#' @return Parameter estimated and maximum likelihood -#' @examples -#'# Example of how to set the arguments for a ML search. -#'library(secsse) -#'library(DDD) -#'set.seed(13) -#'# Check the vignette for a better working exercise. -#'# lambdas for 0A and 1A and 2A are the same but need to be estimated -#'# (CTD model, see Syst Biol paper) -#'# mus are fixed to zero, -#'# the transition rates are constrained to be equal and fixed 0.01 -#'phylotree <- ape::rcoal(31, tip.label = 1:31) -#'#get some traits -#'traits <- sample(c(0,1,2), ape::Ntip(phylotree), replace = TRUE) -#'num_concealed_states <- 3 -#'idparslist <- cla_id_paramPos(traits,num_concealed_states) -#'idparslist$lambdas[1,] <- c(1,1,1,2,2,2,3,3,3) -#'idparslist[[2]][] <- 4 -#'masterBlock <- matrix(5,ncol = 3,nrow = 3,byrow = TRUE) -#'diag(masterBlock) <- NA -#'diff.conceal <- FALSE -#'idparslist[[3]] <- q_doubletrans(traits,masterBlock,diff.conceal) -#'startingpoint <- bd_ML(brts = ape::branching.times(phylotree)) -#'intGuessLamba <- startingpoint$lambda0 -#'intGuessMu <- startingpoint$mu0 -#'idparsopt <- c(1,2,3) -#'initparsopt <- c(rep(intGuessLamba,3)) -#'idparsfix <- c(0,4,5) -#'parsfix <- c(0,0,0.01) -#'tol <- c(1e-04, 1e-05, 1e-07) -#'maxiter <- 1000 * round((1.25) ^ length(idparsopt)) -#'optimmethod <- 'subplex' -#'cond <- 'proper_cond' -#'root_state_weight <- 'proper_weights' -#'sampling_fraction <- c(1,1,1) -#'model <- cla_secsse_ml( -#' phylotree, -#' traits, -#' num_concealed_states, -#' idparslist, -#' idparsopt, -#' initparsopt, -#' idparsfix, -#' parsfix, -#' cond, -#' root_state_weight, -#' sampling_fraction, -#' tol, -#' maxiter, -#' optimmethod, -#' num_cycles = 1, -#' verbose = FALSE) -#' # [1] -90.97626 -#' @export -cla_secsse_ml <- function(phy, - traits, - num_concealed_states, - idparslist, - idparsopt, - initparsopt, - idparsfix, - parsfix, - cond = "proper_cond", - root_state_weight = "proper_weights", - sampling_fraction, - tol = c(1e-04, 1e-05, 1e-07), - maxiter = 1000 * round((1.25)^length(idparsopt)), - optimmethod = "subplex", - num_cycles = 1, - loglik_penalty = 0, - is_complete_tree = FALSE, - verbose = (optimmethod == "subplex"), - num_threads = 1, - atol = 1e-8, - rtol = 1e-7, - method = "odeint::bulirsch_stoer") { - - structure_func <- NULL - if (is.matrix(traits)) { - warning("you are setting a model where some species have more - than one trait state") - } - - if (length(initparsopt) != length(idparsopt)) { - stop("initparsopt must be the same length as idparsopt. - Number of parameters to optimize does not match the number of - initial values for the search") - } - - if (length(idparsfix) != length(parsfix)) { - stop("idparsfix and parsfix must be the same length. - Number of fixed elements does not match the fixed figures") - } - - if (anyDuplicated(c(idparsopt, idparsfix)) != 0) { - stop("at least one element was asked to be both fixed and estimated ") - } - - if (identical(as.numeric(sort(c(idparsopt, idparsfix))), - as.numeric(sort(unique(unlist(idparslist))))) == FALSE) { - stop("All elements in idparslist must be included in either - idparsopt or idparsfix ") - } - - if (anyDuplicated(c(unique(sort(as.vector(idparslist[[3]]))), - idparsfix[which(parsfix == 0)])) != 0) { - warning("Note: you set some transitions as impossible to happen.") - } - - if (is.matrix(idparslist[[1]])) { - ## it is a tailor case otherwise - idparslist[[1]] <- prepare_full_lambdas(traits, - num_concealed_states, - idparslist[[1]]) - } - - if (min(initparsopt) <= 0.0) { - stop("All elements in init_parsopt need to be larger than 0") - } - - see_ancestral_states <- FALSE - - trparsopt <- initparsopt / (1 + initparsopt) - trparsopt[which(initparsopt == Inf)] <- 1 - trparsfix <- parsfix / (1 + parsfix) - trparsfix[which(parsfix == Inf)] <- 1 - mus <- calc_mus(is_complete_tree, - idparslist, - idparsfix, - parsfix, - idparsopt, - initparsopt) - optimpars <- c(tol, maxiter) - - num_modeled_traits <- length(idparslist[[1]]) / num_concealed_states - - setting_calculation <- build_initStates_time(phy, - traits, - num_concealed_states, - sampling_fraction, - is_complete_tree, - mus, - num_modeled_traits) - - initloglik <- secsse_loglik_choosepar(trparsopt = trparsopt, - trparsfix = trparsfix, - idparsopt = idparsopt, - idparsfix = idparsfix, - idparslist = idparslist, - structure_func = structure_func, - phy = phy, - traits = traits, - num_concealed_states = - num_concealed_states, - cond = cond, - root_state_weight = root_state_weight, - sampling_fraction = sampling_fraction, - setting_calculation = - setting_calculation, - see_ancestral_states = - see_ancestral_states, - loglik_penalty = loglik_penalty, - is_complete_tree = is_complete_tree, - verbose = verbose, - num_threads = num_threads, - atol = atol, - rtol = rtol, - method = method) - # Function here - print_init_ll(initloglik = initloglik, verbose = verbose) - if (initloglik == -Inf) { - stop("The initial parameter values have a likelihood that is - equal to 0 or below machine precision. - Try again with different initial values.") - } else { - out <- DDD::optimizer(optimmethod = optimmethod, - optimpars = optimpars, - fun = secsse_loglik_choosepar, - trparsopt = trparsopt, - idparsopt = idparsopt, - trparsfix = trparsfix, - idparsfix = idparsfix, - idparslist = idparslist, - structure_func = structure_func, - phy = phy, - traits = traits, - num_concealed_states = num_concealed_states, - cond = cond, - root_state_weight = root_state_weight, - sampling_fraction = sampling_fraction, - setting_calculation = setting_calculation, - see_ancestral_states = see_ancestral_states, - num_cycles = num_cycles, - loglik_penalty = loglik_penalty, - is_complete_tree = is_complete_tree, - verbose = verbose, - num_threads = num_threads, - atol = atol, - rtol = rtol, - method = method) - if (out$conv != 0) { - stop("Optimization has not converged. - Try again with different initial values.") - } else { - ml_pars1 <- secsse_transform_parameters(as.numeric(unlist(out$par)), - trparsfix, - idparsopt, - idparsfix, - idparslist, - structure_func) - out2 <- list(MLpars = ml_pars1, - ML = as.numeric(unlist(out$fvalues)), - conv = out$conv) - } - } - return(out2) -} diff --git a/R/cla_secsse_ml_func_def_pars.R b/R/cla_secsse_ml_func_def_pars.R deleted file mode 100755 index 0479105..0000000 --- a/R/cla_secsse_ml_func_def_pars.R +++ /dev/null @@ -1,322 +0,0 @@ -#' Maximum likehood estimation under cla Several examined and concealed -#' States-dependent Speciation and Extinction (SecSSE) where some paramaters are -#' functions of other parameters and/or factors. Offers the option of -#' cladogenesis -#' @title Maximum likehood estimation for (SecSSE) with parameter as complex -#' functions. Cladogenetic version -#' @param phy phylogenetic tree of class phylo, ultrametric, rooted and with -#' branch lengths. -#' @param traits a vector with trait states for each tip in the phylogeny. -#' @param num_concealed_states number of concealed states, generally equivalent -#' to the number of examined states in the dataset. -#' @param idparslist overview of parameters and their values. -#' @param idparsopt id of parameters to be estimated. -#' @param initparsopt initial guess of the parameters to be estimated. -#' @param idfactorsopt id of the factors that will be optimized. There are not -#' fixed factors, so use a constant within 'functions_defining_params'. -#' @param initfactors the initial guess for a factor (it should be set to NULL -#' when no factors). -#' @param idparsfix id of the fixed parameters (it should be set to NULL when -#' no factors). -#' @param parsfix value of the fixed parameters. -#' @param idparsfuncdefpar id of the parameters which will be a function of -#' optimized and/or fixed parameters. The order of id should match -#' functions_defining_params -#' @param functions_defining_params a list of functions. Each element will be a -#' function which defines a parameter e.g. id_3 <- (id_1+id_2)/2. See example -#' and vigenette -#' @param cond condition on the existence of a node root: 'maddison_cond', -#' 'proper_cond'(default). For details, see vignette. -#' @param root_state_weight the method to weigh the states:'maddison_weights', -#' 'proper_weights'(default) or 'equal_weights'. It can also be specified the -#' root -#' state:the vector c(1,0,0) indicates state 1 was the root state. -#' @param sampling_fraction vector that states the sampling proportion per trait -#' state. It must have as many elements as there are trait states. -#' @param tol maximum tolerance. Default is 'c(1e-04, 1e-05, 1e-05)'. -#' @param maxiter max number of iterations. Default is -#' '1000*round((1.25)^length(idparsopt))'. -#' @param optimmethod method used for optimization. Default is 'simplex'. -#' @param num_cycles number of cycles of the optimization (default is 1). -#' @param loglik_penalty the size of the penalty for all parameters; default -#' is 0 (no penalty) -#' @param is_complete_tree whether or not a tree with all its extinct species -#' is provided -#' @param verbose sets verbose output; default is verbose when optimmethod is -#' 'subplex' -#' @param num_threads number of threads. Set to -1 to use all available -#' threads. Default is one thread. -#' @param atol absolute tolerance of integration -#' @param rtol relative tolerance of integration -#' @param method integration method used, available are: -#' "odeint::runge_kutta_cash_karp54", "odeint::runge_kutta_fehlberg78", -#' "odeint::runge_kutta_dopri5", "odeint::bulirsch_stoer" and -#' "odeint::runge_kutta4". Default method is:"odeint::bulirsch_stoer". -#' @return Parameter estimated and maximum likelihood -#' @return Parameter estimated and maximum likelihood -#' @examples -#'# Example of how to set the arguments for a ML search. -#'rm(list=ls(all=TRUE)) -#'library(secsse) -#'library(DDD) -#'set.seed(16) -#'phylotree <- ape::rbdtree(0.07,0.001,Tmax=50) -#'startingpoint <- bd_ML(brts = ape::branching.times(phylotree)) -#'intGuessLamba <- startingpoint$lambda0 -#'intGuessMu <- startingpoint$mu0 -#'traits <- sample(c(0,1,2), -#' ape::Ntip(phylotree), replace = TRUE) # get some traits -#'num_concealed_states <- 3 -#'idparslist <- cla_id_paramPos(traits, num_concealed_states) -#'idparslist$lambdas[1,] <- c(1,2,3,1,2,3,1,2,3) -#'idparslist[[2]][] <- 4 -#'masterBlock <- matrix(c(5,6,5,6,5,6,5,6,5),ncol = 3, nrow=3, byrow = TRUE) -#'diag(masterBlock) <- NA -#'diff.conceal <- FALSE -#'idparslist[[3]] <- q_doubletrans(traits,masterBlock,diff.conceal) -#'idparsfuncdefpar <- c(3,5,6) -#'idparsopt <- c(1,2) -#'idparsfix <- c(0,4) -#'initparsopt <- c(rep(intGuessLamba,2)) -#'parsfix <- c(0,0) -#'idfactorsopt <- 1 -#'initfactors <- 4 -#'# functions_defining_params is a list of functions. Each function has no -#'# arguments and to refer -#'# to parameters ids should be indicated as 'par_' i.e. par_3 refers to -#'# parameter 3. When a -#'# function is defined, be sure that all the parameters involved are either -#'# estimated, fixed or -#'# defined by previous functions (i.e, a function that defines parameter in -#'# 'functions_defining_params'). The user is responsible for this. In this -#'# example, par_3 -#'# (i.e., parameter 3) is needed to calculate par_6. This is correct because -#'# par_3 is defined -#'# in the first function of 'functions_defining_params'. Notice that factor_1 -#'# indicates a value -#'# that will be estimated to satisfy the equation. The same factor can be -#'# shared to define several parameters. -#'functions_defining_params <- list() -#'functions_defining_params[[1]] <- function() { -#' par_3 <- par_1 + par_2 -#'} -#'functions_defining_params[[2]] <- function() { -#' par_5 <- par_1 * factor_1 -#'} -#'functions_defining_params[[3]] <- function() { -#' par_6 <- par_3 * factor_1 -#'} -#' -#'tol = c(1e-02, 1e-03, 1e-04) -#'maxiter = 1000 * round((1.25)^length(idparsopt)) -#'optimmethod = 'subplex' -#'cond <- 'proper_cond' -#'root_state_weight <- 'proper_weights' -#'sampling_fraction <- c(1,1,1) -#'model <- cla_secsse_ml_func_def_pars(phylotree, -#'traits, -#'num_concealed_states, -#'idparslist, -#'idparsopt, -#'initparsopt, -#'idfactorsopt, -#'initfactors, -#'idparsfix, -#'parsfix, -#'idparsfuncdefpar, -#'functions_defining_params, -#'cond, -#'root_state_weight, -#'sampling_fraction, -#'tol, -#'maxiter, -#'optimmethod, -#'num_cycles = 1) -#'# ML -136.5796 -#' @export -cla_secsse_ml_func_def_pars <- function(phy, - traits, - num_concealed_states, - idparslist, - idparsopt, - initparsopt, - idfactorsopt, - initfactors, - idparsfix, - parsfix, - idparsfuncdefpar, - functions_defining_params, - cond = "proper_cond", - root_state_weight = "proper_weights", - sampling_fraction, - tol = c(1e-04, 1e-05, 1e-07), - maxiter = 1000 * - round((1.25) ^ length(idparsopt)), - optimmethod = "simplex", - num_cycles = 1, - loglik_penalty = 0, - is_complete_tree = FALSE, - verbose = (optimmethod == "subplex"), - num_threads = 1, - atol = 1e-12, - rtol = 1e-12, - method = "odeint::bulirsch_stoer") { - structure_func <- list() - structure_func[[1]] <- idparsfuncdefpar - structure_func[[2]] <- functions_defining_params - structure_func[[3]] <- idfactorsopt - - see_ancestral_states <- FALSE - if (is.null(idfactorsopt) == FALSE) { - if (length(initfactors) != length(idfactorsopt)) { - stop("idfactorsopt should have the same length than initfactors.") - } - } - - if (is.list(functions_defining_params) == FALSE) { - stop("The argument functions_defining_params should be a - list of functions. See example and vignette") - } - - if (length(functions_defining_params) != length(idparsfuncdefpar)) { - stop("the argument functions_defining_params should have - the same length as idparsfuncdefpar") - } - - if (is.matrix(traits)) { - message("You are setting a model where some species had more - than one trait state \n") - } - - if (length(initparsopt) != length(idparsopt)) { - stop("initparsopt must be the same length as idparsopt. - Number of parameters to optimize does not match the number of - initial values for the search") - } - - if (length(idparsfix) != length(parsfix)) { - stop("idparsfix and parsfix must be the same length. - Number of fixed elements does not match the fixed figures") - } - - if (anyDuplicated(c(idparsopt, idparsfix, idparsfuncdefpar)) != 0) { - stop("At least one element was asked to be fixed, estimated or a - function at the same time") - } - - if (identical(as.numeric(sort(c(idparsopt, idparsfix, idparsfuncdefpar))), - as.numeric(sort(unique(unlist(idparslist))))) == - FALSE) { - stop("All elements in idparslist must be included in either - idparsopt or idparsfix or idparsfuncdefpar.") - } - - if (anyDuplicated(c(unique(sort(as.vector(idparslist[[3]]))), - idparsfix[which(parsfix == 0)])) != 0) { - warning("Warning: you set some transitions as impossible to happen.") - } - - idparslist[[1]] <- prepare_full_lambdas(traits, num_concealed_states, idparslist[[1]]) - see_ancestral_states <- FALSE - - message("Calculating the likelihood for the initial parameters.", "\n") - utils::flush.console() - - initparsopt2 <- c(initparsopt, initfactors) - - trparsopt <- initparsopt2 / (1 + initparsopt2) - trparsopt[which(initparsopt2 == Inf)] <- 1 - trparsfix <- parsfix / (1 + parsfix) - trparsfix[which(parsfix == Inf)] <- 1 - - mus <- calc_mus(is_complete_tree, - idparslist, - idparsfix, - parsfix, - idparsopt, - initparsopt) - - optimpars <- c(tol, maxiter) - - num_modeled_traits <- length(idparslist[[1]]) / num_concealed_states - - setting_calculation <- build_initStates_time(phy, traits, - num_concealed_states, - sampling_fraction, - is_complete_tree, - mus, - num_modeled_traits) - - initloglik <- secsse_loglik_choosepar(trparsopt = trparsopt, - trparsfix = trparsfix, - idparsopt = idparsopt, - idparsfix = idparsfix, - idparslist = idparslist, - structure_func = structure_func, - phy = phy, - traits = traits, - num_concealed_states = - num_concealed_states, - cond = cond, - root_state_weight = - root_state_weight, - sampling_fraction = sampling_fraction, - setting_calculation = - setting_calculation, - see_ancestral_states = - see_ancestral_states, - loglik_penalty = loglik_penalty, - is_complete_tree = is_complete_tree, - verbose = verbose, - num_threads = num_threads, - atol = atol, - rtol = rtol, - method = method) - print_init_ll(initloglik = initloglik, verbose = verbose) - if (initloglik == -Inf) { - stop("The initial parameter values have a likelihood that is - equal to 0 or below machine precision. - Try again with different initial values.") - } else { - out <- DDD::optimizer(optimmethod = optimmethod, - optimpars = optimpars, - fun = secsse_loglik_choosepar, - trparsopt = trparsopt, - idparsopt = idparsopt, - trparsfix = trparsfix, - idparsfix = idparsfix, - idparslist = idparslist, - structure_func = structure_func, - phy = phy, - traits = traits, - num_concealed_states = num_concealed_states, - cond = cond, - root_state_weight = root_state_weight, - sampling_fraction = sampling_fraction, - setting_calculation = setting_calculation, - see_ancestral_states = see_ancestral_states, - num_cycles = num_cycles, - loglik_penalty = loglik_penalty, - is_complete_tree = is_complete_tree, - verbose = verbose, - num_threads = num_threads, - atol = atol, - rtol = rtol, - method = method) - if (out$conv != 0) { - stop("Optimization has not converged. - Try again with different initial values.\n") - } else { - ml_pars1 <- secsse_transform_parameters(as.numeric(unlist(out$par)), - trparsfix, - idparsopt, - idparsfix, - idparslist, - structure_func) - out2 <- list(MLpars = ml_pars1, - ML = as.numeric(unlist(out$fvalues)), - conv = out$conv) - } - } - return(out2) -} diff --git a/R/default_params_doc.R b/R/default_params_doc.R new file mode 100644 index 0000000..58aadf6 --- /dev/null +++ b/R/default_params_doc.R @@ -0,0 +1,214 @@ +#' Default parameter documentation +#' +#' This function's purpose is to list all parameter documentation to be +#' inherited by the relevant functions. +#' +#' @param phy phylogenetic tree of class `phylo`, rooted and with +#' branch lengths. +#' @param traits vector with trait states for each tip in the phylogeny. The +#' order of the states must be the same as the tree tips. For help, see +#' `vignette("starting_secsse", package = "secsse")`. +#' @param num_concealed_states number of concealed states, generally equivalent +#' to the number of examined states in the dataset. +#' @param idparslist overview of parameters and their values. +#' @param idparsopt a numeric vector with the ID of parameters to be estimated. +#' @param idfactorsopt id of the factors that will be optimized. There are not +#' fixed factors, so use a constant within `functions_defining_params`. +#' @param initfactors the initial guess for a factor (it should be set to `NULL` +#' when no factors). +#' @param idparsfuncdefpar id of the parameters which will be a function of +#' optimized and/or fixed parameters. The order of id should match +#' `functions_defining_params`. +#' @param functions_defining_params a list of functions. Each element will be a +#' function which defines a parameter e.g. `id_3 <- (id_1 + id_2) / 2`. See +#' example. +#' @param initparsopt a numeric vector with the initial guess of the parameters +#' to be estimated. +#' @param idparsfix a numeric vector with the ID of the fixed parameters. +#' @param parsfix a numeric vector with the value of the fixed parameters. +#' @param cond condition on the existence of a node root: `"maddison_cond"`, +#' `"proper_cond"` (default). For details, see vignette. +#' @param root_state_weight the method to weigh the states: +#' `"maddison_weights"`, `"proper_weights"` (default) or `"equal_weights"`. +#' It can also be specified for the root state: the vector `c(1, 0, 0)` +#' indicates state 1 was the root state. +#' @param sampling_fraction vector that states the sampling proportion per +#' trait state. It must have as many elements as there are trait states. +#' @param tol A numeric vector with the maximum tolerance of the optimization +#' algorithm. Default is `c(1e-04, 1e-05, 1e-05)`. +#' @param maxiter max number of iterations. Default is +#' `1000 * round((1.25) ^ length(idparsopt))`. +#' @param num_cycles Number of cycles of the optimization. When set to `Inf`, +#' the optimization will be repeated until the result is, within the +#' tolerance, equal to the starting values, with a maximum of 10 cycles. +#' @param is_complete_tree logical specifying whether or not a tree with all its +#' extinct species is provided. If set to `TRUE`, it also assumes that all +#' *all* extinct lineages are present on the tree. Defaults to `FALSE`. +#' @param verbose sets verbose output; default is `TRUE` when `optimmethod` is +#' `"simplex"`. If `optimmethod` is set to `"simplex"`, then even if set to +#' `FALSE`, optimizer output will be shown. +#' @param num_threads number of threads to be used. Default is one thread. +#' @param atol A numeric specifying the absolute tolerance of integration. +#' @param rtol A numeric specifying the relative tolerance of integration. +#' @param method integration method used, available are: +#' `"odeint::runge_kutta_cash_karp54"`, `"odeint::runge_kutta_fehlberg78"`, +#' `"odeint::runge_kutta_dopri5"`, `"odeint::bulirsch_stoer"` and +#' `"odeint::runge_kutta4"`. Default method is: `"odeint::bulirsch_stoer"`. +#' @param parameter list where first vector represents lambdas, the second +#' mus and the third transition rates. +#' @param setting_calculation argument used internally to speed up calculation. +#' It should be left blank (default : `setting_calculation = NULL`). +#' @param loglik_penalty the size of the penalty for all parameters; default is +#' 0 (no penalty). +#' @param num_steps number of substeps to show intermediate likelihoods +#' along a branch. +#' @param see_ancestral_states Boolean for whether the ancestral states should +#' be shown? Defaults to `FALSE`. +#' @param lambdas speciation rates, in the form of a list of matrices. +#' @param mus extinction rates, in the form of a vector. +#' @param qs The Q matrix, for example the result of function q_doubletrans, but +#' generally in the form of a matrix. +#' @param crown_age crown age of the tree, tree will be simulated conditional +#' on non-extinction and this crown age. +#' @param pool_init_states pool of initial states at the crown, in case this is +#' different from all available states, otherwise leave at NULL +#' @param max_spec Maximum number of species in the tree (please note that the +#' tree is not conditioned on this number, but that this is a safeguard +#' against generating extremely large trees). +#' @param min_spec Minimum number of species in the tree. +#' @param conditioning can be `"obs_states"`, `"true_states"` or `"none"`, the +#' tree is simulated until one is generated that contains all observed states +#' (`"obs_states"`), all true states (e.g. all combinations of obs and hidden +#' states), or is always returned (`"none"`). Alternatively, a vector with +#' the names of required observed states can be provided, e.g. c("S", "N"). +#' @param non_extinction boolean stating if the tree should be conditioned on +#' non-extinction of the crown lineages. Defaults to `TRUE`. +#' @param max_tries maximum number of simulations to try to obtain a tree. +#' @param drop_extinct boolean stating if extinct species should be dropped from +#' the tree. Defaults to `TRUE`. +#' @param seed pseudo-random number generator seed. +#' @param parameters list where first vector represents lambdas, the second mus +#' and the third transition rates. +#' @param prob_func a function to calculate the probability of interest, see +#' description. +#' @param masterBlock matrix of transitions among only examined states, `NA` in +#' the main diagonal, used to build the full transition rates matrix. +#' @param diff.conceal Boolean stating if the concealed states should be +#' different. E.g. that the transition rates for the concealed +#' states are different from the transition rates for the examined states. +#' Normally it should be `FALSE` in order to avoid having a huge number of +#' parameters. +#' @param trait_info data frame where first column has species ids and the second +#' one is the trait associated information. +#' @param optimmethod A string with method used for optimization. Default is +#' `"subplex"`. Alternative is `"simplex"` and it shouldn't be used in normal +#' conditions (only for debugging). Both are called from [DDD::optimizer()], +#' simplex is implemented natively in [DDD], while subplex is ultimately +#' called from [subplex::subplex()]. +#' @param lambd_and_modeSpe a matrix with the 4 models of speciation possible. +#' @param initloglik A numeric with the value of loglikehood obtained prior to +#' optimisation. Only used internally. +#' @param state_names vector of names of all observed states. +#' @param transition_matrix a matrix containing a description of all speciation +#' events, where the first column indicates the source state, the second and +#' third column indicate the two daughter states, and the fourth column gives +#' the rate indicator used. E.g.: `["SA", "S", "A", 1]` for a trait state +#' `"SA"` which upon speciation generates two daughter species with traits +#' `"S"` and `"A"`, where the number 1 is used as indicator for optimization +#' of the likelihood. +#' @param model used model, choice of `"ETD"` (Examined Traits Diversification), +#' `"CTD"` (Concealed Traits Diversification) or `"CR"` (Constant Rate). +#' @param concealed_spec_rates vector specifying the rate indicators for each +#' concealed state, length should be identical to `num_concealed_states`. If +#' left empty when using the CTD model, it is assumed that all available +#' speciation rates are distributed uniformly over the concealed states. +#' @param shift_matrix matrix of shifts, indicating in order: +#' 1. starting state (typically the column in the transition matrix) +#' 2. ending state (typically the row in the transition matrix) +#' 3. associated rate indicator. +#' @param q_matrix `q_matrix` with only transitions between observed states. +#' @param lambda_list previously generated list of lambda matrices, +#' used to infer the rate number to start with. +#' @param object lambda matrices, `q_matrix` or mu vector. +#' @param params parameters in order, where each value reflects the value +#' of the parameter at that position, e.g. `c(0.3, 0.2, 0.1)` will fill out +#' the value 0.3 for the parameter with rate identifier 1, 0.2 for the +#' parameter with rate identifier 2 and 0.1 for the parameter with rate +#' identifier 3. +#' @param param_posit initial parameter structure, consisting of a list with +#' three entries: +#' 1. lambda matrices +#' 2. mus +#' 3. Q matrix +#' +#' In each entry, integers numbers (1-n) indicate the parameter to be +#' optimized. +#' @param ml_pars resulting parameter estimates as returned by for instance +#' [cla_secsse_ml()], having the same structure as `param_post`. +#' @param mu_vector previously defined mus - used to choose indicator number. +#' +#' @return Nothing +#' @keywords internal +#' @export +default_params_doc <- function(phy, + traits, + num_concealed_states, + idparslist, + initparsopt, + idparsfix, + idparsopt, + idfactorsopt, + parsfix, + cond, + root_state_weight, + sampling_fraction, + tol, + maxiter, + optimethod, + num_cycles, + loglik_penalty, + is_complete_tree, + verbose, + num_threads, + atol, + rtol, + method, + parameter, + setting_calculation, + num_steps, + see_ancestral_states, + lambdas, + mus, + qs, + crown_age, + pool_init_states, + maxSpec, + conditioning, + non_extinction, + max_tries, + drop_extinct, + seed, + prob_func, + parameters, + masterBlock, + diff.conceal, + trait_info, + lambd_and_modeSpe, + initloglik, + initfactors, + idparsfuncdefpar, + functions_defining_params, + state_names, + transition_matrix, + model, + concealed_spec_rates, + shift_matrix, + q_matrix, + lambda_list, + object, + params, + param_posit, + ml_pars, + mu_vector) { + # Nothing +} diff --git a/R/event_times.R b/R/event_times.R deleted file mode 100755 index 02efb01..0000000 --- a/R/event_times.R +++ /dev/null @@ -1,50 +0,0 @@ -#' Times at which speciation or extinction occurs -#' @title Event times of a (possibly non-ultrametric) phylogenetic tree -#' @param phy phylogenetic tree of class phylo, without polytomies, rooted and -#' with branch lengths. Need not be ultrametric. -#' @return times at which speciation or extinction happens. -#' @note This script has been modified from BAMMtools' internal function -#' NU.branching.times -#' @export -event_times <- function(phy) { - if (ape::is.ultrametric(phy)) { - return(ape::branching.times(phy)) - } else { - if (ape::is.binary(phy) == FALSE) { - stop("error. Need fully bifurcating (resolved) tree\n") - } - phy$begin <- rep(0, nrow(phy$edge)) - phy$end <- rep(0, nrow(phy$edge)) - fx <- function(phy, node) { - cur_time <- 0 - root <- length(phy$tip.label) + 1 - if (node > root) { - cur_time <- phy$end[which(phy$edge[, 2] == node)] - } - dset <- phy$edge[, 2][phy$edge[, 1] == node] - i1 <- which(phy$edge[, 2] == dset[1]) - i2 <- which(phy$edge[, 2] == dset[2]) - phy$end[i1] <- cur_time + phy$edge.length[i1] - phy$end[i2] <- cur_time + phy$edge.length[i2] - if (dset[1] > length(phy$tip.label)) { - phy$begin[phy$edge[, 1] == dset[1]] <- phy$end[i1] - phy <- fx(phy, node = dset[1]) - } - if (dset[2] > length(phy$tip.label)) { - phy$begin[phy$edge[, 1] == dset[2]] <- phy$end[i2] - phy <- fx(phy, node = dset[2]) - } - return(phy) - } - phy <- fx(phy, node = length(phy$tip.label) + 1) - maxbt <- max(phy$end) - nodes <- (length(phy$tip.label) + 1):(2 * length(phy$tip.label) - 1) - bt <- numeric(length(nodes)) - names(bt) <- nodes - for (i in seq_along(bt)) { - tt <- phy$begin[phy$edge[, 1] == nodes[i]][1] - bt[i] <- maxbt - tt - } - return(bt) - } -} diff --git a/R/plot_state_exact.R b/R/plot_state_exact.R deleted file mode 100644 index 574f6bc..0000000 --- a/R/plot_state_exact.R +++ /dev/null @@ -1,242 +0,0 @@ -#' function to plot the local probability along the tree, including the branches -#' @param parameters used parameters for the likelihood calculation -#' @param focal_tree used phylogeny -#' @param traits used traits -#' @param num_concealed_states number of concealed states -#' @param sampling_fraction sampling fraction -#' @param cond condition on the existence of a node root: 'maddison_cond', -#' 'proper_cond'(default). For details, see vignette. -#' @param root_state_weight the method to weigh the states:'maddison_weigh -#' ,'proper_weights'(default) or 'equal_weights'. It can also be specified the -#' root state:the vector c(1,0,0) indicates state 1 was the root state. -#' @param method integration method used, available are: -#' "odeint::runge_kutta_cash_karp54", "odeint::runge_kutta_fehlberg78", -#' "odeint::runge_kutta_dopri5", "odeint::bulirsch_stoer" and -#' "odeint::runge_kutta4". Default method is:"odeint::bulirsch_stoer". -#' @param atol absolute tolerance of integration -#' @param rtol relative tolerance of integration -#' @param steps number of substeps evaluated per branch, see description. -#' @param prob_func a function to calculate the probability of interest, see -#' description -#' @param is_complete_tree whether or not a tree with all its extinct species is -#' provided -#' @param verbose provides intermediate output (progressbars etc) when TRUE. -#' @return ggplot2 object -#' @description this function will evaluate the log likelihood locally along -#' all branches and plot the result. When steps is left to NULL, all likelihood -#' evaluations during integration are used for plotting. This may work for not -#' too large trees, but may become very memory heavy for larger trees. Instead, -#' the user can indicate a number of steps, which causes the probabilities to be -#' evaluated at a distinct amount of steps along each branch (and the -#' probabilities to be properly integrated in between these steps). This -#' provides an approximation, but generally results look very similar to using -#' the full evaluation. -#' The function used for prob_func will be highly dependent on your system. -#' for instance, for a 3 observed, 2 hidden states model, the probability -#' of state A is prob[1] + prob[2] + prob[3], normalized by the row sum. -#' prob_func will be applied to each row of the 'states' matrix (you can thus -#' test your function on the states matrix returned when -#' 'see_ancestral_states = TRUE'). Please note that the first N columns of the -#' states matrix are the extinction rates, and the (N+1):2N columns belong to -#' the speciation rates, where N = num_obs_states * num_concealed_states. -#' A typical probfunc function will look like: -#' my_prob_func <- function(x) { -#' return(sum(x[5:8]) / sum(x)) -#' } -#' @examples -#' set.seed(5) -#' focal_tree <- ape::rphylo(n = 4, birth = 1, death = 0) -#' traits <- c(0, 1, 1, 0) -#' params <- secsse::id_paramPos(c(0, 1), 2) -#' params[[1]][] <- c(0.2, 0.2, 0.1, 0.1) -#' params[[2]][] <- 0.0 -#' params[[3]][, ] <- 0.1 -#' diag(params[[3]]) <- NA -#' # Thus, we have for both, rates -#' # 0A, 1A, 0B and 1B. If we are interested in the posterior probability of -#' # trait 0,we have to provide a helper function that sums the probabilities of -#' # 0A and 0B, e.g.: -#' helper_function <- function(x) { -#' return(sum(x[c(5, 7)]) / sum(x)) # normalized by total sum, just in case. -#' } -#' -#' out_plot <- plot_state_exact(parameters = params, -#' focal_tree = focal_tree, -#' traits = traits, -#' num_concealed_states = 2, -#' sampling_fraction = c(1, 1), -#' steps = 10, -#' prob_func = helper_function) -#' @export -plot_state_exact <- function(parameters, - focal_tree, - traits, - num_concealed_states, - sampling_fraction, - cond = "proper_cond", - root_state_weight = "proper_weights", - is_complete_tree = FALSE, - method = "odeint::bulirsch_stoer", - atol = 1e-16, - rtol = 1e-16, - steps = NULL, - prob_func = NULL, - verbose = FALSE) { - - if (is.null(prob_func)) { - stop("need to set a probability function, check description to how") - } - - if (verbose) message("collecting all states on nodes") - ll1 <- secsse::secsse_loglik(parameter = parameters, - phy = focal_tree, - traits = traits, - num_concealed_states = num_concealed_states, - cond = cond, - root_state_weight = root_state_weight, - sampling_fraction = sampling_fraction, - see_ancestral_states = TRUE, - loglik_penalty = 0, - is_complete_tree = is_complete_tree, - num_threads = 1, - atol = atol, - rtol = rtol, - method = method) - - if (verbose) message("collecting branch likelihoods\n") - eval_res <- secsse::secsse_loglik_eval(parameter = parameters, - phy = focal_tree, - traits = traits, - num_concealed_states = - num_concealed_states, - ancestral_states = ll1$states, - cond = cond, - root_state_weight = root_state_weight, - sampling_fraction = sampling_fraction, - is_complete_tree = is_complete_tree, - atol = atol, - rtol = rtol, - method = method, - num_steps = steps, - verbose = verbose) - - if (verbose) - message("\nconverting collected likelihoods to graph positions:\n") - - xs <- ape::node.depth.edgelength(focal_tree) - ys <- ape::node.height(focal_tree) - num_tips <- length(focal_tree$tip.label) - num_nodes <- (1 + num_tips):length(ys) - - nodes <- data.frame(x = xs, y = ys, n = c(1:num_tips, num_nodes)) - - for_plot <- collect_branches(eval_res, nodes, prob_func, verbose) - - node_bars <- collect_node_bars(eval_res, nodes, prob_func, ll1) - - if (verbose) message("\ngenerating ggplot object\n") - focal_plot <- make_ggplot(for_plot, node_bars) - return(focal_plot) -} - -#' @keywords internal -collect_branches <- function(to_plot, - nodes, - prob_func, - verbose) { - num_rows <- length(to_plot[, 1]) - - for_plot <- matrix(nrow = num_rows, ncol = 6) - for_plot_cnt <- 1 - if (verbose) pb <- utils::txtProgressBar(max = length(unique(to_plot[, 1])), - style = 3) - cnt <- 1 - for (parent in unique(to_plot[, 1])) { - if (verbose) utils::setTxtProgressBar(pb, cnt) - cnt <- cnt + 1 - - to_plot2 <- subset(to_plot, to_plot[, 1] == parent) - for (daughter in unique(to_plot2[, 2])) { - indices <- which(to_plot2[, 2] == daughter) - if (length(indices) > 0) { - # we have a branch - focal_branch <- to_plot2[indices, ] - start_x <- nodes$x[which(nodes$n == parent)] - end_x <- nodes$x[which(nodes$n == daughter)] - y <- nodes$y[which(nodes$n == daughter)] - - bl <- end_x - start_x - - probs <- apply(focal_branch[, 4:length(focal_branch[1, ])], - 1, - prob_func) - - for (s in 1:(length(focal_branch[, 1]) - 1)) { - x0 <- start_x + bl - focal_branch[s, 3] - x1 <- start_x + bl - focal_branch[s + 1, 3] - ps <- probs[s] - for_plot[for_plot_cnt, ] <- c(x0, x1, y, ps, parent, daughter) - for_plot_cnt <- for_plot_cnt + 1 - } - } - } - } - colnames(for_plot) <- c("x0", "x1", "y", "prob", "p", "d") - for_plot <- tibble::as_tibble(for_plot) - - return(for_plot) -} - -#' @keywords internal -collect_node_bars <- function(to_plot, - nodes, - prob_func, - ll) { - node_bars <- matrix(nrow = length(unique(to_plot[, 1])), ncol = 4) - node_bars_cnt <- 1 - for (parent in unique(to_plot[, 1])) { - focal_data <- subset(to_plot, to_plot[, 1] == parent) - daughters <- unique(focal_data[, 2]) - start_x <- nodes$x[which(nodes$n == parent)] - y <- c() - for (i in seq_along(daughters)) { - y <- c(y, nodes$y[nodes$n == daughters[i]]) - } - y <- sort(y) - - probs <- ll$states[parent, ] - rel_prob <- prob_func(probs) - node_bars[node_bars_cnt, ] <- c(start_x, y, rel_prob) - node_bars_cnt <- node_bars_cnt + 1 - } - - colnames(node_bars) <- c("x", "y0", "y1", "prob") - node_bars <- tibble::as_tibble(node_bars) - return(node_bars) -} - -#' @importFrom rlang .data -#' @keywords internal -make_ggplot <- function(for_plot, node_bars) { - ggplot_plot <- ggplot2::ggplot(for_plot) + - ggplot2::geom_segment(ggplot2::aes(x = .data[["x0"]], - y = .data[["y"]], - xend = .data[["x1"]], - yend = .data[["y"]], - col = .data[["prob"]])) + - ggplot2::geom_segment(data = node_bars, - ggplot2::aes(x = .data[["x"]], - y = .data[["y0"]], - yend = .data[["y1"]], - xend = .data[["x"]], - col = .data[["prob"]]) - ) + - ggplot2::theme_classic() + - ggplot2::xlab("") + - ggplot2::ylab("") + - ggplot2::theme(axis.text.y = ggplot2::element_blank(), - axis.ticks.y = ggplot2::element_blank(), - axis.line.y = ggplot2::element_blank()) - - return(ggplot_plot) -} diff --git a/R/plot_state_exact_cla.R b/R/plot_state_exact_cla.R deleted file mode 100644 index a41067a..0000000 --- a/R/plot_state_exact_cla.R +++ /dev/null @@ -1,165 +0,0 @@ -#' function to plot the local probability along the tree, -#' including the branches, for the CLA model. -#' @param parameters used parameters for the likelihood calculation -#' @param focal_tree used phylogeny -#' @param traits used traits -#' @param num_concealed_states number of concealed states -#' @param sampling_fraction sampling fraction -#' @param cond condition on the existence of a node root: 'maddison_cond', -#' 'proper_cond'(default). For details, see vignette. -#' @param root_state_weight the method to weigh the states:'maddison_weigh -#' ,'proper_weights'(default) or 'equal_weights'. It can also be specified the -#' root state:the vector c(1,0,0) indicates state 1 was the root state. -#' @param method integration method used, available are: -#' "odeint::runge_kutta_cash_karp54", "odeint::runge_kutta_fehlberg78", -#' "odeint::runge_kutta_dopri5", "odeint::bulirsch_stoer" and -#' "odeint::runge_kutta4". Default method is:"odeint::bulirsch_stoer". -#' @param atol absolute tolerance of integration -#' @param rtol relative tolerance of integration -#' @param steps number of substeps evaluated per branch, see description. -#' @param prob_func a function to calculate the probability of interest, see -#' description -#' @param is_complete_tree whether or not a tree with all its extinct species is -#' provided -#' @param verbose return verbose output / progress bars when true. -#' @return ggplot2 object -#' @description this function will evaluate the log likelihood locally along -#' all branches and plot the result. When steps is left to NULL, all likelihood -#' evaluations during integration are used for plotting. This may work for not -#' too large trees, but may become very memory heavy for larger trees. Instead, -#' the user can indicate a number of steps, which causes the probabilities to be -#' evaluated at a distinct amount of steps along each branch (and the -#' probabilities to be properly integrated in between these steps). This -#' provides an approximation, but generally results look very similar to using -#' the full evaluation. -#' The function used for prob_func will be highly dependent on your system. -#' for instance, for a 3 observed, 2 hidden states model, the probability -#' of state A is prob[1] + prob[2] + prob[3], normalized by the row sum. -#' prob_func will be applied to each row of the 'states' matrix (you can thus -#' test your function on the states matrix returned when -#' 'see_ancestral_states = TRUE'). Please note that the first N columns of the -#' states matrix are the extinction rates, and the (N+1):2N columns belong to -#' the speciation rates, where N = num_obs_states * num_concealed_states. -#' A typical probfunc function will look like: -#' my_prob_func <- function(x) { -#' return(sum(x[5:8]) / sum(x)) -#' } -#' -#' @examples -#' set.seed(13) -#'phylotree <- ape::rcoal(12, tip.label = 1:12) -#'traits <- sample(c(0, 1, 2), ape::Ntip(phylotree), replace = TRUE) -#'num_concealed_states <- 3 -#'sampling_fraction <- c(1,1,1) -#'phy <- phylotree -#'# the idparlist for a ETD model (dual state inheritance model of evolution) -#'# would be set like this: -#'idparlist <- secsse::cla_id_paramPos(traits,num_concealed_states) -#'lambd_and_modeSpe <- idparlist$lambdas -#'lambd_and_modeSpe[1,] <- c(1,1,1,2,2,2,3,3,3) -#'idparlist[[1]] <- lambd_and_modeSpe -#'idparlist[[2]][] <- 0 -#'masterBlock <- matrix(4,ncol = 3, nrow = 3, byrow = TRUE) -#'diag(masterBlock) <- NA -#'idparlist[[3]] <- q_doubletrans(traits, masterBlock, diff.conceal = FALSE) -#'# Now, internally, clasecsse sorts the lambda matrices, so they look like -#'# a list with 9 matrices, corresponding to the 9 states -#'# (0A,1A,2A,0B, etc) - -#'parameter <- idparlist -#'lambda_and_modeSpe <- parameter$lambdas -#'lambda_and_modeSpe[1,] <- c(0.2,0.2,0.2,0.4,0.4,0.4,0.01,0.01,0.01) -#'parameter[[1]] <- prepare_full_lambdas(traits,num_concealed_states, -#' lambda_and_modeSpe) -#'parameter[[2]] <- rep(0,9) -#'masterBlock <- matrix(0.07, ncol = 3, nrow = 3, byrow = TRUE) -#'diag(masterBlock) <- NA -#'parameter[[3]] <- q_doubletrans(traits, masterBlock, diff.conceal = FALSE) -#'helper_function <- function(x) { -#' return(sum(x[c(10, 13, 16)]) / sum(x)) -#'} -#'out_plot <- plot_state_exact_cla(parameters = parameter, -#' focal_tree = phy, -#' traits = traits, -#' num_concealed_states = 3, -#' sampling_fraction = sampling_fraction, -#' cond = 'maddison_cond', -#' root_state_weight = 'maddison_weights', -#' is_complete_tree = FALSE, -#' prob_func = helper_function, -#' steps = 10) -#' @export -plot_state_exact_cla <- function(parameters, - focal_tree, - traits, - num_concealed_states, - sampling_fraction, - cond = "proper_cond", - root_state_weight = "proper_weights", - is_complete_tree = FALSE, - method = "odeint::bulirsch_stoer", - atol = 1e-16, - rtol = 1e-16, - steps = 10, - prob_func = NULL, - verbose = FALSE) { - - if (is.null(prob_func)) { - stop("need to set a probability function, check description to how") - } - - if (verbose) message("collecting all states on nodes") - ll1 <- secsse::cla_secsse_loglik(parameter = parameters, - phy = focal_tree, - traits = traits, - num_concealed_states = num_concealed_states, - cond = cond, - root_state_weight = root_state_weight, - sampling_fraction = sampling_fraction, - see_ancestral_states = TRUE, - loglik_penalty = 0, - is_complete_tree = is_complete_tree, - num_threads = 1, - atol = atol, - rtol = rtol, - method = method) - - if (verbose) message("collecting branch likelihoods\n") - eval_res <- secsse::cla_secsse_eval(parameter = parameters, - phy = focal_tree, - traits = traits, - num_concealed_states = - num_concealed_states, - ancestral_states = ll1$states, - cond = cond, - root_state_weight = root_state_weight, - num_steps = steps, - sampling_fraction = sampling_fraction, - is_complete_tree = is_complete_tree, - atol = atol, - rtol = rtol, - method = method, - verbose = verbose) - - if (verbose) message("\nconverting collected likelihoods - to graph positions:\n") - - xs <- ape::node.depth.edgelength(focal_tree) - ys <- ape::node.height(focal_tree) - num_tips <- length(focal_tree$tip.label) - num_nodes <- (1 + num_tips):length(ys) - - nodes <- data.frame(x = xs, y = ys, n = c(1:num_tips, num_nodes)) - - to_plot <- eval_res - to_plot[, c(1, 2)] <- to_plot[, c(1, 2)] + 1 - - for_plot <- collect_branches(to_plot, nodes, prob_func, verbose) - - node_bars <- collect_node_bars(to_plot, nodes, prob_func, ll1) - - if (verbose) message("\ngenerating ggplot object\n") - - focal_plot <- make_ggplot(for_plot, node_bars) - return(focal_plot) -} diff --git a/R/print_init_ll.R b/R/print_init_ll.R deleted file mode 100644 index d4545d8..0000000 --- a/R/print_init_ll.R +++ /dev/null @@ -1,21 +0,0 @@ -#' Print likelihood for initial parameters -#' -#' @inheritParams default_params_doc -#' @param initloglik A numeric with the value of loglikehood obtained prior to -#' optimisation. Only used internally. -#' -#' @return Invisible `NULL`. Prints a `message()` to the console with the -#' initial loglikelihood if `verbose >= 1` -#' @noRd -print_init_ll <- function(initloglik, - verbose) { - if (isTRUE(verbose >= 1)) { - init_ll_msg1 <- "Calculating the likelihood for the initial parameters." - init_ll_msg2 <- paste0("The loglikelihood for the initial parameter values is ", initloglik) - init_ll_msg3 <- c("Optimizing the likelihood - this may take a while.") - message(paste(init_ll_msg1, init_ll_msg2, init_ll_msg3, sep = "\n")) - - } - - invisible(NULL) -} \ No newline at end of file diff --git a/R/seccse_plot.R b/R/seccse_plot.R new file mode 100644 index 0000000..0b746ba --- /dev/null +++ b/R/seccse_plot.R @@ -0,0 +1,283 @@ +#' @title Likelihood for SecSSE model +#' Logikelihood calculation for the SecSSE model given a set of parameters and +#' data, returning also the likelihoods along the branches +#' +#' @inheritParams default_params_doc +#' +#' @return A list containing: "output", observed states along evaluated time +#' points along all branches, used for plotting. "states" all ancestral states +#' on the nodes and "duration", indicating the time taken for the total +#' evaluation +#' @examples +#' set.seed(5) +#' phy <- ape::rphylo(n = 4, birth = 1, death = 0) +#' traits <- c(0, 1, 1, 0) +#' params <- secsse::id_paramPos(c(0, 1), 2) +#' params[[1]][] <- c(0.2, 0.2, 0.1, 0.1) +#' params[[2]][] <- 0.0 +#' params[[3]][, ] <- 0.1 +#' diag(params[[3]]) <- NA +#' +#' secsse_loglik_eval(parameter = params, +#' phy = phy, +#' traits = traits, +#' num_concealed_states = 2, +#' sampling_fraction = c(1, 1), +#' num_steps = 10) +#' @export +secsse_loglik_eval <- function(parameter, + phy, + traits, + num_concealed_states, + cond = "proper_cond", + root_state_weight = "proper_weights", + sampling_fraction, + setting_calculation = NULL, + loglik_penalty = 0, + is_complete_tree = FALSE, + num_threads = 1, + atol = 1e-8, + rtol = 1e-7, + method = "odeint::bulirsch_stoer", + num_steps = 100) { + RcppParallel::setThreadOptions(numThreads = num_threads) + lambdas <- parameter[[1]] + mus <- parameter[[2]] + parameter[[3]][is.na(parameter[[3]])] <- 0 + q_matrix <- parameter[[3]] + + check_input(traits, + phy, + sampling_fraction, + root_state_weight, + is_complete_tree) + setting_calculation <- build_initStates_time(phy, + traits, + num_concealed_states, + sampling_fraction, + is_complete_tree, + mus) + eval_cpp(rhs = if (is.list(lambdas)) "ode_cla" else "ode_standard", + ances = setting_calculation$ances, + states = setting_calculation$states, + forTime = setting_calculation$forTime, + lambdas = lambdas, + mus = mus, + Q = q_matrix, + method = method, + atol = atol, + rtol = rtol, + is_complete_tree = is_complete_tree, + num_steps = num_steps) +} + +#' Plot the local probability along a tree +#' +#' Plot the local probability along the tree, including the branches +#' +#' @details This function will evaluate the log likelihood locally along +#' all branches and plot the result. When `num_steps` is left to `NULL`, all +#' likelihood evaluations during integration are used for plotting. This may +#' work for not too large trees, but may become very memory heavy for larger +#' trees. Instead, the user can indicate a number of steps, which causes the +#' probabilities to be evaluated at a distinct amount of steps along each branch +#' (and the probabilities to be properly integrated in between these steps). +#' This provides an approximation, but generally results look very similar to +#' using the full evaluation. +#' The function used for `prob_func` will be highly dependent on your system. +#' for instance, for a 3 observed, 2 hidden states model, the probability +#' of state A is `prob[1] + prob[2] + prob[3]`, normalized by the row sum. +#' `prob_func` will be applied to each row of the 'states' matrix (you can thus +#' test your function on the states matrix returned when +#' `'see_ancestral_states = TRUE'`). Please note that the first N columns of the +#' states matrix are the extinction rates, and the `(N+1):2N` columns belong to +#' the speciation rates, where `N = num_obs_states * num_concealed_states`. +#' A typical `prob_func` function will look like: +#' ``` +#' my_prob_func <- function(x) { +#' return(sum(x[5:8]) / sum(x)) +#' } +#' ``` +#' +#' @inheritParams default_params_doc +#' +#' @return ggplot2 object +#' @examples +#' set.seed(5) +#' phy <- ape::rphylo(n = 4, birth = 1, death = 0) +#' traits <- c(0, 1, 1, 0) +#' params <- secsse::id_paramPos(c(0, 1), 2) +#' params[[1]][] <- c(0.2, 0.2, 0.1, 0.1) +#' params[[2]][] <- 0.0 +#' params[[3]][, ] <- 0.1 +#' diag(params[[3]]) <- NA +#' # Thus, we have for both, rates +#' # 0A, 1A, 0B and 1B. If we are interested in the posterior probability of +#' # trait 0,we have to provide a helper function that sums the probabilities of +#' # 0A and 0B, e.g.: +#' helper_function <- function(x) { +#' return(sum(x[c(5, 7)]) / sum(x)) # normalized by total sum, just in case. +#' } +#' +#' out_plot <- plot_state_exact(parameters = params, +#' phy = phy, +#' traits = traits, +#' num_concealed_states = 2, +#' sampling_fraction = c(1, 1), +#' num_steps = 10, +#' prob_func = helper_function) +#' @export +plot_state_exact <- function(parameters, + phy, + traits, + num_concealed_states, + sampling_fraction, + cond = "proper_cond", + root_state_weight = "proper_weights", + is_complete_tree = FALSE, + method = "odeint::bulirsch_stoer", + atol = 1e-16, + rtol = 1e-16, + num_steps = 100, + prob_func = NULL, + verbose = FALSE) { + if (is.null(prob_func)) { + stop("need to set a probability function, check description to how") + } + + eval_res <- secsse_loglik_eval(parameter = parameters, + phy = phy, + traits = traits, + num_concealed_states = + num_concealed_states, + cond = cond, + root_state_weight = root_state_weight, + num_steps = num_steps, + sampling_fraction = sampling_fraction, + is_complete_tree = is_complete_tree, + atol = atol, + rtol = rtol, + method = method) + + if (verbose) message("\nconverting collected likelihoods + to graph positions:\n") + + xs <- ape::node.depth.edgelength(phy) + ys <- ape::node.height(phy) + num_tips <- length(phy$tip.label) + num_nodes <- (1 + num_tips):length(ys) + + nodes <- data.frame(x = xs, y = ys, n = c(1:num_tips, num_nodes)) + + to_plot <- eval_res$output + + for_plot <- collect_branches(to_plot, nodes, prob_func, verbose) + + node_bars <- collect_node_bars(to_plot, nodes, prob_func, eval_res$states) + + if (verbose) message("\ngenerating ggplot object\n") + + focal_plot <- make_ggplot(for_plot, node_bars) + return(focal_plot) +} + + +#' @importFrom rlang .data +#' @keywords internal +make_ggplot <- function(for_plot, node_bars) { + ggplot_plot <- ggplot2::ggplot(for_plot) + + ggplot2::geom_segment(ggplot2::aes(x = .data[["x0"]], + y = .data[["y"]], + xend = .data[["x1"]], + yend = .data[["y"]], + col = .data[["prob"]])) + + ggplot2::geom_segment(data = node_bars, + ggplot2::aes(x = .data[["x"]], + y = .data[["y0"]], + yend = .data[["y1"]], + xend = .data[["x"]], + col = .data[["prob"]]) + ) + + ggplot2::theme_classic() + + ggplot2::xlab("") + + ggplot2::ylab("") + + ggplot2::theme(axis.text.y = ggplot2::element_blank(), + axis.ticks.y = ggplot2::element_blank(), + axis.line.y = ggplot2::element_blank()) + + return(ggplot_plot) +} + +#' @keywords internal +collect_branches <- function(to_plot, + nodes, + prob_func, + verbose) { + num_rows <- length(to_plot[, 1]) + + for_plot <- matrix(nrow = num_rows, ncol = 6) + for_plot_cnt <- 1 + if (verbose) pb <- utils::txtProgressBar(max = length(unique(to_plot[, 1])), + style = 3) + cnt <- 1 + for (parent in unique(to_plot[, 1])) { + if (verbose) utils::setTxtProgressBar(pb, cnt) + cnt <- cnt + 1 + + to_plot2 <- subset(to_plot, to_plot[, 1] == parent) + for (daughter in unique(to_plot2[, 2])) { + indices <- which(to_plot2[, 2] == daughter) + if (length(indices) > 0) { + # we have a branch + focal_branch <- to_plot2[indices, ] + start_x <- nodes$x[which(nodes$n == parent)] + end_x <- nodes$x[which(nodes$n == daughter)] + y <- nodes$y[which(nodes$n == daughter)] + + bl <- end_x - start_x + + probs <- apply(focal_branch[, 4:length(focal_branch[1, ])], + 1, + prob_func) + for (s in 1:(length(focal_branch[, 1]) - 1)) { + x0 <- start_x + bl - focal_branch[s, 3] + x1 <- start_x + bl - focal_branch[s + 1, 3] + ps <- probs[s] + for_plot[for_plot_cnt, ] <- c(x0, x1, y, ps, parent, daughter) + for_plot_cnt <- for_plot_cnt + 1 + } + } + } + } + colnames(for_plot) <- c("x0", "x1", "y", "prob", "p", "d") + for_plot <- tibble::as_tibble(for_plot) + return(for_plot) +} + +#' @keywords internal +collect_node_bars <- function(to_plot, + nodes, + prob_func, + states) { + node_bars <- matrix(nrow = length(unique(to_plot[, 1])), ncol = 4) + node_bars_cnt <- 1 + for (parent in unique(to_plot[, 1])) { + focal_data <- subset(to_plot, to_plot[, 1] == parent) + daughters <- unique(focal_data[, 2]) + start_x <- nodes$x[which(nodes$n == parent)] + y <- c() + for (i in seq_along(daughters)) { + y <- c(y, nodes$y[nodes$n == daughters[i]]) + } + y <- sort(y) + + probs <- states[parent, ] + rel_prob <- prob_func(probs) + node_bars[node_bars_cnt, ] <- c(start_x, y, rel_prob) + node_bars_cnt <- node_bars_cnt + 1 + } + + colnames(node_bars) <- c("x", "y0", "y1", "prob") + node_bars <- tibble::as_tibble(node_bars) + return(node_bars) +} diff --git a/R/data.R b/R/secsse_data.R similarity index 76% rename from R/data.R rename to R/secsse_data.R index 35e8618..bbbd9be 100755 --- a/R/data.R +++ b/R/secsse_data.R @@ -1,19 +1,19 @@ -#' @name phylo_Vign +#' @name phylo_vignette #' @title A phylogenetic reconstuction to run the vignette #' @description An example phylogeny in the right format for secsse -#' @format Phylogenetic tree in format nexus, rooted, including branch lengths -NULL +#' @format Phylogenetic tree in phy format, rooted, including branch lengths +"phylo_vignette" -#' @name traitinfo +#' @name traits #' @title A table with trait info to run the vignette #' @description An example of trait information in the right format for secsse #' @format A data frame where each species has a trait state associated -NULL +"traits" #' @name example_phy_GeoSSE #' @title A phylogeny with traits at the tips #' @description An example phylogeny for testing purposes #' @format A phylogeny as created by GeoSSE (diversitree) -"phy" +"example_phy_GeoSSE" diff --git a/R/secsse_loglik.R b/R/secsse_loglik.R old mode 100755 new mode 100644 index 10b08d7..77e18d6 --- a/R/secsse_loglik.R +++ b/R/secsse_loglik.R @@ -1,69 +1,5 @@ -#' Logikelihood calculation for the SecSSE model given a set of parameters and -#' data -#' @title Likelihood for SecSSE model -#' @param parameter list where first vector represents lambdas, the second mus -#' and the third transition rates. -#' @param phy phylogenetic tree of class phylo, ultrametric, fully-resolved, -#' rooted and with branch lengths. -#' @param traits vector with trait states, order of states must be the same as -#' tree tips, for help, see vignette. -#' @param num_concealed_states number of concealed states, generally equivalent -#' to number of examined states. -#' @param cond condition on the existence of a node root: "maddison_cond", -#' "proper_cond"(default). For details, see vignette. -#' @param root_state_weight the method to weigh the states: -#' "maddison_weights","proper_weights"(default) or "equal_weights". -#' It can also be specified the root state:the vector c(1, 0, 0) -#' indicates state 1 was the root state. -#' @param sampling_fraction vector that states the sampling proportion per -#' trait state. It must have as many elements as trait states. -#' @param setting_calculation argument used internally to speed up calculation. -#' It should be left blank (default : setting_calculation = NULL) -#' @param see_ancestral_states should the ancestral states be shown? Default -#' FALSE -#' @param loglik_penalty the size of the penalty for all parameters; default is -#' 0 (no penalty) -#' @param is_complete_tree whether or not a tree with all its extinct species -#' is provided -#' @param num_threads number of threads. Set to -1 to use all available threads. -#' Default is one thread. -#' @param atol absolute tolerance of integration -#' @param rtol relative tolerance of integration -#' @param method integration method used, available are: -#' "odeint::runge_kutta_cash_karp54", "odeint::runge_kutta_fehlberg78", -#' "odeint::runge_kutta_dopri5", "odeint::bulirsch_stoer" and -#' "odeint::runge_kutta4". Default method is:"odeint::bulirsch_stoer". -#' @return The loglikelihood of the data given the parameter. -#' @note Multithreading might lead to a slightly reduced accuracy -#' (in the order of 1e-10) and is therefore not enabled by default. -#' Please use at your own discretion. -#' @examples -#' rm(list = ls(all = TRUE)) -#' library(secsse) -#' set.seed(13) -#' phylotree <- ape::rcoal(31, tip.label = 1:31) -#' traits <- sample(c(0,1,2),ape::Ntip(phylotree),replace = TRUE) -#' num_concealed_states <- 2 -#' cond <- "proper_cond" -#' root_state_weight <- "proper_weights" -#' sampling_fraction <- c(1,1,1) -#' drill <- id_paramPos(traits,num_concealed_states) -#' drill[[1]][] <- c(0.12,0.01,0.2,0.21,0.31,0.23) -#' drill[[2]][] <- 0 -#' drill[[3]][,] <- 0.1 -#' diag(drill[[3]]) <- NA -#' secsse_loglik(parameter = drill, -#' phylotree, -#' traits, -#' num_concealed_states, -#' cond, -#' root_state_weight, -#' sampling_fraction, -#' see_ancestral_states = FALSE) -#' -#' #[1] -113.1018 -#' @export -secsse_loglik <- function(parameter, +#' @keywords internal +master_loglik <- function(parameter, phy, traits, num_concealed_states, @@ -83,6 +19,10 @@ secsse_loglik <- function(parameter, parameter[[3]][is.na(parameter[[3]])] <- 0 q_matrix <- parameter[[3]] + using_cla <- is.list(lambdas) + + num_modeled_traits <- ncol(q_matrix) / floor(num_concealed_states) + if (is.null(setting_calculation)) { check_input(traits, phy, @@ -94,101 +34,79 @@ secsse_loglik <- function(parameter, num_concealed_states, sampling_fraction, is_complete_tree, - mus) - } - + mus, + num_modeled_traits) + } + states <- setting_calculation$states - - if (num_concealed_states != round(num_concealed_states)) { # for test case - d <- ncol(states) / 2 - new_states <- states[, c(1:sqrt(d), (d + 1):((d + 1) + sqrt(d) - 1))] - new_states <- states[, c(1, 2, 3, 10, 11, 12)] - states <- new_states - } - + forTime <- setting_calculation$forTime + ances <- setting_calculation$ances + + d <- ncol(states) / 2 + + # with a complete tree, we need to re-calculate the states every time we + # run, because they are dependent on mu. if (is_complete_tree) { states <- build_states(phy = phy, traits = traits, num_concealed_states = num_concealed_states, sampling_fraction = sampling_fraction, is_complete_tree = is_complete_tree, - mus = mus) - } - - forTime <- setting_calculation$forTime - ances <- setting_calculation$ances - - d <- ncol(states) / 2 - - if (see_ancestral_states == TRUE) { - if (num_threads != 1) { - warning("see ancestral states only works with one thread, - setting to one thread") - num_threads <- 1 - } + mus = mus, + num_unique_traits = num_modeled_traits) } RcppParallel::setThreadOptions(numThreads = num_threads) - - calcul <- calThruNodes_cpp(ances, - states, - forTime, - lambdas, - mus, - q_matrix, - num_threads, - atol, - rtol, - method, - is_complete_tree) - + calcul <- calc_ll_cpp(rhs = if (using_cla) "ode_cla" else "ode_standard", + ances = ances, + states = states, + forTime = forTime, + lambdas = lambdas, + mus = mus, + Q = q_matrix, + method = method, + atol = atol, + rtol = rtol, + is_complete_tree = is_complete_tree, + see_states = see_ancestral_states) loglik <- calcul$loglik - nodeM <- calcul$nodeM - mergeBranch <- calcul$mergeBranch - states <- calcul$states + nodeM <- calcul$node_M + mergeBranch <- calcul$merge_branch if (length(nodeM) > 2 * d) nodeM <- nodeM[1:(2 * d)] ## At the root - mergeBranch2 <- (mergeBranch) + weight_states <- get_weight_states(root_state_weight, + num_concealed_states, + mergeBranch, + lambdas, + nodeM, + d, + is_cla = using_cla) + + if (is_complete_tree) nodeM <- update_complete_tree(phy, + lambdas, + mus, + q_matrix, + method, + atol, + rtol, + length(mergeBranch)) + + mergeBranch2 <- condition(cond, + mergeBranch, + weight_states, + lambdas, + nodeM) + + wholeLike <- sum((mergeBranch2) * (weight_states)) - weightStates <- get_weight_states(root_state_weight, - num_concealed_states, - mergeBranch, - lambdas, - nodeM, - d, - is_cla = FALSE) - - if (is_complete_tree) { - time_inte <- max(abs(ape::branching.times(phy))) # nolint - y <- rep(0, 2 * length(mergeBranch2)) - - nodeM <- ct_condition(y, # nolint - time_inte, - lambdas, - mus, - q_matrix, - method, - atol, - rtol) - } - - if (cond == "maddison_cond") { - mergeBranch2 <- - mergeBranch2 / sum(weightStates * lambdas * (1 - nodeM[1:d]) ^ 2) - } - - if (cond == "proper_cond") { - mergeBranch2 <- mergeBranch2 / (lambdas * (1 - nodeM[1:d]) ^ 2) - } - - wholeLike <- sum((mergeBranch2) * (weightStates)) LL <- log(wholeLike) + loglik - penalty(pars = parameter, loglik_penalty = loglik_penalty) if (see_ancestral_states == TRUE) { + states <- calcul$states num_tips <- ape::Ntip(phy) ancestral_states <- states[(num_tips + 1):(nrow(states)), ] ancestral_states <- @@ -200,253 +118,146 @@ secsse_loglik <- function(parameter, } } -#' @keywords internal -check_tree <- function(phy, is_complete_tree) { - if (ape::is.rooted(phy) == FALSE) { - stop("The tree needs to be rooted.") - } - - if (ape::is.binary(phy) == FALSE) { - stop("The tree needs to be fully resolved.") - } - if (ape::is.ultrametric(phy) == FALSE && is_complete_tree == FALSE) { - stop("The tree needs to be ultrametric.") - } - if (any(phy$edge.length == 0)) { - stop("The tree must have internode distancs that are all larger than 0.") - } -} - -check_traits <- function(traits, sampling_fraction) { - if (is.matrix(traits)) { - if (length(sampling_fraction) != length(sort(unique(traits[, 1])))) { - stop("Sampling_fraction must have as many elements - as the number of traits.") - } - - if (all(sort(unique(as.vector(traits))) == sort(unique(traits[, 1]))) == - FALSE) { - stop( - "Check your trait argument; if you have more than one column, - make sure all your states are included in the first column." - ) - } - } else { - if (length(sampling_fraction) != length(sort(unique(traits)))) { - stop("Sampling_fraction must have as many elements as - the number of traits.") - } - } - - if (length(sort(unique(as.vector(traits)))) < 2) { - stop("The trait has only one state.") - } -} - -check_root_state_weight <- function(root_state_weight, traits) { - if (is.numeric(root_state_weight)) { - if (length(root_state_weight) != length(sort(unique(traits)))) { - stop("There need to be as many elements in root_state_weight - as there are traits.") - } - if (length(which(root_state_weight == 1)) != 1) { - stop("The root_state_weight needs only one 1.") - } - } else { - if (any(root_state_weight == "maddison_weights" | - root_state_weight == "equal_weights" | - root_state_weight == "proper_weights") == FALSE) { - stop("The root_state_weight must be any of - maddison_weights, equal_weights, or proper_weights.") - } - } -} - -check_input <- function(traits, - phy, - sampling_fraction, - root_state_weight, - is_complete_tree) { - check_root_state_weight(root_state_weight, sampling_fraction) - - check_tree(phy, is_complete_tree) - - check_traits(traits, sampling_fraction) -} - -create_states <- function(usetraits, +#' @title Likelihood for SecSSE model +#' Loglikelihood calculation for the SecSSE model given a set of parameters and +#' data +#' +#' @inheritParams default_params_doc +#' @return The loglikelihood of the data given the parameter. +#' @examples +#' rm(list = ls(all = TRUE)) +#' library(secsse) +#' set.seed(13) +#' phylotree <- ape::rcoal(31, tip.label = 1:31) +#' traits <- sample(c(0,1,2),ape::Ntip(phylotree),replace = TRUE) +#' num_concealed_states <- 2 +#' cond <- "proper_cond" +#' root_state_weight <- "proper_weights" +#' sampling_fraction <- c(1,1,1) +#' drill <- id_paramPos(traits,num_concealed_states) +#' drill[[1]][] <- c(0.12,0.01,0.2,0.21,0.31,0.23) +#' drill[[2]][] <- 0 +#' drill[[3]][,] <- 0.1 +#' diag(drill[[3]]) <- NA +#' secsse_loglik(parameter = drill, +#' phylotree, +#' traits, +#' num_concealed_states, +#' cond, +#' root_state_weight, +#' sampling_fraction, +#' see_ancestral_states = FALSE) +#' +#' #[1] -113.1018 +#' @export +secsse_loglik <- function(parameter, + phy, traits, - states, - sampling_fraction, num_concealed_states, - d, - traitStates, - is_complete_tree, - phy, - ly, - mus, - nb_tip) { - if (anyNA(usetraits)) { - nas <- which(is.na(traits)) - for (iii in seq_along(nas)) { - states[nas[iii], ] <- c(1 - rep(sampling_fraction, - num_concealed_states), - rep(sampling_fraction, num_concealed_states)) - } - } - - for (iii in seq_along(traitStates)) { # Initial state probabilities - StatesPresents <- d + iii - toPlaceOnes <- StatesPresents + - length(traitStates) * (0:(num_concealed_states - 1)) - tipSampling <- 1 * sampling_fraction - states[which(usetraits == - traitStates[iii]), toPlaceOnes] <- tipSampling[iii] - } - - if (is_complete_tree) { - extinct_species <- geiger::is.extinct(phy) - if (!is.null(extinct_species)) { - for (i in seq_along(extinct_species)) { - states[which(phy$tip.label == extinct_species[i]), (d + 1):ly] <- - mus * states[which(phy$tip.label == extinct_species[i]), (d + 1):ly] - } - } - for (iii in 1:nb_tip) { - states[iii, 1:d] <- 0 - } - } else { - for (iii in 1:nb_tip) { - states[iii, 1:d] <- rep(1 - sampling_fraction, num_concealed_states) - } - } - - return(states) -} - - -build_states <- function(phy, - traits, - num_concealed_states, - sampling_fraction, - is_complete_tree = FALSE, - mus = NULL, - num_unique_traits = NULL, - first_time = FALSE) { - if (!is.matrix(traits)) { - traits <- matrix(traits, nrow = length(traits), ncol = 1, byrow = FALSE) - } - - if (length(phy$tip.label) != nrow(traits)) { - stop("Number of species in the tree must be the same as in the trait file") - } - # if there are traits that are not in the observed tree, - # the user passes these themselves. - # yes, this is a weird use-case - - traitStates <- sort(unique(traits[, 1])) - - if (!is.null(num_unique_traits)) { - if (num_unique_traits > length(traitStates)) { - if (first_time) message("found un-observed traits, expanding state space") - traitStates <- 1:num_unique_traits - } - } - - nb_tip <- ape::Ntip(phy) - nb_node <- phy$Nnode - ly <- length(traitStates) * 2 * num_concealed_states - states <- matrix(ncol = ly, nrow = nb_tip + nb_node) - d <- ly / 2 - ## In a example of 3 states, the names of the colums would be like: - ## - ## colnames(states) <- c("E0A","E1A","E2A","E0B","E1B","E2B", - ## "D0A","D1A","D2A","D0B","D1B","D2B") - states[1:nb_tip, ] <- 0 - ## I repeat the process of state assignment as many times as columns I have - for (iv in seq_len(ncol(traits))) { - states <- create_states(traits[, iv], - traits, - states, - sampling_fraction, - num_concealed_states, - d, - traitStates, - is_complete_tree, - phy, - ly, - mus, - nb_tip) - } - return(states) -} - -build_initStates_time <- function(phy, - traits, - num_concealed_states, - sampling_fraction, - is_complete_tree = FALSE, - mus = NULL, - num_unique_traits = NULL, - first_time = FALSE) { - states <- build_states(phy, - traits, - num_concealed_states, - sampling_fraction, - is_complete_tree, - mus, - num_unique_traits, - first_time) - phy$node.label <- NULL - split_times <- sort(event_times(phy), decreasing = FALSE) - ances <- as.numeric(names(split_times)) - - forTime <- cbind(phy$edge, phy$edge.length) - - return(list( - states = states, - ances = ances, - forTime = forTime - )) + cond = "proper_cond", + root_state_weight = "proper_weights", + sampling_fraction, + setting_calculation = NULL, + see_ancestral_states = FALSE, + loglik_penalty = 0, + is_complete_tree = FALSE, + num_threads = 1, + atol = 1e-8, + rtol = 1e-7, + method = "odeint::bulirsch_stoer") { + master_loglik(parameter = parameter, + phy = phy, + traits = traits, + num_concealed_states = num_concealed_states, + cond = cond, + root_state_weight = root_state_weight, + sampling_fraction = sampling_fraction, + setting_calculation = setting_calculation, + see_ancestral_states = see_ancestral_states, + loglik_penalty = loglik_penalty, + is_complete_tree = is_complete_tree, + num_threads = num_threads, + atol = atol, + rtol = rtol, + method = method) } - -get_weight_states <- function(root_state_weight, +#' @title Likelihood for SecSSE model, using Rcpp +#' Loglikelihood calculation for the cla_SecSSE model given a set of parameters +#' and data using Rcpp +#' +#' @inheritParams default_params_doc +#' +#' @return The loglikelihood of the data given the parameters +#' @examples +#'rm(list=ls(all=TRUE)) +#'library(secsse) +#'set.seed(13) +#'phylotree <- ape::rcoal(12, tip.label = 1:12) +#'traits <- sample(c(0,1,2),ape::Ntip(phylotree),replace=TRUE) +#'num_concealed_states <- 3 +#'sampling_fraction <- c(1,1,1) +#'phy <- phylotree +#'# the idparlist for a ETD model (dual state inheritance model of evolution) +#'# would be set like this: +#'idparlist <- cla_id_paramPos(traits,num_concealed_states) +#'lambd_and_modeSpe <- idparlist$lambdas +#'lambd_and_modeSpe[1,] <- c(1,1,1,2,2,2,3,3,3) +#'idparlist[[1]] <- lambd_and_modeSpe +#'idparlist[[2]][] <- 0 +#'masterBlock <- matrix(4,ncol=3,nrow=3,byrow=TRUE) +#'diag(masterBlock) <- NA +#'idparlist [[3]] <- q_doubletrans(traits,masterBlock,diff.conceal = FALSE) +#'# Now, internally, clasecsse sorts the lambda matrices, so they look like: +#'prepare_full_lambdas(traits,num_concealed_states,idparlist[[1]]) +#'# which is a list with 9 matrices, corresponding to the 9 states +#'# (0A,1A,2A,0B,etc) +#'# if we want to calculate a single likelihood: +#'parameter <- idparlist +#'lambda_and_modeSpe <- parameter$lambdas +#'lambda_and_modeSpe[1,] <- c(0.2,0.2,0.2,0.4,0.4,0.4,0.01,0.01,0.01) +#'parameter[[1]] <- prepare_full_lambdas(traits,num_concealed_states, +#'lambda_and_modeSpe) +#'parameter[[2]] <- rep(0,9) +#'masterBlock <- matrix(0.07, ncol=3, nrow=3, byrow=TRUE) +#'diag(masterBlock) <- NA +#'parameter [[3]] <- q_doubletrans(traits,masterBlock,diff.conceal = FALSE) +#'cla_secsse_loglik(parameter, phy, traits, num_concealed_states, +#' cond = 'maddison_cond', +#' root_state_weight = 'maddison_weights', sampling_fraction, +#' setting_calculation = NULL, +#' see_ancestral_states = FALSE, +#' loglik_penalty = 0) +#'# LL = -42.18407 +#' @export +cla_secsse_loglik <- function(parameter, + phy, + traits, num_concealed_states, - mergeBranch, - lambdas, - nodeM, - d, - is_cla = FALSE) { - - if (is.numeric(root_state_weight)) { - weight_states <- rep(root_state_weight / num_concealed_states, - num_concealed_states) - } else { - if (root_state_weight == "maddison_weights") { - weight_states <- (mergeBranch) / sum((mergeBranch)) - } - - if (root_state_weight == "proper_weights") { - if (is_cla) { - lmb <- length(mergeBranch) - numerator <- rep(NA, lmb) - for (j in 1:lmb) { - numerator[j] <- mergeBranch[j] / sum(lambdas[[j]] * - ((1 - nodeM[1:d]) %o% (1 - nodeM[1:d]))) - } - weight_states <- numerator / sum(numerator) # nolint - } else { - weight_states <- (mergeBranch / - (lambdas * (1 - nodeM[1:d]) ^ 2)) / - sum((mergeBranch / (lambdas * (1 - nodeM[1:d]) ^ 2))) - } - } - - if (root_state_weight == "equal_weights") { - weight_states <- rep(1 / length(mergeBranch), length(mergeBranch)) - } - } - - return(weight_states) + cond = "proper_cond", + root_state_weight = "proper_weights", + sampling_fraction, + setting_calculation = NULL, + see_ancestral_states = FALSE, + loglik_penalty = 0, + is_complete_tree = FALSE, + num_threads = 1, + method = "odeint::bulirsch_stoer", + atol = 1e-8, + rtol = 1e-7) { + master_loglik(parameter, + phy, + traits, + num_concealed_states, + cond, + root_state_weight, + sampling_fraction, + setting_calculation, + see_ancestral_states, + loglik_penalty, + is_complete_tree, + num_threads, + atol, + rtol, + method) } diff --git a/R/secsse_loglik_eval.R b/R/secsse_loglik_eval.R deleted file mode 100644 index 1e3ee19..0000000 --- a/R/secsse_loglik_eval.R +++ /dev/null @@ -1,124 +0,0 @@ -#' Logikelihood calculation for the SecSSE model given a set of parameters and -#' data, returning also the likelihoods along the branches -#' @title Likelihood for SecSSE model -#' @param parameter list where first vector represents lambdas, the second mus -#' and the third transition rates. -#' @param phy phylogenetic tree of class phylo, ultrametric, fully-resolved, -#' rooted and with branch lengths. -#' @param traits vector with trait states, order of states must be the same as -#' tree tips, for help, see vignette. -#' @param num_concealed_states number of concealed states, generally equivalent -#' to number of examined states. -#' @param ancestral_states ancestral states matrix provided by -#' secsse_loglik, this is used as starting points for the branch integration -#' @param cond condition on the existence of a node root: "maddison_cond", -#' "proper_cond"(default). For details, see vignette. -#' @param root_state_weight the method to weigh the states:"maddison_weights", -#' "proper_weights"(default) or "equal_weights". It can also be specified the -#' root state:the vector c(1,0,0) indicates state 1 was the root state. -#' @param sampling_fraction vector that states the sampling proportion per -#' trait state. It must have as many elements as trait states. -#' @param setting_calculation argument used internally to speed up calculation. -#' It should be left blank (default : setting_calculation = NULL) -#' @param loglik_penalty the size of the penalty for all parameters; default is -#' 0 (no penalty) -#' @param is_complete_tree whether or not a tree with all its extinct species -#' is provided -#' @param atol absolute tolerance of integration -#' @param rtol relative tolerance of integration -#' @param method integration method used, available are: -#' "odeint::runge_kutta_cash_karp54", "odeint::runge_kutta_fehlberg78", -#' "odeint::runge_kutta_dopri5", "odeint::bulirsch_stoer" and -#' "odeint::runge_kutta4". Default method is:"odeint::bulirsch_stoer". -#' @param num_steps number of substeps to show intermediate likelihoods -#' along a branch, if left to NULL, the intermediate likelihoods at every -#' integration evaluation are stored, which is more exact, but can lead to -#' huge datasets / memory usage. -#' @param verbose provides intermediate output if TRUE -#' @return The loglikelihood of the data given the parameters -#' @examples -#' #' set.seed(5) -#' focal_tree <- ape::rphylo(n = 4, birth = 1, death = 0) -#' traits <- c(0, 1, 1, 0) -#' params <- secsse::id_paramPos(c(0, 1), 2) -#' params[[1]][] <- c(0.2, 0.2, 0.1, 0.1) -#' params[[2]][] <- 0.0 -#' params[[3]][, ] <- 0.1 -#' diag(params[[3]]) <- NA -#' # Thus, we have for both, rates -#' # 0A, 1A, 0B and 1B. If we are interested in the posterior probability of -#' # trait 0 we have to provide a helper function that sums the probabilities of -#' # 0A and 0B, e.g.: -#' helper_function <- function(x) { -#' return(sum(x[c(5, 7)]) / sum(x)) # normalized by total sum, just in case. -#' } -#' ll <- secsse::secsse_loglik(parameter = params, -#' phy = focal_tree, -#' traits = traits, -#' num_concealed_states = 2, -#' sampling_fraction = c(1, 1), -#' see_ancestral_states = TRUE) -#' -#' secsse_loglik_eval(parameter = params, -#' phy = focal_tree, -#' traits = traits, -#' ancestral_states = ll$states, -#' num_concealed_states = 2, -#' sampling_fraction = c(1, 1), -#' num_steps = 10) -#' @export -secsse_loglik_eval <- function(parameter, - phy, - traits, - num_concealed_states, - ancestral_states, - cond = "proper_cond", - root_state_weight = "proper_weights", - sampling_fraction, - setting_calculation = NULL, - loglik_penalty = 0, - is_complete_tree = FALSE, - atol = 1e-12, - rtol = 1e-12, - method = "odeint::bulirsch_stoer", - num_steps = NULL, - verbose = FALSE) { - lambdas <- parameter[[1]] - mus <- parameter[[2]] - parameter[[3]][is.na(parameter[[3]])] <- 0 - q_matrix <- parameter[[3]] - - if (is.null(setting_calculation)) { - check_input(traits, - phy, - sampling_fraction, - root_state_weight, - is_complete_tree) - setting_calculation <- build_initStates_time(phy, - traits, - num_concealed_states, - sampling_fraction, - is_complete_tree, - mus) - } - - for_time <- setting_calculation$forTime - ances <- setting_calculation$ances - - - calcul <- calThruNodes_store_cpp(ances, - ancestral_states, - for_time, - lambdas, - mus, - q_matrix, - 1, - atol, - rtol, - method, - is_complete_tree, - ifelse(is.null(num_steps), 0, num_steps), - verbose) - # if the number of steps == NULL, pass a 0. - return(calcul) -} diff --git a/R/secsse_ml.R b/R/secsse_ml.R old mode 100755 new mode 100644 index 0e20345..654d333 --- a/R/secsse_ml.R +++ b/R/secsse_ml.R @@ -1,51 +1,176 @@ +#' @keywords internal +master_ml <- function(phy, + traits, + num_concealed_states, + idparslist, + idparsopt, + initparsopt, + idparsfix, + parsfix, + idfactorsopt = NULL, + initfactors = NULL, + idparsfuncdefpar = NULL, + functions_defining_params = NULL, + cond = "proper_cond", + root_state_weight = "proper_weights", + sampling_fraction, + tol = c(1e-04, 1e-05, 1e-07), + maxiter = 1000 * round((1.25)^length(idparsopt)), + optimmethod = "subplex", + num_cycles = 1, + loglik_penalty = 0, + is_complete_tree = FALSE, + verbose = (optimmethod == "simplex"), + num_threads = 1, + atol = 1e-8, + rtol = 1e-7, + method = "odeint::bulirsch_stoer") { + + structure_func <- NULL + if (!is.null(functions_defining_params)) { + structure_func <- set_and_check_structure_func(idparsfuncdefpar, + functions_defining_params, + idparslist, + idparsopt, + idfactorsopt, + idparsfix, + initfactors) + } else { + if (identical(as.numeric(sort(c(idparsopt, idparsfix))), + as.numeric(sort(unique(unlist(idparslist))))) == FALSE) { + stop("All elements in idparslist must be included in either + idparsopt or idparsfix ") + } + } + + check_ml_conditions(traits, + idparslist, + initparsopt, + idparsopt, + idparsfix, + parsfix) + + if (is.matrix(idparslist[[1]])) { + ## it is a tailor case otherwise + idparslist[[1]] <- prepare_full_lambdas(traits, + num_concealed_states, + idparslist[[1]]) + } + + see_ancestral_states <- FALSE + if (!is.null(structure_func)) { + initparsopt <- c(initparsopt, initfactors) + } + + trparsopt <- initparsopt / (1 + initparsopt) + trparsopt[which(initparsopt == Inf)] <- 1 + trparsfix <- parsfix / (1 + parsfix) + trparsfix[which(parsfix == Inf)] <- 1 + + mus <- calc_mus(is_complete_tree, + idparslist, + idparsfix, + parsfix, + idparsopt, + initparsopt) + optimpars <- c(tol, maxiter) + + num_modeled_traits <- length(idparslist[[1]]) / num_concealed_states + + setting_calculation <- build_initStates_time(phy, + traits, + num_concealed_states, + sampling_fraction, + is_complete_tree, + mus, + num_modeled_traits) + + initloglik <- secsse_loglik_choosepar(trparsopt = trparsopt, + trparsfix = trparsfix, + idparsopt = idparsopt, + idparsfix = idparsfix, + idparslist = idparslist, + structure_func = structure_func, + phy = phy, + traits = traits, + num_concealed_states = + num_concealed_states, + cond = cond, + root_state_weight = root_state_weight, + sampling_fraction = sampling_fraction, + setting_calculation = + setting_calculation, + see_ancestral_states = + see_ancestral_states, + loglik_penalty = loglik_penalty, + is_complete_tree = is_complete_tree, + verbose = verbose, + num_threads = num_threads, + atol = atol, + rtol = rtol, + method = method) + # Function here + print_init_ll(initloglik = initloglik, verbose = verbose) + if (initloglik == -Inf) { + stop("The initial parameter values have a likelihood that is + equal to 0 or below machine precision. + Try again with different initial values.") + } else { + out <- DDD::optimizer(optimmethod = optimmethod, + optimpars = optimpars, + fun = secsse_loglik_choosepar, + trparsopt = trparsopt, + idparsopt = idparsopt, + trparsfix = trparsfix, + idparsfix = idparsfix, + idparslist = idparslist, + structure_func = structure_func, + phy = phy, + traits = traits, + num_concealed_states = num_concealed_states, + cond = cond, + root_state_weight = root_state_weight, + sampling_fraction = sampling_fraction, + setting_calculation = setting_calculation, + see_ancestral_states = see_ancestral_states, + num_cycles = num_cycles, + loglik_penalty = loglik_penalty, + is_complete_tree = is_complete_tree, + verbose = verbose, + num_threads = num_threads, + atol = atol, + rtol = rtol, + method = method) + if (out$conv != 0) { + stop("Optimization has not converged. + Try again with different initial values.") + } else { + ml_pars1 <- secsse_transform_parameters(as.numeric(unlist(out$par)), + trparsfix, + idparsopt, + idparsfix, + idparslist, + structure_func) + out2 <- list(MLpars = ml_pars1, + ML = as.numeric(unlist(out$fvalues)), + conv = out$conv) + } + } + return(out2) +} + +#' Maximum likehood estimation for (SecSSE) +#' #' Maximum likehood estimation under Several examined and concealed #' States-dependent Speciation and Extinction (SecSSE) -#' @title Maximum likehood estimation for (SecSSE) -#' @param phy phylogenetic tree of class phylo, ultrametric, rooted and with -#' branch lengths. -#' @param traits a vector with trait states for each tip in the phylogeny. -#' @param num_concealed_states number of concealed states, generally equivalent -#' to the number of examined states in the dataset. -#' @param idparslist overview of parameters and their values. -#' @param idparsopt id of parameters to be estimated. -#' @param initparsopt initial guess of the parameters to be estimated. -#' @param idparsfix id of the fixed parameters. -#' @param parsfix value of the fixed parameters. -#' @param cond condition on the existence of a node root: 'maddison_cond', -#' 'proper_cond'(default). For details, see vignette. -#' @param root_state_weight the method to weigh the states: -#' 'maddison_weights','proper_weights'(default) or 'equal_weights'. -#' It can also be specified the -#' root state:the vector c(1,0,0) indicates state 1 was the root state. -#' @param sampling_fraction vector that states the sampling proportion per -#' trait state. It must have as many elements as there are trait states. -#' @param tol maximum tolerance. Default is 'c(1e-04, 1e-05, 1e-05)'. -#' @param maxiter max number of iterations. -#' Default is '1000 *round((1.25)^length(idparsopt))'. -#' @param optimmethod method used for optimization. Available are simplex and -#' subplex, default is 'subplex'. Simplex should only be used for debugging. -#' @param num_cycles number of cycles of the optimization (default is 1). -#' @param loglik_penalty the size of the penalty for all parameters; default -#' is 0 (no penalty) -#' @param is_complete_tree whether or not a tree with all its extinct species -#' is provided -#' @param verbose sets verbose output; default is verbose when optimmethod is -#' 'simplex' -#' @param num_threads number of threads. Set to -1 to use all available threads. -#' Default is one thread. -#' @param atol absolute tolerance of integration -#' @param rtol relative tolerance of integration -#' @param method integration method used, available are: -#' "odeint::runge_kutta_cash_karp54", "odeint::runge_kutta_fehlberg78", -#' "odeint::runge_kutta_dopri5", "odeint::bulirsch_stoer" and -#' "odeint::runge_kutta4". Default method is:"odeint::bulirsch_stoer". +#' @inheritParams default_params_doc +#' #' @return Parameter estimated and maximum likelihood #' @examples #'# Example of how to set the arguments for a ML search. #'library(secsse) #'library(DDD) #'set.seed(13) -#'# Check the vignette for a better working exercise. #'# lambdas for 0A and 1A and 2A are the same but need to be estimated #'# mus are fixed to #'# the transition rates are constrained to be equal and fixed 0.01 @@ -61,7 +186,7 @@ #'diag(masterBlock) <- NA #'diff.conceal <- FALSE #'idparslist[[3]] <- q_doubletrans(traits,masterBlock,diff.conceal) -#'startingpoint <- bd_ML(brts = ape::branching.times(phylotree)) +#'startingpoint <- DDD::bd_ML(brts = ape::branching.times(phylotree)) #'intGuessLamba <- startingpoint$lambda0 #'intGuessMu <- startingpoint$mu0 #'idparsopt <- c(1,2,3,5) @@ -111,374 +236,38 @@ secsse_ml <- function(phy, num_cycles = 1, loglik_penalty = 0, is_complete_tree = FALSE, - verbose = (optimmethod == "subplex"), + verbose = (optimmethod == "simplex"), num_threads = 1, atol = 1e-8, rtol = 1e-7, method = "odeint::bulirsch_stoer") { - - structure_func <- NULL - check_input(traits, - phy, - sampling_fraction, - root_state_weight, - is_complete_tree) - - if (is.matrix(traits)) { - warning("You are setting a model where some species had more than - one trait state.") - } - - if (length(initparsopt) != length(idparsopt)) { - stop("initparsopt must be the same length as idparsopt. - Number of parameters to optimize does not match the number of - initial values for the search") - } - - if (length(idparsfix) != length(parsfix)) { - stop("idparsfix and parsfix must be the same length. - Number of fixed elements does not match the fixed figures") - } - - if (anyDuplicated(c(idparsopt, idparsfix)) != 0) { - stop("At least one element was asked to be both fixed and estimated ") - } - - if (identical(as.numeric(sort(c(idparsopt, idparsfix))), - as.numeric(sort(unique(unlist(idparslist))))) == FALSE) { - stop("All elements in idparslist must be included in either - idparsopt or idparsfix ") - } - - if (anyDuplicated(c(unique(sort(as.vector(idparslist[[3]]))), - idparsfix[which(parsfix == 0)])) != 0) { - warning("You set some transitions as impossible to happen") - } - - see_ancestral_states <- FALSE - - utils::flush.console() - trparsopt <- initparsopt / (1 + initparsopt) - trparsopt[which(initparsopt == Inf)] <- 1 - trparsfix <- parsfix / (1 + parsfix) - trparsfix[which(parsfix == Inf)] <- 1 - mus <- calc_mus(is_complete_tree, - idparslist, - idparsfix, - parsfix, - idparsopt, - initparsopt) - optimpars <- c(tol, maxiter) - - setting_calculation <- build_initStates_time(phy, - traits, - num_concealed_states, - sampling_fraction, - is_complete_tree, - mus) - - initloglik <- secsse_loglik_choosepar(trparsopt = trparsopt, - trparsfix = trparsfix, - idparsopt = idparsopt, - idparsfix = idparsfix, - idparslist = idparslist, - structure_func = structure_func, - phy = phy, - traits = traits, - num_concealed_states = - num_concealed_states, - cond = cond, - root_state_weight = root_state_weight, - sampling_fraction = sampling_fraction, - setting_calculation = - setting_calculation, - see_ancestral_states = - see_ancestral_states, - loglik_penalty = loglik_penalty, - is_complete_tree = is_complete_tree, - verbose = verbose, - num_threads = num_threads, - atol = atol, - rtol = rtol, - method = method) - - print_init_ll(initloglik = initloglik, verbose = verbose) - - if (initloglik == -Inf) { - stop("The initial parameter values have a likelihood that is - equal to 0 or below machine precision. - Try again with different initial values.") - } else { - if (is_complete_tree == TRUE) { - setting_calculation <- NULL - } - out <- DDD::optimizer(optimmethod = optimmethod, - optimpars = optimpars, - fun = secsse_loglik_choosepar, - trparsopt = trparsopt, - idparsopt = idparsopt, - trparsfix = trparsfix, - idparsfix = idparsfix, - idparslist = idparslist, - structure_func = structure_func, - phy = phy, - traits = traits, - num_concealed_states = num_concealed_states, - cond = cond, - root_state_weight = root_state_weight, - sampling_fraction = sampling_fraction, - setting_calculation = setting_calculation, - see_ancestral_states = see_ancestral_states, - num_cycles = num_cycles, - loglik_penalty = loglik_penalty, - is_complete_tree = is_complete_tree, - verbose = verbose, - num_threads = num_threads, - atol = atol, - rtol = rtol, - method = method) - if (out$conv != 0) { - stop("Optimization has not converged. - Try again with different initial values.\n") - } else { - MLpars1 <- secsse_transform_parameters(as.numeric(unlist(out$par)), - trparsfix, - idparsopt, - idparsfix, - idparslist, - structure_func) - out2 <- list(MLpars = MLpars1, - ML = as.numeric(unlist(out$fvalues)), - conv = out$conv) - } - } - return(out2) -} - -#' @keywords internal -transf_funcdefpar <- function(idparsfuncdefpar, - functions_defining_params, - idfactorsopt, - trparsfix, - trparsopt, - idparsfix, - idparsopt) { - trparfuncdefpar <- NULL - ids_all <- c(idparsfix, idparsopt) - - values_all <- c(trparsfix / (1 - trparsfix), - trparsopt / (1 - trparsopt)) - a_new_envir <- new.env() - x <- as.list(values_all) ## To declare all the ids as variables - - if (is.null(idfactorsopt)) { - names(x) <- paste0("par_", ids_all) - } else { - names(x) <- c(paste0("par_", ids_all), paste0("factor_", idfactorsopt)) - } - list2env(x, envir = a_new_envir) - - for (jj in seq_along(functions_defining_params)) { - myfunc <- functions_defining_params[[jj]] - environment(myfunc) <- a_new_envir - value_func_defining_parm <- local(myfunc(), envir = a_new_envir) - - ## Now, declare the variable that is just calculated, so it is available - ## for the next calculation if needed - y <- as.list(value_func_defining_parm) - names(y) <- paste0("par_", idparsfuncdefpar[jj]) - list2env(y, envir = a_new_envir) - - if (is.numeric(value_func_defining_parm) == FALSE) { - stop("Something went wrong with the calculation of - parameters in 'functions_param_struct'") - } - trparfuncdefpar <- c(trparfuncdefpar, value_func_defining_parm) - } - trparfuncdefpar <- trparfuncdefpar / (1 + trparfuncdefpar) - rm(a_new_envir) - return(trparfuncdefpar) -} - -#' @keywords internal -update_values_transform_cla <- function(trpars, - idparslist, - idpars, - parvals) { - for (i in seq_along(idpars)) { - for (j in seq_len(nrow(trpars[[3]]))) { - id <- which(idparslist[[1]][[j]] == idpars[i]) - trpars[[1]][[j]][id] <- parvals[i] - } - for (j in 2:3) { - id <- which(idparslist[[j]] == idpars[i]) - trpars[[j]][id] <- parvals[i] - } - } - return(trpars) -} - -#' @keywords internal -transform_params_cla <- function(idparslist, - idparsfix, - trparsfix, - idparsopt, - trparsopt, - structure_func, - idparsfuncdefpar, - trparfuncdefpar) { - trpars1 <- idparslist - for (j in seq_len(nrow(trpars1[[3]]))) { - trpars1[[1]][[j]][, ] <- NA - } - - for (j in 2:3) { - trpars1[[j]][] <- NA - } - - if (length(idparsfix) != 0) { - trpars1 <- update_values_transform_cla(trpars1, - idparslist, - idparsfix, - trparsfix) - } - - trpars1 <- update_values_transform_cla(trpars1, - idparslist, - idparsopt, - trparsopt) - ## structure_func part - if (!is.null(structure_func)) { - trpars1 <- update_values_transform_cla(trpars1, - idparslist, - idparsfuncdefpar, - trparfuncdefpar) - } - - pre_pars1 <- list() - pars1 <- list() - - for (j in seq_len(nrow(trpars1[[3]]))) { - pre_pars1[[j]] <- trpars1[[1]][[j]][, ] / (1 - trpars1[[1]][[j]][, ]) - } - - pars1[[1]] <- pre_pars1 - for (j in 2:3) { - pars1[[j]] <- trpars1[[j]] / (1 - trpars1[[j]]) - } - - return(pars1) -} - -#' @keywords internal -update_values_transform <- function(trpars, - idparslist, - idpars, - parvals) { - for (i in seq_along(idpars)) { - for (j in 1:3) { - id <- which(idparslist[[j]] == idpars[i]) - trpars[[j]][id] <- parvals[i] - } - } - return(trpars) + master_ml(phy = phy, + traits = traits, + num_concealed_states = num_concealed_states, + idparslist = idparslist, + idparsopt = idparsopt, + initparsopt = initparsopt, + idparsfix = idparsfix, + parsfix = parsfix, + initfactors = NULL, + idparsfuncdefpar = NULL, + cond = cond, + root_state_weight = root_state_weight, + sampling_fraction = sampling_fraction, + tol = tol, + maxiter = maxiter, + optimmethod = optimmethod, + num_cycles = num_cycles, + loglik_penalty = loglik_penalty, + is_complete_tree = is_complete_tree, + verbose = verbose, + num_threads = num_threads, + atol = atol, + rtol = rtol, + method = method) } #' @keywords internal -transform_params_normal <- function(idparslist, - idparsfix, - trparsfix, - idparsopt, - trparsopt, - structure_func, - idparsfuncdefpar, - trparfuncdefpar) { - trpars1 <- idparslist - for (j in 1:3) { - trpars1[[j]][] <- NA - } - if (length(idparsfix) != 0) { - trpars1 <- update_values_transform(trpars1, - idparslist, - idparsfix, - trparsfix) - } - - trpars1 <- update_values_transform(trpars1, - idparslist, - idparsopt, - trparsopt) - - ## if structure_func part - if (is.null(structure_func) == FALSE) { - trpars1 <- update_values_transform(trpars1, - idparslist, - idparsfuncdefpar, - trparfuncdefpar) - } - pars1 <- list() - for (j in 1:3) { - pars1[[j]] <- trpars1[[j]] / (1 - trpars1[[j]]) - } - return(pars1) -} - -#' @keywords internal -secsse_transform_parameters <- function(trparsopt, - trparsfix, - idparsopt, - idparsfix, - idparslist, - structure_func) { - if (!is.null(structure_func)) { - idparsfuncdefpar <- structure_func[[1]] - functions_defining_params <- structure_func[[2]] - - if (length(structure_func[[3]]) > 1) { - idfactorsopt <- structure_func[[3]] - } else { - if (structure_func[[3]] == "noFactor") { - idfactorsopt <- NULL - } else { - idfactorsopt <- structure_func[[3]] - } - } - - trparfuncdefpar <- transf_funcdefpar(idparsfuncdefpar = - idparsfuncdefpar, - functions_defining_params = - functions_defining_params, - idfactorsopt = idfactorsopt, - trparsfix = trparsfix, - trparsopt = trparsopt, - idparsfix = idparsfix, - idparsopt = idparsopt) - } - - if (is.list(idparslist[[1]])) { - # when the ml function is called from cla_secsse - pars1 <- transform_params_cla(idparslist, - idparsfix, - trparsfix, - idparsopt, - trparsopt, - structure_func, - idparsfuncdefpar, - trparfuncdefpar) - } else { - # when non-cla option is called - pars1 <- transform_params_normal(idparslist, - idparsfix, - trparsfix, - idparsopt, - trparsopt, - structure_func, - idparsfuncdefpar, - trparfuncdefpar) - } - return(pars1) -} - secsse_loglik_choosepar <- function(trparsopt, trparsfix, idparsopt, @@ -500,63 +289,152 @@ secsse_loglik_choosepar <- function(trparsopt, atol = atol, rtol = rtol, method = method) { - alltrpars <- c(trparsopt, trparsfix) - if (max(alltrpars) > 1 || min(alltrpars) < 0) { - loglik <- -Inf - } else { - pars1 <- secsse_transform_parameters(trparsopt, trparsfix, - idparsopt, idparsfix, - idparslist, structure_func) - - if (is.list(pars1[[1]])) { - # is the cla_ used? - loglik <- secsse::cla_secsse_loglik(parameter = pars1, - phy = phy, - traits = traits, - num_concealed_states = - num_concealed_states, - cond = cond, - root_state_weight = - root_state_weight, - sampling_fraction = - sampling_fraction, - setting_calculation = - setting_calculation, - see_ancestral_states = - see_ancestral_states, - loglik_penalty = loglik_penalty, - is_complete_tree = - is_complete_tree, - num_threads = num_threads, - method = method, - atol = atol, - rtol = rtol) - } else { - loglik <- secsse_loglik(parameter = pars1, - phy = phy, - traits = traits, - num_concealed_states = num_concealed_states, - cond = cond, - root_state_weight = root_state_weight, - sampling_fraction = sampling_fraction, - setting_calculation = setting_calculation, - see_ancestral_states = see_ancestral_states, - loglik_penalty = loglik_penalty, - is_complete_tree = is_complete_tree, - num_threads = num_threads, - atol = atol, - rtol = rtol, - method = method) - } - if (is.nan(loglik) || is.na(loglik)) { - warning("There are parameter values used which cause + alltrpars <- c(trparsopt, trparsfix) + if (max(alltrpars) > 1 || min(alltrpars) < 0) { + loglik <- -Inf + } else { + pars1 <- secsse_transform_parameters(trparsopt, trparsfix, + idparsopt, idparsfix, + idparslist, structure_func) + + loglik <- master_loglik(parameter = pars1, + phy = phy, + traits = traits, + num_concealed_states = + num_concealed_states, + cond = cond, + root_state_weight = + root_state_weight, + sampling_fraction = + sampling_fraction, + setting_calculation = + setting_calculation, + see_ancestral_states = + see_ancestral_states, + loglik_penalty = loglik_penalty, + is_complete_tree = + is_complete_tree, + num_threads = num_threads, + method = method, + atol = atol, + rtol = rtol) + + if (is.nan(loglik) || is.na(loglik)) { + warning("There are parameter values used which cause numerical problems.") - loglik <- -Inf - } - } - if (verbose) { - out_print <- c(trparsopt / (1 - trparsopt), loglik) - message(paste(out_print, collapse = " ")) + loglik <- -Inf } - return(loglik) + } + if (verbose) { + out_print <- c(trparsopt / (1 - trparsopt), loglik) + message(paste(out_print, collapse = " ")) + } + return(loglik) +} + +#' Maximum likehood estimation for (SecSSE) +#' +#' Maximum likehood estimation under Several examined and concealed +#' States-dependent Speciation and Extinction (SecSSE) with cladogenetic option +#' +#' @inheritParams default_params_doc +#' +#' @return Parameter estimated and maximum likelihood +#' @examples +#'# Example of how to set the arguments for a ML search. +#'library(secsse) +#'library(DDD) +#'set.seed(13) +#'# Check the vignette for a better working exercise. +#'# lambdas for 0A and 1A and 2A are the same but need to be estimated +#'# (CTD model, see Syst Biol paper) +#'# mus are fixed to zero, +#'# the transition rates are constrained to be equal and fixed 0.01 +#'phylotree <- ape::rcoal(31, tip.label = 1:31) +#'#get some traits +#'traits <- sample(c(0,1,2), ape::Ntip(phylotree), replace = TRUE) +#'num_concealed_states <- 3 +#'idparslist <- cla_id_paramPos(traits,num_concealed_states) +#'idparslist$lambdas[1,] <- c(1,1,1,2,2,2,3,3,3) +#'idparslist[[2]][] <- 4 +#'masterBlock <- matrix(5,ncol = 3,nrow = 3,byrow = TRUE) +#'diag(masterBlock) <- NA +#'diff.conceal <- FALSE +#'idparslist[[3]] <- q_doubletrans(traits,masterBlock,diff.conceal) +#'startingpoint <- bd_ML(brts = ape::branching.times(phylotree)) +#'intGuessLamba <- startingpoint$lambda0 +#'intGuessMu <- startingpoint$mu0 +#'idparsopt <- c(1,2,3) +#'initparsopt <- c(rep(intGuessLamba,3)) +#'idparsfix <- c(0,4,5) +#'parsfix <- c(0,0,0.01) +#'tol <- c(1e-04, 1e-05, 1e-07) +#'maxiter <- 1000 * round((1.25) ^ length(idparsopt)) +#'optimmethod <- 'subplex' +#'cond <- 'proper_cond' +#'root_state_weight <- 'proper_weights' +#'sampling_fraction <- c(1,1,1) +#'model <- cla_secsse_ml( +#' phylotree, +#' traits, +#' num_concealed_states, +#' idparslist, +#' idparsopt, +#' initparsopt, +#' idparsfix, +#' parsfix, +#' cond, +#' root_state_weight, +#' sampling_fraction, +#' tol, +#' maxiter, +#' optimmethod, +#' num_cycles = 1, +#' verbose = FALSE) +#' # [1] -90.97626 +#' @export +cla_secsse_ml <- function(phy, + traits, + num_concealed_states, + idparslist, + idparsopt, + initparsopt, + idparsfix, + parsfix, + cond = "proper_cond", + root_state_weight = "proper_weights", + sampling_fraction, + tol = c(1e-04, 1e-05, 1e-07), + maxiter = 1000 * round((1.25)^length(idparsopt)), + optimmethod = "subplex", + num_cycles = 1, + loglik_penalty = 0, + is_complete_tree = FALSE, + verbose = (optimmethod == "simplex"), + num_threads = 1, + atol = 1e-8, + rtol = 1e-7, + method = "odeint::bulirsch_stoer") { + master_ml(phy = phy, + traits = traits, + num_concealed_states = num_concealed_states, + idparslist = idparslist, + idparsopt = idparsopt, + initparsopt = initparsopt, + idparsfix = idparsfix, + parsfix = parsfix, + cond = cond, + root_state_weight = root_state_weight, + sampling_fraction = sampling_fraction, + tol = tol, + maxiter = maxiter, + optimmethod = optimmethod, + num_cycles = num_cycles, + loglik_penalty = loglik_penalty, + is_complete_tree = is_complete_tree, + verbose = verbose, + num_threads = num_threads, + atol = atol, + rtol = rtol, + method = method) } diff --git a/R/secsse_ml_func_def_pars.R b/R/secsse_ml_func_def_pars.R old mode 100755 new mode 100644 index a693127..ea0e238 --- a/R/secsse_ml_func_def_pars.R +++ b/R/secsse_ml_func_def_pars.R @@ -1,55 +1,12 @@ +#' Maximum likehood estimation for (SecSSE) with parameter as complex +#' functions. +#' #' Maximum likehood estimation under Several examined and concealed #' States-dependent Speciation and Extinction (SecSSE) where some paramaters #' are functions of other parameters and/or factors. -#' @title Maximum likehood estimation for (SecSSE) with parameter as complex -#' functions. -#' @param phy phylogenetic tree of class phylo, ultrametric, rooted and with -#' branch lengths. -#' @param traits a vector with trait states for each tip in the phylogeny. -#' @param num_concealed_states number of concealed states, generally equivalent -#' to the number of examined states in the dataset. -#' @param idparslist overview of parameters and their values. -#' @param idparsopt id of parameters to be estimated. -#' @param initparsopt initial guess of the parameters to be estimated. -#' @param idfactorsopt id of the factors that will be optimized. There are not -#' fixed factors, so use a constant within 'functions_defining_params'. -#' @param initfactors the initial guess for a factor (it should be set to NULL -#' when no factors). -#' @param idparsfix id of the fixed parameters (it should be set to NULL when -#' there are no factors). -#' @param parsfix value of the fixed parameters. -#' @param idparsfuncdefpar id of the parameters which will be a function of -#' optimized and/or fixed parameters. The order of id should match -#' functions_defining_params -#' @param functions_defining_params a list of functions. Each element will be a -#' function which defines a parameter e.g. id_3 <- (id_1+id_2)/2. See example -#' and vigenette -#' @param cond condition on the existence of a node root: -#' "maddison_cond","proper_cond"(default). For details, see vignette. -#' @param root_state_weight the method to weigh the states: -#' "maddison_weights","proper_weights"(default) or "equal_weights". It can also -#' be specified the root state:the vector c(1, 0, 0) indicates state -#' 1 was the root state. -#' @param sampling_fraction vector that states the sampling proportion per trait -#' state. It must have as many elements as there are trait states. -#' @param tol maximum tolerance. Default is "c(1e-04, 1e-05, 1e-05)". -#' @param maxiter max number of iterations. Default is -#' "1000 *round((1.25)^length(idparsopt))". -#' @param optimmethod method used for optimization. Default is "simplex". -#' @param num_cycles number of cycles of the optimization (default is 1). -#' @param loglik_penalty the size of the penalty for all parameters; -#' default is 0 (no penalty) -#' @param is_complete_tree whether or not a tree with all its extinct species -#' is provided -#' @param num_threads number of threads. Set to -1 to use all available threads. -#' Default is one thread. -#' @param atol absolute tolerance of integration -#' @param rtol relative tolerance of integration -#' @param method integration method used, available are: -#' "odeint::runge_kutta_cash_karp54", "odeint::runge_kutta_fehlberg78", -#' "odeint::runge_kutta_dopri5", "odeint::bulirsch_stoer" and -#' "odeint::runge_kutta4". Default method is:"odeint::bulirsch_stoer". -#' @return Parameter estimated and maximum likelihood +#' +#' @inheritParams default_params_doc +#' #' @return Parameter estimated and maximum likelihood #' @examples #'# Example of how to set the arguments for a ML search. @@ -140,14 +97,14 @@ secsse_ml_func_def_pars <- function(phy, idparsfix, parsfix, idparsfuncdefpar, - functions_defining_params, + functions_defining_params = NULL, cond = "proper_cond", root_state_weight = "proper_weights", sampling_fraction, tol = c(1E-4, 1E-5, 1E-7), maxiter = 1000 * - round((1.25) ^ length(idparsopt)), - optimmethod = "simplex", + round((1.25) ^ length(idparsopt)), + optimmethod = "subplex", num_cycles = 1, loglik_penalty = 0, is_complete_tree = FALSE, @@ -155,178 +112,175 @@ secsse_ml_func_def_pars <- function(phy, atol = 1e-8, rtol = 1e-6, method = "odeint::bulirsch_stoer") { + return(master_ml(phy = phy, + traits = traits, + num_concealed_states = num_concealed_states, + idparslist = idparslist, + idparsopt = idparsopt, + initparsopt = initparsopt, + idparsfix = idparsfix, + parsfix = parsfix, + idfactorsopt = idfactorsopt, + initfactors = initfactors, + idparsfuncdefpar = idparsfuncdefpar, + functions_defining_params = functions_defining_params, + cond = cond, + root_state_weight = root_state_weight, + sampling_fraction = sampling_fraction, + tol = tol, + maxiter = maxiter, + optimmethod = optimmethod, + num_cycles = num_cycles, + loglik_penalty = loglik_penalty, + is_complete_tree = is_complete_tree, + num_threads = num_threads, + atol = atol, + rtol = rtol, + method = method)) +} - structure_func <- list() - structure_func[[1]] <- idparsfuncdefpar - structure_func[[2]] <- functions_defining_params - if (is.null(idfactorsopt)) { - structure_func[[3]] <- "noFactor" - } else { - structure_func[[3]] <- idfactorsopt - } - - see_ancestral_states <- FALSE - if (is.null(idfactorsopt) == FALSE) { - if (length(initfactors) != length(idfactorsopt)) { - stop("idfactorsopt should have the same length as initfactors.") - } - } - - if (is.list(functions_defining_params) == FALSE) { - stop( - "The argument functions_defining_params should be a list of - functions. See example and vignette" - ) - } - - if (length(functions_defining_params) != length(idparsfuncdefpar)) { - stop( - "The argument functions_defining_params should have the same - length than idparsfuncdefpar" - ) - } - - if (is.matrix(traits)) { - warning("You are setting a model where some species had more than - one trait state") - } - - if (length(initparsopt) != length(idparsopt)) { - stop( - "initparsopt must be the same length as idparsopt. - Number of parameters to optimize does not match the number of - initial values for the search" - ) - } - - if (length(idparsfix) != length(parsfix)) { - stop( - "idparsfix and parsfix must be the same length. - Number of fixed elements does not match the fixed figures" - ) - } - - if (anyDuplicated(c(idparsopt, idparsfix, idparsfuncdefpar)) != 0) { - stop("At least one element was asked to be fixed, - estimated or a function at the same time") - } - - if (identical(as.numeric(sort( - c(idparsopt, idparsfix, idparsfuncdefpar) - )), as.numeric(sort(unique( - unlist(idparslist) - )))) == FALSE) { - stop( - "All elements in idparslist must be included in either - idparsopt or idparsfix or idparsfuncdefpar " - ) - } - - if (anyDuplicated(c(unique(sort( - as.vector(idparslist[[3]]) - )), idparsfix[which(parsfix == 0)])) != 0) { - warning("You set some transitions as impossible to happen") - } - - initparsopt2 <- c(initparsopt, initfactors) - - trparsopt <- initparsopt2 / (1 + initparsopt2) - trparsopt[which(initparsopt2 == Inf)] <- 1 - trparsfix <- parsfix / (1 + parsfix) - trparsfix[which(parsfix == Inf)] <- 1 - - mus <- calc_mus(is_complete_tree, idparslist, idparsfix, - parsfix, idparsopt, initparsopt) - - optimpars <- c(tol, maxiter) - - setting_calculation <- - build_initStates_time(phy, traits, num_concealed_states, - sampling_fraction, is_complete_tree, mus) - - if (optimmethod == "subplex") { - verbose <- TRUE - } else { - verbose <- FALSE - } - - initloglik <- - secsse_loglik_choosepar( - trparsopt = trparsopt, - trparsfix = trparsfix, - idparsopt = idparsopt, - idparsfix = idparsfix, - idparslist = idparslist, - structure_func = structure_func, - phy = phy, - traits = traits, - num_concealed_states = - num_concealed_states, - cond = cond, - root_state_weight = root_state_weight, - sampling_fraction = sampling_fraction, - setting_calculation = - setting_calculation, - see_ancestral_states = see_ancestral_states, - loglik_penalty = loglik_penalty, - is_complete_tree = is_complete_tree, - verbose = verbose, - num_threads = num_threads, - atol = atol, - rtol = rtol, - method = method - ) - print_init_ll(initloglik = initloglik, verbose = verbose) - if (initloglik == -Inf) { - stop("The initial parameter values have a likelihood that is equal to 0 - or below machine precision. Try again with different initial values." - ) - } else { - out <- - DDD::optimizer( - optimmethod = optimmethod, - optimpars = optimpars, - fun = secsse_loglik_choosepar, - trparsopt = trparsopt, - idparsopt = idparsopt, - trparsfix = trparsfix, - idparsfix = idparsfix, - idparslist = idparslist, - structure_func = structure_func, - phy = phy, - traits = traits, - num_concealed_states = num_concealed_states, - cond = cond, - root_state_weight = root_state_weight, - sampling_fraction = sampling_fraction, - setting_calculation = setting_calculation, - see_ancestral_states = see_ancestral_states, - num_cycles = num_cycles, - loglik_penalty = loglik_penalty, - is_complete_tree = is_complete_tree, - verbose = verbose, - num_threads = num_threads, - atol = atol, - rtol = rtol, - method = method - ) - if (out$conv != 0) { - stop("Optimization has not converged. - Try again with different initial values.\n") - } else { - ml_pars1 <- - secsse_transform_parameters( - as.numeric(unlist(out$par)), - trparsfix, - idparsopt, - idparsfix, - idparslist, - structure_func - ) - out2 <- - list(MLpars = ml_pars1, - ML = as.numeric(unlist(out$fvalues)), conv = out$conv) - } - } - return(out2) +#' Maximum likehood estimation for (SecSSE) with parameter as complex +#' functions. Cladogenetic version +#' +#' Maximum likehood estimation under cla Several examined and concealed +#' States-dependent Speciation and Extinction (SecSSE) where some paramaters are +#' functions of other parameters and/or factors. Offers the option of +#' cladogenesis +#' +#' @inheritParams default_params_doc +#' +#' @return Parameter estimated and maximum likelihood +#' @examples +#'# Example of how to set the arguments for a ML search. +#'rm(list=ls(all=TRUE)) +#'library(secsse) +#'library(DDD) +#'set.seed(16) +#'phylotree <- ape::rbdtree(0.07,0.001,Tmax=50) +#'startingpoint <- bd_ML(brts = ape::branching.times(phylotree)) +#'intGuessLamba <- startingpoint$lambda0 +#'intGuessMu <- startingpoint$mu0 +#'traits <- sample(c(0,1,2), +#' ape::Ntip(phylotree), replace = TRUE) # get some traits +#'num_concealed_states <- 3 +#'idparslist <- cla_id_paramPos(traits, num_concealed_states) +#'idparslist$lambdas[1,] <- c(1,2,3,1,2,3,1,2,3) +#'idparslist[[2]][] <- 4 +#'masterBlock <- matrix(c(5,6,5,6,5,6,5,6,5),ncol = 3, nrow=3, byrow = TRUE) +#'diag(masterBlock) <- NA +#'diff.conceal <- FALSE +#'idparslist[[3]] <- q_doubletrans(traits,masterBlock,diff.conceal) +#'idparsfuncdefpar <- c(3,5,6) +#'idparsopt <- c(1,2) +#'idparsfix <- c(0,4) +#'initparsopt <- c(rep(intGuessLamba,2)) +#'parsfix <- c(0,0) +#'idfactorsopt <- 1 +#'initfactors <- 4 +#'# functions_defining_params is a list of functions. Each function has no +#'# arguments and to refer +#'# to parameters ids should be indicated as 'par_' i.e. par_3 refers to +#'# parameter 3. When a +#'# function is defined, be sure that all the parameters involved are either +#'# estimated, fixed or +#'# defined by previous functions (i.e, a function that defines parameter in +#'# 'functions_defining_params'). The user is responsible for this. In this +#'# example, par_3 +#'# (i.e., parameter 3) is needed to calculate par_6. This is correct because +#'# par_3 is defined +#'# in the first function of 'functions_defining_params'. Notice that factor_1 +#'# indicates a value +#'# that will be estimated to satisfy the equation. The same factor can be +#'# shared to define several parameters. +#'functions_defining_params <- list() +#'functions_defining_params[[1]] <- function() { +#' par_3 <- par_1 + par_2 +#'} +#'functions_defining_params[[2]] <- function() { +#' par_5 <- par_1 * factor_1 +#'} +#'functions_defining_params[[3]] <- function() { +#' par_6 <- par_3 * factor_1 +#'} +#' +#'tol = c(1e-02, 1e-03, 1e-04) +#'maxiter = 1000 * round((1.25)^length(idparsopt)) +#'optimmethod = 'subplex' +#'cond <- 'proper_cond' +#'root_state_weight <- 'proper_weights' +#'sampling_fraction <- c(1,1,1) +#'model <- cla_secsse_ml_func_def_pars(phylotree, +#'traits, +#'num_concealed_states, +#'idparslist, +#'idparsopt, +#'initparsopt, +#'idfactorsopt, +#'initfactors, +#'idparsfix, +#'parsfix, +#'idparsfuncdefpar, +#'functions_defining_params, +#'cond, +#'root_state_weight, +#'sampling_fraction, +#'tol, +#'maxiter, +#'optimmethod, +#'num_cycles = 1) +#'# ML -136.5796 +#' @export +cla_secsse_ml_func_def_pars <- function(phy, + traits, + num_concealed_states, + idparslist, + idparsopt, + initparsopt, + idfactorsopt, + initfactors, + idparsfix, + parsfix, + idparsfuncdefpar, + functions_defining_params, + cond = "proper_cond", + root_state_weight = "proper_weights", + sampling_fraction, + tol = c(1e-04, 1e-05, 1e-07), + maxiter = 1000 * + round((1.25) ^ length(idparsopt)), + optimmethod = "subplex", + num_cycles = 1, + loglik_penalty = 0, + is_complete_tree = FALSE, + verbose = (optimmethod == "simplex"), + num_threads = 1, + atol = 1e-12, + rtol = 1e-12, + method = "odeint::bulirsch_stoer") { + return(master_ml(phy = phy, + traits = traits, + num_concealed_states = num_concealed_states, + idparslist = idparslist, + idparsopt = idparsopt, + initparsopt = initparsopt, + idparsfix = idparsfix, + parsfix = parsfix, + idfactorsopt = idfactorsopt, + initfactors = initfactors, + idparsfuncdefpar = idparsfuncdefpar, + functions_defining_params = functions_defining_params, + cond = cond, + root_state_weight = root_state_weight, + sampling_fraction = sampling_fraction, + tol = tol, + maxiter = maxiter, + optimmethod = optimmethod, + num_cycles = num_cycles, + loglik_penalty = loglik_penalty, + is_complete_tree = is_complete_tree, + verbose = verbose, + num_threads = num_threads, + atol = atol, + rtol = rtol, + method = method)) } diff --git a/R/secsse_prep.R b/R/secsse_prep.R index f03566e..04c55c6 100644 --- a/R/secsse_prep.R +++ b/R/secsse_prep.R @@ -44,27 +44,24 @@ get_state_names <- function(state_names, num_concealed_states) { return(all_state_names) } -#' helper function to automatically create lambda matrices, based on input -#' @param state_names vector of names of all observed states -#' @param num_concealed_states number of hidden states -#' @param transition_list a matrix containing a description of all speciation -#' events, where the first column indicates the source state, the second and -#' third column indicate the two daughter states, and the fourth column gives -#' the rate indicator used. E.g.: ["SA", "S", "A", 1] for a trait state "SA" -#' which upon speciation generates two daughter species with traits "S" and "A", -#' where the number 1 is used as indicator for optimization of the likelihood. -#' @param model used model, choice of "ETD" (Examined Traits Diversification) or -#' "CTD" (Concealed Traits Diversification). -#' @param concealed_spec_rates vector specifying the rate indicators for each -#' concealed state, length should be identical to num_concealed_states. If left -#' empty when using the CTD model, it is assumed that all available speciation -#' rates are distributed uniformly over the concealed states. +#' Helper function to automatically create lambda matrices, based on input +#' +#' @inheritParams default_params_doc +#' +#' @examples +#' trans_matrix <- c(0, 0, 0, 1) +#' trans_matrix <- rbind(trans_matrix, c(1, 1, 1, 2)) +#' lambda_list <- create_lambda_list(state_names = c(0, 1), +#' num_concealed_states = 2, +#' transition_matrix = trans_matrix, +#' model = "ETD") +#' #' @export -create_lambda_matrices <- function(state_names, - num_concealed_states, - transition_list, - model = "ETD", - concealed_spec_rates = NULL) { +create_lambda_list <- function(state_names = c(0, 1), + num_concealed_states = 2, + transition_matrix, + model = "ETD", + concealed_spec_rates = NULL) { if (!(model %in% c("CR", "ETD", "CTD"))) { stop("only CR, ETD or CTD are specified") } @@ -77,12 +74,13 @@ create_lambda_matrices <- function(state_names, lambdas <- list() for (i in 1:total_num_states) { lambdas[[i]] <- matrix(0, nrow = total_num_states, - ncol = total_num_states) + ncol = total_num_states) rownames(lambdas[[i]]) <- all_state_names colnames(lambdas[[i]]) <- all_state_names } + names(lambdas) <- all_state_names - transition_list <- convert_transition_list(transition_list, state_names) + transition_list <- convert_transition_list(transition_matrix, state_names) if (model == "CTD") { if (is.null(concealed_spec_rates)) { @@ -106,7 +104,6 @@ create_lambda_matrices <- function(state_names, incr <- (j - 1) * num_obs_states focal_rate <- target_rate if (model == "CTD") focal_rate <- concealed_spec_rates[j] - # if (model == "CR") focal_rate <- 1 lambdas[[focal_state + incr]][daughter1 + incr, daughter2 + incr] <- focal_rate @@ -117,31 +114,30 @@ create_lambda_matrices <- function(state_names, return(lambdas) } - -#' helper function to neatly setup a Q matrix, without transitions to +#' Helper function to neatly setup a Q matrix, without transitions to #' concealed states (only observed transitions shown) -#' @param state_names names of observed states -#' @param num_concealed_states number of concealed states -#' @param transition_list matrix of transitions, indicating in order: 1) -#' starting state (typically the column in the transition matrix), 2) ending -#' state (typically the row in the transition matrix) and 3) associated rate -#' indicator -#' @param diff.conceal should we use the same number of rates for the -#' concealed state transitions, or should all concealed state transitions -#' have separate rates? Typically, FALSE is fine and should be used in order -#' to avoid having a huge number of parameters. +#' +#' @inheritParams default_params_doc +#' #' @return transition matrix +#' @examples +#' shift_matrix <- c(0, 1, 5) +#' shift_matrix <- rbind(shift_matrix, c(1, 0, 6)) +#' q_matrix <- secsse::create_q_matrix(state_names = c(0, 1), +#' num_concealed_states = 2, +#' shift_matrix = shift_matrix, +#' diff.conceal = TRUE) #' @export -create_transition_matrix <- function(state_names, - num_concealed_states, - transition_list, - diff.conceal = FALSE) { +create_q_matrix <- function(state_names, + num_concealed_states, + shift_matrix, + diff.conceal = FALSE) { total_num_states <- length(state_names) trans_matrix <- matrix(0, ncol = total_num_states, nrow = total_num_states) - - transition_list <- convert_transition_list_q(transition_list, state_names) + + transition_list <- convert_transition_list_q(shift_matrix, state_names) for (i in seq_len(nrow(transition_list))) { parent_state <- transition_list[i, 1] daughter_state <- transition_list[i, 2] @@ -150,10 +146,9 @@ create_transition_matrix <- function(state_names, } diag(trans_matrix) <- NA - - + trans_matrix <- secsse::expand_q_matrix(q_matrix = trans_matrix, - num_concealed_states = + num_concealed_states = num_concealed_states, diff.conceal = diff.conceal) @@ -161,7 +156,7 @@ create_transition_matrix <- function(state_names, colnames(trans_matrix) <- all_state_names rownames(trans_matrix) <- all_state_names diag(trans_matrix) <- NA - + return(trans_matrix) } @@ -207,7 +202,7 @@ get_chosen_rates <- function(q_matrix, num_concealed_states) { existing_rates <- existing_rates[existing_rates > 0] existing_rates <- existing_rates[!is.na(existing_rates)] existing_rates <- sort(existing_rates) - + num_transitions <- num_concealed_states * (num_concealed_states - 1) chosen_rates <- existing_rates while (num_transitions > length(chosen_rates)) { @@ -221,8 +216,10 @@ get_chosen_rates <- function(q_matrix, num_concealed_states) { } #' @keywords internal -fill_from_rates <- function(new_q_matrix, chosen_rates, - num_traits, num_concealed_states, +fill_from_rates <- function(new_q_matrix, + chosen_rates, + num_traits, + num_concealed_states, rate_indic) { for (i in 1:num_concealed_states) { for (j in i:num_concealed_states) { @@ -240,14 +237,12 @@ fill_from_rates <- function(new_q_matrix, chosen_rates, return(new_q_matrix) } -#' function to expand an existing q_matrix to a number of -#' concealed states -#' @param q_matrix q_matrix with only transitions between observed states -#' @param num_concealed_states number of concealed states -#' @param diff.conceal should we use the same number of rates for the -#' concealed state transitions, or should all concealed state transitions -#' have separate rates? Typically, FALSE is fine and should be used in order -#' to avoid having a huge number of parameters. +#' Function to expand an existing q_matrix to a number of concealed states +#' +#' @inheritParams default_params_doc +#' +#' @note This is highly similar to [q_doubletrans()]. +#' #' @return updated q matrix #' @export expand_q_matrix <- function(q_matrix, @@ -268,25 +263,36 @@ expand_q_matrix <- function(q_matrix, # we now re-use the existing rates chosen_rates <- get_chosen_rates(q_matrix, num_concealed_states) rate_indic <- 1 - new_q_matrix <- fill_from_rates(new_q_matrix, chosen_rates, - num_traits, num_concealed_states, + new_q_matrix <- fill_from_rates(new_q_matrix, + chosen_rates, + num_traits, + num_concealed_states, rate_indic) } + return(new_q_matrix) } - -#' helper function to create a default q_matrix list -#' @param state_names names of the observed states -#' @param num_concealed_states number of concealed states -#' @param mus previously defined mus - used to choose indicator number -#' @description -#' This function generates a generic transition list +#' Helper function to create a default `shift_matrix` list +#' +#' This function generates a generic shift matrix to be used with the function +#' [create_q_matrix()]. +#' +#' @inheritParams default_params_doc +#' +#' @examples +#' shift_matrix <- create_default_shift_matrix(state_names = c(0, 1), +#' num_concealed_states = 2, +#' mu_vector = c(1, 2, 1, 2)) +#' q_matrix <- create_q_matrix(state_names = c(0, 1), +#' num_concealed_states = 2, +#' shift_matrix = shift_matrix, +#' diff.conceal = FALSE) #' @export -create_default_q_list <- function(state_names = c("0", "1"), - num_concealed_states, - mus = NULL) { - lm <- unlist(mus) +create_default_shift_matrix <- function(state_names = c("0", "1"), + num_concealed_states = 2, + mu_vector = NULL) { + lm <- unlist(mu_vector) focal_rate <- max(lm) + 1 num_obs_states <- length(state_names) transition_list <- c() @@ -298,7 +304,7 @@ create_default_q_list <- function(state_names = c("0", "1"), to_add <- c(start_state, end_state, focal_rate) transition_list <- rbind(transition_list, to_add) focal_rate <- focal_rate + 1 - } + } } } @@ -306,18 +312,25 @@ create_default_q_list <- function(state_names = c("0", "1"), return(transition_list) } -#' helper function to create a default lambda list -#' @param state_names names of the observed states -#' @param model chosen model of interest, either "CR" (Constant Rates), "ETD" -#' (Examined Trait Diversification) or "CTD" ("Concealed Trait Diversification). -#' @description +#' Helper function to create a default lambda list +#' #' This function generates a generic lambda list, assuming no transitions #' between states, e.g. a species of observed state 0 generates daughter #' species with state 0 as well. +#' +#' @inheritParams default_params_doc +#' +#' @examples +#' lambda_matrix <- +#' create_default_lambda_transition_matrix(state_names = c(0, 1), +#' model = "ETD") +#' lambda_list <- create_lambda_list(state_names = c(0, 1), +#' num_concealed_states = 2, +#' transition_matrix = lambda_matrix, +#' model = "ETD") #' @export -create_default_lambda_list <- function(state_names = c("0", "1"), - model = "ETD") { - +create_default_lambda_transition_matrix <- function(state_names = c("0", "1"), + model = "ETD") { transition_list <- c() for (i in seq_along(state_names)) { focal_rate <- i @@ -325,27 +338,24 @@ create_default_lambda_list <- function(state_names = c("0", "1"), transition_list <- rbind(transition_list, c(state_names[i], state_names[i], - state_names[i], + state_names[i], focal_rate)) } rownames(transition_list) <- rep("", nrow(transition_list)) return(transition_list) } -#' function to generate generic mus vector -#' @param state_names full state names, including concealed states, for example -#' c("0A", "1A", "0B", "1B") -#' @param num_concealed_states number of concealed states -#' @param model model replicated, available are "CR", "ETD" and "CTD" -#' @param lambdas previously generated lambda matrices, used to infer the rate -#' number to start with +#' Generate mus vector +#' +#' @inheritParams default_params_doc +#' #' @return mu vector #' @export -create_mus <- function(state_names, - num_concealed_states, - model = "CR", - lambdas) { - focal_rate <- 1 + max(unlist(lambdas), na.rm = TRUE) +create_mu_vector <- function(state_names, + num_concealed_states, + model = "CR", + lambda_list) { + focal_rate <- 1 + max(unlist(lambda_list), na.rm = TRUE) if (!(model %in% c("CR", "ETD", "CTD"))) { stop("only CR, ETD or CTD are specified") @@ -390,13 +400,11 @@ replace_matrix <- function(focal_matrix, return(focal_matrix) } -#' helper function to enter parameter value on their right place -#' @param object lambda matrices, q_matrix or mu vector -#' @param params parameters in order, where each value reflects the value -#' of the parameter at that position, e.g. c(0.3, 0.2, 0.1) will fill out -#' the value 0.3 for the parameter with rate indentifier 1, 0.2 for the -#' parameter with rate identifier 2 and 0.1 for the parameter with rate -#' identifier 3 +#' Helper function to enter parameter value on their right place +#' +#' @inheritParams default_params_doc +#' @return lambda matrices, `q_matrix` or mu vector with the correct values in +#' their right place. #' @export fill_in <- function(object, params) { @@ -432,14 +440,12 @@ extract_answ <- function(indic_mat, } -#' function to extract parameter values out of the result of a maximum -#' likelihood inference run. -#' @param param_posit initial parameter structure, consisting of a list with -#' three entries: 1) lambda matrices, 2) mus and 3) Q matrix. In each entry, -#' integers numbers (1-n) indicate the parameter to be optimized -#' @param ml_pars resulting parameter estimates as returned by for instance -#' cla_secsse_ml, having the same structure as param_post -#' @return vector of parameter estimates +#' Extract parameter values out of the result of a maximum likelihood inference +#' run +#' +#' @inheritParams default_params_doc + +#' @return Vector of parameter estimates. #' @export extract_par_vals <- function(param_posit, ml_pars) { @@ -457,7 +463,6 @@ extract_par_vals <- function(param_posit, answ <- extract_answ(param_posit[[3]], # Q matrix ml_pars[[3]], answ) - for (i in seq_along(param_posit[[2]])) { if (param_posit[[2]][i] > 0 && !is.na(param_posit[[2]][i])) { answ[param_posit[[2]][i]] <- ml_pars[[2]][i] diff --git a/R/secsse_sim.R b/R/secsse_sim.R index 9b24a15..fe74064 100644 --- a/R/secsse_sim.R +++ b/R/secsse_sim.R @@ -1,27 +1,7 @@ #' Function to simulate a tree, conditional on observing all states. -#' @param lambdas speciation rates, in the form of a list of matrices -#' @param mus extinction rates, in the form of a vector -#' @param qs The Q matrix, for example the result of function q_doubletrans, but -#' generally in the form of a matrix. -#' @param num_concealed_states number of concealed states -#' @param crown_age crown age of the tree, tree will be simulated conditional -#' on non-extinction and this crown age. -#' @param pool_init_states pool of initial states at the crown, in case this is -#' different from all available states, otherwise leave at NULL -#' @param maxSpec Maximum number of species in the tree (please note that the -#' tree is not conditioned on this number, but that this is a safeguard against -#' generating extremely large trees). -#' @param conditioning can be 'obs_states', 'true_states' or 'none', the tree is -#' simulated until one is generated that contains all observed states -#' ('obs_states'), all true states (e.g. all combinations of obs and hidden -#' states), or is always returned ('none'). -#' @param non_extinction should the tree be conditioned on non-extinction of the -#' crown lineages? Default is TRUE. -#' @param verbose provide intermediate output. -#' @param max_tries maximum number of simulations to try to obtain a tree. -#' @param drop_extinct should extinct species be dropped from the tree? default -#' is TRUE. -#' @param seed pseudo-random number generator seed +#' +#' @inheritParams default_params_doc +#' #' @return a list with four properties: phy: reconstructed phylogeny, #' true_traits: the true traits in order of tip label, obs_traits: observed #' traits, ignoring hidden traits and lastly: @@ -34,7 +14,7 @@ #' Simulation is performed with a randomly #' sampled initial trait at the crown - if you, however - want a specific, #' single, trait used at the crown, you can reduce the possible traits by -#' modifying 'pool_init_states'. +#' modifying `pool_init_states`. #' #' By default, the algorithm keeps simulating until it generates a tree where #' both crown lineages survive to the present - this is to ensure that the tree @@ -47,8 +27,9 @@ secsse_sim <- function(lambdas, crown_age, num_concealed_states, pool_init_states = NULL, - maxSpec = 1e5, - conditioning = "none", + max_spec = 1e5, + min_spec = 2, + conditioning = "obs_states", non_extinction = TRUE, verbose = FALSE, max_tries = 1e6, @@ -85,27 +66,41 @@ secsse_sim <- function(lambdas, pool_init_states <- -1 + indices } - if (!conditioning %in% c("none", "true_states", "obs_states")) { - stop("unknown conditioning, please pick from - 'none', 'obs_states', 'true_states'") - } - if (is.null(seed)) seed <- -1 + condition_vec <- vector() + if (length(conditioning) > 1) { + condition_vec <- conditioning + conditioning <- "custom" + true_traits <- names(mus) + all_states <- c() + for (i in seq_along(true_traits)) { + all_states[i] <- substr(true_traits[i], 1, (nchar(true_traits[i]) - 1)) + } + all_states <- unique(all_states) + + indices <- which(all_states %in% condition_vec) + condition_vec <- -1 + indices + } + res <- secsse_sim_cpp(mus, lambdas, qs, crown_age, - maxSpec, + max_spec, + min_spec, pool_init_states, conditioning, num_concealed_states, non_extinction, verbose, max_tries, - seed) + seed, + condition_vec) - if (length(res$traits) < 1) { + Ltable <- res$ltable + + if (sum(Ltable[, 4] == -1) < 2) { warning("crown lineages died out") return(list(phy = "ds", traits = 0, @@ -114,9 +109,18 @@ secsse_sim <- function(lambdas, conditioning = res$tracker[4])) } - Ltable <- res$ltable + if (sum(res$tracker) >= max_tries) { + warning("Couldn't simulate a tree in enough tries, + try increasing max_tries") + return(list(phy = "ds", + traits = 0, + extinct = res$tracker[2], + overshoot = res$tracker[3], + conditioning = res$tracker[4])) + } + + - speciesID <- res$traits[seq(2, length(res$traits), by = 2)] initialState <- res$initial_state Ltable[, 1] <- crown_age - Ltable[, 1] # simulation starts at 0, # not at crown age @@ -124,22 +128,32 @@ secsse_sim <- function(lambdas, Ltable[notmin1, 4] <- crown_age - c(Ltable[notmin1, 4]) Ltable[which(Ltable[, 4] == crown_age + 1), 4] <- -1 - indices <- seq(1, length(res$traits), by = 2) - speciesTraits <- 1 + res$traits[indices] + # indices <- seq(1, length(res$traits), by = 2) + speciesTraits <- 1 + Ltable[, 5] + used_id <- abs(Ltable[, 3]) phy <- DDD::L2phylo(Ltable, dropextinct = drop_extinct) + + + if (drop_extinct) { + to_drop <- which(Ltable[, 4] != -1) + if (length(to_drop) > 0) { + used_id <- used_id[-to_drop] + speciesTraits <- speciesTraits[-to_drop] + } + } - true_traits <- sortingtraits(data.frame(cbind(paste0("t", abs(speciesID)), + true_traits <- sortingtraits(data.frame(cbind(paste0("t", used_id), speciesTraits), - row.names = NULL), - phy) + row.names = NULL), + phy) true_traits <- names(mus)[true_traits] obs_traits <- c() for (i in seq_along(true_traits)) { - obs_traits[i] <- stringr::str_sub(true_traits[i], 1, -2) + obs_traits[i] <- substr(true_traits[i], 1, (nchar(true_traits[i]) - 1)) } - + if (sum(Ltable[, 4] < 0)) { return(list(phy = phy, true_traits = true_traits, diff --git a/R/secsse_utils.R b/R/secsse_utils.R index a1b358d..b0ad32f 100755 --- a/R/secsse_utils.R +++ b/R/secsse_utils.R @@ -1,22 +1,24 @@ -#' It sets the parameters (speciation, extinction and transition) -#' ids. Needed for ML calculation (secsse_ml) #' @title Parameter structure setting -#' @param traits vector with trait states, order of states must be the same as -#' tree tips, for help, see vignette. -#' @param num_concealed_states number of concealed states, generally equivalent -#' to number of examined states. +#' Sets the parameters (speciation, extinction and transition) ids. Needed for +#' ML calculation ([secsse_ml()]). +#' +#' @inheritParams default_params_doc +#' #' @return A list that includes the ids of the parameters for ML analysis. #' @examples #' traits <- sample(c(0,1,2), 45,replace = TRUE) #get some traits #' num_concealed_states <- 3 #' param_posit <- id_paramPos(traits,num_concealed_states) #' @export +#' @rawNamespace useDynLib(secsse, .registration = TRUE) +#' @rawNamespace import(Rcpp) +#' @rawNamespace importFrom(RcppParallel, RcppParallelLibs) id_paramPos <- function(traits, num_concealed_states) { #noLint idparslist <- list() if (is.matrix(traits)) { traits <- traits[, 1] } - + ly <- length(sort(unique(traits))) * 2 * num_concealed_states d <- ly / 2 idparslist[[1]] <- 1:d @@ -31,23 +33,22 @@ id_paramPos <- function(traits, num_concealed_states) { #noLint Q <- matrix(toMatrix, ncol = d, nrow = d, byrow = TRUE) diag(Q) <- NA idparslist[[3]] <- Q - + lab_states <- rep(as.character(sort(unique(traits))), num_concealed_states) - + lab_conceal <- NULL for (i in 1:num_concealed_states) { - + lab_conceal <- c(lab_conceal, rep(LETTERS[i], length(sort(unique(traits))))) } - + statesCombiNames <- character() for (i in seq_along(lab_states)) { statesCombiNames <- c(statesCombiNames, paste0(lab_states[i], lab_conceal[i])) - } colnames(idparslist[[3]]) <- statesCombiNames rownames(idparslist[[3]]) <- statesCombiNames @@ -57,10 +58,11 @@ id_paramPos <- function(traits, num_concealed_states) { #noLint return(idparslist) } -create_q_matrix <- function(masterBlock, - concealnewQMatr, - ntraits, - diff.conceal) { +#' @keywords internal +create_q_matrix_int <- function(masterBlock, + concealnewQMatr, + ntraits, + diff.conceal) { Q <- NULL for (i in 1:ntraits) { Qrow <- NULL @@ -72,7 +74,7 @@ create_q_matrix <- function(masterBlock, if (diff.conceal == TRUE) { entry <- concealnewQMatr[i, ii] } - + outDiagBlock <- matrix(0, ncol = ntraits, nrow = ntraits, @@ -87,19 +89,15 @@ create_q_matrix <- function(masterBlock, } -#' Sets a Q matrix where double transitions are not allowed #' @title Basic Qmatrix -#' @param traits vector with trait states, order of states must be the same as -#' tree tips, for help, see vignette. -#' @param masterBlock matrix of transitions among only examined states, NA in -#' the main diagonal, used to build the full transition rates matrix. -#' @param diff.conceal should the concealed states be different? Normally it -#' should be FALSE. E.g. that the transition rates for the concealed states -#' are different from the transition rates for the examined states. +#' Sets a Q matrix where double transitions are not allowed +#' +#' @inheritParams default_params_doc +#' #' @return Q matrix that includes both examined and concealed states, it should #' be declared as the third element of idparslist. #' @description This function expands the Q_matrix, but it does so assuming -#' that the number of concealed traits is equal to the number of examined +#' that the number of concealed traits is equal to the number of examined #' traits, if you have a different number, you should consider looking at #' the function [expand_q_matrix()]. #' @examples @@ -124,34 +122,34 @@ q_doubletrans <- function(traits, masterBlock, diff.conceal) { all(floor(masterBlock) == masterBlock, na.rm = TRUE) == FALSE) { integersmasterBlock <- floor(masterBlock) factorBlock <- signif(masterBlock - integersmasterBlock, digits = 2) - + factorstoExpand <- unique(sort(c(factorBlock))) factorstoExpand <- factorstoExpand[factorstoExpand > 0] newshareFac <- (max(factorstoExpand * 10) + 1):(max(factorstoExpand * 10) + length(factorstoExpand)) newshareFac <- newshareFac / 10 - + for (iii in seq_along(newshareFac)) { factorBlock[which(factorBlock == factorstoExpand[iii])] <- newshareFac[iii] } - + ntraits <- length(sort(unique(traits))) uniqParQ <- sort(unique(c(floor(masterBlock)))) uniqParQ2 <- uniqParQ[which(uniqParQ > 0)] concealnewQ <- (max(uniqParQ2) + 1):(max(uniqParQ2) + length(uniqParQ2)) - + for (iii in seq_along(concealnewQ)) { integersmasterBlock[which(integersmasterBlock == uniqParQ2[iii])] <- concealnewQ[iii] } concealnewQMatr <- integersmasterBlock + factorBlock - - Q <- create_q_matrix(masterBlock, - concealnewQMatr, - ntraits, - diff.conceal) + + Q <- create_q_matrix_int(masterBlock, + concealnewQMatr, + ntraits, + diff.conceal) } else { ntraits <- length(sort(unique(traits))) uniqParQ <- sort(unique(c(masterBlock))) @@ -162,83 +160,87 @@ q_doubletrans <- function(traits, masterBlock, diff.conceal) { uniqParQ2 concealnewQMatr[concealnewQMatr == uniqParQ2[I]] <- concealnewQ[I] } - - Q <- create_q_matrix(masterBlock, - concealnewQMatr, - ntraits, - diff.conceal) + + Q <- create_q_matrix_int(masterBlock, + concealnewQMatr, + ntraits, + diff.conceal) } + uniq_traits <- unique(traits) + uniq_traits <- uniq_traits[!is.na(uniq_traits)] + all_names <- get_state_names(state_names = uniq_traits, + num_concealed_states = length(uniq_traits)) + colnames(Q) <- all_names + rownames(Q) <- all_names return(Q) } +#' @title Data checking and trait sorting #' In preparation for likelihood calculation, it orders trait data according #' the tree tips -#' @title Data checking and trait sorting -#' @param traitinfo data frame where first column has species ids and the second -#' one is the trait associated information. -#' @param phy phy phylogenetic tree of class phylo, ultrametric, fully-resolved, -#' rooted and with branch lengths. +#' +#' @inheritParams default_params_doc +#' #' @return Vector of traits #' @examples #' # Some data we have prepared -#' data(traitinfo) -#' data('phylo_Vign') -#' traits <- sortingtraits(traitinfo,phylo_Vign) +#' data(traits) +#' data('phylo_vignette') +#' traits <- sortingtraits(traits, phylo_vignette) #' @export -sortingtraits <- function(traitinfo, phy) { - traitinfo <- as.matrix(traitinfo) - if (length(phy$tip.label) != nrow(traitinfo)) { +sortingtraits <- function(trait_info, phy) { + trait_info <- as.matrix(trait_info) + if (length(phy$tip.label) != nrow(trait_info)) { stop("Number of species in the tree must be the same as in the trait file") } - + if (identical(as.character(sort(phy$tip.label)), - as.character(sort(traitinfo[, 1]))) == FALSE) { - mismatch <- match(as.character(sort(traitinfo[, 1])), + as.character(sort(trait_info[, 1]))) == FALSE) { + mismatch <- match(as.character(sort(trait_info[, 1])), as.character(sort(phy$tip.label))) - mismatched <- (sort(traitinfo[, 1]))[which(is.na(mismatch))] + mismatched <- (sort(trait_info[, 1]))[which(is.na(mismatch))] stop( paste(c("Mismatch on tip labels and taxa names, check the species:", mismatched), collapse = " ") ) } - - traitinfo <- traitinfo[match(phy$tip.label, traitinfo[, 1]), ] - traitinfo[, 1] == phy$tip.label - - if (ncol(traitinfo) == 2) { - traits <- as.numeric(traitinfo[, 2]) + + trait_info <- trait_info[match(phy$tip.label, trait_info[, 1]), ] + trait_info[, 1] == phy$tip.label + + if (ncol(trait_info) == 2) { + traits <- as.numeric(trait_info[, 2]) } - - if (ncol(traitinfo) > 2) { + + if (ncol(trait_info) > 2) { traits <- NULL - for (i in 1:(ncol(traitinfo) - 1)) { - traits <- cbind(traits, as.numeric(traitinfo[, 1 + i])) + for (i in 1:(ncol(trait_info) - 1)) { + traits <- cbind(traits, as.numeric(trait_info[, 1 + i])) } } return(traits) } -#' It sets the parameters (speciation, extinction and transition) -#' ids. Needed for ML calculation with cladogenetic options (cla_secsse_ml) #' @title Parameter structure setting for cla_secsse -#' @param traits vector with trait states, order of states must be the same as -#' tree tips, for help, see vignette. -#' @param num_concealed_states number of concealed states, generally equivalent -#' to number of examined states. +#' It sets the parameters (speciation, extinction and transition) +#' IDs. Needed for ML calculation with cladogenetic options (cla_secsse_ml) +#' +#' @inheritParams default_params_doc +#' #' @return A list that includes the ids of the parameters for ML analysis. #' @examples #'traits <- sample(c(0,1,2), 45,replace = TRUE) #get some traits #'num_concealed_states <- 3 -#'param_posit <- cla_id_paramPos(traits,num_concealed_states) +#'param_posit <- cla_id_paramPos(traits, num_concealed_states) #' @export cla_id_paramPos <- function(traits, num_concealed_states) { idparslist <- list() if (is.matrix(traits)) { traits <- traits[, 1] } - + ly <- length(sort(unique(traits))) * 2 * num_concealed_states d <- ly / 2 toMatrix <- 1 @@ -246,38 +248,35 @@ cla_id_paramPos <- function(traits, num_concealed_states) { for (i in 1:d) { toMatrix <- c(toMatrix, matPos[(i * d - (d - 1)):((i * d - (d - 1)) + d)]) - } toMatrix <- toMatrix[1:d^2] Q <- matrix(toMatrix, ncol = d, nrow = d, byrow = TRUE) diag(Q) <- NA lab_states <- rep(as.character(sort(unique(traits))), num_concealed_states) - + lab_conceal <- NULL for (i in 1:num_concealed_states) { - lab_conceal <- c(lab_conceal, rep(LETTERS[i], length(sort(unique(traits))))) } - - + statesCombiNames <- character() for (i in seq_along(lab_states)) { statesCombiNames <- c(statesCombiNames, paste0(lab_states[i], lab_conceal[i])) } - + idparslist[[1]] <- matrix(0, ncol = d, nrow = 4) idparslist[[2]] <- (d + 1):ly idparslist[[3]] <- Q - + rownames(idparslist[[1]]) <- c("dual_inheritance", "single_inheritance", "dual_symmetric_transition", "dual_asymmetric_transition") - + colnames(idparslist[[1]]) <- statesCombiNames colnames(idparslist[[3]]) <- statesCombiNames rownames(idparslist[[3]]) <- statesCombiNames @@ -286,13 +285,11 @@ cla_id_paramPos <- function(traits, num_concealed_states) { return(idparslist) } -#' It provides the set of matrices containing all the speciation rates #' @title Prepares the entire set of lambda matrices for cla_secsse. -#' @param traits vector with trait states, order of states must be the same as -#' tree tips, for help, see vignette. -#' @param num_concealed_states number of concealed states, generally equivalent -#' to number of examined states. -#' @param lambd_and_modeSpe a matrix with the 4 models of speciation possible. +#' It provides the set of matrices containing all the speciation rates +#' +#' @inheritParams default_params_doc +#' #' @return A list of lambdas, its length would be the same than the number of #' trait states * num_concealed_states.. #' @export @@ -315,7 +312,7 @@ cla_id_paramPos <- function(traits, num_concealed_states) { #' # Now, internally, clasecsse sorts the lambda matrices, so they look like #' # a list with 9 matrices, corresponding to the 9 states #' # (0A,1A,2A,0B, etc) -#' +#' #' parameter <- idparlist #' lambda_and_modeSpe <- parameter$lambdas #' lambda_and_modeSpe[1, ] <- c(0.2, 0.2, 0.2, 0.4, 0.4, 0.4, 0.01, 0.01, 0.01) @@ -325,7 +322,7 @@ prepare_full_lambdas <- function(traits, num_concealed_states, lambd_and_modeSpe) { if (is.list(lambd_and_modeSpe)) return(lambd_and_modeSpe) - + num_exami <- length(sort(unique(traits))) mat_size <- num_exami * num_concealed_states posib_trans <- matrix(1, @@ -336,27 +333,27 @@ prepare_full_lambdas <- function(traits, posib_trans <- q_doubletrans(traits, masterBlock = posib_trans, diff.conceal = FALSE) - + full_lambdas <- list() for (jj in 1:mat_size) { # dual_state_inhe m1 <- matrix(0, ncol = mat_size, nrow = mat_size) m1[jj, jj] <- as.numeric(lambd_and_modeSpe[, jj][1]) - + # single_state_inhe m2 <- matrix(0, ncol = mat_size, nrow = mat_size) m2[, jj] <- posib_trans[jj, ] m2[jj, jj] <- 0 m2[m2 == 1] <- as.numeric(lambd_and_modeSpe[, jj][2]) # symet_state_emerge - + m3 <- matrix(0, ncol = mat_size, nrow = mat_size) - + diag(m3) <- posib_trans[jj, ] m3[jj, jj] <- 0 m3[m3 == 1] <- as.numeric(lambd_and_modeSpe[, jj][3]) # symet_state_emerge - + m4 <- matrix(0, ncol = mat_size, nrow = mat_size) for (i in seq_along(which(posib_trans[jj, ] == 1))) { m4[which(posib_trans[jj, ] == 1)[i], ] <- posib_trans[jj, ] @@ -366,24 +363,11 @@ prepare_full_lambdas <- function(traits, diag(m4) <- 0 m4[is.na(m4)] <- 0 m4[m4 == 1] <- as.numeric(lambd_and_modeSpe[, jj][4]) - full_lambdas[[jj]] <- m1 + m2 + m3 + m4 } return(full_lambdas) } -#' @rawNamespace useDynLib(secsse, .registration = TRUE) -#' @rawNamespace import(Rcpp) -#' @rawNamespace importFrom(RcppParallel, RcppParallelLibs) -#' @keywords internal -normalize_loglik <- function(probs, loglik) { - sumabsprobs <- sum(abs(probs)) - probs <- probs / sumabsprobs - loglik <- loglik + log(sumabsprobs) - message(paste(c(probs, loglik), collapse = " ")) - return(list(probs = probs, loglik = loglik)) -} - #' @keywords internal penalty <- function(pars, loglik_penalty = 0) { pars <- unlist(unlist(pars)) @@ -407,3 +391,733 @@ calc_mus <- function(is_complete_tree, } return(mus) } + +#' @keywords internal +check_tree <- function(phy, is_complete_tree) { + if (ape::is.rooted(phy) == FALSE) { + stop("The tree needs to be rooted.") + } + + if (ape::is.binary(phy) == FALSE) { + stop("The tree needs to be fully resolved.") + } + if (ape::is.ultrametric(phy) == FALSE && is_complete_tree == FALSE) { + stop("The tree needs to be ultrametric.") + } + if (any(phy$edge.length == 0)) { + stop("The tree must have internode distancs that are all larger than 0.") + } +} + +#' @keywords internal +check_traits <- function(traits, sampling_fraction) { + if (is.matrix(traits)) { + if (length(sampling_fraction) != length(sort(unique(traits[, 1])))) { + stop("Sampling_fraction must have as many elements + as the number of traits.") + } + + if (all(sort(unique(as.vector(traits))) == sort(unique(traits[, 1]))) == + FALSE) { + stop( + "Check your trait argument; if you have more than one column, + make sure all your states are included in the first column." + ) + } + } else { + if (length(sampling_fraction) != length(sort(unique(traits)))) { + stop("Sampling_fraction must have as many elements as + the number of traits.") + } + } + + if (length(sort(unique(as.vector(traits)))) < 2) { + stop("The trait has only one state.") + } +} + +#' @keywords internal +check_root_state_weight <- function(root_state_weight, traits) { + if (is.numeric(root_state_weight)) { + if (length(root_state_weight) != length(sort(unique(traits)))) { + stop("There need to be as many elements in root_state_weight + as there are traits.") + } + if (length(which(root_state_weight == 1)) != 1) { + stop("The root_state_weight needs only one 1.") + } + } else { + if (any(root_state_weight == "maddison_weights" | + root_state_weight == "equal_weights" | + root_state_weight == "proper_weights") == FALSE) { + stop("The root_state_weight must be any of + maddison_weights, equal_weights, or proper_weights.") + } + } +} + +#' @keywords internal +check_input <- function(traits, + phy, + sampling_fraction, + root_state_weight, + is_complete_tree) { + check_root_state_weight(root_state_weight, sampling_fraction) + + check_tree(phy, is_complete_tree) + + check_traits(traits, sampling_fraction) +} + + +#' @keywords internal +transf_funcdefpar <- function(idparsfuncdefpar, + functions_defining_params, + idfactorsopt, + trparsfix, + trparsopt, + idparsfix, + idparsopt) { + trparfuncdefpar <- NULL + ids_all <- c(idparsfix, idparsopt) + + values_all <- c(trparsfix / (1 - trparsfix), + trparsopt / (1 - trparsopt)) + a_new_envir <- new.env() + x <- as.list(values_all) ## To declare all the ids as variables + + if (is.null(idfactorsopt)) { + names(x) <- paste0("par_", ids_all) + } else { + names(x) <- c(paste0("par_", ids_all), paste0("factor_", idfactorsopt)) + } + list2env(x, envir = a_new_envir) + + for (jj in seq_along(functions_defining_params)) { + myfunc <- functions_defining_params[[jj]] + environment(myfunc) <- a_new_envir + value_func_defining_parm <- local(myfunc(), envir = a_new_envir) + + ## Now, declare the variable that is just calculated, so it is available + ## for the next calculation if needed + y <- as.list(value_func_defining_parm) + names(y) <- paste0("par_", idparsfuncdefpar[jj]) + list2env(y, envir = a_new_envir) + + if (is.numeric(value_func_defining_parm) == FALSE) { + stop("Something went wrong with the calculation of + parameters in 'functions_param_struct'") + } + trparfuncdefpar <- c(trparfuncdefpar, value_func_defining_parm) + } + trparfuncdefpar <- trparfuncdefpar / (1 + trparfuncdefpar) + rm(a_new_envir) + return(trparfuncdefpar) +} + +#' @keywords internal +update_values_transform_cla <- function(trpars, + idparslist, + idpars, + parvals) { + for (i in seq_along(idpars)) { + for (j in seq_len(nrow(trpars[[3]]))) { + id <- which(idparslist[[1]][[j]] == idpars[i]) + trpars[[1]][[j]][id] <- parvals[i] + } + for (j in 2:3) { + id <- which(idparslist[[j]] == idpars[i]) + trpars[[j]][id] <- parvals[i] + } + } + return(trpars) +} + +#' @keywords internal +transform_params_cla <- function(idparslist, + idparsfix, + trparsfix, + idparsopt, + trparsopt, + structure_func, + idparsfuncdefpar, + trparfuncdefpar) { + trpars1 <- idparslist + for (j in seq_len(nrow(trpars1[[3]]))) { + trpars1[[1]][[j]][, ] <- NA + } + + for (j in 2:3) { + trpars1[[j]][] <- NA + } + + if (length(idparsfix) != 0) { + trpars1 <- update_values_transform_cla(trpars1, + idparslist, + idparsfix, + trparsfix) + } + + trpars1 <- update_values_transform_cla(trpars1, + idparslist, + idparsopt, + trparsopt) + ## structure_func part + if (!is.null(structure_func)) { + trpars1 <- update_values_transform_cla(trpars1, + idparslist, + idparsfuncdefpar, + trparfuncdefpar) + } + + pre_pars1 <- list() + pars1 <- list() + + for (j in seq_len(nrow(trpars1[[3]]))) { + pre_pars1[[j]] <- trpars1[[1]][[j]][, ] / (1 - trpars1[[1]][[j]][, ]) + } + + pars1[[1]] <- pre_pars1 + for (j in 2:3) { + pars1[[j]] <- trpars1[[j]] / (1 - trpars1[[j]]) + } + + return(pars1) +} + +#' @keywords internal +update_values_transform <- function(trpars, + idparslist, + idpars, + parvals) { + for (i in seq_along(idpars)) { + for (j in 1:3) { + id <- which(idparslist[[j]] == idpars[i]) + trpars[[j]][id] <- parvals[i] + } + } + return(trpars) +} + +#' @keywords internal +transform_params_normal <- function(idparslist, + idparsfix, + trparsfix, + idparsopt, + trparsopt, + structure_func, + idparsfuncdefpar, + trparfuncdefpar) { + trpars1 <- idparslist + for (j in 1:3) { + trpars1[[j]][] <- NA + } + if (length(idparsfix) != 0) { + trpars1 <- update_values_transform(trpars1, + idparslist, + idparsfix, + trparsfix) + } + + trpars1 <- update_values_transform(trpars1, + idparslist, + idparsopt, + trparsopt) + + ## if structure_func part + if (is.null(structure_func) == FALSE) { + trpars1 <- update_values_transform(trpars1, + idparslist, + idparsfuncdefpar, + trparfuncdefpar) + } + pars1 <- list() + for (j in 1:3) { + pars1[[j]] <- trpars1[[j]] / (1 - trpars1[[j]]) + } + return(pars1) +} + +#' @keywords internal +secsse_transform_parameters <- function(trparsopt, + trparsfix, + idparsopt, + idparsfix, + idparslist, + structure_func) { + if (!is.null(structure_func)) { + idparsfuncdefpar <- structure_func[[1]] + functions_defining_params <- structure_func[[2]] + + if (length(structure_func[[3]]) > 1) { + idfactorsopt <- structure_func[[3]] + } else { + if (structure_func[[3]] == "noFactor") { + idfactorsopt <- NULL + } else { + idfactorsopt <- structure_func[[3]] + } + } + + trparfuncdefpar <- transf_funcdefpar(idparsfuncdefpar = + idparsfuncdefpar, + functions_defining_params = + functions_defining_params, + idfactorsopt = idfactorsopt, + trparsfix = trparsfix, + trparsopt = trparsopt, + idparsfix = idparsfix, + idparsopt = idparsopt) + } + + if (is.list(idparslist[[1]])) { + # when the ml function is called from cla_secsse + pars1 <- transform_params_cla(idparslist, + idparsfix, + trparsfix, + idparsopt, + trparsopt, + structure_func, + idparsfuncdefpar, + trparfuncdefpar) + } else { + # when non-cla option is called + pars1 <- transform_params_normal(idparslist, + idparsfix, + trparsfix, + idparsopt, + trparsopt, + structure_func, + idparsfuncdefpar, + trparfuncdefpar) + } + return(pars1) +} + + +condition <- function(cond, + mergeBranch2, + weight_states, + lambdas, + nodeM) { + lmb <- length(mergeBranch2) + d <- length(lambdas) + if (is.list(lambdas)) { + if (cond == "maddison_cond") { + pre_cond <- rep(NA, lmb) # nolint + for (j in 1:lmb) { + pre_cond[j] <- sum(weight_states[j] * + lambdas[[j]] * + (1 - nodeM[1:d][j]) ^ 2) + } + mergeBranch2 <- mergeBranch2 / sum(pre_cond) # nolint + } + + if (cond == "proper_cond") { + pre_cond <- rep(NA, lmb) # nolint + for (j in 1:lmb) { + pre_cond[j] <- sum(lambdas[[j]] * + ((1 - nodeM[1:d]) %o% (1 - nodeM[1:d]))) + } + mergeBranch2 <- mergeBranch2 / pre_cond # nolint + } + + } else { + if (cond == "maddison_cond") { + mergeBranch2 <- + mergeBranch2 / sum(weight_states * lambdas * + (1 - nodeM[1:d]) ^ 2) + } + + if (cond == "proper_cond") { + mergeBranch2 <- mergeBranch2 / (lambdas * (1 - nodeM[1:d]) ^ 2) + } + } + return(mergeBranch2) +} + +#' @keywords internal +update_complete_tree <- function(phy, + lambdas, + mus, + q_matrix, + method, + atol, + rtol, + lmb) { + time_inte <- max(abs(ape::branching.times(phy))) # nolint + + if (is.list(lambdas)) { + y <- rep(0, lmb) + nodeM <- ct_condition_cpp(rhs = "ode_cla", + y, # nolint + time_inte, + lambdas, + mus, + q_matrix, + method, + atol, + rtol) + nodeM <- c(nodeM, y) # nolint + } else { + y <- rep(0, 2 * lmb) + nodeM <- ct_condition_cpp(rhs = "ode_standard", + y, # nolint + time_inte, + lambdas, + mus, + q_matrix, + method, + atol, + rtol) + } + return(nodeM) +} + + +#' @keywords internal +create_states <- function(usetraits, + traits, + states, + sampling_fraction, + num_concealed_states, + d, + traitStates, + is_complete_tree, + phy, + ly, + mus, + nb_tip) { + if (anyNA(usetraits)) { + nas <- which(is.na(traits)) + for (iii in seq_along(nas)) { + states[nas[iii], ] <- c(1 - rep(sampling_fraction, + num_concealed_states), + rep(sampling_fraction, num_concealed_states)) + } + } + + for (iii in seq_along(traitStates)) { # Initial state probabilities + StatesPresents <- d + iii + toPlaceOnes <- StatesPresents + + length(traitStates) * (0:(num_concealed_states - 1)) + tipSampling <- 1 * sampling_fraction + states[which(usetraits == + traitStates[iii]), toPlaceOnes] <- tipSampling[iii] + } + + if (is_complete_tree) { + extinct_species <- geiger::is.extinct(phy) + if (!is.null(extinct_species)) { + for (i in seq_along(extinct_species)) { + states[which(phy$tip.label == extinct_species[i]), (d + 1):ly] <- + mus * states[which(phy$tip.label == extinct_species[i]), (d + 1):ly] + } + } + for (iii in 1:nb_tip) { + states[iii, 1:d] <- 0 + } + } else { + for (iii in 1:nb_tip) { + states[iii, 1:d] <- rep(1 - sampling_fraction, num_concealed_states) + } + } + return(states) +} + +#' @keywords internal +build_states <- function(phy, + traits, + num_concealed_states, + sampling_fraction, + is_complete_tree = FALSE, + mus = NULL, + num_unique_traits = NULL, + first_time = FALSE) { + if (!is.matrix(traits)) { + traits <- matrix(traits, nrow = length(traits), ncol = 1, byrow = FALSE) + } + + if (length(phy$tip.label) != nrow(traits)) { + stop("Number of species in the tree must be the same as in the trait file") + } + # if there are traits that are not in the observed tree, + # the user passes these themselves. + # yes, this is a weird use-case + + traitStates <- sort(unique(traits[, 1])) + + if (!is.null(num_unique_traits)) { + if (num_unique_traits > length(traitStates)) { + if (first_time) + message("found un-observed traits, expanding state space") + traitStates <- 1:num_unique_traits + } + } + + nb_tip <- ape::Ntip(phy) + nb_node <- phy$Nnode + ly <- length(traitStates) * 2 * num_concealed_states + states <- matrix(ncol = ly, nrow = nb_tip + nb_node) + d <- ly / 2 + ## In a example of 3 states, the names of the colums would be like: + ## + ## colnames(states) <- c("E0A","E1A","E2A","E0B","E1B","E2B", + ## "D0A","D1A","D2A","D0B","D1B","D2B") + states[1:nb_tip, ] <- 0 + ## I repeat the process of state assignment as many times as columns I have + for (iv in seq_len(ncol(traits))) { + states <- create_states(traits[, iv], + traits, + states, + sampling_fraction, + num_concealed_states, + d, + traitStates, + is_complete_tree, + phy, + ly, + mus, + nb_tip) + } + return(states) +} + +#' @keywords internal +build_initStates_time <- function(phy, + traits, + num_concealed_states, + sampling_fraction, + is_complete_tree = FALSE, + mus = NULL, + num_unique_traits = NULL, + first_time = FALSE) { + states <- build_states(phy, + traits, + num_concealed_states, + sampling_fraction, + is_complete_tree, + mus, + num_unique_traits, + first_time) + phy$node.label <- NULL + split_times <- sort(event_times(phy), decreasing = FALSE) + ances <- as.numeric(names(split_times)) + + forTime <- cbind(phy$edge, phy$edge.length) + + return(list( + states = states, + ances = ances, + forTime = forTime + )) +} + +#' @keywords internal +get_weight_states <- function(root_state_weight, + num_concealed_states, + mergeBranch, + lambdas, + nodeM, + d, + is_cla = FALSE) { + + if (is.numeric(root_state_weight)) { + weight_states <- rep(root_state_weight / num_concealed_states, + num_concealed_states) + } else { + if (root_state_weight == "maddison_weights") { + weight_states <- (mergeBranch) / sum((mergeBranch)) + } + + if (root_state_weight == "proper_weights") { + if (is_cla) { + lmb <- length(mergeBranch) + numerator <- rep(NA, lmb) + for (j in 1:lmb) { + numerator[j] <- + mergeBranch[j] / sum(lambdas[[j]] * + ((1 - nodeM[1:d]) %o% (1 - nodeM[1:d]))) + } + weight_states <- numerator / sum(numerator) # nolint + } else { + weight_states <- (mergeBranch / + (lambdas * (1 - nodeM[1:d]) ^ 2)) / + sum((mergeBranch / (lambdas * (1 - nodeM[1:d]) ^ 2))) + } + } + + if (root_state_weight == "equal_weights") { + weight_states <- rep(1 / length(mergeBranch), length(mergeBranch)) + } + } + + return(weight_states) +} + +#' Times at which speciation or extinction occurs +#' @title Event times of a (possibly non-ultrametric) phylogenetic tree +#' @param phy phylogenetic tree of class phylo, without polytomies, rooted and +#' with branch lengths. Need not be ultrametric. +#' @return times at which speciation or extinction happens. +#' @note This script has been modified from BAMMtools' internal function +#' NU.branching.times +#' @export +event_times <- function(phy) { + if (ape::is.ultrametric(phy)) { + return(ape::branching.times(phy)) + } else { + if (ape::is.binary(phy) == FALSE) { + stop("error. Need fully bifurcating (resolved) tree\n") + } + phy$begin <- rep(0, nrow(phy$edge)) + phy$end <- rep(0, nrow(phy$edge)) + fx <- function(phy, node) { + cur_time <- 0 + root <- length(phy$tip.label) + 1 + if (node > root) { + cur_time <- phy$end[which(phy$edge[, 2] == node)] + } + dset <- phy$edge[, 2][phy$edge[, 1] == node] + i1 <- which(phy$edge[, 2] == dset[1]) + i2 <- which(phy$edge[, 2] == dset[2]) + phy$end[i1] <- cur_time + phy$edge.length[i1] + phy$end[i2] <- cur_time + phy$edge.length[i2] + if (dset[1] > length(phy$tip.label)) { + phy$begin[phy$edge[, 1] == dset[1]] <- phy$end[i1] + phy <- fx(phy, node = dset[1]) + } + if (dset[2] > length(phy$tip.label)) { + phy$begin[phy$edge[, 1] == dset[2]] <- phy$end[i2] + phy <- fx(phy, node = dset[2]) + } + return(phy) + } + phy <- fx(phy, node = length(phy$tip.label) + 1) + maxbt <- max(phy$end) + nodes <- (length(phy$tip.label) + 1):(2 * length(phy$tip.label) - 1) + bt <- numeric(length(nodes)) + names(bt) <- nodes + for (i in seq_along(bt)) { + tt <- phy$begin[phy$edge[, 1] == nodes[i]][1] + bt[i] <- maxbt - tt + } + return(bt) + } +} + +#' Print likelihood for initial parameters +#' +#' @inheritParams default_params_doc +#' +#' @return Invisible `NULL`. Prints a `message()` to the console with the +#' initial loglikelihood if `verbose >= 1` +#' @noRd +print_init_ll <- function(initloglik, + verbose) { + if (isTRUE(verbose >= 1)) { + init_ll_msg1 <- "Calculating the likelihood for the initial parameters." + init_ll_msg2 <- + paste0("The loglikelihood for the initial parameter values is ", + initloglik) + init_ll_msg3 <- c("Optimizing the likelihood - this may take a while.") + message(paste(init_ll_msg1, init_ll_msg2, init_ll_msg3, sep = "\n")) + } + + invisible(NULL) +} + +#' @keywords internal +set_and_check_structure_func <- function(idparsfuncdefpar, + functions_defining_params, + idparslist, + idparsopt, + idfactorsopt, + idparsfix, + initfactors) { + structure_func <- list() + structure_func[[1]] <- idparsfuncdefpar + structure_func[[2]] <- functions_defining_params + + # checks specific to when the user has specified factors: + + if (is.null(idfactorsopt) == FALSE) { + if (length(initfactors) != length(idfactorsopt)) { + stop("idfactorsopt should have the same length as initfactors.") + } + } + + if (is.list(functions_defining_params) == FALSE) { + stop( + "The argument functions_defining_params should be a list of + functions. See example and vignette" + ) + } + + if (length(functions_defining_params) != length(idparsfuncdefpar)) { + stop( + "The argument functions_defining_params should have the same + length than idparsfuncdefpar" + ) + } + + if (anyDuplicated(c(idparsopt, idparsfix, idparsfuncdefpar)) != 0) { + stop("At least one element was asked to be fixed, + estimated or a function at the same time") + } + + if (identical(as.numeric(sort( + c(idparsopt, idparsfix, idparsfuncdefpar) + )), as.numeric(sort(unique( + unlist(idparslist) + )))) == FALSE) { + stop( + "All elements in idparslist must be included in either + idparsopt or idparsfix or idparsfuncdefpar " + ) + } + if (is.null(idfactorsopt)) { + structure_func[[3]] <- "noFactor" + } else { + structure_func[[3]] <- idfactorsopt + } + + return(structure_func) +} + +#' @keywords internal +check_ml_conditions <- function(traits, + idparslist, + initparsopt, + idparsopt, + idparsfix, + parsfix) { + if (is.matrix(traits)) { + warning("you are setting a model where some species have more + than one trait state") + } + + if (length(initparsopt) != length(idparsopt)) { + stop("initparsopt must be the same length as idparsopt. + Number of parameters to optimize does not match the number of + initial values for the search") + } + + if (length(idparsfix) != length(parsfix)) { + stop("idparsfix and parsfix must be the same length. + Number of fixed elements does not match the fixed figures") + } + + if (anyDuplicated(c(idparsopt, idparsfix)) != 0) { + stop("at least one element was asked to be both fixed and estimated ") + } + + if (anyDuplicated(c(unique(sort(as.vector(idparslist[[3]]))), + idparsfix[which(parsfix == 0)])) != 0) { + warning("Note: you set some transitions as impossible to happen.") + } + + if (min(initparsopt) <= 0.0) { + stop("All elements in init_parsopt need to be larger than 0") + } +} \ No newline at end of file diff --git a/README.md b/README.md index 6db2b38..9922c01 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,10 @@ [![](http://cranlogs.r-pkg.org/badges/secsse)](https://CRAN.R-project.org/package=secsse) -Branch|[![GitHub Actions logo](pics/github_actions_logo.png)](https://github.com/features/actions)|[![Codecov logo](pics/Codecov.png)](https://www.codecov.io) +Branch|[![GitHub Actions logo](man/figures/github_actions_logo.png)](https://github.com/features/actions)|[![Codecov logo](man/figures/Codecov.png)](https://www.codecov.io) --------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------- -`master`|[![R build status](https://github.com/rsetienne/secsse/workflows/R-CMD-check/badge.svg?branch=master)](https://github.com/rsetienne/secsse/actions)|[![codecov.io](https://codecov.io/github/rsetienne/secsse/coverage.svg?branch=master)](https://codecov.io/github/rsetienne/secsse/branch/master) -`develop`|[![R build status](https://github.com/rsetienne/secsse/workflows/R-CMD-check/badge.svg?branch=develop)](https://github.com/rsetienne/secsse/actions)|[![codecov.io](https://codecov.io/github/rsetienne/secsse/coverage.svg?branch=develop)](https://codecov.io/github/rsetienne/secsse/branch/develop) +`master`|[![R build status](https://github.com/rsetienne/secsse/workflows/R-CMD-check/badge.svg?branch=master)](https://github.com/rsetienne/secsse/actions)|[![codecov.io](https://codecov.io/gh/rsetienne/secsse/branch/master/graph/badge.svg)](https://codecov.io/github/rsetienne/secsse/branch/master) +`develop`|[![R build status](https://github.com/rsetienne/secsse/workflows/R-CMD-check/badge.svg?branch=develop)](https://github.com/rsetienne/secsse/actions)|[![codecov.io](https://codecov.io/gh/rsetienne/secsse/branch/develop/graph/badge.svg)](https://codecov.io/github/rsetienne/secsse/branch/develop) ## What is SecSSE? SecSSE is an R package designed for multistate data sets under a concealed state and speciation (`hisse`) framework. In this sense, it is parallel to the 'MuSSE' functionality implemented in `diversitree`, but it accounts for finding possible spurious relationships between traits and diversification rates ("false positives", Rabosky & Goldberg 2015) by testing against a "hidden trait" (Beaulieu et al. 2013), which is responsible for more variation in diversification rates than the trait being investigated. @@ -72,4 +72,4 @@ If you use `secsse` in your publications, please cite: * Beaulieu, Jeremy M, and Brian C O'Meara. “Detecting Hidden Diversification Shifts in Models of Trait-Dependent Speciation and Extinction.” Systematic biology vol. 65,4 (2016): 583-601. https://doi.org/10.1093/sysbio/syw022 -* Rabosky, Daniel L., and Emma E. Goldberg. "Model inadequacy and mistaken inferences of trait-dependent speciation." Systematic biology 64.2 (2015): 340-355. https://doi.org/10.1093/sysbio/syu131 \ No newline at end of file +* Rabosky, Daniel L., and Emma E. Goldberg. "Model inadequacy and mistaken inferences of trait-dependent speciation." Systematic biology 64.2 (2015): 340-355. https://doi.org/10.1093/sysbio/syu131 diff --git a/_pkgdown.yml b/_pkgdown.yml index 603bfca..35fcc57 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -2,3 +2,16 @@ url: https://rsetienne.github.io/secsse/ template: bootstrap: 5 +resource_files: + - man/figures/Codecov.png + - man/figures/github_actions_logo.png + +articles: +- title: Articles + navbar: ~ + contents: + - starting_secsse + - plotting_states + - sim_with_secsse + - complete_tree + - secsse_performance diff --git a/data/example_phy_GeoSSE.RData b/data/example_phy_GeoSSE.RData deleted file mode 100644 index f50c150..0000000 Binary files a/data/example_phy_GeoSSE.RData and /dev/null differ diff --git a/data/example_phy_GeoSSE.rda b/data/example_phy_GeoSSE.rda new file mode 100644 index 0000000..c7f778e Binary files /dev/null and b/data/example_phy_GeoSSE.rda differ diff --git a/data/phylo_Vign.RData b/data/phylo_Vign.RData deleted file mode 100644 index ac73801..0000000 Binary files a/data/phylo_Vign.RData and /dev/null differ diff --git a/data/phylo_vignette.rda b/data/phylo_vignette.rda new file mode 100644 index 0000000..f66281d Binary files /dev/null and b/data/phylo_vignette.rda differ diff --git a/data/traitinfo.RData b/data/traitinfo.RData deleted file mode 100644 index ec667ae..0000000 Binary files a/data/traitinfo.RData and /dev/null differ diff --git a/data/traits.rda b/data/traits.rda new file mode 100644 index 0000000..de7ee45 Binary files /dev/null and b/data/traits.rda differ diff --git a/inst/CITATION b/inst/CITATION new file mode 100644 index 0000000..3c01d6c --- /dev/null +++ b/inst/CITATION @@ -0,0 +1,17 @@ +citHeader("To cite secsse in publications use:") + +citEntry( + entry = "Article", + title = "Detecting the Dependence of Diversification on Multiple Traits from Phylogenetic Trees and Trait Data", + author = "Leonel Herrera-Alsina, Paul van Els, Rampal S. Etienne", + journal = "Systematic Biology", + year = 2019, + volume = 68, + number = 2, + pages = "317-328", + url = "https://academic.oup.com/sysbio/article/68/2/317/5107025", + doi = "10.1093/sysbio/syy057", + textVersion = "Leonel Herrera-Alsina, Paul van Els and Rampal S. Etienne, Detecting the Dependence of Diversification on Multiple Traits from Phylogenetic Trees and Trait Data, Systematic Biology, Volume 68, Issue 2, March 2019, Pages 317–328, https://doi.org/10.1093/sysbio/syy057", + footer = "secsse is continually being developed, so you may also want to cite its version number (found with 'library(help = secsse)' or 'packageVersion(\"secsse\")')." +) + diff --git a/inst/COPYRIGHTS b/inst/COPYRIGHTS index 3e5e1af..9f7c807 100644 --- a/inst/COPYRIGHTS +++ b/inst/COPYRIGHTS @@ -1,15 +1,18 @@ -Files: src/config.h, src/odeint.h +Files: src/config.h, src/secsse_eval.cpp, src/secsse_loglik.cpp, +src/secsse_loglik.h Copyright: 2023 Hanno Hildenbrandt License: BSL-1.0 -Files: src/secsse_sim.h, src/rhs.h, src/threaded_ll.h src/util.h, src/util.cpp, -src/secsse_sim.cpp, src/cla_loglik.cpp, src/cla_loglik_threaded.cpp, -src/cla_secsse_store.cpp, src/secsse_loglik_store.cpp, src/secsse_loglik_threaded.cpp +src/odeint.h +Copyright: 2021-2023 Hanno Hildenbrandt +License: BSL-1.0 + +Files: src/secsse_sim.h, src/secsse_sim.cpp Copyright: 2022 - 2023 Thijs Janzen License: BSL-1.0 -Files: src/secsse_loglik.cpp -Copyright: 2022 - 2023 Thijs Janzen and Hanno Hildenbrandt +src/secsse_rhs.h +Copyright: 2021 - 2023 Thijs Janzen, 2023 Hanno Hildenbrandt License: BSL-1.0 Boost Software License - Version 1.0 - August 17th, 2003 diff --git a/man/cla_id_paramPos.Rd b/man/cla_id_paramPos.Rd index e649643..af7c6aa 100644 --- a/man/cla_id_paramPos.Rd +++ b/man/cla_id_paramPos.Rd @@ -2,26 +2,30 @@ % Please edit documentation in R/secsse_utils.R \name{cla_id_paramPos} \alias{cla_id_paramPos} -\title{Parameter structure setting for cla_secsse} +\title{Parameter structure setting for cla_secsse +It sets the parameters (speciation, extinction and transition) +IDs. Needed for ML calculation with cladogenetic options (cla_secsse_ml)} \usage{ cla_id_paramPos(traits, num_concealed_states) } \arguments{ -\item{traits}{vector with trait states, order of states must be the same as -tree tips, for help, see vignette.} +\item{traits}{vector with trait states for each tip in the phylogeny. The +order of the states must be the same as the tree tips. For help, see +\code{vignette("starting_secsse", package = "secsse")}.} \item{num_concealed_states}{number of concealed states, generally equivalent -to number of examined states.} +to the number of examined states in the dataset.} } \value{ A list that includes the ids of the parameters for ML analysis. } \description{ +Parameter structure setting for cla_secsse It sets the parameters (speciation, extinction and transition) -ids. Needed for ML calculation with cladogenetic options (cla_secsse_ml) +IDs. Needed for ML calculation with cladogenetic options (cla_secsse_ml) } \examples{ traits <- sample(c(0,1,2), 45,replace = TRUE) #get some traits num_concealed_states <- 3 -param_posit <- cla_id_paramPos(traits,num_concealed_states) +param_posit <- cla_id_paramPos(traits, num_concealed_states) } diff --git a/man/cla_secsse_eval.Rd b/man/cla_secsse_eval.Rd deleted file mode 100644 index 0e44674..0000000 --- a/man/cla_secsse_eval.Rd +++ /dev/null @@ -1,88 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cla_secsse_eval.R -\name{cla_secsse_eval} -\alias{cla_secsse_eval} -\title{Likelihood for SecSSE model, using Rcpp} -\usage{ -cla_secsse_eval( - parameter, - phy, - traits, - num_concealed_states, - ancestral_states, - num_steps = NULL, - cond = "proper_cond", - root_state_weight = "proper_weights", - sampling_fraction, - setting_calculation = NULL, - loglik_penalty = 0, - is_complete_tree = FALSE, - method = "odeint::bulirsch_stoer", - atol = 1e-16, - rtol = 1e-16, - verbose = FALSE -) -} -\arguments{ -\item{parameter}{list where the first is a table where lambdas across -different modes of speciation are shown, the second mus and the third - transition rates.} - -\item{phy}{phylogenetic tree of class phylo, ultrametric, fully-resolved, -rooted and with branch lengths.} - -\item{traits}{vector with trait states, order of states must be the same as -tree tips, for help, see vignette.} - -\item{num_concealed_states}{number of concealed states, generally equivalent -to number of examined states.} - -\item{ancestral_states}{ancestral states matrix provided by -cla_secsse_loglik, this is used as starting points for manual integration} - -\item{num_steps}{number of steps to integrate along a branch} - -\item{cond}{condition on the existence of a node root: 'maddison_cond', -'proper_cond'(default). For details, see vignette.} - -\item{root_state_weight}{the method to weigh the states:'maddison_weigh -,'proper_weights'(default) or 'equal_weights'. It can also be specified the -root state:the vector c(1,0,0) indicates state 1 was the root state.} - -\item{sampling_fraction}{vector that states the sampling proportion per trait -state. It must have as many elements as trait states.} - -\item{setting_calculation}{argument used internally to speed up calculation. -It should be leave blank (default : setting_calculation = NULL)} - -\item{loglik_penalty}{the size of the penalty for all parameters; default is -0 (no penalty)} - -\item{is_complete_tree}{whether or not a tree with all its extinct species is -provided} - -\item{method}{integration method used, available are: -"odeint::runge_kutta_cash_karp54", "odeint::runge_kutta_fehlberg78", -"odeint::runge_kutta_dopri5", "odeint::bulirsch_stoer" and -"odeint::runge_kutta4". Default method is:"odeint::bulirsch_stoer".} - -\item{atol}{absolute tolerance of integration} - -\item{rtol}{relative tolerance of integration} - -\item{verbose}{provide intermediate verbose output if TRUE} -} -\value{ -The loglikelihood of the data given the parameters -} -\description{ -Using see_ancestral_states = TRUE in the function -cla_secsse_loglik will provide posterior probabilities of the states of the -model on the nodes of the tree, but will not give the values on the branches. -This function evaluates these probabilities at fixed time intervals dt. -Because dt is fixed, this may lead to some inaccuracies, and dt is best -chosen as small as possible. -} -\details{ -Evaluation of probabilities of observing states along branches. -} diff --git a/man/cla_secsse_loglik.Rd b/man/cla_secsse_loglik.Rd index c6f7ba5..4dd26de 100644 --- a/man/cla_secsse_loglik.Rd +++ b/man/cla_secsse_loglik.Rd @@ -1,8 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cla_secsse_loglik.R +% Please edit documentation in R/secsse_loglik.R \name{cla_secsse_loglik} \alias{cla_secsse_loglik} -\title{Likelihood for SecSSE model, using Rcpp} +\title{Likelihood for SecSSE model, using Rcpp +Loglikelihood calculation for the cla_SecSSE model given a set of parameters +and data using Rcpp} \usage{ cla_secsse_loglik( parameter, @@ -23,65 +25,62 @@ cla_secsse_loglik( ) } \arguments{ -\item{parameter}{list where the first is a table where lambdas across -different modes of speciation are shown, the second mus and the third - transition rates.} +\item{parameter}{list where first vector represents lambdas, the second +mus and the third transition rates.} -\item{phy}{phylogenetic tree of class phylo, ultrametric, fully-resolved, -rooted and with branch lengths.} +\item{phy}{phylogenetic tree of class \code{phylo}, rooted and with +branch lengths.} -\item{traits}{vector with trait states, order of states must be the same as -tree tips, for help, see vignette.} +\item{traits}{vector with trait states for each tip in the phylogeny. The +order of the states must be the same as the tree tips. For help, see +\code{vignette("starting_secsse", package = "secsse")}.} \item{num_concealed_states}{number of concealed states, generally equivalent -to number of examined states.} +to the number of examined states in the dataset.} -\item{cond}{condition on the existence of a node root: 'maddison_cond', -'proper_cond'(default). For details, see vignette.} +\item{cond}{condition on the existence of a node root: \code{"maddison_cond"}, +\code{"proper_cond"} (default). For details, see vignette.} -\item{root_state_weight}{the method to weigh the states:'maddison_weigh -,'proper_weights'(default) or 'equal_weights'. It can also be specified the -root state:the vector c(1,0,0) indicates state 1 was the root state.} +\item{root_state_weight}{the method to weigh the states: +\code{"maddison_weights"}, \code{"proper_weights"} (default) or \code{"equal_weights"}. +It can also be specified for the root state: the vector \code{c(1, 0, 0)} +indicates state 1 was the root state.} -\item{sampling_fraction}{vector that states the sampling proportion per trait -state. It must have as many elements as trait states.} +\item{sampling_fraction}{vector that states the sampling proportion per +trait state. It must have as many elements as there are trait states.} \item{setting_calculation}{argument used internally to speed up calculation. -It should be leave blank (default : setting_calculation = NULL)} +It should be left blank (default : \code{setting_calculation = NULL}).} -\item{see_ancestral_states}{should the ancestral states be shown? Deafault -FALSE} +\item{see_ancestral_states}{Boolean for whether the ancestral states should +be shown? Defaults to \code{FALSE}.} \item{loglik_penalty}{the size of the penalty for all parameters; default is -0 (no penalty)} +0 (no penalty).} -\item{is_complete_tree}{whether or not a tree with all its extinct species is -provided} +\item{is_complete_tree}{logical specifying whether or not a tree with all its +extinct species is provided. If set to \code{TRUE}, it also assumes that all +\emph{all} extinct lineages are present on the tree. Defaults to \code{FALSE}.} -\item{num_threads}{number of threads to be used, default is 1. Set to -1 to -use all available threads.} +\item{num_threads}{number of threads to be used. Default is one thread.} \item{method}{integration method used, available are: -"odeint::runge_kutta_cash_karp54", "odeint::runge_kutta_fehlberg78", -"odeint::runge_kutta_dopri5", "odeint::bulirsch_stoer" and -"odeint::runge_kutta4". Default method is:"odeint::bulirsch_stoer".} +\code{"odeint::runge_kutta_cash_karp54"}, \code{"odeint::runge_kutta_fehlberg78"}, +\code{"odeint::runge_kutta_dopri5"}, \code{"odeint::bulirsch_stoer"} and +\code{"odeint::runge_kutta4"}. Default method is: \code{"odeint::bulirsch_stoer"}.} -\item{atol}{absolute tolerance of integration} +\item{atol}{A numeric specifying the absolute tolerance of integration.} -\item{rtol}{relative tolerance of integration} +\item{rtol}{A numeric specifying the relative tolerance of integration.} } \value{ The loglikelihood of the data given the parameters } \description{ +Likelihood for SecSSE model, using Rcpp Loglikelihood calculation for the cla_SecSSE model given a set of parameters and data using Rcpp } -\note{ -Multithreading might lead to a slightly reduced accuracy -(in the order of 1e-8) and is therefore not enabled by default. -Please use at your own discretion. -} \examples{ rm(list=ls(all=TRUE)) library(secsse) diff --git a/man/cla_secsse_ml.Rd b/man/cla_secsse_ml.Rd index bcee1ee..e0d92fb 100644 --- a/man/cla_secsse_ml.Rd +++ b/man/cla_secsse_ml.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cla_secsse_ml.R +% Please edit documentation in R/secsse_ml.R \name{cla_secsse_ml} \alias{cla_secsse_ml} \title{Maximum likehood estimation for (SecSSE)} @@ -22,7 +22,7 @@ cla_secsse_ml( num_cycles = 1, loglik_penalty = 0, is_complete_tree = FALSE, - verbose = (optimmethod == "subplex"), + verbose = (optimmethod == "simplex"), num_threads = 1, atol = 1e-08, rtol = 1e-07, @@ -30,65 +30,75 @@ cla_secsse_ml( ) } \arguments{ -\item{phy}{phylogenetic tree of class phylo, ultrametric, rooted and with +\item{phy}{phylogenetic tree of class \code{phylo}, rooted and with branch lengths.} -\item{traits}{a vector with trait states for each tip in the phylogeny.} +\item{traits}{vector with trait states for each tip in the phylogeny. The +order of the states must be the same as the tree tips. For help, see +\code{vignette("starting_secsse", package = "secsse")}.} \item{num_concealed_states}{number of concealed states, generally equivalent to the number of examined states in the dataset.} \item{idparslist}{overview of parameters and their values.} -\item{idparsopt}{id of parameters to be estimated.} +\item{idparsopt}{a numeric vector with the ID of parameters to be estimated.} -\item{initparsopt}{initial guess of the parameters to be estimated.} +\item{initparsopt}{a numeric vector with the initial guess of the parameters +to be estimated.} -\item{idparsfix}{id of the fixed parameters.} +\item{idparsfix}{a numeric vector with the ID of the fixed parameters.} -\item{parsfix}{value of the fixed parameters.} +\item{parsfix}{a numeric vector with the value of the fixed parameters.} -\item{cond}{condition on the existence of a node root: 'maddison_cond', -'proper_cond'(default). For details, see vignette.} +\item{cond}{condition on the existence of a node root: \code{"maddison_cond"}, +\code{"proper_cond"} (default). For details, see vignette.} \item{root_state_weight}{the method to weigh the states: -'maddison_weights','proper_weights'(default) or 'equal_weights'. -It can also be specified the -root state:the vector c(1,0,0) indicates state 1 was the root state.} +\code{"maddison_weights"}, \code{"proper_weights"} (default) or \code{"equal_weights"}. +It can also be specified for the root state: the vector \code{c(1, 0, 0)} +indicates state 1 was the root state.} \item{sampling_fraction}{vector that states the sampling proportion per trait state. It must have as many elements as there are trait states.} -\item{tol}{maximum tolerance. Default is 'c(1e-04, 1e-05, 1e-05)'.} +\item{tol}{A numeric vector with the maximum tolerance of the optimization +algorithm. Default is \code{c(1e-04, 1e-05, 1e-05)}.} \item{maxiter}{max number of iterations. Default is -'1000*round((1.25)^length(idparsopt))'.} +\code{1000 * round((1.25) ^ length(idparsopt))}.} -\item{optimmethod}{method used for optimization. Available are simplex and -subplex, default is 'subplex'. Simplex should only be used for debugging.} +\item{optimmethod}{A string with method used for optimization. Default is +\code{"subplex"}. Alternative is \code{"simplex"} and it shouldn't be used in normal +conditions (only for debugging). Both are called from \code{\link[DDD:optimizer]{DDD::optimizer()}}, +simplex is implemented natively in \link{DDD}, while subplex is ultimately +called from \code{\link[subplex:subplex]{subplex::subplex()}}.} -\item{num_cycles}{number of cycles of the optimization (default is 1).} +\item{num_cycles}{Number of cycles of the optimization. When set to \code{Inf}, +the optimization will be repeated until the result is, within the +tolerance, equal to the starting values, with a maximum of 10 cycles.} \item{loglik_penalty}{the size of the penalty for all parameters; default is -0 (no penalty)} +0 (no penalty).} -\item{is_complete_tree}{whether or not a tree with all its extinct species -is provided} +\item{is_complete_tree}{logical specifying whether or not a tree with all its +extinct species is provided. If set to \code{TRUE}, it also assumes that all +\emph{all} extinct lineages are present on the tree. Defaults to \code{FALSE}.} -\item{verbose}{sets verbose output; default is verbose when optimmethod is -'subplex'} +\item{verbose}{sets verbose output; default is \code{TRUE} when \code{optimmethod} is +\code{"simplex"}. If \code{optimmethod} is set to \code{"simplex"}, then even if set to +\code{FALSE}, optimizer output will be shown.} -\item{num_threads}{number of threads. Set to -1 to use all available threads. -Default is one thread.} +\item{num_threads}{number of threads to be used. Default is one thread.} -\item{atol}{absolute tolerance of integration} +\item{atol}{A numeric specifying the absolute tolerance of integration.} -\item{rtol}{relative tolerance of integration} +\item{rtol}{A numeric specifying the relative tolerance of integration.} \item{method}{integration method used, available are: -"odeint::runge_kutta_cash_karp54", "odeint::runge_kutta_fehlberg78", -"odeint::runge_kutta_dopri5", "odeint::bulirsch_stoer" and -"odeint::runge_kutta4". Default method is:"odeint::bulirsch_stoer".} +\code{"odeint::runge_kutta_cash_karp54"}, \code{"odeint::runge_kutta_fehlberg78"}, +\code{"odeint::runge_kutta_dopri5"}, \code{"odeint::bulirsch_stoer"} and +\code{"odeint::runge_kutta4"}. Default method is: \code{"odeint::bulirsch_stoer"}.} } \value{ Parameter estimated and maximum likelihood diff --git a/man/cla_secsse_ml_func_def_pars.Rd b/man/cla_secsse_ml_func_def_pars.Rd index 18bcb14..c448a0f 100644 --- a/man/cla_secsse_ml_func_def_pars.Rd +++ b/man/cla_secsse_ml_func_def_pars.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cla_secsse_ml_func_def_pars.R +% Please edit documentation in R/secsse_ml_func_def_pars.R \name{cla_secsse_ml_func_def_pars} \alias{cla_secsse_ml_func_def_pars} \title{Maximum likehood estimation for (SecSSE) with parameter as complex @@ -23,11 +23,11 @@ cla_secsse_ml_func_def_pars( sampling_fraction, tol = c(1e-04, 1e-05, 1e-07), maxiter = 1000 * round((1.25)^length(idparsopt)), - optimmethod = "simplex", + optimmethod = "subplex", num_cycles = 1, loglik_penalty = 0, is_complete_tree = FALSE, - verbose = (optimmethod == "subplex"), + verbose = (optimmethod == "simplex"), num_threads = 1, atol = 1e-12, rtol = 1e-12, @@ -35,83 +35,91 @@ cla_secsse_ml_func_def_pars( ) } \arguments{ -\item{phy}{phylogenetic tree of class phylo, ultrametric, rooted and with +\item{phy}{phylogenetic tree of class \code{phylo}, rooted and with branch lengths.} -\item{traits}{a vector with trait states for each tip in the phylogeny.} +\item{traits}{vector with trait states for each tip in the phylogeny. The +order of the states must be the same as the tree tips. For help, see +\code{vignette("starting_secsse", package = "secsse")}.} \item{num_concealed_states}{number of concealed states, generally equivalent to the number of examined states in the dataset.} \item{idparslist}{overview of parameters and their values.} -\item{idparsopt}{id of parameters to be estimated.} +\item{idparsopt}{a numeric vector with the ID of parameters to be estimated.} -\item{initparsopt}{initial guess of the parameters to be estimated.} +\item{initparsopt}{a numeric vector with the initial guess of the parameters +to be estimated.} \item{idfactorsopt}{id of the factors that will be optimized. There are not -fixed factors, so use a constant within 'functions_defining_params'.} +fixed factors, so use a constant within \code{functions_defining_params}.} -\item{initfactors}{the initial guess for a factor (it should be set to NULL +\item{initfactors}{the initial guess for a factor (it should be set to \code{NULL} when no factors).} -\item{idparsfix}{id of the fixed parameters (it should be set to NULL when -no factors).} +\item{idparsfix}{a numeric vector with the ID of the fixed parameters.} -\item{parsfix}{value of the fixed parameters.} +\item{parsfix}{a numeric vector with the value of the fixed parameters.} \item{idparsfuncdefpar}{id of the parameters which will be a function of optimized and/or fixed parameters. The order of id should match -functions_defining_params} +\code{functions_defining_params}.} \item{functions_defining_params}{a list of functions. Each element will be a -function which defines a parameter e.g. id_3 <- (id_1+id_2)/2. See example -and vigenette} +function which defines a parameter e.g. \code{id_3 <- (id_1 + id_2) / 2}. See +example.} -\item{cond}{condition on the existence of a node root: 'maddison_cond', -'proper_cond'(default). For details, see vignette.} +\item{cond}{condition on the existence of a node root: \code{"maddison_cond"}, +\code{"proper_cond"} (default). For details, see vignette.} -\item{root_state_weight}{the method to weigh the states:'maddison_weights', -'proper_weights'(default) or 'equal_weights'. It can also be specified the -root -state:the vector c(1,0,0) indicates state 1 was the root state.} +\item{root_state_weight}{the method to weigh the states: +\code{"maddison_weights"}, \code{"proper_weights"} (default) or \code{"equal_weights"}. +It can also be specified for the root state: the vector \code{c(1, 0, 0)} +indicates state 1 was the root state.} -\item{sampling_fraction}{vector that states the sampling proportion per trait -state. It must have as many elements as there are trait states.} +\item{sampling_fraction}{vector that states the sampling proportion per +trait state. It must have as many elements as there are trait states.} -\item{tol}{maximum tolerance. Default is 'c(1e-04, 1e-05, 1e-05)'.} +\item{tol}{A numeric vector with the maximum tolerance of the optimization +algorithm. Default is \code{c(1e-04, 1e-05, 1e-05)}.} \item{maxiter}{max number of iterations. Default is -'1000*round((1.25)^length(idparsopt))'.} +\code{1000 * round((1.25) ^ length(idparsopt))}.} -\item{optimmethod}{method used for optimization. Default is 'simplex'.} +\item{optimmethod}{A string with method used for optimization. Default is +\code{"subplex"}. Alternative is \code{"simplex"} and it shouldn't be used in normal +conditions (only for debugging). Both are called from \code{\link[DDD:optimizer]{DDD::optimizer()}}, +simplex is implemented natively in \link{DDD}, while subplex is ultimately +called from \code{\link[subplex:subplex]{subplex::subplex()}}.} -\item{num_cycles}{number of cycles of the optimization (default is 1).} +\item{num_cycles}{Number of cycles of the optimization. When set to \code{Inf}, +the optimization will be repeated until the result is, within the +tolerance, equal to the starting values, with a maximum of 10 cycles.} -\item{loglik_penalty}{the size of the penalty for all parameters; default -is 0 (no penalty)} +\item{loglik_penalty}{the size of the penalty for all parameters; default is +0 (no penalty).} -\item{is_complete_tree}{whether or not a tree with all its extinct species -is provided} +\item{is_complete_tree}{logical specifying whether or not a tree with all its +extinct species is provided. If set to \code{TRUE}, it also assumes that all +\emph{all} extinct lineages are present on the tree. Defaults to \code{FALSE}.} -\item{verbose}{sets verbose output; default is verbose when optimmethod is -'subplex'} +\item{verbose}{sets verbose output; default is \code{TRUE} when \code{optimmethod} is +\code{"simplex"}. If \code{optimmethod} is set to \code{"simplex"}, then even if set to +\code{FALSE}, optimizer output will be shown.} -\item{num_threads}{number of threads. Set to -1 to use all available -threads. Default is one thread.} +\item{num_threads}{number of threads to be used. Default is one thread.} -\item{atol}{absolute tolerance of integration} +\item{atol}{A numeric specifying the absolute tolerance of integration.} -\item{rtol}{relative tolerance of integration} +\item{rtol}{A numeric specifying the relative tolerance of integration.} \item{method}{integration method used, available are: -"odeint::runge_kutta_cash_karp54", "odeint::runge_kutta_fehlberg78", -"odeint::runge_kutta_dopri5", "odeint::bulirsch_stoer" and -"odeint::runge_kutta4". Default method is:"odeint::bulirsch_stoer".} +\code{"odeint::runge_kutta_cash_karp54"}, \code{"odeint::runge_kutta_fehlberg78"}, +\code{"odeint::runge_kutta_dopri5"}, \code{"odeint::bulirsch_stoer"} and +\code{"odeint::runge_kutta4"}. Default method is: \code{"odeint::bulirsch_stoer"}.} } \value{ -Parameter estimated and maximum likelihood - Parameter estimated and maximum likelihood } \description{ diff --git a/man/create_default_lambda_list.Rd b/man/create_default_lambda_list.Rd deleted file mode 100644 index 5b26651..0000000 --- a/man/create_default_lambda_list.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/secsse_prep.R -\name{create_default_lambda_list} -\alias{create_default_lambda_list} -\title{helper function to create a default lambda list} -\usage{ -create_default_lambda_list(state_names = c("0", "1"), model = "ETD") -} -\arguments{ -\item{state_names}{names of the observed states} - -\item{model}{chosen model of interest, either "CR" (Constant Rates), "ETD" -(Examined Trait Diversification) or "CTD" ("Concealed Trait Diversification).} -} -\description{ -This function generates a generic lambda list, assuming no transitions -between states, e.g. a species of observed state 0 generates daughter -species with state 0 as well. -} diff --git a/man/create_default_lambda_transition_matrix.Rd b/man/create_default_lambda_transition_matrix.Rd new file mode 100644 index 0000000..50fcdc7 --- /dev/null +++ b/man/create_default_lambda_transition_matrix.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/secsse_prep.R +\name{create_default_lambda_transition_matrix} +\alias{create_default_lambda_transition_matrix} +\title{Helper function to create a default lambda list} +\usage{ +create_default_lambda_transition_matrix( + state_names = c("0", "1"), + model = "ETD" +) +} +\arguments{ +\item{state_names}{vector of names of all observed states.} + +\item{model}{used model, choice of \code{"ETD"} (Examined Traits Diversification), +\code{"CTD"} (Concealed Traits Diversification) or \code{"CR"} (Constant Rate).} +} +\description{ +This function generates a generic lambda list, assuming no transitions +between states, e.g. a species of observed state 0 generates daughter +species with state 0 as well. +} +\examples{ +lambda_matrix <- + create_default_lambda_transition_matrix(state_names = c(0, 1), + model = "ETD") +lambda_list <- create_lambda_list(state_names = c(0, 1), + num_concealed_states = 2, + transition_matrix = lambda_matrix, + model = "ETD") +} diff --git a/man/create_default_q_list.Rd b/man/create_default_q_list.Rd deleted file mode 100644 index e0f4cea..0000000 --- a/man/create_default_q_list.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/secsse_prep.R -\name{create_default_q_list} -\alias{create_default_q_list} -\title{helper function to create a default q_matrix list} -\usage{ -create_default_q_list( - state_names = c("0", "1"), - num_concealed_states, - mus = NULL -) -} -\arguments{ -\item{state_names}{names of the observed states} - -\item{num_concealed_states}{number of concealed states} - -\item{mus}{previously defined mus - used to choose indicator number} -} -\description{ -This function generates a generic transition list -} diff --git a/man/create_default_shift_matrix.Rd b/man/create_default_shift_matrix.Rd new file mode 100644 index 0000000..01c5281 --- /dev/null +++ b/man/create_default_shift_matrix.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/secsse_prep.R +\name{create_default_shift_matrix} +\alias{create_default_shift_matrix} +\title{Helper function to create a default \code{shift_matrix} list} +\usage{ +create_default_shift_matrix( + state_names = c("0", "1"), + num_concealed_states = 2, + mu_vector = NULL +) +} +\arguments{ +\item{state_names}{vector of names of all observed states.} + +\item{num_concealed_states}{number of concealed states, generally equivalent +to the number of examined states in the dataset.} + +\item{mu_vector}{previously defined mus - used to choose indicator number.} +} +\description{ +This function generates a generic shift matrix to be used with the function +\code{\link[=create_q_matrix]{create_q_matrix()}}. +} +\examples{ +shift_matrix <- create_default_shift_matrix(state_names = c(0, 1), + num_concealed_states = 2, + mu_vector = c(1, 2, 1, 2)) +q_matrix <- create_q_matrix(state_names = c(0, 1), + num_concealed_states = 2, + shift_matrix = shift_matrix, + diff.conceal = FALSE) +} diff --git a/man/create_lambda_list.Rd b/man/create_lambda_list.Rd new file mode 100644 index 0000000..db8021d --- /dev/null +++ b/man/create_lambda_list.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/secsse_prep.R +\name{create_lambda_list} +\alias{create_lambda_list} +\title{Helper function to automatically create lambda matrices, based on input} +\usage{ +create_lambda_list( + state_names = c(0, 1), + num_concealed_states = 2, + transition_matrix, + model = "ETD", + concealed_spec_rates = NULL +) +} +\arguments{ +\item{state_names}{vector of names of all observed states.} + +\item{num_concealed_states}{number of concealed states, generally equivalent +to the number of examined states in the dataset.} + +\item{transition_matrix}{a matrix containing a description of all speciation +events, where the first column indicates the source state, the second and +third column indicate the two daughter states, and the fourth column gives +the rate indicator used. E.g.: \verb{["SA", "S", "A", 1]} for a trait state +\code{"SA"} which upon speciation generates two daughter species with traits +\code{"S"} and \code{"A"}, where the number 1 is used as indicator for optimization +of the likelihood.} + +\item{model}{used model, choice of \code{"ETD"} (Examined Traits Diversification), +\code{"CTD"} (Concealed Traits Diversification) or \code{"CR"} (Constant Rate).} + +\item{concealed_spec_rates}{vector specifying the rate indicators for each +concealed state, length should be identical to \code{num_concealed_states}. If +left empty when using the CTD model, it is assumed that all available +speciation rates are distributed uniformly over the concealed states.} +} +\description{ +Helper function to automatically create lambda matrices, based on input +} +\examples{ +trans_matrix <- c(0, 0, 0, 1) +trans_matrix <- rbind(trans_matrix, c(1, 1, 1, 2)) +lambda_list <- create_lambda_list(state_names = c(0, 1), + num_concealed_states = 2, + transition_matrix = trans_matrix, + model = "ETD") + +} diff --git a/man/create_lambda_matrices.Rd b/man/create_lambda_matrices.Rd deleted file mode 100644 index 65405c8..0000000 --- a/man/create_lambda_matrices.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/secsse_prep.R -\name{create_lambda_matrices} -\alias{create_lambda_matrices} -\title{helper function to automatically create lambda matrices, based on input} -\usage{ -create_lambda_matrices( - state_names, - num_concealed_states, - transition_list, - model = "ETD", - concealed_spec_rates = NULL -) -} -\arguments{ -\item{state_names}{vector of names of all observed states} - -\item{num_concealed_states}{number of hidden states} - -\item{transition_list}{a matrix containing a description of all speciation -events, where the first column indicates the source state, the second and -third column indicate the two daughter states, and the fourth column gives -the rate indicator used. E.g.: ["SA", "S", "A", 1] for a trait state "SA" -which upon speciation generates two daughter species with traits "S" and "A", -where the number 1 is used as indicator for optimization of the likelihood.} - -\item{model}{used model, choice of "ETD" (Examined Traits Diversification) or -"CTD" (Concealed Traits Diversification).} - -\item{concealed_spec_rates}{vector specifying the rate indicators for each -concealed state, length should be identical to num_concealed_states. If left -empty when using the CTD model, it is assumed that all available speciation -rates are distributed uniformly over the concealed states.} -} -\description{ -helper function to automatically create lambda matrices, based on input -} diff --git a/man/create_mu_vector.Rd b/man/create_mu_vector.Rd new file mode 100644 index 0000000..16455d1 --- /dev/null +++ b/man/create_mu_vector.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/secsse_prep.R +\name{create_mu_vector} +\alias{create_mu_vector} +\title{Generate mus vector} +\usage{ +create_mu_vector(state_names, num_concealed_states, model = "CR", lambda_list) +} +\arguments{ +\item{state_names}{vector of names of all observed states.} + +\item{num_concealed_states}{number of concealed states, generally equivalent +to the number of examined states in the dataset.} + +\item{model}{used model, choice of \code{"ETD"} (Examined Traits Diversification), +\code{"CTD"} (Concealed Traits Diversification) or \code{"CR"} (Constant Rate).} + +\item{lambda_list}{previously generated list of lambda matrices, +used to infer the rate number to start with.} +} +\value{ +mu vector +} +\description{ +Generate mus vector +} diff --git a/man/create_mus.Rd b/man/create_mus.Rd deleted file mode 100644 index 50def63..0000000 --- a/man/create_mus.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/secsse_prep.R -\name{create_mus} -\alias{create_mus} -\title{function to generate generic mus vector} -\usage{ -create_mus(state_names, num_concealed_states, model = "CR", lambdas) -} -\arguments{ -\item{state_names}{full state names, including concealed states, for example -c("0A", "1A", "0B", "1B")} - -\item{num_concealed_states}{number of concealed states} - -\item{model}{model replicated, available are "CR", "ETD" and "CTD"} - -\item{lambdas}{previously generated lambda matrices, used to infer the rate -number to start with} -} -\value{ -mu vector -} -\description{ -function to generate generic mus vector -} diff --git a/man/create_q_matrix.Rd b/man/create_q_matrix.Rd new file mode 100644 index 0000000..4ab64f5 --- /dev/null +++ b/man/create_q_matrix.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/secsse_prep.R +\name{create_q_matrix} +\alias{create_q_matrix} +\title{Helper function to neatly setup a Q matrix, without transitions to +concealed states (only observed transitions shown)} +\usage{ +create_q_matrix( + state_names, + num_concealed_states, + shift_matrix, + diff.conceal = FALSE +) +} +\arguments{ +\item{state_names}{vector of names of all observed states.} + +\item{num_concealed_states}{number of concealed states, generally equivalent +to the number of examined states in the dataset.} + +\item{shift_matrix}{matrix of shifts, indicating in order: +\enumerate{ +\item starting state (typically the column in the transition matrix) +\item ending state (typically the row in the transition matrix) +\item associated rate indicator. +}} + +\item{diff.conceal}{Boolean stating if the concealed states should be +different. E.g. that the transition rates for the concealed +states are different from the transition rates for the examined states. +Normally it should be \code{FALSE} in order to avoid having a huge number of +parameters.} +} +\value{ +transition matrix +} +\description{ +Helper function to neatly setup a Q matrix, without transitions to +concealed states (only observed transitions shown) +} +\examples{ +shift_matrix <- c(0, 1, 5) +shift_matrix <- rbind(shift_matrix, c(1, 0, 6)) +q_matrix <- secsse::create_q_matrix(state_names = c(0, 1), + num_concealed_states = 2, + shift_matrix = shift_matrix, + diff.conceal = TRUE) +} diff --git a/man/create_transition_matrix.Rd b/man/create_transition_matrix.Rd deleted file mode 100644 index a7d229c..0000000 --- a/man/create_transition_matrix.Rd +++ /dev/null @@ -1,36 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/secsse_prep.R -\name{create_transition_matrix} -\alias{create_transition_matrix} -\title{helper function to neatly setup a Q matrix, without transitions to -concealed states (only observed transitions shown)} -\usage{ -create_transition_matrix( - state_names, - num_concealed_states, - transition_list, - diff.conceal = FALSE -) -} -\arguments{ -\item{state_names}{names of observed states} - -\item{num_concealed_states}{number of concealed states} - -\item{transition_list}{matrix of transitions, indicating in order: 1) -starting state (typically the column in the transition matrix), 2) ending -state (typically the row in the transition matrix) and 3) associated rate -indicator} - -\item{diff.conceal}{should we use the same number of rates for the -concealed state transitions, or should all concealed state transitions -have separate rates? Typically, FALSE is fine and should be used in order -to avoid having a huge number of parameters.} -} -\value{ -transition matrix -} -\description{ -helper function to neatly setup a Q matrix, without transitions to -concealed states (only observed transitions shown) -} diff --git a/man/default_params_doc.Rd b/man/default_params_doc.Rd new file mode 100644 index 0000000..b074c75 --- /dev/null +++ b/man/default_params_doc.Rd @@ -0,0 +1,286 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/default_params_doc.R +\name{default_params_doc} +\alias{default_params_doc} +\title{Default parameter documentation} +\usage{ +default_params_doc( + phy, + traits, + num_concealed_states, + idparslist, + initparsopt, + idparsfix, + idparsopt, + idfactorsopt, + parsfix, + cond, + root_state_weight, + sampling_fraction, + tol, + maxiter, + optimethod, + num_cycles, + loglik_penalty, + is_complete_tree, + verbose, + num_threads, + atol, + rtol, + method, + parameter, + setting_calculation, + num_steps, + see_ancestral_states, + lambdas, + mus, + qs, + crown_age, + pool_init_states, + maxSpec, + conditioning, + non_extinction, + max_tries, + drop_extinct, + seed, + prob_func, + parameters, + masterBlock, + diff.conceal, + trait_info, + lambd_and_modeSpe, + initloglik, + initfactors, + idparsfuncdefpar, + functions_defining_params, + state_names, + transition_matrix, + model, + concealed_spec_rates, + shift_matrix, + q_matrix, + lambda_list, + object, + params, + param_posit, + ml_pars, + mu_vector +) +} +\arguments{ +\item{phy}{phylogenetic tree of class \code{phylo}, rooted and with +branch lengths.} + +\item{traits}{vector with trait states for each tip in the phylogeny. The +order of the states must be the same as the tree tips. For help, see +\code{vignette("starting_secsse", package = "secsse")}.} + +\item{num_concealed_states}{number of concealed states, generally equivalent +to the number of examined states in the dataset.} + +\item{idparslist}{overview of parameters and their values.} + +\item{initparsopt}{a numeric vector with the initial guess of the parameters +to be estimated.} + +\item{idparsfix}{a numeric vector with the ID of the fixed parameters.} + +\item{idparsopt}{a numeric vector with the ID of parameters to be estimated.} + +\item{idfactorsopt}{id of the factors that will be optimized. There are not +fixed factors, so use a constant within \code{functions_defining_params}.} + +\item{parsfix}{a numeric vector with the value of the fixed parameters.} + +\item{cond}{condition on the existence of a node root: \code{"maddison_cond"}, +\code{"proper_cond"} (default). For details, see vignette.} + +\item{root_state_weight}{the method to weigh the states: +\code{"maddison_weights"}, \code{"proper_weights"} (default) or \code{"equal_weights"}. +It can also be specified for the root state: the vector \code{c(1, 0, 0)} +indicates state 1 was the root state.} + +\item{sampling_fraction}{vector that states the sampling proportion per +trait state. It must have as many elements as there are trait states.} + +\item{tol}{A numeric vector with the maximum tolerance of the optimization +algorithm. Default is \code{c(1e-04, 1e-05, 1e-05)}.} + +\item{maxiter}{max number of iterations. Default is +\code{1000 * round((1.25) ^ length(idparsopt))}.} + +\item{num_cycles}{Number of cycles of the optimization. When set to \code{Inf}, +the optimization will be repeated until the result is, within the +tolerance, equal to the starting values, with a maximum of 10 cycles.} + +\item{loglik_penalty}{the size of the penalty for all parameters; default is +0 (no penalty).} + +\item{is_complete_tree}{logical specifying whether or not a tree with all its +extinct species is provided. If set to \code{TRUE}, it also assumes that all +\emph{all} extinct lineages are present on the tree. Defaults to \code{FALSE}.} + +\item{verbose}{sets verbose output; default is \code{TRUE} when \code{optimmethod} is +\code{"simplex"}. If \code{optimmethod} is set to \code{"simplex"}, then even if set to +\code{FALSE}, optimizer output will be shown.} + +\item{num_threads}{number of threads to be used. Default is one thread.} + +\item{atol}{A numeric specifying the absolute tolerance of integration.} + +\item{rtol}{A numeric specifying the relative tolerance of integration.} + +\item{method}{integration method used, available are: +\code{"odeint::runge_kutta_cash_karp54"}, \code{"odeint::runge_kutta_fehlberg78"}, +\code{"odeint::runge_kutta_dopri5"}, \code{"odeint::bulirsch_stoer"} and +\code{"odeint::runge_kutta4"}. Default method is: \code{"odeint::bulirsch_stoer"}.} + +\item{parameter}{list where first vector represents lambdas, the second +mus and the third transition rates.} + +\item{setting_calculation}{argument used internally to speed up calculation. +It should be left blank (default : \code{setting_calculation = NULL}).} + +\item{num_steps}{number of substeps to show intermediate likelihoods +along a branch.} + +\item{see_ancestral_states}{Boolean for whether the ancestral states should +be shown? Defaults to \code{FALSE}.} + +\item{lambdas}{speciation rates, in the form of a list of matrices.} + +\item{mus}{extinction rates, in the form of a vector.} + +\item{qs}{The Q matrix, for example the result of function q_doubletrans, but +generally in the form of a matrix.} + +\item{crown_age}{crown age of the tree, tree will be simulated conditional +on non-extinction and this crown age.} + +\item{pool_init_states}{pool of initial states at the crown, in case this is +different from all available states, otherwise leave at NULL} + +\item{conditioning}{can be \code{"obs_states"}, \code{"true_states"} or \code{"none"}, the +tree is simulated until one is generated that contains all observed states +(\code{"obs_states"}), all true states (e.g. all combinations of obs and hidden +states), or is always returned (\code{"none"}). Alternatively, a vector with +the names of required observed states can be provided, e.g. c("S", "N").} + +\item{non_extinction}{boolean stating if the tree should be conditioned on +non-extinction of the crown lineages. Defaults to \code{TRUE}.} + +\item{max_tries}{maximum number of simulations to try to obtain a tree.} + +\item{drop_extinct}{boolean stating if extinct species should be dropped from +the tree. Defaults to \code{TRUE}.} + +\item{seed}{pseudo-random number generator seed.} + +\item{prob_func}{a function to calculate the probability of interest, see +description.} + +\item{parameters}{list where first vector represents lambdas, the second mus +and the third transition rates.} + +\item{masterBlock}{matrix of transitions among only examined states, \code{NA} in +the main diagonal, used to build the full transition rates matrix.} + +\item{diff.conceal}{Boolean stating if the concealed states should be +different. E.g. that the transition rates for the concealed +states are different from the transition rates for the examined states. +Normally it should be \code{FALSE} in order to avoid having a huge number of +parameters.} + +\item{trait_info}{data frame where first column has species ids and the second +one is the trait associated information.} + +\item{lambd_and_modeSpe}{a matrix with the 4 models of speciation possible.} + +\item{initloglik}{A numeric with the value of loglikehood obtained prior to +optimisation. Only used internally.} + +\item{initfactors}{the initial guess for a factor (it should be set to \code{NULL} +when no factors).} + +\item{idparsfuncdefpar}{id of the parameters which will be a function of +optimized and/or fixed parameters. The order of id should match +\code{functions_defining_params}.} + +\item{functions_defining_params}{a list of functions. Each element will be a +function which defines a parameter e.g. \code{id_3 <- (id_1 + id_2) / 2}. See +example.} + +\item{state_names}{vector of names of all observed states.} + +\item{transition_matrix}{a matrix containing a description of all speciation +events, where the first column indicates the source state, the second and +third column indicate the two daughter states, and the fourth column gives +the rate indicator used. E.g.: \verb{["SA", "S", "A", 1]} for a trait state +\code{"SA"} which upon speciation generates two daughter species with traits +\code{"S"} and \code{"A"}, where the number 1 is used as indicator for optimization +of the likelihood.} + +\item{model}{used model, choice of \code{"ETD"} (Examined Traits Diversification), +\code{"CTD"} (Concealed Traits Diversification) or \code{"CR"} (Constant Rate).} + +\item{concealed_spec_rates}{vector specifying the rate indicators for each +concealed state, length should be identical to \code{num_concealed_states}. If +left empty when using the CTD model, it is assumed that all available +speciation rates are distributed uniformly over the concealed states.} + +\item{shift_matrix}{matrix of shifts, indicating in order: +\enumerate{ +\item starting state (typically the column in the transition matrix) +\item ending state (typically the row in the transition matrix) +\item associated rate indicator. +}} + +\item{q_matrix}{\code{q_matrix} with only transitions between observed states.} + +\item{lambda_list}{previously generated list of lambda matrices, +used to infer the rate number to start with.} + +\item{object}{lambda matrices, \code{q_matrix} or mu vector.} + +\item{params}{parameters in order, where each value reflects the value +of the parameter at that position, e.g. \code{c(0.3, 0.2, 0.1)} will fill out +the value 0.3 for the parameter with rate identifier 1, 0.2 for the +parameter with rate identifier 2 and 0.1 for the parameter with rate +identifier 3.} + +\item{param_posit}{initial parameter structure, consisting of a list with +three entries: +\enumerate{ +\item lambda matrices +\item mus +\item Q matrix +} + +In each entry, integers numbers (1-n) indicate the parameter to be +optimized.} + +\item{ml_pars}{resulting parameter estimates as returned by for instance +\code{\link[=cla_secsse_ml]{cla_secsse_ml()}}, having the same structure as \code{param_post}.} + +\item{mu_vector}{previously defined mus - used to choose indicator number.} + +\item{max_spec}{Maximum number of species in the tree (please note that the +tree is not conditioned on this number, but that this is a safeguard +against generating extremely large trees).} + +\item{min_spec}{Minimum number of species in the tree.} + +\item{optimmethod}{A string with method used for optimization. Default is +\code{"subplex"}. Alternative is \code{"simplex"} and it shouldn't be used in normal +conditions (only for debugging). Both are called from \code{\link[DDD:optimizer]{DDD::optimizer()}}, +simplex is implemented natively in \link{DDD}, while subplex is ultimately +called from \code{\link[subplex:subplex]{subplex::subplex()}}.} +} +\value{ +Nothing +} +\description{ +This function's purpose is to list all parameter documentation to be +inherited by the relevant functions. +} +\keyword{internal} diff --git a/man/event_times.Rd b/man/event_times.Rd index f80dfbe..deb63ce 100755 --- a/man/event_times.Rd +++ b/man/event_times.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/event_times.R +% Please edit documentation in R/secsse_utils.R \name{event_times} \alias{event_times} \title{Event times of a (possibly non-ultrametric) phylogenetic tree} diff --git a/man/example_phy_GeoSSE.Rd b/man/example_phy_GeoSSE.Rd index ec0f2c1..07f13d9 100755 --- a/man/example_phy_GeoSSE.Rd +++ b/man/example_phy_GeoSSE.Rd @@ -1,15 +1,14 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R +% Please edit documentation in R/secsse_data.R \docType{data} \name{example_phy_GeoSSE} \alias{example_phy_GeoSSE} -\alias{phy} \title{A phylogeny with traits at the tips} \format{ A phylogeny as created by GeoSSE (diversitree) } \usage{ -phy +example_phy_GeoSSE } \description{ An example phylogeny for testing purposes diff --git a/man/expand_q_matrix.Rd b/man/expand_q_matrix.Rd index 25beafe..324dd93 100644 --- a/man/expand_q_matrix.Rd +++ b/man/expand_q_matrix.Rd @@ -2,25 +2,28 @@ % Please edit documentation in R/secsse_prep.R \name{expand_q_matrix} \alias{expand_q_matrix} -\title{function to expand an existing q_matrix to a number of -concealed states} +\title{Function to expand an existing q_matrix to a number of concealed states} \usage{ expand_q_matrix(q_matrix, num_concealed_states, diff.conceal = FALSE) } \arguments{ -\item{q_matrix}{q_matrix with only transitions between observed states} +\item{q_matrix}{\code{q_matrix} with only transitions between observed states.} -\item{num_concealed_states}{number of concealed states} +\item{num_concealed_states}{number of concealed states, generally equivalent +to the number of examined states in the dataset.} -\item{diff.conceal}{should we use the same number of rates for the -concealed state transitions, or should all concealed state transitions -have separate rates? Typically, FALSE is fine and should be used in order -to avoid having a huge number of parameters.} +\item{diff.conceal}{Boolean stating if the concealed states should be +different. E.g. that the transition rates for the concealed +states are different from the transition rates for the examined states. +Normally it should be \code{FALSE} in order to avoid having a huge number of +parameters.} } \value{ updated q matrix } \description{ -function to expand an existing q_matrix to a number of -concealed states +Function to expand an existing q_matrix to a number of concealed states +} +\note{ +This is highly similar to \code{\link[=q_doubletrans]{q_doubletrans()}}. } diff --git a/man/extract_par_vals.Rd b/man/extract_par_vals.Rd index e3937e8..9feb080 100644 --- a/man/extract_par_vals.Rd +++ b/man/extract_par_vals.Rd @@ -2,23 +2,30 @@ % Please edit documentation in R/secsse_prep.R \name{extract_par_vals} \alias{extract_par_vals} -\title{function to extract parameter values out of the result of a maximum -likelihood inference run.} +\title{Extract parameter values out of the result of a maximum likelihood inference +run} \usage{ extract_par_vals(param_posit, ml_pars) } \arguments{ \item{param_posit}{initial parameter structure, consisting of a list with -three entries: 1) lambda matrices, 2) mus and 3) Q matrix. In each entry, -integers numbers (1-n) indicate the parameter to be optimized} +three entries: +\enumerate{ +\item lambda matrices +\item mus +\item Q matrix +} + +In each entry, integers numbers (1-n) indicate the parameter to be +optimized.} \item{ml_pars}{resulting parameter estimates as returned by for instance -cla_secsse_ml, having the same structure as param_post} +\code{\link[=cla_secsse_ml]{cla_secsse_ml()}}, having the same structure as \code{param_post}.} } \value{ -vector of parameter estimates +Vector of parameter estimates. } \description{ -function to extract parameter values out of the result of a maximum -likelihood inference run. +Extract parameter values out of the result of a maximum likelihood inference +run } diff --git a/pics/Codecov.png b/man/figures/Codecov.png similarity index 100% rename from pics/Codecov.png rename to man/figures/Codecov.png diff --git a/pics/github_actions_logo.png b/man/figures/github_actions_logo.png similarity index 100% rename from pics/github_actions_logo.png rename to man/figures/github_actions_logo.png diff --git a/man/fill_in.Rd b/man/fill_in.Rd index 4d6bbfa..a132540 100644 --- a/man/fill_in.Rd +++ b/man/fill_in.Rd @@ -2,19 +2,23 @@ % Please edit documentation in R/secsse_prep.R \name{fill_in} \alias{fill_in} -\title{helper function to enter parameter value on their right place} +\title{Helper function to enter parameter value on their right place} \usage{ fill_in(object, params) } \arguments{ -\item{object}{lambda matrices, q_matrix or mu vector} +\item{object}{lambda matrices, \code{q_matrix} or mu vector.} \item{params}{parameters in order, where each value reflects the value -of the parameter at that position, e.g. c(0.3, 0.2, 0.1) will fill out -the value 0.3 for the parameter with rate indentifier 1, 0.2 for the +of the parameter at that position, e.g. \code{c(0.3, 0.2, 0.1)} will fill out +the value 0.3 for the parameter with rate identifier 1, 0.2 for the parameter with rate identifier 2 and 0.1 for the parameter with rate -identifier 3} +identifier 3.} +} +\value{ +lambda matrices, \code{q_matrix} or mu vector with the correct values in +their right place. } \description{ -helper function to enter parameter value on their right place +Helper function to enter parameter value on their right place } diff --git a/man/id_paramPos.Rd b/man/id_paramPos.Rd index df14c2b..fa4e6ef 100644 --- a/man/id_paramPos.Rd +++ b/man/id_paramPos.Rd @@ -2,23 +2,27 @@ % Please edit documentation in R/secsse_utils.R \name{id_paramPos} \alias{id_paramPos} -\title{Parameter structure setting} +\title{Parameter structure setting +Sets the parameters (speciation, extinction and transition) ids. Needed for +ML calculation (\code{\link[=secsse_ml]{secsse_ml()}}).} \usage{ id_paramPos(traits, num_concealed_states) } \arguments{ -\item{traits}{vector with trait states, order of states must be the same as -tree tips, for help, see vignette.} +\item{traits}{vector with trait states for each tip in the phylogeny. The +order of the states must be the same as the tree tips. For help, see +\code{vignette("starting_secsse", package = "secsse")}.} \item{num_concealed_states}{number of concealed states, generally equivalent -to number of examined states.} +to the number of examined states in the dataset.} } \value{ A list that includes the ids of the parameters for ML analysis. } \description{ -It sets the parameters (speciation, extinction and transition) -ids. Needed for ML calculation (secsse_ml) +Parameter structure setting +Sets the parameters (speciation, extinction and transition) ids. Needed for +ML calculation (\code{\link[=secsse_ml]{secsse_ml()}}). } \examples{ traits <- sample(c(0,1,2), 45,replace = TRUE) #get some traits diff --git a/man/phylo_Vign.Rd b/man/phylo_Vign.Rd deleted file mode 100755 index 460b5d9..0000000 --- a/man/phylo_Vign.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\name{phylo_Vign} -\alias{phylo_Vign} -\title{A phylogenetic reconstuction to run the vignette} -\format{ -Phylogenetic tree in format nexus, rooted, including branch lengths -} -\description{ -An example phylogeny in the right format for secsse -} diff --git a/man/phylo_vignette.Rd b/man/phylo_vignette.Rd new file mode 100644 index 0000000..c4e3382 --- /dev/null +++ b/man/phylo_vignette.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/secsse_data.R +\docType{data} +\name{phylo_vignette} +\alias{phylo_vignette} +\title{A phylogenetic reconstuction to run the vignette} +\format{ +Phylogenetic tree in phy format, rooted, including branch lengths +} +\usage{ +phylo_vignette +} +\description{ +An example phylogeny in the right format for secsse +} +\keyword{datasets} diff --git a/man/plot_state_exact.Rd b/man/plot_state_exact.Rd index e3c095c..41ced77 100644 --- a/man/plot_state_exact.Rd +++ b/man/plot_state_exact.Rd @@ -1,12 +1,12 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plot_state_exact.R +% Please edit documentation in R/seccse_plot.R \name{plot_state_exact} \alias{plot_state_exact} -\title{function to plot the local probability along the tree, including the branches} +\title{Plot the local probability along a tree} \usage{ plot_state_exact( parameters, - focal_tree, + phy, traits, num_concealed_states, sampling_fraction, @@ -16,77 +16,93 @@ plot_state_exact( method = "odeint::bulirsch_stoer", atol = 1e-16, rtol = 1e-16, - steps = NULL, + num_steps = 100, prob_func = NULL, verbose = FALSE ) } \arguments{ -\item{parameters}{used parameters for the likelihood calculation} +\item{parameters}{list where first vector represents lambdas, the second mus +and the third transition rates.} -\item{focal_tree}{used phylogeny} +\item{phy}{phylogenetic tree of class \code{phylo}, rooted and with +branch lengths.} -\item{traits}{used traits} +\item{traits}{vector with trait states for each tip in the phylogeny. The +order of the states must be the same as the tree tips. For help, see +\code{vignette("starting_secsse", package = "secsse")}.} -\item{num_concealed_states}{number of concealed states} +\item{num_concealed_states}{number of concealed states, generally equivalent +to the number of examined states in the dataset.} -\item{sampling_fraction}{sampling fraction} +\item{sampling_fraction}{vector that states the sampling proportion per +trait state. It must have as many elements as there are trait states.} -\item{cond}{condition on the existence of a node root: 'maddison_cond', -'proper_cond'(default). For details, see vignette.} +\item{cond}{condition on the existence of a node root: \code{"maddison_cond"}, +\code{"proper_cond"} (default). For details, see vignette.} -\item{root_state_weight}{the method to weigh the states:'maddison_weigh -,'proper_weights'(default) or 'equal_weights'. It can also be specified the -root state:the vector c(1,0,0) indicates state 1 was the root state.} +\item{root_state_weight}{the method to weigh the states: +\code{"maddison_weights"}, \code{"proper_weights"} (default) or \code{"equal_weights"}. +It can also be specified for the root state: the vector \code{c(1, 0, 0)} +indicates state 1 was the root state.} -\item{is_complete_tree}{whether or not a tree with all its extinct species is -provided} +\item{is_complete_tree}{logical specifying whether or not a tree with all its +extinct species is provided. If set to \code{TRUE}, it also assumes that all +\emph{all} extinct lineages are present on the tree. Defaults to \code{FALSE}.} \item{method}{integration method used, available are: -"odeint::runge_kutta_cash_karp54", "odeint::runge_kutta_fehlberg78", -"odeint::runge_kutta_dopri5", "odeint::bulirsch_stoer" and -"odeint::runge_kutta4". Default method is:"odeint::bulirsch_stoer".} +\code{"odeint::runge_kutta_cash_karp54"}, \code{"odeint::runge_kutta_fehlberg78"}, +\code{"odeint::runge_kutta_dopri5"}, \code{"odeint::bulirsch_stoer"} and +\code{"odeint::runge_kutta4"}. Default method is: \code{"odeint::bulirsch_stoer"}.} -\item{atol}{absolute tolerance of integration} +\item{atol}{A numeric specifying the absolute tolerance of integration.} -\item{rtol}{relative tolerance of integration} +\item{rtol}{A numeric specifying the relative tolerance of integration.} -\item{steps}{number of substeps evaluated per branch, see description.} +\item{num_steps}{number of substeps to show intermediate likelihoods +along a branch.} \item{prob_func}{a function to calculate the probability of interest, see -description} +description.} -\item{verbose}{provides intermediate output (progressbars etc) when TRUE.} +\item{verbose}{sets verbose output; default is \code{TRUE} when \code{optimmethod} is +\code{"simplex"}. If \code{optimmethod} is set to \code{"simplex"}, then even if set to +\code{FALSE}, optimizer output will be shown.} } \value{ ggplot2 object } \description{ -this function will evaluate the log likelihood locally along -all branches and plot the result. When steps is left to NULL, all likelihood -evaluations during integration are used for plotting. This may work for not -too large trees, but may become very memory heavy for larger trees. Instead, -the user can indicate a number of steps, which causes the probabilities to be -evaluated at a distinct amount of steps along each branch (and the -probabilities to be properly integrated in between these steps). This -provides an approximation, but generally results look very similar to using -the full evaluation. -The function used for prob_func will be highly dependent on your system. +Plot the local probability along the tree, including the branches +} +\details{ +This function will evaluate the log likelihood locally along +all branches and plot the result. When \code{num_steps} is left to \code{NULL}, all +likelihood evaluations during integration are used for plotting. This may +work for not too large trees, but may become very memory heavy for larger +trees. Instead, the user can indicate a number of steps, which causes the +probabilities to be evaluated at a distinct amount of steps along each branch +(and the probabilities to be properly integrated in between these steps). +This provides an approximation, but generally results look very similar to +using the full evaluation. +The function used for \code{prob_func} will be highly dependent on your system. for instance, for a 3 observed, 2 hidden states model, the probability -of state A is prob[1] + prob[2] + prob[3], normalized by the row sum. -prob_func will be applied to each row of the 'states' matrix (you can thus +of state A is \code{prob[1] + prob[2] + prob[3]}, normalized by the row sum. +\code{prob_func} will be applied to each row of the 'states' matrix (you can thus test your function on the states matrix returned when -'see_ancestral_states = TRUE'). Please note that the first N columns of the -states matrix are the extinction rates, and the (N+1):2N columns belong to -the speciation rates, where N = num_obs_states * num_concealed_states. - A typical probfunc function will look like: -my_prob_func <- function(x) { - return(sum(x[5:8]) / sum(x)) -} +\code{'see_ancestral_states = TRUE'}). Please note that the first N columns of the +states matrix are the extinction rates, and the \verb{(N+1):2N} columns belong to +the speciation rates, where \code{N = num_obs_states * num_concealed_states}. +A typical \code{prob_func} function will look like: + +\if{html}{\out{