From a9c1b2e63895be7438976f6d9df702effa67ae2f Mon Sep 17 00:00:00 2001 From: brycefrank Date: Sat, 21 Sep 2024 13:24:23 -0700 Subject: [PATCH 1/6] initial commit for pub processing --- .github/workflows/models.install.yaml | 6 +- DESCRIPTION | 3 + NAMESPACE | 7 + R/boilerplate.R | 98 ++++++++ R/params.R | 52 +++++ R/publication_processing.R | 212 ++++++++++++++++++ .../vsa_brackett_1977_test.csv | 25 +++ .../test_publications/a_e/barrett_1978.R | 49 ++++ .../test_publications/a_e/brackett_1977.R | 34 +++ tests/testthat.R | 12 + tests/testthat/test-publication-processing.R | 50 +++++ tests/testthat/test-update-models.R | 22 ++ 12 files changed, 568 insertions(+), 2 deletions(-) create mode 100644 NAMESPACE create mode 100644 R/boilerplate.R create mode 100644 R/params.R create mode 100644 R/publication_processing.R create mode 100644 inst/testdata/test_parameters/vsa_brackett_1977_test.csv create mode 100644 inst/testdata/test_publications/a_e/barrett_1978.R create mode 100644 inst/testdata/test_publications/a_e/brackett_1977.R create mode 100644 tests/testthat.R create mode 100644 tests/testthat/test-publication-processing.R create mode 100644 tests/testthat/test-update-models.R diff --git a/.github/workflows/models.install.yaml b/.github/workflows/models.install.yaml index 3e55cfd..1ee4fcc 100644 --- a/.github/workflows/models.install.yaml +++ b/.github/workflows/models.install.yaml @@ -2,7 +2,9 @@ name: model-install on: push: - branches: [ main ] + branches: [ migrate_processing ] + pull-request: + branches: migrate_processing jobs: build: @@ -29,7 +31,7 @@ jobs: - name: Clone allometric/allometric. run: | - git clone https://github.com/allometric/allometric.git + git clone --branch migrate_processing https://github.com/allometric/allometric.git - name: Install allometric, generate the models.RDS file. run: | diff --git a/DESCRIPTION b/DESCRIPTION index 8ae0418..5cf4900 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,6 +9,8 @@ License: MIT + file LICENSE Encoding: UTF-8 Imports: devtools +Remotes: + allometric/allometric Depends: R (>= 2.10) Suggests: @@ -17,3 +19,4 @@ Config/testthat/edition: 3 LazyData: true BugReports: https://github.com/allometric/models/issues Contact: bfrank70@gmail.com +RoxygenNote: 7.3.1 diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..a524ad2 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,7 @@ +# Generated by roxygen2: do not edit by hand + +export(get_params_path) +export(ingest_models) +export(load_parameter_frame) +export(map_publications) +export(set_params_path) diff --git a/R/boilerplate.R b/R/boilerplate.R new file mode 100644 index 0000000..c03c2a1 --- /dev/null +++ b/R/boilerplate.R @@ -0,0 +1,98 @@ +find_pub_dir <- function(pub_id) { + pub_dir <- system.file("publications", package = "allometric") + pub_dir_names <- list.files(pub_dir) + + pub_char <- substr(pub_id, 1, 1) + matched_dir <- "" + + for(dir_name in pub_dir_names) { + first_char <- substr(dir_name, 1, 1) + second_char <- substr(dir_name, 3, 3) + seq <- letters[(letters >= first_char) & (letters <= second_char)] + + if(any(grepl(pub_char, seq, fixed = TRUE))) { + return(dir_name) + } + } + + stop(paste("No matching directory found for pub_id:"), pub_id) +} + + +generate_pub_obj <- function(pub_id, bibtype) { + pub_id_quo <- paste("\"", pub_id, "\"", sep = "") + bibtype_quo <- paste("\"", bibtype, "\"", sep = "") + + pub_obj <- paste(c( + paste(pub_id, "<-", "Publication("), + " citation = RefManageR::BibEntry(", + paste(" key = ", pub_id_quo, ",", sep = ""), + paste(" bibtype = ", bibtype_quo, ",", sep = ""), + paste(" title = ", "", ",", sep = ""), + paste(" author = ", "<author>", ",", sep = ""), + paste(" year = ", "<year>", ",", sep = ""), + " )", + ")" + ), collapse = "\n") + + pub_obj + +} + +generate_fixef_model <- function() { + fixef_model <- paste(c( + "<model> <- FixedEffectsModel(", + " response = list(", + " <res> = units::as_units(<res_unit>)", + " )", + " covariates = list(", + " <covt_unit1> = units::as_units(<covt_unit>)", + " )", + " parameters = list(", + " <parameters>", + " )", + " predict_fn = function(<covts>) {", + " <predict_fn>", + " }", + ")" + ), collapse = "\n") + + fixef_model +} + +generate_footer <- function(pub_id, n_fixef_models) { + out <- c( + paste(pub_id, "%>%") + ) + + for(i in 1:n_fixef_models) { + out <- c( + out, + paste(" add_model(<model_", i, ">)", sep="") + ) + } + + out +} + +generate_pub <- function(models_dir, pub_id, bibtype, n_fixef_models = 0) { + pub_obj <- generate_pub_obj(pub_id, bibtype) + text <- c(pub_obj, "") + + if (n_fixef_models > 0) { + for (i in 1:n_fixef_models) { + text <- c(text, generate_fixef_model(), "") + } + + text <- c(text, generate_footer(pub_id, n_fixef_models)) + } + + pub_dir <- find_pub_dir(pub_id) + pub_file_name <- paste(pub_id, ".R", sep = "") + + out_path <- file.path( + models_dir, "/publications/", pub_dir, pub_file_name + ) + + write(text, out_path) +} \ No newline at end of file diff --git a/R/params.R b/R/params.R new file mode 100644 index 0000000..6fecebe --- /dev/null +++ b/R/params.R @@ -0,0 +1,52 @@ +#' Load a parameter frame from the models/parameters directory +#' +#' This is a convenience function that allows a user to easily load parameter +#' files from a directory, and is typically used when creating +#' `FixedEffectsSet`. By default the function will load parameter frames +#' from an existing `allometric` installation. For the purposes of testing +#' publication files locally, refer to `set_params_path`. +#' +#' @param name The name of the file, excluding the extension +#' @return A tibble::tbl_df of the parameter data. +#' @export +load_parameter_frame <- function(name) { + csv_name <- paste(name, ".csv", sep = "") + param_search_path <- get_params_path() + + if(param_search_path == "package") { + file_path <- system.file( + "models-main/parameters", csv_name, + package = "allometric" + ) + } else { + file_path <- file.path(param_search_path, csv_name) + } + + table <- utils::read.csv(file_path, na.strings = "") + tibble::as_tibble(table) +} + +#' Set the parameter search path +#' +#' The parameter search path is where `allometric` looks for parameter frames. +#' By default, the package searches the local installation, however it is +#' useful when testing publication files to search a local directory, which +#' can be set here. +#' +#' @param params_path The file path containing parameter files +#' @export +set_params_path <- function(params_path) { + params_path <- list(params_path = params_path) + rds_path <- file.path(system.file("extdata", package = "allometric"), "params_path.RDS") + saveRDS(params_path, rds_path) +} + +#' Get the parameter search path +#' +#' @return A string containing the currently set parameter search path +#' @export +get_params_path <- function() { + rds_path <- file.path(system.file("extdata", package = "allometric"), "params_path.RDS") + rds <- readRDS(rds_path) + rds$params_path +} diff --git a/R/publication_processing.R b/R/publication_processing.R new file mode 100644 index 0000000..a9860b8 --- /dev/null +++ b/R/publication_processing.R @@ -0,0 +1,212 @@ +#' Retrieve all publication file names in the publication subdirectories +#' +#' The publication subdirectories divide the publications into groups sorted +#' by last name. This function returns a list containing the publication paths +#' and the publication names. +#' +#' @keywords internal +get_pub_file_specs <- function(pub_path) { + sub_dirs <- list.files(pub_path) + pub_paths <- c() + pub_names <- c() + + for(dir in sub_dirs) { + dir_path <- file.path(pub_path, dir) + + names <- list.files(dir_path) + paths <- file.path(dir_path, names) + + pub_paths <- c(pub_paths, paths) + pub_names <- c(pub_names, names) + } + + list(pub_paths = pub_paths, pub_names = pub_names) +} + +#' Hashes a function string +#' +#' We need some sort of stable data structure that will serve as a unique ID +#' for a model, but will also change in the event that the model changes. This +#' way, models can be "versioned" across time, which may be useful for debugging +#' purposes down the line. This function trims whitespace and lowercases +#' the predict_fn_populated pasted with the descriptors, which serves as a +#' reasonable proxy for the model. +#' +#' @keywords internal +get_model_hash <- function(predict_fn_populated, descriptors) { + descriptors_string <- gsub(" ", "", tolower(paste(descriptors, collapse = ""))) + fn_string <- gsub(" ", "", tolower(paste(deparse(predict_fn_populated), collapse = ""))) + hash_str <- paste(descriptors_string, fn_string, sep = "") + as.character(openssl::md5(hash_str)) +} + +append_search_descriptors <- function(row, model_descriptors) { + row$country <- list(unlist(model_descriptors$country)) + row$region <- list(unlist(model_descriptors$region)) + row$taxa <- model_descriptors$taxa + row +} + +#' Creates a dataframe row from model information +#' +#' @keywords internal +create_model_row <- function(model, pub, model_id) { + model_descriptors <- model@descriptors + + if(!"taxa" %in% colnames(model_descriptors)) { + model_descriptors$taxa <- list(allometric::Taxa()) + } + + model_row <- tibble::as_tibble(list(pub_id = pub@id)) + model_row$id <- model_id + model_row$model <- c(model) + + # Gets rid of column not exist errors. + suppressWarnings( + model_row <- append_search_descriptors( + model_row, + model_descriptors + ) + ) + + family_name <- pub@citation$author$family + model_row$family_name <- list(as.character(family_name)) + + covt_name <- names(model@covariates) + model_row$covt_name <- list(covt_name) + + pub_year <- as.numeric(pub@citation$year) + model_row$pub_year <- pub_year + + response_def <- allometric::get_variable_def(names(model@response)[[1]], return_exact_only = T) + model_row$model_type <- model@model_type + + model_row +} + +#' Aggregates the set of models in a publication into a model_tbl +#' +#' @param pub The publication object +#' @keywords internal +aggregate_pub_models <- function(pub) { + agg_models <- list() + + response_sets <- pub@response_sets + for (i in seq_along(response_sets)) { + response_set <- response_sets[[i]] + for (j in seq_along(response_set)) { + model_set <- response_set[[j]] + for (k in seq_along(model_set@models)) { + model <- model_set@models[[k]] + hash <- get_model_hash(model@predict_fn_populated, model@descriptors) + model_id <- substr(hash, 1, 8) + + agg_models[[model_id]] <- create_model_row(model, pub, model_id) + } + } + } + + dplyr::bind_rows(agg_models) +} + +#' Iteratively process publication files +#' +#' This function allows a user to flexibly extract information as it loops over +#' the publication files. Two main internal use-cases exist for this. First, +#' it is used to install models as is done in `insall_models()` and, second, +#' it is used to populate the remote MongoDB. Most users will not be interested +#' in this function, but it is exposed for usage in the `allodata` package. +#' +#' @param verbose Whether or not to print verbose messages to console +#' @param func The publication processing function. It should take a Publication +#' object as its only argument. +#' @param pub_path An optional path to a publication directory, by +#' default the internally stored set of publications is used. +#' @param params_path An optional path to a parameters directory, by +#' default the internally stored set of parameter files is used. +#' @export +map_publications <- function(verbose, func, pub_path = NULL, params_path = NULL) { + if(is.null(pub_path)) { + pub_path <- system.file("models-main/publications", package = "allometric") + } + + if(!is.null(params_path)) { + set_params_path(params_path) + } + + pub_specs <- get_pub_file_specs(pub_path) + + n_pubs <- length(pub_specs$pub_paths) + + pb <- progress::progress_bar$new( + format = "Running publication file: :pub_id [:bar] :percent", + total = n_pubs, + width = 75 + ) + + output <- list() + + for (i in 1:n_pubs) { + pub_env <- new.env() + + pub_r_path <- pub_specs$pub_paths[[i]] + pub_r_file <- pub_specs$pub_names[[i]] + pub_name <- tools::file_path_sans_ext(pub_r_file) + + tryCatch({ + source(pub_r_path, local = pub_env) + pub <- get(pub_name, envir = pub_env) + output[[pub_name]] <- func(pub) + }, error = function(e) { + warning( + paste( + "Publication file", + pub_name, + "encountered an error during execution:", + e + ) + ) + }) + + if (verbose) { + pb$tick(tokens = list(pub_id = pub_name)) + } + + # Remove pub_env from memory + rm("pub_env") + } + + # reset the param search path + if(!is.null(params_path)) { + set_params_path("pacakge") + } + + output +} + +#' Ingest a set of models by running the publication files +#' +#' @param verbose If TRUE, print verbose messages +#' @param pub_path A path to a directory containing publication files +#' @param params_path A path to a directory containing parameter files +#' @export +ingest_models <- function(verbose, pub_path = NULL, params_path = NULL) { + out_order <- c( + "id", "model_type", "country", "region", "taxa" + ) + + allometric_models <- map_publications( + verbose, aggregate_pub_models, + pub_path = pub_path, params_path = params_path + ) %>% + dplyr::bind_rows() %>% + dplyr::arrange(.data$pub_id) + + not_in_order <- colnames(allometric_models)[ + !colnames(allometric_models) %in% out_order + ] + + order_cols <- c(out_order, not_in_order) + + new_model_tbl(allometric_models[, order_cols]) +} \ No newline at end of file diff --git a/inst/testdata/test_parameters/vsa_brackett_1977_test.csv b/inst/testdata/test_parameters/vsa_brackett_1977_test.csv new file mode 100644 index 0000000..a40798c --- /dev/null +++ b/inst/testdata/test_parameters/vsa_brackett_1977_test.csv @@ -0,0 +1,25 @@ +family,genus,species,geographic_region,age_class,a,b,c +Pinaceae,Pseudotsuga,menziesii,coastal,< 140 years,-2.658025,1.739925,1.133187 +Pinaceae,Pseudotsuga,menziesii,coastal,>= 80 years,-2.712153,1.659012,1.195715 +Pinaceae,Pseudotsuga,menziesii,interior,,-2.734532,1.739418,1.166033 +Pinaceae,Tsuga,heterophylla,coastal,< 140 years,-2.702922,1.84268,1.123661 +Pinaceae,Tsuga,heterophylla,coastal,>= 80 years,-2.663834,1.79023,1.124873 +Pinaceae,Tsuga,heterophylla,interior,,-2.571619,1.96971,0.977003 +Cupressaceae,Thuja,plicata,coastal,< 140 years,-2.441193,1.720761,1.049976 +Cupressaceae,Thuja,plicata,coastal,>= 80 years,-2.379642,1.6823,1.039712 +Cupressaceae,Thuja,plicata,interior,,-2.464614,1.701993,1.067038 +Pinaceae,Abies,balsamea,coastal,,-2.575642,1.806775,1.094665 +Pinaceae,Abies,balsamea,interior,,-2.502332,1.864963,1.004903 +Pinaceae,Picea,stichensis,,< 140 years,-2.550299,1.835678,1.042599 +Pinaceae,Picea,stichensis,,>= 140 years,-2.700574,1.754171,1.164531 +Pinaceae,Picea,stichensis,interior,,-2.539944,1.841226,1.034051 +Pinaceae,Pinus,ponderosa,,,-2.729937,1.909478,1.085681 +Pinaceae,Pinus,contorta,,,-2.615591,1.847504,1.085772 +Pinaceae,Pinus,monticola,,,-2.480145,1.867286,0.994351 +Pinaceae,Larix,occidentalis,,,-2.624325,1.847123,1.044007 +Cupressaceae,Cupressus,nootkatensis,,,-2.454348,1.741044,1.058437 +Betulaceae,Alnus,rubra,,,-2.672775,1.920617,1.074024 +Aceraceae,Acer,,,,-2.770324,1.885813,1.119043 +Salicaceae,Populus,tremuloides,,,-2.63536,1.946034,1.024793 +Betulaceae,Betula,,,,-2.757813,1.911681,1.105403 +Salicaceae,Populus,,,,-2.945047,2,1.238853 diff --git a/inst/testdata/test_publications/a_e/barrett_1978.R b/inst/testdata/test_publications/a_e/barrett_1978.R new file mode 100644 index 0000000..9677d97 --- /dev/null +++ b/inst/testdata/test_publications/a_e/barrett_1978.R @@ -0,0 +1,49 @@ +barrett_1978 <- allometric::Publication( + citation = RefManageR::BibEntry( + key = "barrett_1978", + bibtype = "techreport", + title = "Height growth and site index curves for managed, even-aged stands of ponderosa pine in the Pacific Northwest", + author = "Barrett, James Willis", + volume = 232, + year = 1978, + institution = "Department of Agriculture, Forest Service, Pacific Northwest Forest and Range Research Station", + doi = "https://doi.org/10.5962/bhl.title.94197" + ), + descriptors = list( + taxa = allometric::Taxa( + allometric::Taxon( + family = "Pinaceae", + genus = "Pinus", + species = "ponderosa" + ) + ), + region = c("US-WA", "US-OR"), + country = "US" + ) +) + +hstix100 <- allometric::FixedEffectsModel( + response = list( + hstix100 = units::as_units("ft") + ), + covariates = list( + hst = units::as_units("ft"), + atb = units::as_units("years") + ), + parameters = list( + a = 100.43, + b = 1.198632, + c = 0.00283073, + d = 8.44441, + e = 128.8952205, + f = 0.016959, + g = 1.23114 + ), + predict_fn = function(hst, atb) { + a - (b - c * atb + d / atb) * (e * (1 - exp(-f * atb)^g)) + + ((b - c * atb + d / atb) * (hst - 4.5)) + 4.5 + } +) + +barrett_1978 <- barrett_1978 %>% + allometric::add_model(hstix100) \ No newline at end of file diff --git a/inst/testdata/test_publications/a_e/brackett_1977.R b/inst/testdata/test_publications/a_e/brackett_1977.R new file mode 100644 index 0000000..f67dadc --- /dev/null +++ b/inst/testdata/test_publications/a_e/brackett_1977.R @@ -0,0 +1,34 @@ +bracket_1977_citation <- RefManageR::BibEntry( + bibtype = "techreport", + key = "brackett_1977", + title = "Notes on tarif tree volume computation", + author = "Brackett, Michael", + year = 1977, + institution = "State of Washington, Dept. of Natural Resources" +) + +brackett_1977 <- allometric::Publication( + citation = bracket_1977_citation, + descriptors = list( + country = "US", + region = "US-WA" + ) +) + +model_specifications <- load_parameter_frame("vsa_brackett_1977_test") %>% + allometric::aggregate_taxa() + +brackett_1977 <- allometric::add_set(brackett_1977, allometric::FixedEffectsSet( + response = list( + vsia = units::as_units("ft3") + ), + covariates = list( + dsob = units::as_units("in"), + hst = units::as_units("ft") + ), + parameter_names = c("a", "b", "c"), + predict_fn = function(dsob, hst) { + 10^a * dsob^b * hst^c + }, + model_specifications = model_specifications +)) diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..68de530 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,12 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview +# * https://testthat.r-lib.org/articles/special-files.html + +library(testthat) +library(models) + +test_check("models") diff --git a/tests/testthat/test-publication-processing.R b/tests/testthat/test-publication-processing.R new file mode 100644 index 0000000..9a0897b --- /dev/null +++ b/tests/testthat/test-publication-processing.R @@ -0,0 +1,50 @@ +test_that("Model row is correctly made", { + pub <- allometric::Publication( + citation = RefManageR::BibEntry( + key = "test_2000", + bibtype = "article", + author = "test", + title = "test", + journal = "test", + year = 2000, + volume = 0 + ), + descriptors = list( + region = "US-WA" + ) + ) + + model_row <- create_model_row(allometric::brackett_acer, pub, "test_id") + + expect_s4_class(model_row$model[[1]], "FixedEffectsModel") +}) + +test_that("aggregate_pub_models runs", { + pub <- allometric::Publication( + citation = RefManageR::BibEntry( + key = "test_2000", + bibtype = "article", + author = "test", + title = "test", + journal = "test", + year = 2000, + volume = 0 + ) + ) + + pub <- allometric::add_model(pub, allometric::brackett_acer) + + agg_models <- aggregate_pub_models(pub) + expect_s4_class(agg_models$model[[1]], "FixedEffectsModel") +}) + +test_that("ingest_models runs correctly with locally specified data", { + pub_path <- system.file("testdata/test_publications", package = "models") + params_path <- system.file("testdata/test_parameters", package = "models") + + models_ingested <- ingest_models( + FALSE, pub_path = pub_path, params_path = params_path + ) + + expect_equal(25, nrow(models_ingested)) +}) diff --git a/tests/testthat/test-update-models.R b/tests/testthat/test-update-models.R new file mode 100644 index 0000000..bd4eb9c --- /dev/null +++ b/tests/testthat/test-update-models.R @@ -0,0 +1,22 @@ +test_that("get_model_hash returns an md5 string", { + predict_fn_populated <- function(dsob) { + 1.2 * dsob + } + descriptors <- tibble::tibble(country = "US") + + hash <- get_model_hash(predict_fn_populated, descriptors) + expect_equal(hash, "e826bd8a3a66fc5d77e95e812a8ae6c9") +}) + +test_that("append_search_descriptors creates a valid tibble row", { + row <- tibble::tibble(a = c(1)) + descriptors <- tibble::tibble(country = "US") + + suppressWarnings(( + row <- append_search_descriptors(row, descriptors) + )) + + test_row <- tibble::tibble(a = 1, country = list("US"), region = list(NULL)) + + expect_equal(row, test_row) +}) \ No newline at end of file From 0ce3ef5115b00ce658b226996b39718dbbb4f78c Mon Sep 17 00:00:00 2001 From: brycefrank <bfrank70@gmail.com> Date: Sat, 21 Sep 2024 13:25:07 -0700 Subject: [PATCH 2/6] adding man for pub processing --- man/aggregate_pub_models.Rd | 15 +++++++++++++++ man/create_model_row.Rd | 12 ++++++++++++ man/get_model_hash.Rd | 17 +++++++++++++++++ man/get_params_path.Rd | 14 ++++++++++++++ man/get_pub_file_specs.Rd | 14 ++++++++++++++ man/ingest_models.Rd | 18 ++++++++++++++++++ man/load_parameter_frame.Rd | 21 +++++++++++++++++++++ man/map_publications.Rd | 27 +++++++++++++++++++++++++++ man/set_params_path.Rd | 17 +++++++++++++++++ 9 files changed, 155 insertions(+) create mode 100644 man/aggregate_pub_models.Rd create mode 100644 man/create_model_row.Rd create mode 100644 man/get_model_hash.Rd create mode 100644 man/get_params_path.Rd create mode 100644 man/get_pub_file_specs.Rd create mode 100644 man/ingest_models.Rd create mode 100644 man/load_parameter_frame.Rd create mode 100644 man/map_publications.Rd create mode 100644 man/set_params_path.Rd diff --git a/man/aggregate_pub_models.Rd b/man/aggregate_pub_models.Rd new file mode 100644 index 0000000..8dc21ca --- /dev/null +++ b/man/aggregate_pub_models.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/publication_processing.R +\name{aggregate_pub_models} +\alias{aggregate_pub_models} +\title{Aggregates the set of models in a publication into a model_tbl} +\usage{ +aggregate_pub_models(pub) +} +\arguments{ +\item{pub}{The publication object} +} +\description{ +Aggregates the set of models in a publication into a model_tbl +} +\keyword{internal} diff --git a/man/create_model_row.Rd b/man/create_model_row.Rd new file mode 100644 index 0000000..e0a0c37 --- /dev/null +++ b/man/create_model_row.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/publication_processing.R +\name{create_model_row} +\alias{create_model_row} +\title{Creates a dataframe row from model information} +\usage{ +create_model_row(model, pub, model_id) +} +\description{ +Creates a dataframe row from model information +} +\keyword{internal} diff --git a/man/get_model_hash.Rd b/man/get_model_hash.Rd new file mode 100644 index 0000000..8d5038b --- /dev/null +++ b/man/get_model_hash.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/publication_processing.R +\name{get_model_hash} +\alias{get_model_hash} +\title{Hashes a function string} +\usage{ +get_model_hash(predict_fn_populated, descriptors) +} +\description{ +We need some sort of stable data structure that will serve as a unique ID +for a model, but will also change in the event that the model changes. This +way, models can be "versioned" across time, which may be useful for debugging +purposes down the line. This function trims whitespace and lowercases +the predict_fn_populated pasted with the descriptors, which serves as a +reasonable proxy for the model. +} +\keyword{internal} diff --git a/man/get_params_path.Rd b/man/get_params_path.Rd new file mode 100644 index 0000000..c5164e4 --- /dev/null +++ b/man/get_params_path.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/params.R +\name{get_params_path} +\alias{get_params_path} +\title{Get the parameter search path} +\usage{ +get_params_path() +} +\value{ +A string containing the currently set parameter search path +} +\description{ +Get the parameter search path +} diff --git a/man/get_pub_file_specs.Rd b/man/get_pub_file_specs.Rd new file mode 100644 index 0000000..9035234 --- /dev/null +++ b/man/get_pub_file_specs.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/publication_processing.R +\name{get_pub_file_specs} +\alias{get_pub_file_specs} +\title{Retrieve all publication file names in the publication subdirectories} +\usage{ +get_pub_file_specs(pub_path) +} +\description{ +The publication subdirectories divide the publications into groups sorted +by last name. This function returns a list containing the publication paths +and the publication names. +} +\keyword{internal} diff --git a/man/ingest_models.Rd b/man/ingest_models.Rd new file mode 100644 index 0000000..2e24a87 --- /dev/null +++ b/man/ingest_models.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/publication_processing.R +\name{ingest_models} +\alias{ingest_models} +\title{Ingest a set of models by running the publication files} +\usage{ +ingest_models(verbose, pub_path = NULL, params_path = NULL) +} +\arguments{ +\item{verbose}{If TRUE, print verbose messages} + +\item{pub_path}{A path to a directory containing publication files} + +\item{params_path}{A path to a directory containing parameter files} +} +\description{ +Ingest a set of models by running the publication files +} diff --git a/man/load_parameter_frame.Rd b/man/load_parameter_frame.Rd new file mode 100644 index 0000000..58fc41a --- /dev/null +++ b/man/load_parameter_frame.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/params.R +\name{load_parameter_frame} +\alias{load_parameter_frame} +\title{Load a parameter frame from the models/parameters directory} +\usage{ +load_parameter_frame(name) +} +\arguments{ +\item{name}{The name of the file, excluding the extension} +} +\value{ +A tibble::tbl_df of the parameter data. +} +\description{ +This is a convenience function that allows a user to easily load parameter +files from a directory, and is typically used when creating +`FixedEffectsSet`. By default the function will load parameter frames +from an existing `allometric` installation. For the purposes of testing +publication files locally, refer to `set_params_path`. +} diff --git a/man/map_publications.Rd b/man/map_publications.Rd new file mode 100644 index 0000000..6182d2b --- /dev/null +++ b/man/map_publications.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/publication_processing.R +\name{map_publications} +\alias{map_publications} +\title{Iteratively process publication files} +\usage{ +map_publications(verbose, func, pub_path = NULL, params_path = NULL) +} +\arguments{ +\item{verbose}{Whether or not to print verbose messages to console} + +\item{func}{The publication processing function. It should take a Publication +object as its only argument.} + +\item{pub_path}{An optional path to a publication directory, by +default the internally stored set of publications is used.} + +\item{params_path}{An optional path to a parameters directory, by +default the internally stored set of parameter files is used.} +} +\description{ +This function allows a user to flexibly extract information as it loops over +the publication files. Two main internal use-cases exist for this. First, +it is used to install models as is done in `insall_models()` and, second, +it is used to populate the remote MongoDB. Most users will not be interested +in this function, but it is exposed for usage in the `allodata` package. +} diff --git a/man/set_params_path.Rd b/man/set_params_path.Rd new file mode 100644 index 0000000..6ec6fff --- /dev/null +++ b/man/set_params_path.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/params.R +\name{set_params_path} +\alias{set_params_path} +\title{Set the parameter search path} +\usage{ +set_params_path(params_path) +} +\arguments{ +\item{params_path}{The file path containing parameter files} +} +\description{ +The parameter search path is where `allometric` looks for parameter frames. +By default, the package searches the local installation, however it is +useful when testing publication files to search a local directory, which +can be set here. +} From 018e79d3181d7e4822af4aa6243c0ee0837e6bf7 Mon Sep 17 00:00:00 2001 From: brycefrank <bfrank70@gmail.com> Date: Sat, 21 Sep 2024 13:40:42 -0700 Subject: [PATCH 3/6] passing all existing tests --- R/publication_processing.R | 2 +- tests/testthat/test-publication-processing.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/publication_processing.R b/R/publication_processing.R index a9860b8..23379b8 100644 --- a/R/publication_processing.R +++ b/R/publication_processing.R @@ -208,5 +208,5 @@ ingest_models <- function(verbose, pub_path = NULL, params_path = NULL) { order_cols <- c(out_order, not_in_order) - new_model_tbl(allometric_models[, order_cols]) + allometric:::new_model_tbl(allometric_models[, order_cols]) } \ No newline at end of file diff --git a/tests/testthat/test-publication-processing.R b/tests/testthat/test-publication-processing.R index 9a0897b..9961129 100644 --- a/tests/testthat/test-publication-processing.R +++ b/tests/testthat/test-publication-processing.R @@ -41,7 +41,7 @@ test_that("aggregate_pub_models runs", { test_that("ingest_models runs correctly with locally specified data", { pub_path <- system.file("testdata/test_publications", package = "models") params_path <- system.file("testdata/test_parameters", package = "models") - + models_ingested <- ingest_models( FALSE, pub_path = pub_path, params_path = params_path ) From bad9fd79901351c4cf16b133e3013a6576c9f0d8 Mon Sep 17 00:00:00 2001 From: brycefrank <bfrank70@gmail.com> Date: Sun, 22 Sep 2024 08:00:08 -0700 Subject: [PATCH 4/6] triggering install action --- .github/workflows/models.install.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/models.install.yaml b/.github/workflows/models.install.yaml index 1ee4fcc..17940b7 100644 --- a/.github/workflows/models.install.yaml +++ b/.github/workflows/models.install.yaml @@ -37,6 +37,7 @@ jobs: run: | devtools::install("./allometric", reload = FALSE); library(allometric) + devtools::load_all(".") models <- ingest_models(FALSE, pub_path = "./publications", params_path = "./parameters"); warnings(); saveRDS(models, "./models.RDS") From 52cd89733c8db997cd919bd7f240e6206921d5f4 Mon Sep 17 00:00:00 2001 From: brycefrank <bfrank70@gmail.com> Date: Sun, 22 Sep 2024 08:02:13 -0700 Subject: [PATCH 5/6] fix: fixed pull request trigger on yaml --- .github/workflows/models.install.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/models.install.yaml b/.github/workflows/models.install.yaml index 17940b7..8e0d3ce 100644 --- a/.github/workflows/models.install.yaml +++ b/.github/workflows/models.install.yaml @@ -3,7 +3,7 @@ name: model-install on: push: branches: [ migrate_processing ] - pull-request: + pull_request: branches: migrate_processing jobs: From 800373a7c6800b0cb90d4e5494742a464dc016c0 Mon Sep 17 00:00:00 2001 From: brycefrank <bfrank70@gmail.com> Date: Sun, 22 Sep 2024 08:10:24 -0700 Subject: [PATCH 6/6] BREAKING CHANGE: migrating publication processing functions into models --- .github/workflows/models.install.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/models.install.yaml b/.github/workflows/models.install.yaml index 8e0d3ce..5fa0a71 100644 --- a/.github/workflows/models.install.yaml +++ b/.github/workflows/models.install.yaml @@ -2,9 +2,9 @@ name: model-install on: push: - branches: [ migrate_processing ] + branches: [ main ] pull_request: - branches: migrate_processing + branches: main jobs: build: @@ -31,7 +31,7 @@ jobs: - name: Clone allometric/allometric. run: | - git clone --branch migrate_processing https://github.com/allometric/allometric.git + git clone https://github.com/allometric/allometric.git - name: Install allometric, generate the models.RDS file. run: |