Skip to content

Commit

Permalink
Merge pull request #48 from Big-Life-Lab/dev
Browse files Browse the repository at this point in the history
Version 0.1.1
  • Loading branch information
yulric authored Dec 16, 2024
2 parents 404576e + 36dc037 commit f95e150
Show file tree
Hide file tree
Showing 162 changed files with 84,051 additions and 10,770 deletions.
4 changes: 4 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
^CONTRIBUTING\.md$
^path$
^renv$
^renv\.lock$
^.*\.Rproj$
Expand All @@ -6,3 +8,5 @@
^docs$
^pkgdown$
^\.github$
^data-raw$
^path/to/venv/
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@
# Mac
.DS_Store
inst/doc

*.code-workspace
21 changes: 11 additions & 10 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,35 +1,36 @@
Package: recodeflow
Type: Package
Title: Contains functions to interface with variable details sheets, including recoding variables and converting them to PMML
Version: 0.1.0
Version: 0.1.1
Authors@R: c(
person(given = "Yulric", family = "Sequeira", role = c("aut"), email = "ysequeira@ohri.ca"),
person(given = "Luke",family = "Bailey", email = "lbailey@toh.ca", role = c("aut")),
person(given = "Rostyslav", family = "Vyuha", role = c("aut","cre"), email = "rvyuha@toh.ca"))
Maintainer: Rostyslav Vyuha <rvyuha@toh.ca>
person("Yulric", "Sequeira", email = "ysequeira@ohri.ca", role = c("aut", "cre")),
person("Luke", "Bailey", role = c("aut")),
person("Rostyslav", role = c("aut"))
)
Maintainer: Yulric Sequeria <ysequeira@ohri.ca>
Description: Recode and harmonize data using variable and details sheets.
Depends:
R (>= 3.1.0)
R (>= 3.5)
Imports:
sjlabelled,
stringr,
tidyr,
haven,
dplyr,
magrittr,
glue,
survival
License: MIT + file LICENSE
URL: https://github.com/Big-Life-Lab/recodeflow
BugReports: https://github.com/Big-Life-Lab/recodeflow/issues
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
Suggests:
DT,
kableExtra,
knitr,
rmarkdown,
testthat (>= 3.0.0)
readr,
testthat (>= 3.0.0),
survival
Config/testthat/edition: 3
VignetteBuilder: knitr
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Generated by roxygen2: do not edit by hand

export(example_der_fun)
export(get_table_name)
export(is_equal)
export(is_table_feeder_var)
Expand Down
129 changes: 129 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#' The pbc dataset
#'
#' @format A data frame with 418 observations and 20 variables.
#' \describe{
#' \item{id}{case number}
#' \item{time}{number of days between registration and the earlier of death, transplantation, or study analysis time}
#' \item{status}{status at endpoint, 0/1/2 for censored, transplant, dead}
#' \item{trt}{1/2/NA for D-penicillamine, placebo, or not randomized}
#' \item{age}{age in years}
#' \item{sex}{m/f}
#' \item{ascites}{presence of ascites}
#' \item{hepato}{presence of hepatomegaly or enlarged liver}
#' \item{spiders}{blood vessel malformations in the skin}
#' \item{edema}{0 no edema, 0.5 untreated or successfully treated, 1 edema despite diuretic therapy}
#' \item{bili}{serum bilirubin (mg/dl)}
#' \item{chol}{serum cholesterol (mg/dl)}
#' \item{albumin}{serum albumin (g/dl)}
#' \item{copper}{urine copper (ug/day)}
#' \item{alk.phos}{alkaline phosphotase (U/liter)}
#' \item{ast}{aspartate aminotransferase (U/ml)}
#' \item{trig}{triglycerides (mg/dl)}
#' \item{platelet}{platelet count}
#' \item{protime}{standardised blood clotting time}
#' \item{stage}{histologic stage of disease (1, 2, 3, or 4)}
#' }
#' @source {https://cran.r-project.org/web/packages/survival/survival.pdf}
"pbc"

#' Metadata for the pbc dataset using the DCIM standard
#'
#' @format A list containing DCMI metadata:
#' \describe{
#' \item{title}{title}
#' \item{creator}{creator}
#' \item{subject}{subject}
#' \item{description}{description}
#' \item{publisher}{publisher}
#' \item{date}{date}
#' \item{type}{type}
#' \item{format}{format}
#' \item{identifier}{identifier}
#' \item{source}{source}
#' \item{language}{language}
#' \item{rights}{rights}
#' \item{references}{references}
#' }
"pbc_metadata"

#' Variables sheet for the pbc dataset
#'
#' @format A data frame with 24 rows and 11 columns:
#' \describe{
#' \item{variable}{variable name}
#' \item{label}{variable label}
#' \item{labelLong}{variable label long}
#' \item{subject}{subject}
#' \item{section}{section}
#' \item{variableType}{variable type}
#' \item{databaseStart}{database start}
#' \item{units}{units}
#' \item{variableStart}{variable start}
#' \item{notes}{logical indicating presence of notes}
#' \item{description}{logical indicating presence of description}
#' }
"pbc_variables"

#' Variable details sheet for the pbc dataset
#'
#' @format A data frame with 69 rows and 16 columns:
#' \describe{
#' \item{variable}{variable name}
#' \item{dummyVariable}{dummy variable name}
#' \item{typeEnd}{end type}
#' \item{databaseStart}{database start}
#' \item{variableStart}{variable start}
#' \item{typeStart}{start type}
#' \item{recEnd}{record end}
#' \item{recStart}{record start}
#' \item{catLabel}{category label}
#' \item{catLabelLong}{category long label}
#' \item{numValidCat}{number of valid categories (numeric)}
#' \item{units}{logical indicating presence of units}
#' \item{notes}{logical indicating presence of notes}
#' \item{catStartLabel}{category start label}
#' \item{variableStartShortLabel}{variable start short label}
#' \item{variableStartLabel}{variable start label}
#' }
"pbc_variable_details"

#' Example variables sheet for vignettes
#'
#' @format A data frame with 24 rows and 11 columns:
#' \describe{
#' \item{variable}{variable name}
#' \item{label}{variable label}
#' \item{labelLong}{variable label long}
#' \item{subject}{subject}
#' \item{section}{section}
#' \item{variableType}{variable type}
#' \item{databaseStart}{database start}
#' \item{units}{units}
#' \item{variableStart}{variable start}
#' \item{notes}{logical indicating presence of notes}
#' \item{description}{logical indicating presence of description}
#' }
"tester_variables"

#' Example variable details sheet for vignettes
#'
#' @format A data frame with 69 rows and 16 columns:
#' \describe{
#' \item{variable}{variable name}
#' \item{dummyVariable}{dummy variable name}
#' \item{typeEnd}{end type}
#' \item{databaseStart}{database start}
#' \item{variableStart}{variable start}
#' \item{typeStart}{start type}
#' \item{recEnd}{record end}
#' \item{recStart}{record start}
#' \item{catLabel}{category label}
#' \item{catLabelLong}{category long label}
#' \item{numValidCat}{number of valid categories (numeric)}
#' \item{units}{logical indicating presence of units}
#' \item{notes}{logical indicating presence of notes}
#' \item{catStartLabel}{category start label}
#' \item{variableStartShortLabel}{variable start short label}
#' \item{variableStartLabel}{variable start label}
#' }
"tester_variable_details"
2 changes: 1 addition & 1 deletion R/example_der_function.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' example_der_fun caluclates chol*bili
#' @param chol the row value for chol
#' @param bili the row value for bili
#' @export
#' @keywords internal
example_der_fun <- function(chol, bili){
# as numeric is used to coerce in case categorical numeric variables are used.
# Warning either chol or bili being NA will result in NA return
Expand Down
9 changes: 7 additions & 2 deletions R/recode-with-table.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ is_equal <- function(v1, v2) {
#' Creates new variables by recoding variables in a dataset using the rules
#' specified in a variables details sheet
#'
#' The \href{https://github.com/Big-Life-Lab/recodeflow/blob/master/inst/extdata/PBC-variableDetails.csv}{variable_details}
#' The \href{https://github.com/Big-Life-Lab/recodeflow/blob/master/inst/extdata/pbc_variable_details.csv}{variable_details}
#' dataframe needs the following columns:
#' \describe{
#' \item{variable}{Name of the new variable created. The name of the new
Expand Down Expand Up @@ -523,6 +523,7 @@ recode_call <-
#' @param variable_being_checked the name of the recoded variable
#'
#' @return the data equivalent of variable_being_checked
#' @keywords internal
get_data_variable_name <-
function(data_name,
data,
Expand Down Expand Up @@ -586,6 +587,7 @@ get_data_variable_name <-
#' @param tables A list of reference tables
#'
#' @return Returns recoded and labeled data
#' @keywords internal
recode_columns <-
function(data,
variables_details_rows_to_process,
Expand Down Expand Up @@ -979,7 +981,7 @@ recode_non_derived_variables <- function(
)
if (length(else_value) > 0) {
extra_row <- nrow(log_table) + 1
log_table[extra_row , "value_to"] <- else_value
log_table[extra_row , "value_to"] <- as.character(else_value)
log_table[extra_row , "From"] <-
"else"
log_table[extra_row , "rows_recoded"] <-
Expand Down Expand Up @@ -1009,6 +1011,7 @@ recode_non_derived_variables <- function(
#'
#' @return a boolean vector containing true for rows where the
#' comparison is true
#' @keywords internal
compare_value_based_on_interval <-
function(left_boundary,
right_boundary,
Expand Down Expand Up @@ -1101,6 +1104,7 @@ update_variable_details_based_on_variable_sheet <-
#' @param var_type the toType of a variable
#'
#' @return an appropriately coded tagged NA
#' @keywords internal
format_recoded_value <- function(cell_value, var_type) {
recode_value <- NULL
if (grepl("NA", cell_value)) {
Expand Down Expand Up @@ -1370,6 +1374,7 @@ calculate_custom_function_row_value <-
#' @param variable_details_row A data frame with a single row which will be
#' checked
#' @return A boolean
#' @keywords internal
is_derived_var <- function(variable_details_row) {
derived_var_regex <- "DerivedVar::\\[(.+?)\\]|DerivedVar::\\[\\]"
return(length(grep(
Expand Down
Loading

0 comments on commit f95e150

Please sign in to comment.