Skip to content

Commit

Permalink
Merge pull request #23 from Big-Life-Lab/custom-variable
Browse files Browse the repository at this point in the history
Added vignette explaining custom variables
  • Loading branch information
reikookamoto authored Jan 4, 2024
2 parents 04840ad + 2d9d10a commit 404576e
Show file tree
Hide file tree
Showing 8 changed files with 366 additions and 4 deletions.
58 changes: 54 additions & 4 deletions R/recode-with-table.R
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,58 @@ rec_with_table <-
database_name <- deparse(substitute(data))
}

# The next code chunk will create the variable details file that does not
# have any template variables. All variables which implement a template
# variable will have their specifications replaced by those of the template
# variable.
# This is the variable details sheet that should be used for the
# rest of the program.
no_template_variables_variable_details <- variable_details
# If the template variable isn't in the variable details sheet then this
# is a pre template variable sheet. Don't run the code.
if(variable_details_columns$template_variable$name %in% colnames(variable_details)) {
no_template_variables_variable_details <- variable_details[
variable_details[[variable_details_columns$template_variable$name]] == variable_details_columns$template_variable$values$no,
]
template_variable_names <- unique(variable_details[
variable_details[[variable_details_columns$template_variable$name]] == variable_details_columns$template_variable$values$yes,
pkg.env$columns.Variable
])
for(template_variable_name in template_variable_names)
{
template_variable_rows <- variable_details[
variable_details[[pkg.env$columns.Variable]] == template_variable_name,
]
# All variables which implement this template variable
template_variable_variable_names <- variable_details[
variable_details[[variable_details_columns$template_variable$name]] == template_variable_name,
pkg.env$columns.Variable
]
for(template_variable_variable_name in template_variable_variable_names)
{
template_variable_variable_rows <- variable_details[
variable_details[[pkg.env$columns.Variable]] == template_variable_variable_name,
]
updated_template_variable_variable_rows <- data.frame(
template_variable_rows
)
updated_template_variable_variable_rows[[pkg.env$columns.Variable]] <- rep(
template_variable_variable_rows[1, pkg.env$columns.Variable],
nrow(template_variable_rows)
)
updated_template_variable_variable_rows[[pkg.env$columns.VariableStart]] <- rep(
template_variable_variable_rows[1, pkg.env$columns.VariableStart],
nrow(template_variable_rows)
)

no_template_variables_variable_details <- rbind(
no_template_variables_variable_details,
updated_template_variable_variable_rows
)
}
}
}

# If the passed data parameter is a list, then make sure that the
# each data in the list has a database name in the database_name parameter
# by checking their length
Expand All @@ -284,7 +336,7 @@ rec_with_table <-
database_name = database_name,
print_note = notes,
else_value = else_value,
variable_details = variable_details,
variable_details = no_template_variables_variable_details,
append_to_data = append_to_data,
append_non_db_columns = append_non_db_columns,
log = log,
Expand Down Expand Up @@ -313,7 +365,7 @@ rec_with_table <-
database_name = database_name,
print_note = notes,
else_value = else_value,
variable_details = variable_details,
variable_details = no_template_variables_variable_details,
append_to_data = append_to_data,
append_non_db_columns = append_non_db_columns,
log = log,
Expand Down Expand Up @@ -690,8 +742,6 @@ recode_columns <-
list(var_name = id_name, feeder_vars = tmp_feeder_vars)
top_function_frame$id_role_name <-
append(top_function_frame$id_role_name, tmp_list)


}

# Populate data Labels
Expand Down
11 changes: 11 additions & 0 deletions R/strings.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@ pkg.env$columns.CatLabel <- "catLabel"
pkg.env$columns.CatLabelLong <- "catLabelLong"
pkg.env$columns.Role <- "role"

pkg.env$variable_details$columns.recFrom.elseValue <- "else"
variable_details_columns <- list(
template_variable = list(
name = "templateVariable",
values = list(
no = "No",
yes = "Yes"
)
)
)

pkg.env$recode.key.id.from <- "id_from::"
pkg.env$recode.key.func <- "Func::"
pkg.env$recode.key.map <- "map::"
Expand Down
6 changes: 6 additions & 0 deletions inst/example-dataset.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
id,PL,SL
1,English,French
2,English,N/A
3,Mandarin,English
4,French,English
5,Hindi,English
13 changes: 13 additions & 0 deletions inst/no-template-variable-variable-details.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
variable,typeEnd,typeStart,databaseStart,variableStart,variableStartLabel,numValidCat,recEnd,catLabel,catLabelLong,units,recStart,catStartLabel,notes
primary_lang,cat,cat,database_one,[PL],primary language,5,1,English,English,N/A,English,English,
primary_lang,cat,cat,database_one,[PL],primary language,5,2,French,English,N/A,French,French,
primary_lang,cat,cat,database_one,[PL],primary language,5,3,Mandarin,English,N/A,Mandarin,Mandarin,
primary_lang,cat,cat,database_one,[PL],primary language,5,4,Hindi,English,N/A,Hindi,Hindi,
primary_lang,cat,cat,database_one,[PL],primary language,5,NA::a,not applicable,not applicable,N/A,N/A,not applicable,
primary_lang,cat,cat,database_one,[PL],primary language,5,NA::b,missing,English,N/A,else,N/A,
secondary_lang,cat,cat,database_one,[SL],secondary language,5,1,English,English,N/A,English,English,
secondary_lang,cat,cat,database_one,[SL],secondary language,5,2,French,English,N/A,French,French,
secondary_lang,cat,cat,database_one,[SL],secondary language,5,3,Mandarin,English,N/A,Mandarin,Mandarin,
secondary_lang,cat,cat,database_one,[SL],secondary language,5,4,Hindi,English,N/A,Hindi,Hindi,
secondary_lang,cat,cat,database_one,[SL],secondary language,5,NA::a,not applicable,not applicable,N/A,N/A,not applicable,
secondary_lang,cat,cat,database_one,[SL],secondary language,5,NA::b,missing,English,N/A,else,N/A,
6 changes: 6 additions & 0 deletions inst/recoded-dataset.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
id,primary_lang,secondary_lang
1,1,2
2,1,NA(a)
3,3,1
4,2,1
5,4,1
9 changes: 9 additions & 0 deletions inst/template-variable-variable-details.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
variable,templateVariable,typeEnd,typeStart,databaseStart,variableStart,variableStartLabel,numValidCat,recEnd,catLabel,catLabelLong,units,recStart,catStartLabel,notes
language,Yes,cat,cat,database_one,N/A,N/A,5,1,English,English,N/A,English,English,
language,Yes,cat,cat,database_one,N/A,N/A,5,2,French,English,N/A,French,French,
language,Yes,cat,cat,database_one,N/A,N/A,5,3,Mandarin,English,N/A,Mandarin,Mandarin,
language,Yes,cat,cat,database_one,N/A,N/A,5,4,Hindi,English,N/A,Hindi,Hindi,
language,Yes,cat,cat,database_one,N/A,N/A,5,NA::a,not applicable,not applicable,N/A,N/A,unknown,
language,Yes,cat,cat,database_one,N/A,N/A,5,NA::b,missing,missing,N/A,else,N/A,
primary_language,language,cat,cat,database_one,[PL],Primary Language,N/A,N/A,N/A,N/A,N/A,N/A,N/A,
secondary_language,language,cat,cat,database_one,[SL],Secondary Language,N/A,N/A,N/A,N/A,N/A,N/A,N/A,
187 changes: 187 additions & 0 deletions tests/testthat/test-custom-variables.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
context("recode_with_table template variables")

test_that("Should correct recode non-derived template variables", {
variables <- data.frame(
variable = c("variable_one"),
label = c(""),
labelLong = c(""),
units = c("N/A"),
variableType = c("Categorical"),
databaseStart = c("database_one"),
variableStart = c("[start_variable_one]")
)
variable_details <- data.frame(
variable = c("template_variable_one", "template_variable_one", "variable_one"),
templateVariable = c("Yes", "Yes", "template_variable_one"),
typeEnd = c("cat", "cat", "cat"),
databaseStart = c("database_one", "database_one", "database_one"),
variableStart = c("N/A", "N/A", "[start_variable_one]"),
typeStart = c("cat", "cat", "cat"),
recEnd = c("1", "2", "N/A"),
numValidCat = c("2", "2", "N/A"),
recStart = c("1", "else", "N/A"),
catLabel = c("", "", ""),
catLabelLong = c("", "", "")
)
database_name <- "database_one"
data <- data.frame(
start_variable_one = c(1,3)
)

expected_data <- data.frame(
variable_one = c(1,2)
)
expected_data$variable_one <- as.factor(expected_data$variable_one)

actual_data <- recodeflow:::rec_with_table(
data = data,
variables = variables,
variable_details = variable_details,
database_name = database_name
)
attributes(expected_data$variable_one) <- attributes(actual_data$variable_one)

expect_equal(actual_data, expected_data)
})

test_that("Should correct recode multiple non-derived template variables", {
variables <- data.frame(
variable = c("variable_one", "variable_two"),
label = c("", ""),
labelLong = c("", ""),
units = c("N/A", "N/A"),
variableType = c("Categorical", "Categorical"),
databaseStart = c("database_one", "database_one"),
variableStart = c("[start_variable_one]", "[start_variable_two]")
)
variable_details <- data.frame(
variable = c("template_variable_one", "template_variable_one", "template_variable_two", "template_variable_two", "variable_one", "variable_two"),
templateVariable = c("Yes", "Yes", "Yes", "Yes", "template_variable_one", "template_variable_two"),
typeEnd = c("cat", "cat", "cat", "cat", "cat", "cat"),
databaseStart = c("database_one", "database_one", "database_one", "database_one", "database_one", "database_one"),
variableStart = c("N/A", "N/A", "N/A", "N/A", "[start_variable_one]", "[start_variable_two]"),
typeStart = c("cat", "cat", "cat", "cat", "cat", "cat"),
recEnd = c("1", "2", "1", "2", "N/A", "N/A"),
numValidCat = c("2", "2", "2", "2", "N/A", "N/A"),
recStart = c("1", "else", "1", "else", "N/A", "N/A"),
catLabel = c("", "", "", "", "", ""),
catLabelLong = c("", "", "", "", "", "")
)
database_name <- "database_one"
data <- data.frame(
start_variable_one = c(1,3),
start_variable_two = c(1,3)
)

expected_data <- data.frame(
variable_one = c(1,2),
variable_two = c(1,2)
)
expected_data$variable_one <- as.factor(expected_data$variable_one)
expected_data$variable_two <- as.factor(expected_data$variable_two)

actual_data <- recodeflow:::rec_with_table(
data = data,
variables = variables,
variable_details = variable_details,
database_name = database_name
)
attributes(expected_data$variable_one) <- attributes(actual_data$variable_one)
attributes(expected_data$variable_two) <- attributes(actual_data$variable_two)

expect_equal(actual_data, expected_data)
})

test_that("Should correctly recode derived template variables", {
variables <- data.frame(
variable = c("variable_one"),
label = c(""),
labelLong = c(""),
units = c("N/A"),
variableType = c("Categorical"),
databaseStart = c("database_one"),
variableStart = c("[start_variable_one]")
)
variable_details <- data.frame(
variable = c("template_variable_one", "template_variable_one", "variable_one"),
templateVariable = c("Yes", "Yes", "template_variable_one"),
typeEnd = c("cat", "cat", "cat"),
databaseStart = c("database_one", "database_one", "database_one"),
variableStart = c("N/A", "N/A", "DerivedVar::[start_variable_one]"),
typeStart = c("cat", "cat", "cat"),
recEnd = c("Func::func1", "1", "N/A"),
numValidCat = c("1", "1", "N/A"),
recStart = c("N/A", "N/A", "N/A"),
catLabel = c("", "", ""),
catLabelLong = c("", "", "")
)
.GlobalEnv[["func1"]] <- function(start_variable_one) {
return(1)
}
database_name <- "database_one"
data <- data.frame(
start_variable_one = c(3)
)

expected_data <- data.frame(
variable_one = c(1)
)
expected_data$variable_one <- as.factor(expected_data$variable_one)

actual_data <- recodeflow::rec_with_table(
data,
variables = variables,
variable_details = variable_details,
database_name = database_name
)

attributes(expected_data$variable_one) <- attributes(actual_data$variable_one)
expect_equal(actual_data, expected_data)
})

test_that("Should work with non-template variables", {
variables <- data.frame(
variable = c("variable_one", "variable_two"),
label = c("", ""),
labelLong = c("", ""),
units = c("N/A", "N/A"),
variableType = c("Categorical", "Continuous"),
databaseStart = c("database_one", "database_one"),
variableStart = c("[start_variable_one]", "[start_variable_two]")
)
variable_details <- data.frame(
variable = c("template_variable_one", "template_variable_one", "variable_one", "variable_two"),
templateVariable = c("Yes", "Yes", "template_variable_one", "No"),
typeEnd = c("cat", "cat", "cat", "cont"),
databaseStart = c("database_one", "database_one", "database_one", "database_one"),
variableStart = c("N/A", "N/A", "[start_variable_one]", "[start_variable_two]"),
typeStart = c("cat", "cat", "cat", "cont"),
recEnd = c("1", "2", "N/A", "copy"),
numValidCat = c("2", "2", "N/A", "N/A"),
recStart = c("1", "else", "N/A", "else"),
catLabel = c("", "", "", ""),
catLabelLong = c("", "", "", "")
)
database_name <- "database_one"
data <- data.frame(
start_variable_one = c(1,3),
start_variable_two = c(1,2)
)

expected_data <- data.frame(
variable_one = c(1,2),
variable_two = c(1,2)
)
expected_data$variable_one <- as.factor(expected_data$variable_one)

actual_data <- recodeflow:::rec_with_table(
data = data,
variables = variables,
variable_details = variable_details,
database_name = database_name
)
attributes(expected_data$variable_one) <- attributes(actual_data$variable_one)
attributes(expected_data$variable_two) <- attributes(actual_data$variable_two)

expect_equal(actual_data, expected_data)
})
Loading

0 comments on commit 404576e

Please sign in to comment.