From 0d28e8515acb9e4ac051a01975c26c13d8c0809c Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 18 Dec 2023 11:59:59 +0100 Subject: [PATCH 1/6] Preserve correct label oder --- DESCRIPTION | 2 +- R/data_reverse.R | 4 ++-- R/to_numeric.R | 6 +++--- R/utils_labels.R | 11 +++++++++-- tests/testthat/test-data_to_numeric.R | 17 +++++++++++++++++ 5 files changed, 32 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7d8051b0e..eedc0aed6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.9.0.6 +Version: 0.9.0.7 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")), diff --git a/R/data_reverse.R b/R/data_reverse.R index b9615417e..6e1ef414b 100644 --- a/R/data_reverse.R +++ b/R/data_reverse.R @@ -110,7 +110,7 @@ reverse.numeric <- function(x, out <- as.vector((new_max - new_min) / (max - min) * (x - min) + new_min) # labelled data? - out <- .set_back_labels(out, x) + out <- .set_back_labels(out, x, reverse_values = TRUE) out } @@ -189,7 +189,7 @@ reverse.factor <- function(x, range = NULL, verbose = TRUE, ...) { x <- factor(rev_x, levels = seq_len(length(old_levels)), labels = old_levels) # labelled data? - x <- .set_back_labels(x, original_x) + x <- .set_back_labels(x, original_x, reverse_values = TRUE) x } diff --git a/R/to_numeric.R b/R/to_numeric.R index 9a35f9130..602891f70 100644 --- a/R/to_numeric.R +++ b/R/to_numeric.R @@ -148,7 +148,7 @@ to_numeric.data.frame <- function(x, #' @export to_numeric.numeric <- function(x, verbose = TRUE, ...) { - .set_back_labels(as.numeric(x), x) + .set_back_labels(as.numeric(x), x, reverse_values = FALSE) } #' @export @@ -233,9 +233,9 @@ to_numeric.factor <- function(x, } x <- factor(x_inverse) } - out <- .set_back_labels(as.numeric(as.character(x)), x) + out <- .set_back_labels(as.numeric(as.character(x)), x, reverse_values = FALSE) } else { - out <- .set_back_labels(as.numeric(x), x) + out <- .set_back_labels(as.numeric(x), x, , reverse_values = FALSE) } # shift to requested starting value diff --git a/R/utils_labels.R b/R/utils_labels.R index a783e4fda..5bcebc80d 100644 --- a/R/utils_labels.R +++ b/R/utils_labels.R @@ -3,12 +3,19 @@ # to the transformed vector #' @keywords internal -.set_back_labels <- function(new, old, include_values = TRUE) { +.set_back_labels <- function(new, old, include_values = TRUE, reverse_values = FALSE) { # labelled data? attr(new, "label") <- attr(old, "label", exact = TRUE) labels <- attr(old, "labels", exact = TRUE) + # "include_values" is used to preserve value labels if (isTRUE(include_values) && !is.null(labels)) { - attr(new, "labels") <- stats::setNames(rev(labels), names(labels)) + if (reverse_values) { + # reverse values? Used for "reverse_scale()" + attr(new, "labels") <- stats::setNames(rev(labels), names(labels)) + } else { + # keep value oder? Used for "to_numeric()" + attr(new, "labels") <- stats::setNames(labels, names(labels)) + } } else if (isFALSE(include_values)) { attr(new, "labels") <- NULL } diff --git a/tests/testthat/test-data_to_numeric.R b/tests/testthat/test-data_to_numeric.R index 3e0a9d095..a170be60e 100644 --- a/tests/testthat/test-data_to_numeric.R +++ b/tests/testthat/test-data_to_numeric.R @@ -189,3 +189,20 @@ test_that("to_numeric works with haven_labelled, convert many labels correctly", expect_identical(as.vector(table(x)), c(180L, 506L, 156L)) }) }) + + +test_that("to_numeric preserves correct label order", { + x <- factor(c(1, 2, 3, 4)) + x <- assign_labels(x, values = c("one", "two", "three", "four")) + out <- to_numeric(x, dummy_factors = FALSE) + expect_identical( + attributes(out)$labels, + c(one = "1", two = "2", three = "3", four = "4") + ) + # correctly reverse scale + out <- to_numeric(reverse_scale(x), dummy_factors = FALSE) + expect_identical( + attributes(out)$labels, + c(one = "4", two = "3", three = "2", four = "1") + ) +}) From 18f1ab06c749ee226ffb9ac8661b8c78b73571a0 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 18 Dec 2023 12:03:22 +0100 Subject: [PATCH 2/6] update news --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 9242a0913..7f3987170 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,8 @@ BUG FIXES * `to_numeric()` now correctly deals with inversed factor levels when `preserve_levels = TRUE`. +* `to_numeric()` inversed order of value labels when `dummy_factors = FALSE`. + * `convert_to_na()` now preserves attributes for factors when `drop_levels = TRUE`. # datawizard 0.9.0 From b9f83ba3ddb95d406951b9d7133da3fb46167811 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 18 Dec 2023 12:17:23 +0100 Subject: [PATCH 3/6] capture more exceptions --- R/to_numeric.R | 2 +- R/utils_labels.R | 12 +++++++++++- tests/testthat/test-data_to_numeric.R | 18 ++++++++++++++++-- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/R/to_numeric.R b/R/to_numeric.R index 602891f70..8bfcac6bc 100644 --- a/R/to_numeric.R +++ b/R/to_numeric.R @@ -235,7 +235,7 @@ to_numeric.factor <- function(x, } out <- .set_back_labels(as.numeric(as.character(x)), x, reverse_values = FALSE) } else { - out <- .set_back_labels(as.numeric(x), x, , reverse_values = FALSE) + out <- .set_back_labels(as.numeric(x), x, reverse_values = FALSE) } # shift to requested starting value diff --git a/R/utils_labels.R b/R/utils_labels.R index 5bcebc80d..4b28ac778 100644 --- a/R/utils_labels.R +++ b/R/utils_labels.R @@ -14,7 +14,17 @@ attr(new, "labels") <- stats::setNames(rev(labels), names(labels)) } else { # keep value oder? Used for "to_numeric()" - attr(new, "labels") <- stats::setNames(labels, names(labels)) + if (is.numeric(new)) { + if (any(grepl("[^0-9]", labels))) { + # if we have any non-numeric characters, convert to numeric + attr(new, "labels") <- stats::setNames(as.numeric(as.factor(labels)), names(labels)) + } else { + # if we have numeric, or "numeric character" (like "1", "2", "3" etc.) + attr(new, "labels") <- stats::setNames(as.numeric(labels), names(labels)) + } + } else { + attr(new, "labels") <- stats::setNames(labels, names(labels)) + } } } else if (isFALSE(include_values)) { attr(new, "labels") <- NULL diff --git a/tests/testthat/test-data_to_numeric.R b/tests/testthat/test-data_to_numeric.R index a170be60e..464c35e8d 100644 --- a/tests/testthat/test-data_to_numeric.R +++ b/tests/testthat/test-data_to_numeric.R @@ -197,12 +197,26 @@ test_that("to_numeric preserves correct label order", { out <- to_numeric(x, dummy_factors = FALSE) expect_identical( attributes(out)$labels, - c(one = "1", two = "2", three = "3", four = "4") + c(one = 1, two = 2, three = 3, four = 4) ) # correctly reverse scale out <- to_numeric(reverse_scale(x), dummy_factors = FALSE) expect_identical( attributes(out)$labels, - c(one = "4", two = "3", three = "2", four = "1") + c(one = 4, two = 3, three = 2, four = 1) + ) + # factor with alphabetical values + x <- factor(letters[1:4]) + x <- assign_labels(x, values = c("one", "two", "three", "four")) + out <- to_numeric(x, dummy_factors = FALSE) + expect_identical( + attributes(out)$labels, + c(one = 1, two = 2, three = 3, four = 4) + ) + # correctly reverse scale + out <- to_numeric(reverse_scale(x), dummy_factors = FALSE) + expect_identical( + attributes(out)$labels, + c(one = 4, two = 3, three = 2, four = 1) ) }) From aa03f84aa25224fddecbfc84e2670f87856e0c67 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 18 Dec 2023 12:20:15 +0100 Subject: [PATCH 4/6] lintr --- tests/testthat/test-standardize_models.R | 47 +++++++++++++++++------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/tests/testthat/test-standardize_models.R b/tests/testthat/test-standardize_models.R index 4e1706ce5..27b527573 100644 --- a/tests/testthat/test-standardize_models.R +++ b/tests/testthat/test-standardize_models.R @@ -47,24 +47,28 @@ test_that("transformations", { fit_scale2 <- lm(scale(mpg) ~ scale(exp(hp_100)), mt) expect_equal( effectsize::standardize_parameters(fit_exp, method = "refit")[2, 2], - unname(coef(fit_scale1)[2]) + unname(coef(fit_scale1)[2]), + ignore_attr = TRUE ) expect_equal( effectsize::standardize_parameters(fit_exp, method = "basic")[2, 2], - unname(coef(fit_scale2)[2]) + unname(coef(fit_scale2)[2]), + ignore_attr = TRUE ) skip_if_not_installed("insight", minimum_version = "0.10.0") d <- data.frame( time = as.factor(c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5)), group = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2), - sum = c(0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50) + sum = c(0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50) # nolint ) m <- lm(log(sum + 1) ~ as.numeric(time) * group, data = d) - expect_message(out <- standardize(m)) + expect_message({ + out <- standardize(m) + }) expect_identical(coef(m), c( `(Intercept)` = -0.4575, `as.numeric(time)` = 0.5492, group = 0.3379, `as.numeric(time):group` = 0.15779 @@ -98,12 +102,14 @@ test_that("weights", { stdREFIT <- effectsize::standardize_parameters(m, method = "refit") expect_equal( stdREFIT[[2]], - effectsize::standardize_parameters(m, method = "posthoc")[[2]] + effectsize::standardize_parameters(m, method = "posthoc")[[2]], + ignore_attr = TRUE ) expect_equal( stdREFIT[[2]], - effectsize::standardize_parameters(m, method = "basic")[[2]] + effectsize::standardize_parameters(m, method = "basic")[[2]], + ignore_attr = TRUE ) }) @@ -230,7 +236,9 @@ test_that("standardize mediation", { ) out1 <- summary(standardize(med1)) - expect_message(out2 <- summary(standardize(med2))) + expect_message({ + out2 <- summary(standardize(med2)) + }) expect_identical(unlist(out1[c("d0", "d1", "z0", "z1", "n0", "n1", "tau.coef")]), unlist(out2[c("d0", "d1", "z0", "z1", "n0", "n1", "tau.coef")]), tolerance = 0.1 @@ -266,13 +274,22 @@ test_that("offsets", { m <- lm(mpg ~ hp + offset(wt), data = mtcars) - expect_warning(mz1 <- standardize(m)) - expect_warning(mz2 <- standardize(m, two_sd = TRUE)) + expect_warning({ + mz1 <- standardize(m) + }) + expect_warning({ + mz2 <- standardize(m, two_sd = TRUE) + }) expect_identical(c(1, 2) * coef(mz1), coef(mz2)) m <- glm(cyl ~ hp + offset(wt), family = poisson(), data = mtcars) - expect_warning(mz <- standardize(m), regexp = NA) + expect_warning( + { + mz <- standardize(m) + }, + regexp = NA + ) par1 <- parameters::model_parameters(mz) par2 <- effectsize::standardize_parameters(m, method = "basic") @@ -288,10 +305,12 @@ test_that("brms", { skip_if_not_installed("brms") invisible( - capture.output(mod <- brms::brm(mpg ~ hp, - data = mtcars, - refresh = 0, chains = 1, silent = 2 - )) + capture.output({ + mod <- brms::brm(mpg ~ hp, + data = mtcars, + refresh = 0, chains = 1, silent = 2 + ) + }) ) expect_warning( From f34e7842c63cd753b0c2b67d11a6f426a742aa30 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 18 Dec 2023 12:40:16 +0100 Subject: [PATCH 5/6] lintr --- R/data_reverse.R | 30 +++++++++++++++--------------- R/utils_labels.R | 14 +++++++------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/R/data_reverse.R b/R/data_reverse.R index 6e1ef414b..2fc9ef493 100644 --- a/R/data_reverse.R +++ b/R/data_reverse.R @@ -93,21 +93,21 @@ reverse.numeric <- function(x, } # old minimum and maximum - min <- min(range) - max <- max(range) + min_value <- min(range) + max_value <- max(range) # check if a valid range (i.e. vector of length 2) is provided if (length(range) > 2) { insight::format_error( "`range` must be a numeric vector of length two, indicating lowest and highest value of the required range.", - sprintf("Did you want to provide `range = c(%g, %g)`?", min, max) + sprintf("Did you want to provide `range = c(%g, %g)`?", min_value, max_value) ) } - new_min <- max - new_max <- min + new_min <- max_value + new_max <- min_value - out <- as.vector((new_max - new_min) / (max - min) * (x - min) + new_min) + out <- as.vector((new_max - new_min) / (max_value - min_value) * (x - min_value) + new_min) # labelled data? out <- .set_back_labels(out, x, reverse_values = TRUE) @@ -134,7 +134,9 @@ reverse.factor <- function(x, range = NULL, verbose = TRUE, ...) { # save for later use original_x <- x - if (!is.null(range)) { + if (is.null(range)) { + old_levels <- levels(x) + } else { # no missing values allowed if (anyNA(range)) { insight::format_error("`range` is not allowed to have missing values.") @@ -180,8 +182,6 @@ reverse.factor <- function(x, range = NULL, verbose = TRUE, ...) { } old_levels <- range x <- factor(x, levels = range) - } else { - old_levels <- levels(x) } int_x <- as.integer(x) @@ -225,7 +225,7 @@ reverse.grouped_df <- function(x, # create the new variables and updates "select", so new variables are processed if (!isFALSE(append)) { # process arguments - args <- .process_append( + arguments <- .process_append( x, select, append, @@ -233,8 +233,8 @@ reverse.grouped_df <- function(x, preserve_value_labels = TRUE ) # update processed arguments - x <- args$x - select <- args$select + x <- arguments$x + select <- arguments$select } x <- as.data.frame(x) @@ -279,7 +279,7 @@ reverse.data.frame <- function(x, # create the new variables and updates "select", so new variables are processed if (!isFALSE(append)) { # process arguments - args <- .process_append( + arguments <- .process_append( x, select, append, @@ -287,8 +287,8 @@ reverse.data.frame <- function(x, preserve_value_labels = TRUE ) # update processed arguments - x <- args$x - select <- args$select + x <- arguments$x + select <- arguments$select } # Transform the range so that it is a list now diff --git a/R/utils_labels.R b/R/utils_labels.R index 4b28ac778..64b517086 100644 --- a/R/utils_labels.R +++ b/R/utils_labels.R @@ -6,24 +6,24 @@ .set_back_labels <- function(new, old, include_values = TRUE, reverse_values = FALSE) { # labelled data? attr(new, "label") <- attr(old, "label", exact = TRUE) - labels <- attr(old, "labels", exact = TRUE) + value_labels <- attr(old, "labels", exact = TRUE) # "include_values" is used to preserve value labels - if (isTRUE(include_values) && !is.null(labels)) { + if (isTRUE(include_values) && !is.null(value_labels)) { if (reverse_values) { # reverse values? Used for "reverse_scale()" - attr(new, "labels") <- stats::setNames(rev(labels), names(labels)) + attr(new, "labels") <- stats::setNames(rev(value_labels), names(value_labels)) } else { # keep value oder? Used for "to_numeric()" if (is.numeric(new)) { - if (any(grepl("[^0-9]", labels))) { + if (any(grepl("[^0-9]", value_labels))) { # if we have any non-numeric characters, convert to numeric - attr(new, "labels") <- stats::setNames(as.numeric(as.factor(labels)), names(labels)) + attr(new, "labels") <- stats::setNames(as.numeric(as.factor(value_labels)), names(value_labels)) } else { # if we have numeric, or "numeric character" (like "1", "2", "3" etc.) - attr(new, "labels") <- stats::setNames(as.numeric(labels), names(labels)) + attr(new, "labels") <- stats::setNames(as.numeric(value_labels), names(value_labels)) } } else { - attr(new, "labels") <- stats::setNames(labels, names(labels)) + attr(new, "labels") <- stats::setNames(value_labels, names(value_labels)) } } } else if (isFALSE(include_values)) { From ba0471fc7f60ecc12b8f3fb0e262dc6027acfd91 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 18 Dec 2023 12:42:22 +0100 Subject: [PATCH 6/6] update lintr --- .lintr | 1 + 1 file changed, 1 insertion(+) diff --git a/.lintr b/.lintr index 7fa66fe1c..8aebdfc14 100644 --- a/.lintr +++ b/.lintr @@ -13,6 +13,7 @@ linters: linters_with_defaults( todo_comment_linter = NULL, undesirable_function_linter(c("mapply" = NA, "sapply" = NA, "setwd" = NA)), undesirable_operator_linter = NULL, + if_not_else_linter(exceptions = character(0L)), unnecessary_concatenation_linter(allow_single_expression = FALSE), defaults = linters_with_tags(tags = NULL) )