Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preserve correct label oder #473

Merged
merged 6 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .lintr
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ linters: linters_with_defaults(
todo_comment_linter = NULL,
undesirable_function_linter(c("mapply" = NA, "sapply" = NA, "setwd" = NA)),
undesirable_operator_linter = NULL,
if_not_else_linter(exceptions = character(0L)),
unnecessary_concatenation_linter(allow_single_expression = FALSE),
defaults = linters_with_tags(tags = NULL)
)
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 0.9.0.6
Version: 0.9.0.7
Authors@R: c(
person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ BUG FIXES
* `to_numeric()` now correctly deals with inversed factor levels when
`preserve_levels = TRUE`.

* `to_numeric()` inversed order of value labels when `dummy_factors = FALSE`.

* `convert_to_na()` now preserves attributes for factors when `drop_levels = TRUE`.

# datawizard 0.9.0
Expand Down
34 changes: 17 additions & 17 deletions R/data_reverse.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,24 +93,24 @@ reverse.numeric <- function(x,
}

# old minimum and maximum
min <- min(range)
max <- max(range)
min_value <- min(range)
max_value <- max(range)

# check if a valid range (i.e. vector of length 2) is provided
if (length(range) > 2) {
insight::format_error(
"`range` must be a numeric vector of length two, indicating lowest and highest value of the required range.",
sprintf("Did you want to provide `range = c(%g, %g)`?", min, max)
sprintf("Did you want to provide `range = c(%g, %g)`?", min_value, max_value)
)
}

new_min <- max
new_max <- min
new_min <- max_value
new_max <- min_value

out <- as.vector((new_max - new_min) / (max - min) * (x - min) + new_min)
out <- as.vector((new_max - new_min) / (max_value - min_value) * (x - min_value) + new_min)

# labelled data?
out <- .set_back_labels(out, x)
out <- .set_back_labels(out, x, reverse_values = TRUE)
out
}

Expand All @@ -134,7 +134,9 @@ reverse.factor <- function(x, range = NULL, verbose = TRUE, ...) {
# save for later use
original_x <- x

if (!is.null(range)) {
if (is.null(range)) {
old_levels <- levels(x)
} else {
# no missing values allowed
if (anyNA(range)) {
insight::format_error("`range` is not allowed to have missing values.")
Expand Down Expand Up @@ -180,16 +182,14 @@ reverse.factor <- function(x, range = NULL, verbose = TRUE, ...) {
}
old_levels <- range
x <- factor(x, levels = range)
} else {
old_levels <- levels(x)
}

int_x <- as.integer(x)
rev_x <- reverse(int_x, range = c(1, length(old_levels)))
x <- factor(rev_x, levels = seq_len(length(old_levels)), labels = old_levels)

# labelled data?
x <- .set_back_labels(x, original_x)
x <- .set_back_labels(x, original_x, reverse_values = TRUE)

x
}
Expand Down Expand Up @@ -225,16 +225,16 @@ reverse.grouped_df <- function(x,
# create the new variables and updates "select", so new variables are processed
if (!isFALSE(append)) {
# process arguments
args <- .process_append(
arguments <- .process_append(
x,
select,
append,
append_suffix = "_r",
preserve_value_labels = TRUE
)
# update processed arguments
x <- args$x
select <- args$select
x <- arguments$x
select <- arguments$select
}

x <- as.data.frame(x)
Expand Down Expand Up @@ -279,16 +279,16 @@ reverse.data.frame <- function(x,
# create the new variables and updates "select", so new variables are processed
if (!isFALSE(append)) {
# process arguments
args <- .process_append(
arguments <- .process_append(
x,
select,
append,
append_suffix = "_r",
preserve_value_labels = TRUE
)
# update processed arguments
x <- args$x
select <- args$select
x <- arguments$x
select <- arguments$select
}

# Transform the range so that it is a list now
Expand Down
6 changes: 3 additions & 3 deletions R/to_numeric.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ to_numeric.data.frame <- function(x,

#' @export
to_numeric.numeric <- function(x, verbose = TRUE, ...) {
.set_back_labels(as.numeric(x), x)
.set_back_labels(as.numeric(x), x, reverse_values = FALSE)
}

#' @export
Expand Down Expand Up @@ -233,9 +233,9 @@ to_numeric.factor <- function(x,
}
x <- factor(x_inverse)
}
out <- .set_back_labels(as.numeric(as.character(x)), x)
out <- .set_back_labels(as.numeric(as.character(x)), x, reverse_values = FALSE)
} else {
out <- .set_back_labels(as.numeric(x), x)
out <- .set_back_labels(as.numeric(x), x, reverse_values = FALSE)
}

# shift to requested starting value
Expand Down
25 changes: 21 additions & 4 deletions R/utils_labels.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,29 @@
# to the transformed vector

#' @keywords internal
.set_back_labels <- function(new, old, include_values = TRUE) {
.set_back_labels <- function(new, old, include_values = TRUE, reverse_values = FALSE) {
# labelled data?
attr(new, "label") <- attr(old, "label", exact = TRUE)
labels <- attr(old, "labels", exact = TRUE)
if (isTRUE(include_values) && !is.null(labels)) {
attr(new, "labels") <- stats::setNames(rev(labels), names(labels))
value_labels <- attr(old, "labels", exact = TRUE)
# "include_values" is used to preserve value labels
if (isTRUE(include_values) && !is.null(value_labels)) {
if (reverse_values) {
# reverse values? Used for "reverse_scale()"
attr(new, "labels") <- stats::setNames(rev(value_labels), names(value_labels))
} else {
# keep value oder? Used for "to_numeric()"
if (is.numeric(new)) {
if (any(grepl("[^0-9]", value_labels))) {
# if we have any non-numeric characters, convert to numeric
attr(new, "labels") <- stats::setNames(as.numeric(as.factor(value_labels)), names(value_labels))
} else {
# if we have numeric, or "numeric character" (like "1", "2", "3" etc.)
attr(new, "labels") <- stats::setNames(as.numeric(value_labels), names(value_labels))
}
} else {
attr(new, "labels") <- stats::setNames(value_labels, names(value_labels))

Check warning on line 26 in R/utils_labels.R

View check run for this annotation

Codecov / codecov/patch

R/utils_labels.R#L26

Added line #L26 was not covered by tests
}
}
} else if (isFALSE(include_values)) {
attr(new, "labels") <- NULL
}
Expand Down
31 changes: 31 additions & 0 deletions tests/testthat/test-data_to_numeric.R
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,34 @@ test_that("to_numeric works with haven_labelled, convert many labels correctly",
expect_identical(as.vector(table(x)), c(180L, 506L, 156L))
})
})


test_that("to_numeric preserves correct label order", {
x <- factor(c(1, 2, 3, 4))
x <- assign_labels(x, values = c("one", "two", "three", "four"))
out <- to_numeric(x, dummy_factors = FALSE)
expect_identical(
attributes(out)$labels,
c(one = 1, two = 2, three = 3, four = 4)
)
# correctly reverse scale
out <- to_numeric(reverse_scale(x), dummy_factors = FALSE)
expect_identical(
attributes(out)$labels,
c(one = 4, two = 3, three = 2, four = 1)
)
# factor with alphabetical values
x <- factor(letters[1:4])
x <- assign_labels(x, values = c("one", "two", "three", "four"))
out <- to_numeric(x, dummy_factors = FALSE)
expect_identical(
attributes(out)$labels,
c(one = 1, two = 2, three = 3, four = 4)
)
# correctly reverse scale
out <- to_numeric(reverse_scale(x), dummy_factors = FALSE)
expect_identical(
attributes(out)$labels,
c(one = 4, two = 3, three = 2, four = 1)
)
})
47 changes: 33 additions & 14 deletions tests/testthat/test-standardize_models.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,24 +47,28 @@ test_that("transformations", {
fit_scale2 <- lm(scale(mpg) ~ scale(exp(hp_100)), mt)
expect_equal(
effectsize::standardize_parameters(fit_exp, method = "refit")[2, 2],
unname(coef(fit_scale1)[2])
unname(coef(fit_scale1)[2]),
ignore_attr = TRUE
)

expect_equal(
effectsize::standardize_parameters(fit_exp, method = "basic")[2, 2],
unname(coef(fit_scale2)[2])
unname(coef(fit_scale2)[2]),
ignore_attr = TRUE
)

skip_if_not_installed("insight", minimum_version = "0.10.0")
d <- data.frame(
time = as.factor(c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5)),
group = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2),
sum = c(0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50)
sum = c(0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50) # nolint
)
m <- lm(log(sum + 1) ~ as.numeric(time) * group, data = d)


expect_message(out <- standardize(m))
expect_message({
out <- standardize(m)
})
expect_identical(coef(m), c(
`(Intercept)` = -0.4575, `as.numeric(time)` = 0.5492, group = 0.3379,
`as.numeric(time):group` = 0.15779
Expand Down Expand Up @@ -98,12 +102,14 @@ test_that("weights", {
stdREFIT <- effectsize::standardize_parameters(m, method = "refit")
expect_equal(
stdREFIT[[2]],
effectsize::standardize_parameters(m, method = "posthoc")[[2]]
effectsize::standardize_parameters(m, method = "posthoc")[[2]],
ignore_attr = TRUE
)

expect_equal(
stdREFIT[[2]],
effectsize::standardize_parameters(m, method = "basic")[[2]]
effectsize::standardize_parameters(m, method = "basic")[[2]],
ignore_attr = TRUE
)
})

Expand Down Expand Up @@ -230,7 +236,9 @@ test_that("standardize mediation", {
)

out1 <- summary(standardize(med1))
expect_message(out2 <- summary(standardize(med2)))
expect_message({
out2 <- summary(standardize(med2))
})
expect_identical(unlist(out1[c("d0", "d1", "z0", "z1", "n0", "n1", "tau.coef")]),
unlist(out2[c("d0", "d1", "z0", "z1", "n0", "n1", "tau.coef")]),
tolerance = 0.1
Expand Down Expand Up @@ -266,13 +274,22 @@ test_that("offsets", {

m <- lm(mpg ~ hp + offset(wt), data = mtcars)

expect_warning(mz1 <- standardize(m))
expect_warning(mz2 <- standardize(m, two_sd = TRUE))
expect_warning({
mz1 <- standardize(m)
})
expect_warning({
mz2 <- standardize(m, two_sd = TRUE)
})
expect_identical(c(1, 2) * coef(mz1), coef(mz2))


m <- glm(cyl ~ hp + offset(wt), family = poisson(), data = mtcars)
expect_warning(mz <- standardize(m), regexp = NA)
expect_warning(
{
mz <- standardize(m)
},
regexp = NA
)

par1 <- parameters::model_parameters(mz)
par2 <- effectsize::standardize_parameters(m, method = "basic")
Expand All @@ -288,10 +305,12 @@ test_that("brms", {
skip_if_not_installed("brms")

invisible(
capture.output(mod <- brms::brm(mpg ~ hp,
data = mtcars,
refresh = 0, chains = 1, silent = 2
))
capture.output({
mod <- brms::brm(mpg ~ hp,
data = mtcars,
refresh = 0, chains = 1, silent = 2
)
})
)

expect_warning(
Expand Down
Loading