From fe4aceed4977dd9cd00b4df119d1dc75e0fb7b4d Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Fri, 7 Jun 2024 13:17:22 -0400 Subject: [PATCH 01/20] Increment version number to 0.2.0.9005 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0dc6f114..2cf2a406 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: joyn Type: Package Title: Tool for Diagnosis of Tables Joins and Complementary Join Features -Version: 0.2.0.9004 +Version: 0.2.0.9005 Authors@R: c(person(given = "R.Andres", family = "Castaneda", email = "acastanedaa@worldbank.org", From f1555bb20b499a1e2b7125cd2a69668daad1107f Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Fri, 7 Jun 2024 13:18:22 -0400 Subject: [PATCH 02/20] update news --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 21533b70..e6c812e6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,6 +13,8 @@ * Add information about duplicated obs in `by` variable when match type is `1` rather than `m`. +* Internal: improve storing of joyn messages. + ## breaking changes * Now, by default, `joyn` will not sort the data. This is to avoid unnecessary From 37c9a59063bd86b05bcc08d997457510788de33e Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Fri, 7 Jun 2024 16:20:15 -0400 Subject: [PATCH 03/20] add attr argument to collapse::join --- R/joyn_workhorse.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/joyn_workhorse.R b/R/joyn_workhorse.R index d19f6fa7..8a2a9052 100644 --- a/R/joyn_workhorse.R +++ b/R/joyn_workhorse.R @@ -57,13 +57,13 @@ joyn_workhorse <- function( start_time <- Sys.time() # Do a full join ------------------------------------------------------------- + source_pkg <- "collapse::join" # if not 1:1 => use merge.data.table # not m:m => use collapse::join() dt_result <- tryCatch( expr = { - source_pkg <- "collapse::join" collapse::join(x = x, y = y, @@ -75,7 +75,8 @@ joyn_workhorse <- function( keep.col.order = TRUE, sort = sort, verbose = 0, - column = NULL) + column = NULL, + attr = TRUE) }, # end of expr section error = function(e) { From 592f0ba8a6ec09fc3f5ef3f7bb506050d8723138 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Fri, 7 Jun 2024 21:31:03 -0400 Subject: [PATCH 04/20] update. not working --- R/joyn-merge.R | 71 +++++++++++++++++++++++++--------------------- R/joyn_workhorse.R | 67 +++++++++++++++++++++---------------------- 2 files changed, 71 insertions(+), 67 deletions(-) diff --git a/R/joyn-merge.R b/R/joyn-merge.R index 134ce630..a6f966f1 100644 --- a/R/joyn-merge.R +++ b/R/joyn-merge.R @@ -322,40 +322,19 @@ joyn <- function(x, # include report variable --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - yvars_w <- c(y_vars_to_keep, ".yreport") # working yvars - x <- x |> - ftransform(.xreport = 1) - y <- y |> - ftransform(.yreport = 2) - - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # Actual merge --------- - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - # simple merge - i.yvars <- paste0("i.", yvars_w) - - # keep relevant variables in y - y <- y |> fselect( - by, yvars_w - ) - - # Perform workhorse join - x <- joyn_workhorse( - x = x, - y = y, - by = by, - match_type = match_type, - suffixes = suffixes, - sort = sort - ) + yvars_w <- y_vars_to_keep # working yvars + # yvars_w <- c(y_vars_to_keep, ".yreport") # working yvars + # x <- x |> + # ftransform(.xreport = 1) + # y <- y |> + # ftransform(.yreport = 2) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Report variable --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # replace NAs in report vars - setnafill(x, fill = 0, cols = c(".xreport", ".yreport")) + # setnafill(x, fill = 0, cols = c(".xreport", ".yreport")) # report variable dropreport <- FALSE @@ -375,17 +354,43 @@ joyn <- function(x, check_names <- make.names(check_names, unique = TRUE) nrv <- setdiff(check_names, xnames) - store_joyn_msg(info = "reportvar {.strongVar {reportvar}} is already part of the resulting table. It will be changed to {.strongVar {nrv}}") + store_joyn_msg(info = "reportvar {.strongVar {reportvar}} is + already part of the resulting table. It will be + changed to {.strongVar {nrv}}") reportvar <- nrv } } - # report variable - collapse::settransform(x, use_util_report = .xreport + .yreport) - # Can this be done more efficiently with collapse? - data.table::setnames(x, "use_util_report", reportvar) + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Actual merge --------- + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + # simple merge + i.yvars <- paste0("i.", yvars_w) + + # keep relevant variables in y + y <- y |> + fselect(by, yvars_w) + + # Perform workhorse join + x <- joyn_workhorse( + x = x, + y = y, + by = by, + suffixes = suffixes, + sort = sort, + reportvar = reportvar + ) + + + + + # # report variable + # collapse::settransform(x, use_util_report = .xreport + .yreport) + # # Can this be done more efficiently with collapse? + # data.table::setnames(x, "use_util_report", reportvar) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/R/joyn_workhorse.R b/R/joyn_workhorse.R index 8a2a9052..54951a88 100644 --- a/R/joyn_workhorse.R +++ b/R/joyn_workhorse.R @@ -6,8 +6,6 @@ #' @param x data object, "left" or "master" #' @param y data object, "right" or "using" #' @param by atomic character vector: key specifying join -#' @param match_type atomic character vector of length 1: either "1:1" (default) -#' "1:m", "m:1", or "m:m". Relies on `collapse::join()` #' @param suffixes atomic character vector: give suffixes to columns common to both #' @param sort logical: sort the result by the columns in `by` #' `x` and `y` @@ -29,28 +27,19 @@ joyn_workhorse <- function( x, y, by = intersect(names(x), names(y)), - match_type = c("1:1"), sort = FALSE, - suffixes = getOption("joyn.suffixes") # data.table suffixes + suffixes = getOption("joyn.suffixes"), # data.table suffixes + reportvar = getOption("joyn.reportvar") ) { # Argument checks ------------------------------------------------------------ - match_type <- match.arg( - match_type, - choices = c( - "1:1", - "1:m", - "m:1", - "m:m" - ) - ) - if ( - length(by) == 0 - ) { + + if (length(by) == 0) { store_joyn_msg(err = "In joyn_workhorse {.strongArg by} argument has length of 0") - store_joyn_msg(info = "Either specify by to identify columns to join on in x and y, or x and y should have common column names") + store_joyn_msg(info = "Either specify by to identify columns to join on in x + and y, or x and y should have common column names") } # Measure time @@ -75,7 +64,7 @@ joyn_workhorse <- function( keep.col.order = TRUE, sort = sort, verbose = 0, - column = NULL, + column = ".joyn1", attr = TRUE) }, # end of expr section @@ -89,41 +78,51 @@ joyn_workhorse <- function( warning = function(w) { if (grepl("[Oo]veridentified", w$message)) { - store_joyn_msg(warn = "Your data is overidentified. Below the original message from {.strong {source_pkg}}: \n{w$message}") + store_joyn_msg(warn = "Your data is overidentified. Below the original + message from {.strong {source_pkg}}: \n{w$message}") } else { - store_joyn_msg(warn = "{.strong {source_pkg}} returned the following warning: \n{w$message}") + store_joyn_msg(warn = "{.strong {source_pkg}} returned the following + warning: \n{w$message}") } - collapse::join( x = x, - y = y, - how = "full", - on = by, - multiple = TRUE, # matches row in x with m in y - validate = "m:m", # no checks performed - suffix = suffixes, # data.table suffixes - keep.col.order = TRUE, - sort = sort, - verbose = 0, - column = NULL) |> + collapse::join(x = x, + y = y, + how = "full", + on = by, + multiple = TRUE, # matches row in x with m in y + validate = "m:m", # no checks performed + suffix = suffixes, # data.table suffixes + keep.col.order = TRUE, + sort = sort, + verbose = 0, + column = ".joyn1", + attr = TRUE) |> suppressWarnings() } ) # End of trycatch + # change values of .joyn1 to numeric to make it consistent with joyn + mapping <- c('1' = 3, '2' = 1, '3' = 2) + dt_result <- dt_result |> + ftransform(.joyn1 = as.numeric(.joyn1)) |> + ftransform(.joyn1 = mapping[as.character(.joyn1)]) |> + frename(.joyn1 = reportvar, .nse = FALSE) + + # Calculate the time taken end_time <- Sys.time() time_taken <- end_time - start_time - store_joyn_msg(timing = paste("The full joyn is executed in", round(time_taken, 6))) + store_joyn_msg(timing = paste("The full joyn is executed in", + round(time_taken, 6))) # Return ---- - return( dt_result - ) } From e5be1d31f0a6bb7c3efdbbdf7284e5bdc8ad38a0 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Mon, 10 Jun 2024 15:43:35 -0400 Subject: [PATCH 05/20] fix some bugs --- R/joyn-merge.R | 108 ++++++++++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 47 deletions(-) diff --git a/R/joyn-merge.R b/R/joyn-merge.R index a6f966f1..769b7ea9 100644 --- a/R/joyn-merge.R +++ b/R/joyn-merge.R @@ -163,7 +163,7 @@ joyn <- function(x, update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = FALSE, @@ -375,7 +375,7 @@ joyn <- function(x, fselect(by, yvars_w) # Perform workhorse join - x <- joyn_workhorse( + jn <- joyn_workhorse( x = x, y = y, by = by, @@ -383,16 +383,37 @@ joyn <- function(x, sort = sort, reportvar = reportvar ) - + m <- attr(res, "join.match")$match # # report variable - # collapse::settransform(x, use_util_report = .xreport + .yreport) + # collapse::settransform(jn, use_util_report = .xreport + .yreport) # # Can this be done more efficiently with collapse? - # data.table::setnames(x, "use_util_report", reportvar) + # data.table::setnames(jn, "use_util_report", reportvar) + + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Update jn --------- + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + var_use <- NULL + if (isTRUE(update_NAs) || isTRUE(update_values)) { + var_use <- common_vars + } + + if (isTRUE(update_NAs || update_values) & length(var_use) > 0 ) { + + jn <- update_na_values(dt = jn, + var = var_use, + reportvar = reportvar, + suffixes = suffixes, + rep_NAs = update_NAs, + rep_values = update_values + ) + + } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Filter rows - `keep` --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -400,43 +421,23 @@ joyn <- function(x, ## rows to keep ----- if (keep %in% c("master", "left") ) { - x <- x |> + jn <- jn |> fsubset(get(reportvar) != 2) } else if (keep %in% c("using", "right") ) { - x <- x |> + jn <- jn |> fsubset(get(reportvar) != 1) } else if (keep == "inner") { - x <- x |> + jn <- jn |> fsubset(get(reportvar) >= 3) } else if (keep == "anti") { - x <- x |> + jn <- jn |> fsubset(get(reportvar) == 1) } - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # Update x --------- - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - var_use <- NULL - if (isTRUE(update_NAs) || isTRUE(update_values)) { - var_use <- common_vars - } - - if (isTRUE(update_NAs || update_values) & length(var_use) > 0 ) { - - x <- update_na_values(dt = x, - var = var_use, - reportvar = reportvar, - suffixes = suffixes, - rep_NAs = update_NAs, - rep_values = update_values - ) - - } - ### common vars ---------- @@ -446,15 +447,15 @@ joyn <- function(x, gsub("\\.", "\\\\.", x = _) |> paste0("$") - varx <- grep(patterns[1], names(x), value = TRUE) - vary <- grep(patterns[2], names(x), value = TRUE) + varx <- grep(patterns[1], names(jn), value = TRUE) + vary <- grep(patterns[2], names(jn), value = TRUE) # delete Y vars with suffix - collapse::get_vars(x, vary) <- NULL + collapse::get_vars(jn, vary) <- NULL # remove suffixes nsvar <- gsub(patterns[1], "", varx) - data.table::setnames(x, varx, nsvar) + data.table::setnames(jn, varx, nsvar) } @@ -463,20 +464,32 @@ joyn <- function(x, #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## cleaning temporary report variables ---- - collapse::settransform(x, - .xreport = NULL, - .yreport = NULL) + # collapse::settransform(jn, + # .xreport = NULL, + # .yreport = NULL) # Rename by variables ----- ## in output - if (any(grepl(pattern = "keyby", x = names(x)))) { - data.table::setnames(x, - old = names(x)[grepl(pattern = "keyby", - x = names(x))], + if (any(grepl("keyby",names(jn)))) { + data.table::setnames(jn, + old = names(jn)[grepl("keyby", names(jn))], new = xbynames) } ## convert to characters if chosen ------- + + if (reporttype == "factor") { + + get_vars(jn, reportvar) <- factor(jn[[reportvar]], + levels = 1:6, + labels = c("x", + "y", + "x & y", + "NA updated", + "value updated", + "not updated")) + } + if (reporttype == "character") { rvar_to_chr <- \(x) { @@ -489,13 +502,13 @@ joyn <- function(x, default = "conflict") } - settransformv(x, reportvar, rvar_to_chr) + settransformv(jn, reportvar, rvar_to_chr) } # no matching obs - if ((all(x[[reportvar]] %in% c("x", "y")) || - all(x[[reportvar]] %in% c(1, 2))) && + if ((all(jn[[reportvar]] %in% c("x", "y")) || + all(jn[[reportvar]] %in% c(1, 2))) && !keep == "anti") { store_joyn_msg(warn = " you have no matching obs. Make sure argument @@ -506,12 +519,12 @@ joyn <- function(x, ## Display results------ # freq table - d <- freq_table(x, reportvar) + d <- freq_table(jn, reportvar) rlang::env_poke(.joynenv, "freq_joyn", d) # Report var if (dropreport) { - get_vars(x, reportvar) <- NULL + get_vars(jn, reportvar) <- NULL } # store timing @@ -529,8 +542,9 @@ joyn <- function(x, joyn_msg(msg_type) } - setattr(x, "class", class_x) + setattr(jn, "class", class_x) + setattr(jn, "join.match", NULL) - x + jn } From 33025d6372bafe9b0676ef5190901f7c668ecab9 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Mon, 10 Jun 2024 16:14:01 -0400 Subject: [PATCH 06/20] fix tests --- tests/testthat/test-joyn.R | 6 +++- tests/testthat/test-joyn_workhorse.R | 43 ++++++++++------------------ 2 files changed, 20 insertions(+), 29 deletions(-) diff --git a/tests/testthat/test-joyn.R b/tests/testthat/test-joyn.R index c28d8e8f..1a799a19 100644 --- a/tests/testthat/test-joyn.R +++ b/tests/testthat/test-joyn.R @@ -680,7 +680,11 @@ test_that("error when there is not natural join", { test_that("different names in key vars are working fine", { - df <- joyn(x4, y4, by = c("id1 = id", "id2"), match_type = "m:1", y_vars_to_keep = c("y"), sort = TRUE) + df <- joyn(x4, y4, by = c("id1 = id", "id2"), + match_type = "m:1", + y_vars_to_keep = c("y"), + sort = TRUE, + reporttype = "character") dd <- data.table(id1 = c(1, 1, 2, 2, 3, 3, 5, 6), id2 = c(1, 1, 2, 1, 3, 4, 2, 3), diff --git a/tests/testthat/test-joyn_workhorse.R b/tests/testthat/test-joyn_workhorse.R index 6d2c1eaa..1973c755 100644 --- a/tests/testthat/test-joyn_workhorse.R +++ b/tests/testthat/test-joyn_workhorse.R @@ -73,6 +73,7 @@ test_that( # Checking output with match type m:m and 1:1 ------------------------------------------------- test_that("m:m and 1:1 gives the same output if data is correct", { + skip("This test is not needed anymore because the match is always m:m in collapse::join") expect_equal( joyn_workhorse( x = x2, @@ -133,8 +134,7 @@ test_that("full joyn is correct", { x <- joyn_workhorse( x = x1, y = y1, - by = "id", - match_type = "1:1" + by = "id" ) expect_equal( nrow(x), @@ -153,7 +153,9 @@ test_that("FULL- Compare with base::merge", { x = x1, y = y1, by = "id" - ) + ) |> + fselect(-.joyn) |> + setattr('join.match', NULL) br <- base::merge( x = x1, @@ -162,37 +164,21 @@ test_that("FULL- Compare with base::merge", { all = TRUE ) - setorderv( - br, - "id", - na.last = TRUE - ) - setorderv( - jn, - "id", - na.last = TRUE - ) - setattr( - br, - 'sorted', - "id" - ) - setattr( - jn, - 'sorted', - "id" - ) # ZP: check this + setorderv(br,"id", na.last = TRUE) + setorderv(jn, "id", na.last = TRUE) + setattr(br, 'sorted', "id") + setattr(jn, 'sorted', "id") # ZP: check this - expect_equal( - jn, - br - ) + expect_equal(jn, br) jn <- joyn_workhorse( x = x2, y = y2, by = "id" - ) + ) |> + fselect(-.joyn) |> + setattr('join.match', NULL) + br <- base::merge( x = x2, @@ -213,6 +199,7 @@ test_that("FULL- Compare with base::merge", { # Checking match types work ------------------------------------------------------------------------ test_that("match types work", { + skip("joyn_workhorse does not check match type") # note: `joyn_workhorse` does not # check whether match_type From 6f1403d7459bfd47a1de261486f15a0ba8d4b156 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Mon, 10 Jun 2024 18:34:08 -0400 Subject: [PATCH 07/20] pass all test with new report var from collapse::join --- R/dplyr-joins.R | 10 +++++----- R/info_display.R | 9 +++++++-- R/joyn-merge.R | 2 +- R/joyn_workhorse.R | 3 +-- man/joyn.Rd | 2 +- man/joyn_workhorse.Rd | 7 ++----- man/left_join.Rd | 2 +- tests/testthat/test-dplyr-joins.R | 2 +- 8 files changed, 19 insertions(+), 18 deletions(-) diff --git a/R/dplyr-joins.R b/R/dplyr-joins.R index 22257d69..5c06b869 100644 --- a/R/dplyr-joins.R +++ b/R/dplyr-joins.R @@ -48,7 +48,7 @@ left_join <- function( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, @@ -207,7 +207,7 @@ right_join <- function( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, @@ -369,7 +369,7 @@ full_join <- function( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, @@ -527,7 +527,7 @@ inner_join <- function( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, @@ -687,7 +687,7 @@ anti_join <- function( relationship = "many-to-many", y_vars_to_keep = FALSE, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, diff --git a/R/info_display.R b/R/info_display.R index 8b609050..8a33ac7f 100644 --- a/R/info_display.R +++ b/R/info_display.R @@ -148,12 +148,17 @@ store_joyn_msg <- function(err = NULL, timing = NULL, info = NULL) { - # Check that only one among err, warn, timing and info is not null, otherwise stop + # Check that only one among err, warn, timing and info is not null, + # otherwise stop + # + # Formals + frm <- formals() |> + names() cn <- c(err, warn, timing, info) if (length(cn) != 1) { - cli::cli_abort(c("only one of err, warn, timing, info can be not null", + cli::cli_abort(c("only one of {.or {.arg {frm}}} can be not null", "i" = "check the arguments")) } diff --git a/R/joyn-merge.R b/R/joyn-merge.R index 769b7ea9..78c373cc 100644 --- a/R/joyn-merge.R +++ b/R/joyn-merge.R @@ -383,7 +383,7 @@ joyn <- function(x, sort = sort, reportvar = reportvar ) - m <- attr(res, "join.match")$match + m <- attr(jn, "join.match")$match diff --git a/R/joyn_workhorse.R b/R/joyn_workhorse.R index 54951a88..9d084c30 100644 --- a/R/joyn_workhorse.R +++ b/R/joyn_workhorse.R @@ -71,8 +71,7 @@ joyn_workhorse <- function( error = function(e) { - joyn_msg("err", c("{.pkg {source_pkg}} returned the following:", - x = e$message)) + store_joyn_msg(err = "{.pkg {source_pkg}} returned the following: {e$message}") }, # end of error section warning = function(w) { diff --git a/man/joyn.Rd b/man/joyn.Rd index e370129e..0f4bfb20 100644 --- a/man/joyn.Rd +++ b/man/joyn.Rd @@ -14,7 +14,7 @@ joyn( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = FALSE, diff --git a/man/joyn_workhorse.Rd b/man/joyn_workhorse.Rd index fed075be..807bf5d5 100644 --- a/man/joyn_workhorse.Rd +++ b/man/joyn_workhorse.Rd @@ -8,9 +8,9 @@ joyn_workhorse( x, y, by = intersect(names(x), names(y)), - match_type = c("1:1"), sort = FALSE, - suffixes = getOption("joyn.suffixes") + suffixes = getOption("joyn.suffixes"), + reportvar = getOption("joyn.reportvar") ) } \arguments{ @@ -20,9 +20,6 @@ joyn_workhorse( \item{by}{atomic character vector: key specifying join} -\item{match_type}{atomic character vector of length 1: either "1:1" (default) -"1:m", "m:1", or "m:m". Relies on \code{collapse::join()}} - \item{sort}{logical: sort the result by the columns in \code{by} \code{x} and \code{y}} diff --git a/man/left_join.Rd b/man/left_join.Rd index 871496c9..ac29a894 100644 --- a/man/left_join.Rd +++ b/man/left_join.Rd @@ -19,7 +19,7 @@ left_join( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, diff --git a/tests/testthat/test-dplyr-joins.R b/tests/testthat/test-dplyr-joins.R index 9d478d9b..1bc5dd15 100644 --- a/tests/testthat/test-dplyr-joins.R +++ b/tests/testthat/test-dplyr-joins.R @@ -1800,7 +1800,7 @@ test_that("ANTI JOIN - Conducts ANTI join", { "sorted") <- "id1" expect_equal( jn |> fselect(-get(reportvar)) |> dim(), - c(0, 6) + c(0, 4) ) }) From f7dd67e6e1914632f06a35456284eb4b6ef829dc Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Mon, 10 Jun 2024 20:20:48 -0400 Subject: [PATCH 08/20] increase speed of freq table using collapse --- R/freq_table.R | 44 +++++++++++++------------------------------- R/info_display.R | 2 ++ R/joyn-merge.R | 41 +++++++++++++++-------------------------- R/utils.R | 1 - 4 files changed, 30 insertions(+), 58 deletions(-) diff --git a/R/freq_table.R b/R/freq_table.R index 66e709dc..db5893f1 100644 --- a/R/freq_table.R +++ b/R/freq_table.R @@ -11,7 +11,6 @@ if (getRversion() >= '2.15.1') #' @param x data frame #' @param byvar character: name of variable to tabulate. Use Standard evaluation. #' @param digits numeric: number of decimal places to display. Default is 1. -#' @param na.rm logical: if TRUE remove NAs from calculations. Default is TRUE #' #' @return data.table with frequencies. #' @export @@ -25,37 +24,20 @@ if (getRversion() >= '2.15.1') #' freq_table(x4, "id1") freq_table <- function(x, byvar, - digits = 1, - na.rm = TRUE) { + digits = 1) { - if (!(is.data.table(x))) { - x <- as.data.table(x) - } else { - x <- data.table::copy(x) - } + fq <- qtab(x[[byvar]]) + ft <- data.frame(joyn = names(fq), + n = as.numeric(fq)) + ft <- ft |> + ftransform(percent = paste0(round(n / N * 100, digits), "%")) + # add row with totals + ft <- rowbind(ft, data.table(joyn = "total", + n = fsum(ft$n), + percent = "100%")) |> + # filter zeros + fsubset(n > 0) - # Frequencies and format - d <- x[, .(n = .N), by = byvar - ][, percent := - { - total = sum(n, na.rm = na.rm) - d <- round((n/ total)*100, digits = digits) - d <- as.character(d) - d <- paste0(d, "%") - } - ] - - # Total row just for completeness - setorderv(d, byvar) - totd <- data.table::data.table( - tempname = "total", - n = d[, sum(n, na.rm = na.rm)], - percent = "100%" - ) - - setnames(totd, "tempname", byvar) - d <- data.table::rbindlist(list(d, totd), - use.names = TRUE) - return(d) + setrename(ft, joyn = byvar, .nse = FALSE) } diff --git a/R/info_display.R b/R/info_display.R index 8a33ac7f..2ed4db44 100644 --- a/R/info_display.R +++ b/R/info_display.R @@ -423,7 +423,9 @@ joyn_report <- function(verbose = getOption("joyn.verbose")) { freq <- rlang::env_get(.joynenv, "freq_joyn") if (verbose) { + cli::cli_h2("JOYn Report") print(freq) + cli::cli_rule(right = "End of {.field JOYn} report") } return(invisible(freq)) } diff --git a/R/joyn-merge.R b/R/joyn-merge.R index 78c373cc..3b692758 100644 --- a/R/joyn-merge.R +++ b/R/joyn-merge.R @@ -383,15 +383,6 @@ joyn <- function(x, sort = sort, reportvar = reportvar ) - m <- attr(jn, "join.match")$match - - - - # # report variable - # collapse::settransform(jn, use_util_report = .xreport + .yreport) - # # Can this be done more efficiently with collapse? - # data.table::setnames(jn, "use_util_report", reportvar) - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -476,6 +467,17 @@ joyn <- function(x, new = xbynames) } + + # Report variable ---------- + # no matching obs + if (all(jn[[reportvar]] %in% c(1, 2)) && !keep == "anti") { + + store_joyn_msg(warn = " you have no matching obs. Make sure argument + `by` is correct. Right now, `joyn` is joining by + {.strongVar {by}}") + + } + ## convert to characters if chosen ------- if (reporttype == "factor") { @@ -506,19 +508,10 @@ joyn <- function(x, } - # no matching obs - if ((all(jn[[reportvar]] %in% c("x", "y")) || - all(jn[[reportvar]] %in% c(1, 2))) && - !keep == "anti") { - - store_joyn_msg(warn = " you have no matching obs. Make sure argument - `by` is correct. Right now, `joyn` is joining by - {.strongVar {by}}") - - } ## Display results------ # freq table + d <- freq_table(jn, reportvar) rlang::env_poke(.joynenv, "freq_joyn", d) @@ -535,15 +528,11 @@ joyn <- function(x, is executed in ", round(time_taken_joyn, 6))) # return messages - if (verbose == TRUE) { - cli::cli_h2("JOYn Report") - joyn_report() - cli::cli_rule(right = "End of {.field JOYn} report") - joyn_msg(msg_type) - } + joyn_report(verbose = verbose) + if (verbose == TRUE) joyn_msg(msg_type) setattr(jn, "class", class_x) - setattr(jn, "join.match", NULL) + # setattr(jn, "join.match", NULL) jn diff --git a/R/utils.R b/R/utils.R index be4a5b24..d97d2046 100644 --- a/R/utils.R +++ b/R/utils.R @@ -342,4 +342,3 @@ correct_names <- function(by, x, y, order = TRUE) { ybynames = ybynames) out } - From 020905f6524129ceb1404f1b24173ce009ebf555 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Mon, 10 Jun 2024 20:23:19 -0400 Subject: [PATCH 09/20] remove collapse::join attributes --- R/joyn-merge.R | 1 - R/joyn_workhorse.R | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/R/joyn-merge.R b/R/joyn-merge.R index 3b692758..8d0ea0d8 100644 --- a/R/joyn-merge.R +++ b/R/joyn-merge.R @@ -532,7 +532,6 @@ joyn <- function(x, if (verbose == TRUE) joyn_msg(msg_type) setattr(jn, "class", class_x) - # setattr(jn, "join.match", NULL) jn diff --git a/R/joyn_workhorse.R b/R/joyn_workhorse.R index 9d084c30..d70e896e 100644 --- a/R/joyn_workhorse.R +++ b/R/joyn_workhorse.R @@ -64,8 +64,7 @@ joyn_workhorse <- function( keep.col.order = TRUE, sort = sort, verbose = 0, - column = ".joyn1", - attr = TRUE) + column = ".joyn1") }, # end of expr section error = function(e) { From b26edd9649060649c76e731003287df4ae5e8d23 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Mon, 10 Jun 2024 20:39:50 -0400 Subject: [PATCH 10/20] pass all tests --- DESCRIPTION | 2 +- R/freq_table.R | 9 ++++++++- tests/testthat/test-freq_table.R | 26 +++++++++++++++----------- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2cf2a406..d8fa5b6b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,7 +37,7 @@ Imports: data.table, cli, utils, - collapse (>= 2.0.13), + collapse (>= 2.0.15), lifecycle Depends: R (>= 2.10) diff --git a/R/freq_table.R b/R/freq_table.R index db5893f1..40dcadc2 100644 --- a/R/freq_table.R +++ b/R/freq_table.R @@ -26,15 +26,22 @@ freq_table <- function(x, byvar, digits = 1) { + x_name <- as.character(substitute(x)) + if (!is.data.frame(x)) { + cli::cli_abort("Argument {.arg x} ({.field {x_name}}) must be a data frame") + } + fq <- qtab(x[[byvar]]) ft <- data.frame(joyn = names(fq), n = as.numeric(fq)) + + N <- fsum(ft$n) ft <- ft |> ftransform(percent = paste0(round(n / N * 100, digits), "%")) # add row with totals ft <- rowbind(ft, data.table(joyn = "total", - n = fsum(ft$n), + n = N, percent = "100%")) |> # filter zeros fsubset(n > 0) diff --git a/tests/testthat/test-freq_table.R b/tests/testthat/test-freq_table.R index ca016659..f579699b 100644 --- a/tests/testthat/test-freq_table.R +++ b/tests/testthat/test-freq_table.R @@ -43,22 +43,23 @@ y4 = data.table(id = c(1, 2, 5, 6, 3), # internal = TRUE) test_that("correct inputs", { - x <- "not a data.table/data.frame" + dd <- "not a data.table/data.frame" - freq_table(x) |> + freq_table(dd) |> expect_error() }) test_that("correct frequencies", { - b <- base::table(y4$id2) - b <- as.numeric(b) + b <- base::table(y4$id2) |> + as.numeric() j <- freq_table(y4, "id2") - j <- j[ id2 != "total" - ][, n] - - expect_equal(b, j) + j |> + fsubset(id2 != "total") |> + fselect(n) |> + reg_elem() |> + expect_equal(b) }) @@ -67,10 +68,13 @@ test_that("correct totals", { tr <- nrow(y4) j <- freq_table(y4, "id2") - j <- j[ id2 == "total" - ][, n] + j <- freq_table(y4, "id2") + j |> + fsubset(id2 == "total") |> + fselect(n) |> + reg_elem() |> + expect_equal(tr) - expect_equal(tr, j) }) From cd205e130ed05477daf95c237aa2d254b81cec3b Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Mon, 10 Jun 2024 20:43:12 -0400 Subject: [PATCH 11/20] update news --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index e6c812e6..e52b886a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -15,6 +15,8 @@ * Internal: improve storing of joyn messages. +* Improve creation of reporting variable. Now, it is created in [collapse::join] rather than in `joyn` function. In addition, the reporting variable is created as factor to improve performance. Thanks to @SebKrantz for the suggestion (#58) + ## breaking changes * Now, by default, `joyn` will not sort the data. This is to avoid unnecessary @@ -22,6 +24,8 @@ computational time that most of the time is not needed. If the user wants to sort the data, they can use the `sort` argument, which triggers the sorting mechanism of `collapse` package. +* report variable (named ".join" by default) is now a factor instead of character. Yet, users can still use character if they want with the `reporttype = "character"`. + # joyn 0.2.0 * `joyn` has gained two new authors: Zander Prinsloo and Rossana Tatulli. From e35be63d591f1a3d74d1a065c7c9ed2329403e20 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Mon, 10 Jun 2024 20:43:59 -0400 Subject: [PATCH 12/20] document --- NEWS.md | 2 +- man/anti_join.Rd | 2 +- man/freq_table.Rd | 4 +--- man/full_join.Rd | 2 +- man/inner_join.Rd | 2 +- man/right_join.Rd | 2 +- 6 files changed, 6 insertions(+), 8 deletions(-) diff --git a/NEWS.md b/NEWS.md index e52b886a..ac5a73c7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,7 +7,7 @@ * Add information about duplicated obs in `by` variable when match type is `1` rathern than `m`. -* improve ineffciencies in deep copies with `m:m` joins +* improve inefficiencies in deep copies with `m:m` joins * Replace `m:m` joins from `data.table::merge.data.table` to `collapse::join`. Thanks to @SebKrantz for the suggestion (#58). diff --git a/man/anti_join.Rd b/man/anti_join.Rd index f5cc1c66..e0a22001 100644 --- a/man/anti_join.Rd +++ b/man/anti_join.Rd @@ -16,7 +16,7 @@ anti_join( relationship = "many-to-many", y_vars_to_keep = FALSE, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, diff --git a/man/freq_table.Rd b/man/freq_table.Rd index 2eba5c63..40b9ca3d 100644 --- a/man/freq_table.Rd +++ b/man/freq_table.Rd @@ -4,7 +4,7 @@ \alias{freq_table} \title{Tabulate simple frequencies} \usage{ -freq_table(x, byvar, digits = 1, na.rm = TRUE) +freq_table(x, byvar, digits = 1) } \arguments{ \item{x}{data frame} @@ -12,8 +12,6 @@ freq_table(x, byvar, digits = 1, na.rm = TRUE) \item{byvar}{character: name of variable to tabulate. Use Standard evaluation.} \item{digits}{numeric: number of decimal places to display. Default is 1.} - -\item{na.rm}{logical: if TRUE remove NAs from calculations. Default is TRUE} } \value{ data.table with frequencies. diff --git a/man/full_join.Rd b/man/full_join.Rd index 4639ae23..d5be2aea 100644 --- a/man/full_join.Rd +++ b/man/full_join.Rd @@ -19,7 +19,7 @@ full_join( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, diff --git a/man/inner_join.Rd b/man/inner_join.Rd index f70b8196..162d6b94 100644 --- a/man/inner_join.Rd +++ b/man/inner_join.Rd @@ -19,7 +19,7 @@ inner_join( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, diff --git a/man/right_join.Rd b/man/right_join.Rd index c885e721..7934ae77 100644 --- a/man/right_join.Rd +++ b/man/right_join.Rd @@ -19,7 +19,7 @@ right_join( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, From a4e949e59465be20eb1221c92f70293fa42a4a5c Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Mon, 10 Jun 2024 20:47:06 -0400 Subject: [PATCH 13/20] Increment version number to 0.2.0.9006 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d8fa5b6b..c3b527f3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: joyn Type: Package Title: Tool for Diagnosis of Tables Joins and Complementary Join Features -Version: 0.2.0.9005 +Version: 0.2.0.9006 Authors@R: c(person(given = "R.Andres", family = "Castaneda", email = "acastanedaa@worldbank.org", From 5446ae97caa4732b61b6d902242d77c0fbe9e29f Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Tue, 11 Jun 2024 09:20:34 -0400 Subject: [PATCH 14/20] add collapse as dev package --- DESCRIPTION | 2 ++ 1 file changed, 2 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index c3b527f3..f64132fd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -44,3 +44,5 @@ Depends: RoxygenNote: 7.3.1 Roxygen: list(markdown = TRUE) VignetteBuilder: knitr +Remotes: + SebKrantz/collapse From 02cc0b70dd011cfd41f868d8e307ddbbda918c8f Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Tue, 11 Jun 2024 11:36:45 -0400 Subject: [PATCH 15/20] fix issue with NA in freq_table --- .gitignore | 2 ++ R/freq_table.R | 5 +++-- vignettes/aux-functions.Rmd | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 3ef59ac1..061b0855 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ doc Meta docs +/doc/ +/Meta/ diff --git a/R/freq_table.R b/R/freq_table.R index 40dcadc2..2f03d7d4 100644 --- a/R/freq_table.R +++ b/R/freq_table.R @@ -24,14 +24,15 @@ if (getRversion() >= '2.15.1') #' freq_table(x4, "id1") freq_table <- function(x, byvar, - digits = 1) { + digits = 1, + na.rm = FALSE) { x_name <- as.character(substitute(x)) if (!is.data.frame(x)) { cli::cli_abort("Argument {.arg x} ({.field {x_name}}) must be a data frame") } - fq <- qtab(x[[byvar]]) + fq <- qtab(x[[byvar]], na.exclude = na.rm) ft <- data.frame(joyn = names(fq), n = as.numeric(fq)) diff --git a/vignettes/aux-functions.Rmd b/vignettes/aux-functions.Rmd index dcf425d1..2eab35de 100644 --- a/vignettes/aux-functions.Rmd +++ b/vignettes/aux-functions.Rmd @@ -103,13 +103,13 @@ Furthermore, `joyn` provides a function that generates simple frequency tables, # Tabulating frequencies of var `id` freq_table(x = x1, - byvar = "id") + byvar = "id")[] # Removing NAs from the calculation freq_table(x = x1, byvar = "id", - na.rm = TRUE) + na.rm = TRUE)[] ``` From 28d17f6ea4dfdd2f694ca7fd2e5a73ecf785f48e Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Tue, 11 Jun 2024 14:10:42 -0400 Subject: [PATCH 16/20] fix examples in store_joyn_msg doc --- R/info_display.R | 26 +++++++++++++++++++------- man/freq_table.Rd | 2 +- man/store_joyn_msg.Rd | 26 +++++++++++++++++++------- 3 files changed, 39 insertions(+), 15 deletions(-) diff --git a/R/info_display.R b/R/info_display.R index 2ed4db44..428d3f64 100644 --- a/R/info_display.R +++ b/R/info_display.R @@ -127,21 +127,33 @@ store_msg <- function(type, ...) { #' @param info A character string representing an info message to be stored. Default value is NULL #' #' @section Hot to pass the message string: -#' The function allows for the customization of the message string using {cli} classes to emphasize specific components of the message +#' The function allows for the customization of the message string using \{cli\} classes to emphasize specific components of the message #' Here's how to format the message string: -#' *For variables: .strongVar --example: "{.strongVar {reportvar}}" -#' *For function arguments: .strongArg --example: "{.strongArg {y_vars_to_keep}}" -#' *For dt/df: .strongTable --example: "{.strongTable x}" -#' *For text/anything else: .strong --example: "reportvar is {.strong NOT} returned" +#' *For variables: .strongVar +#' *For function arguments: .strongArg +#' *For dt/df: .strongTable +#' *For text/anything else: .strong #' *NOTE: By default, the number of seconds specified in timing messages is #' automatically emphasized using a custom formatting approach. #' You do not need to apply {cli} classes nor to specify that the number is in seconds. -#' --example usage: store_joyn_msg(timing = -#' paste("The full joyn is executed in", round(time_taken, 6))) +#' #' #' #' @return invisible TRUE #' +#' @examples +#' \donotrun{ +#' # Timing msg +#' store_joyn_msg(timing = paste(" The entire joyn function, including checks, +#' is executed in ", round(time_taken_joyn, 6))) +#' +#' # Error msg +#' store_joyn_msg(err = " Input table {.strongTable x} has no columns.") +#' +#' # Info msg +#' store_joyn_msg(info = "Joyn's report available in variable {.strongVar {reportvar}}") +#' } +#' #' @keywords internal store_joyn_msg <- function(err = NULL, warn = NULL, diff --git a/man/freq_table.Rd b/man/freq_table.Rd index 40b9ca3d..da054c8a 100644 --- a/man/freq_table.Rd +++ b/man/freq_table.Rd @@ -4,7 +4,7 @@ \alias{freq_table} \title{Tabulate simple frequencies} \usage{ -freq_table(x, byvar, digits = 1) +freq_table(x, byvar, digits = 1, na.rm = FALSE) } \arguments{ \item{x}{data frame} diff --git a/man/store_joyn_msg.Rd b/man/store_joyn_msg.Rd index 6f002a88..71e42ada 100644 --- a/man/store_joyn_msg.Rd +++ b/man/store_joyn_msg.Rd @@ -27,17 +27,29 @@ This function serves as a wrapper for the store_msg function, which is used to s } \section{Hot to pass the message string}{ -The function allows for the customization of the message string using {cli} classes to emphasize specific components of the message +The function allows for the customization of the message string using \{cli\} classes to emphasize specific components of the message Here's how to format the message string: -*For variables: .strongVar --example: "{.strongVar {reportvar}}" -*For function arguments: .strongArg --example: "{.strongArg {y_vars_to_keep}}" -*For dt/df: .strongTable --example: "{.strongTable x}" -*For text/anything else: .strong --example: "reportvar is {.strong NOT} returned" +*For variables: .strongVar +*For function arguments: .strongArg +*For dt/df: .strongTable +*For text/anything else: .strong *NOTE: By default, the number of seconds specified in timing messages is automatically emphasized using a custom formatting approach. You do not need to apply {cli} classes nor to specify that the number is in seconds. ---example usage: store_joyn_msg(timing = -paste("The full joyn is executed in", round(time_taken, 6))) } +\examples{ +\donotrun{ +# Timing msg +store_joyn_msg(timing = paste(" The entire joyn function, including checks, + is executed in ", round(time_taken_joyn, 6))) + +# Error msg +store_joyn_msg(err = " Input table {.strongTable x} has no columns.") + +# Info msg +store_joyn_msg(info = "Joyn's report available in variable {.strongVar {reportvar}}") +} + +} \keyword{internal} From 5f1ed7e88374e3443c0d43ecc5f00f08a074c5c6 Mon Sep 17 00:00:00 2001 From: RossanaTat Date: Tue, 11 Jun 2024 14:23:01 -0400 Subject: [PATCH 17/20] fix again --- R/info_display.R | 15 +++++++-------- man/store_joyn_msg.Rd | 15 +++++++-------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/R/info_display.R b/R/info_display.R index 428d3f64..2072ee00 100644 --- a/R/info_display.R +++ b/R/info_display.R @@ -127,7 +127,7 @@ store_msg <- function(type, ...) { #' @param info A character string representing an info message to be stored. Default value is NULL #' #' @section Hot to pass the message string: -#' The function allows for the customization of the message string using \{cli\} classes to emphasize specific components of the message +#' The function allows for the customization of the message string using cli classes to emphasize specific components of the message #' Here's how to format the message string: #' *For variables: .strongVar #' *For function arguments: .strongArg @@ -135,24 +135,23 @@ store_msg <- function(type, ...) { #' *For text/anything else: .strong #' *NOTE: By default, the number of seconds specified in timing messages is #' automatically emphasized using a custom formatting approach. -#' You do not need to apply {cli} classes nor to specify that the number is in seconds. +#' You do not need to apply cli classes nor to specify that the number is in seconds. #' #' #' #' @return invisible TRUE #' #' @examples -#' \donotrun{ #' # Timing msg -#' store_joyn_msg(timing = paste(" The entire joyn function, including checks, -#' is executed in ", round(time_taken_joyn, 6))) +#' joyn:::store_joyn_msg(timing = paste(" The entire joyn function, including checks, +#' is executed in ", round(1.8423467, 6))) #' #' # Error msg -#' store_joyn_msg(err = " Input table {.strongTable x} has no columns.") +#' joyn:::store_joyn_msg(err = " Input table {.strongTable x} has no columns.") #' #' # Info msg -#' store_joyn_msg(info = "Joyn's report available in variable {.strongVar {reportvar}}") -#' } +#' joyn:::store_joyn_msg(info = "Joyn's report available in variable {.strongVar .joyn}") +#' #' #' @keywords internal store_joyn_msg <- function(err = NULL, diff --git a/man/store_joyn_msg.Rd b/man/store_joyn_msg.Rd index 71e42ada..d0c7881b 100644 --- a/man/store_joyn_msg.Rd +++ b/man/store_joyn_msg.Rd @@ -27,7 +27,7 @@ This function serves as a wrapper for the store_msg function, which is used to s } \section{Hot to pass the message string}{ -The function allows for the customization of the message string using \{cli\} classes to emphasize specific components of the message +The function allows for the customization of the message string using cli classes to emphasize specific components of the message Here's how to format the message string: *For variables: .strongVar *For function arguments: .strongArg @@ -35,21 +35,20 @@ Here's how to format the message string: *For text/anything else: .strong *NOTE: By default, the number of seconds specified in timing messages is automatically emphasized using a custom formatting approach. -You do not need to apply {cli} classes nor to specify that the number is in seconds. +You do not need to apply cli classes nor to specify that the number is in seconds. } \examples{ -\donotrun{ # Timing msg -store_joyn_msg(timing = paste(" The entire joyn function, including checks, - is executed in ", round(time_taken_joyn, 6))) +joyn:::store_joyn_msg(timing = paste(" The entire joyn function, including checks, + is executed in ", round(1.8423467, 6))) # Error msg -store_joyn_msg(err = " Input table {.strongTable x} has no columns.") +joyn:::store_joyn_msg(err = " Input table {.strongTable x} has no columns.") # Info msg -store_joyn_msg(info = "Joyn's report available in variable {.strongVar {reportvar}}") -} +joyn:::store_joyn_msg(info = "Joyn's report available in variable {.strongVar .joyn}") + } \keyword{internal} From 3e83fba815bf6fa79702e1dc2542bba542e8b574 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Tue, 11 Jun 2024 15:36:52 -0400 Subject: [PATCH 18/20] include reporting from attributes --- R/freq_table.R | 31 +++++++++++++++++++++++++++++++ R/joyn-merge.R | 9 ++++++++- R/joyn-package.R | 3 ++- R/joyn_workhorse.R | 3 ++- 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/R/freq_table.R b/R/freq_table.R index 2f03d7d4..0cd9f8fd 100644 --- a/R/freq_table.R +++ b/R/freq_table.R @@ -49,3 +49,34 @@ freq_table <- function(x, setrename(ft, joyn = byvar, .nse = FALSE) } + + + +#' Report frequencies from attributes in report var +#' +#' @param x dataframe from [joyn_workhorse] +#' @param y dataframe from original merge ("right" or "using") +#' +#' @return dataframe with frequencies of report var +#' @keywords internal +report_from_attr <- function(x,y, reportvar) { + # from suggestion by @SebKrantz in #58 + # https://github.com/randrescastaneda/joyn/issues/58 + m <- attr(x, "join.match")$match + + N <- fnrow(x) + nm_x <- attr(m, "N.nomatch") # Number of non-matched x values + nm_y <- fnrow(y) - attr(m, "N.distinct") # Number of non-matched y values. If multiple = FALSE attr(m, "N.distinct") = number of unique matches. + + + counts <- c(nm_x, nm_y, N-nm_x-nm_y, N) + report <- data.frame( + .joyn1 = c("x", "y", "x & y", "total"), + n = counts, + percent = paste0(round(counts / N * 100, 1), "%") + ) |> + fsubset(n > 0) + + setrename(report, .joyn1 = reportvar, .nse = FALSE) + +} diff --git a/R/joyn-merge.R b/R/joyn-merge.R index 8d0ea0d8..4eb18490 100644 --- a/R/joyn-merge.R +++ b/R/joyn-merge.R @@ -511,8 +511,15 @@ joyn <- function(x, ## Display results------ # freq table + # + if (update_NAs || update_values) { + d <- freq_table(jn, reportvar) + } else { + d <- report_from_attr(jn, y, reportvar) + } + # remove collapse::join attributes + attr(jn, "join.match") <- NULL - d <- freq_table(jn, reportvar) rlang::env_poke(.joynenv, "freq_joyn", d) # Report var diff --git a/R/joyn-package.R b/R/joyn-package.R index 98d66da3..ace5a944 100644 --- a/R/joyn-package.R +++ b/R/joyn-package.R @@ -26,7 +26,8 @@ if (getRversion() >= "2.15.1") { ".yreport", 'use_util_reportvar', 'varx_na', - 'type' + 'type', + "joyn1" ), package = utils::packageName() ) diff --git a/R/joyn_workhorse.R b/R/joyn_workhorse.R index d70e896e..9d084c30 100644 --- a/R/joyn_workhorse.R +++ b/R/joyn_workhorse.R @@ -64,7 +64,8 @@ joyn_workhorse <- function( keep.col.order = TRUE, sort = sort, verbose = 0, - column = ".joyn1") + column = ".joyn1", + attr = TRUE) }, # end of expr section error = function(e) { From 29b623ff346eca3b44cab0c272a5cde440d0d753 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Tue, 11 Jun 2024 15:38:21 -0400 Subject: [PATCH 19/20] document --- R/freq_table.R | 1 + man/freq_table.Rd | 2 ++ man/report_from_attr.Rd | 20 ++++++++++++++++++++ 3 files changed, 23 insertions(+) create mode 100644 man/report_from_attr.Rd diff --git a/R/freq_table.R b/R/freq_table.R index 0cd9f8fd..5bf93150 100644 --- a/R/freq_table.R +++ b/R/freq_table.R @@ -11,6 +11,7 @@ if (getRversion() >= '2.15.1') #' @param x data frame #' @param byvar character: name of variable to tabulate. Use Standard evaluation. #' @param digits numeric: number of decimal places to display. Default is 1. +#' @param na.rm logical: report NA values in frequencies. Default is FALSE. #' #' @return data.table with frequencies. #' @export diff --git a/man/freq_table.Rd b/man/freq_table.Rd index da054c8a..6be47f0c 100644 --- a/man/freq_table.Rd +++ b/man/freq_table.Rd @@ -12,6 +12,8 @@ freq_table(x, byvar, digits = 1, na.rm = FALSE) \item{byvar}{character: name of variable to tabulate. Use Standard evaluation.} \item{digits}{numeric: number of decimal places to display. Default is 1.} + +\item{na.rm}{logical: report NA values in frequencies. Default is FALSE.} } \value{ data.table with frequencies. diff --git a/man/report_from_attr.Rd b/man/report_from_attr.Rd new file mode 100644 index 00000000..82698c0b --- /dev/null +++ b/man/report_from_attr.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/freq_table.R +\name{report_from_attr} +\alias{report_from_attr} +\title{Report frequencies from attributes in report var} +\usage{ +report_from_attr(x, y, reportvar) +} +\arguments{ +\item{x}{dataframe from \link{joyn_workhorse}} + +\item{y}{dataframe from original merge ("right" or "using")} +} +\value{ +dataframe with frequencies of report var +} +\description{ +Report frequencies from attributes in report var +} +\keyword{internal} From e4dcd5356f149101a1ecd5c3f4ff0a450fba1979 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda Aguilar" Date: Tue, 11 Jun 2024 15:38:35 -0400 Subject: [PATCH 20/20] Increment version number to 0.2.0.9007 --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f64132fd..f0dc7f43 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: joyn Type: Package Title: Tool for Diagnosis of Tables Joins and Complementary Join Features -Version: 0.2.0.9006 +Version: 0.2.0.9007 Authors@R: c(person(given = "R.Andres", family = "Castaneda", email = "acastanedaa@worldbank.org", @@ -44,5 +44,5 @@ Depends: RoxygenNote: 7.3.1 Roxygen: list(markdown = TRUE) VignetteBuilder: knitr -Remotes: +Remotes: SebKrantz/collapse