diff --git a/.gitignore b/.gitignore index 3ef59ac1..061b0855 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ doc Meta docs +/doc/ +/Meta/ diff --git a/DESCRIPTION b/DESCRIPTION index 0dc6f114..f0dc7f43 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: joyn Type: Package Title: Tool for Diagnosis of Tables Joins and Complementary Join Features -Version: 0.2.0.9004 +Version: 0.2.0.9007 Authors@R: c(person(given = "R.Andres", family = "Castaneda", email = "acastanedaa@worldbank.org", @@ -37,10 +37,12 @@ Imports: data.table, cli, utils, - collapse (>= 2.0.13), + collapse (>= 2.0.15), lifecycle Depends: R (>= 2.10) RoxygenNote: 7.3.1 Roxygen: list(markdown = TRUE) VignetteBuilder: knitr +Remotes: + SebKrantz/collapse diff --git a/NEWS.md b/NEWS.md index 21533b70..ac5a73c7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,12 +7,16 @@ * Add information about duplicated obs in `by` variable when match type is `1` rathern than `m`. -* improve ineffciencies in deep copies with `m:m` joins +* improve inefficiencies in deep copies with `m:m` joins * Replace `m:m` joins from `data.table::merge.data.table` to `collapse::join`. Thanks to @SebKrantz for the suggestion (#58). * Add information about duplicated obs in `by` variable when match type is `1` rather than `m`. +* Internal: improve storing of joyn messages. + +* Improve creation of reporting variable. Now, it is created in [collapse::join] rather than in `joyn` function. In addition, the reporting variable is created as factor to improve performance. Thanks to @SebKrantz for the suggestion (#58) + ## breaking changes * Now, by default, `joyn` will not sort the data. This is to avoid unnecessary @@ -20,6 +24,8 @@ computational time that most of the time is not needed. If the user wants to sort the data, they can use the `sort` argument, which triggers the sorting mechanism of `collapse` package. +* report variable (named ".join" by default) is now a factor instead of character. Yet, users can still use character if they want with the `reporttype = "character"`. + # joyn 0.2.0 * `joyn` has gained two new authors: Zander Prinsloo and Rossana Tatulli. diff --git a/R/dplyr-joins.R b/R/dplyr-joins.R index 22257d69..5c06b869 100644 --- a/R/dplyr-joins.R +++ b/R/dplyr-joins.R @@ -48,7 +48,7 @@ left_join <- function( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, @@ -207,7 +207,7 @@ right_join <- function( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, @@ -369,7 +369,7 @@ full_join <- function( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, @@ -527,7 +527,7 @@ inner_join <- function( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, @@ -687,7 +687,7 @@ anti_join <- function( relationship = "many-to-many", y_vars_to_keep = FALSE, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, diff --git a/R/freq_table.R b/R/freq_table.R index 66e709dc..5bf93150 100644 --- a/R/freq_table.R +++ b/R/freq_table.R @@ -11,7 +11,7 @@ if (getRversion() >= '2.15.1') #' @param x data frame #' @param byvar character: name of variable to tabulate. Use Standard evaluation. #' @param digits numeric: number of decimal places to display. Default is 1. -#' @param na.rm logical: if TRUE remove NAs from calculations. Default is TRUE +#' @param na.rm logical: report NA values in frequencies. Default is FALSE. #' #' @return data.table with frequencies. #' @export @@ -26,36 +26,58 @@ if (getRversion() >= '2.15.1') freq_table <- function(x, byvar, digits = 1, - na.rm = TRUE) { + na.rm = FALSE) { - if (!(is.data.table(x))) { - x <- as.data.table(x) - } else { - x <- data.table::copy(x) + x_name <- as.character(substitute(x)) + if (!is.data.frame(x)) { + cli::cli_abort("Argument {.arg x} ({.field {x_name}}) must be a data frame") } + fq <- qtab(x[[byvar]], na.exclude = na.rm) + ft <- data.frame(joyn = names(fq), + n = as.numeric(fq)) - # Frequencies and format - d <- x[, .(n = .N), by = byvar - ][, percent := - { - total = sum(n, na.rm = na.rm) - d <- round((n/ total)*100, digits = digits) - d <- as.character(d) - d <- paste0(d, "%") - } - ] - - # Total row just for completeness - setorderv(d, byvar) - totd <- data.table::data.table( - tempname = "total", - n = d[, sum(n, na.rm = na.rm)], - percent = "100%" - ) + N <- fsum(ft$n) + ft <- ft |> + ftransform(percent = paste0(round(n / N * 100, digits), "%")) + + # add row with totals + ft <- rowbind(ft, data.table(joyn = "total", + n = N, + percent = "100%")) |> + # filter zeros + fsubset(n > 0) + + setrename(ft, joyn = byvar, .nse = FALSE) +} + + + +#' Report frequencies from attributes in report var +#' +#' @param x dataframe from [joyn_workhorse] +#' @param y dataframe from original merge ("right" or "using") +#' +#' @return dataframe with frequencies of report var +#' @keywords internal +report_from_attr <- function(x,y, reportvar) { + # from suggestion by @SebKrantz in #58 + # https://github.com/randrescastaneda/joyn/issues/58 + m <- attr(x, "join.match")$match + + N <- fnrow(x) + nm_x <- attr(m, "N.nomatch") # Number of non-matched x values + nm_y <- fnrow(y) - attr(m, "N.distinct") # Number of non-matched y values. If multiple = FALSE attr(m, "N.distinct") = number of unique matches. + + + counts <- c(nm_x, nm_y, N-nm_x-nm_y, N) + report <- data.frame( + .joyn1 = c("x", "y", "x & y", "total"), + n = counts, + percent = paste0(round(counts / N * 100, 1), "%") + ) |> + fsubset(n > 0) + + setrename(report, .joyn1 = reportvar, .nse = FALSE) - setnames(totd, "tempname", byvar) - d <- data.table::rbindlist(list(d, totd), - use.names = TRUE) - return(d) } diff --git a/R/info_display.R b/R/info_display.R index 8b609050..2072ee00 100644 --- a/R/info_display.R +++ b/R/info_display.R @@ -127,33 +127,49 @@ store_msg <- function(type, ...) { #' @param info A character string representing an info message to be stored. Default value is NULL #' #' @section Hot to pass the message string: -#' The function allows for the customization of the message string using {cli} classes to emphasize specific components of the message +#' The function allows for the customization of the message string using cli classes to emphasize specific components of the message #' Here's how to format the message string: -#' *For variables: .strongVar --example: "{.strongVar {reportvar}}" -#' *For function arguments: .strongArg --example: "{.strongArg {y_vars_to_keep}}" -#' *For dt/df: .strongTable --example: "{.strongTable x}" -#' *For text/anything else: .strong --example: "reportvar is {.strong NOT} returned" +#' *For variables: .strongVar +#' *For function arguments: .strongArg +#' *For dt/df: .strongTable +#' *For text/anything else: .strong #' *NOTE: By default, the number of seconds specified in timing messages is #' automatically emphasized using a custom formatting approach. -#' You do not need to apply {cli} classes nor to specify that the number is in seconds. -#' --example usage: store_joyn_msg(timing = -#' paste("The full joyn is executed in", round(time_taken, 6))) +#' You do not need to apply cli classes nor to specify that the number is in seconds. +#' #' #' #' @return invisible TRUE #' +#' @examples +#' # Timing msg +#' joyn:::store_joyn_msg(timing = paste(" The entire joyn function, including checks, +#' is executed in ", round(1.8423467, 6))) +#' +#' # Error msg +#' joyn:::store_joyn_msg(err = " Input table {.strongTable x} has no columns.") +#' +#' # Info msg +#' joyn:::store_joyn_msg(info = "Joyn's report available in variable {.strongVar .joyn}") +#' +#' #' @keywords internal store_joyn_msg <- function(err = NULL, warn = NULL, timing = NULL, info = NULL) { - # Check that only one among err, warn, timing and info is not null, otherwise stop + # Check that only one among err, warn, timing and info is not null, + # otherwise stop + # + # Formals + frm <- formals() |> + names() cn <- c(err, warn, timing, info) if (length(cn) != 1) { - cli::cli_abort(c("only one of err, warn, timing, info can be not null", + cli::cli_abort(c("only one of {.or {.arg {frm}}} can be not null", "i" = "check the arguments")) } @@ -418,7 +434,9 @@ joyn_report <- function(verbose = getOption("joyn.verbose")) { freq <- rlang::env_get(.joynenv, "freq_joyn") if (verbose) { + cli::cli_h2("JOYn Report") print(freq) + cli::cli_rule(right = "End of {.field JOYn} report") } return(invisible(freq)) } diff --git a/R/joyn-merge.R b/R/joyn-merge.R index 134ce630..4eb18490 100644 --- a/R/joyn-merge.R +++ b/R/joyn-merge.R @@ -163,7 +163,7 @@ joyn <- function(x, update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = FALSE, @@ -322,40 +322,19 @@ joyn <- function(x, # include report variable --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - yvars_w <- c(y_vars_to_keep, ".yreport") # working yvars - x <- x |> - ftransform(.xreport = 1) - y <- y |> - ftransform(.yreport = 2) - - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # Actual merge --------- - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - # simple merge - i.yvars <- paste0("i.", yvars_w) - - # keep relevant variables in y - y <- y |> fselect( - by, yvars_w - ) - - # Perform workhorse join - x <- joyn_workhorse( - x = x, - y = y, - by = by, - match_type = match_type, - suffixes = suffixes, - sort = sort - ) + yvars_w <- y_vars_to_keep # working yvars + # yvars_w <- c(y_vars_to_keep, ".yreport") # working yvars + # x <- x |> + # ftransform(.xreport = 1) + # y <- y |> + # ftransform(.yreport = 2) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Report variable --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # replace NAs in report vars - setnafill(x, fill = 0, cols = c(".xreport", ".yreport")) + # setnafill(x, fill = 0, cols = c(".xreport", ".yreport")) # report variable dropreport <- FALSE @@ -375,45 +354,39 @@ joyn <- function(x, check_names <- make.names(check_names, unique = TRUE) nrv <- setdiff(check_names, xnames) - store_joyn_msg(info = "reportvar {.strongVar {reportvar}} is already part of the resulting table. It will be changed to {.strongVar {nrv}}") + store_joyn_msg(info = "reportvar {.strongVar {reportvar}} is + already part of the resulting table. It will be + changed to {.strongVar {nrv}}") reportvar <- nrv } } - # report variable - collapse::settransform(x, use_util_report = .xreport + .yreport) - # Can this be done more efficiently with collapse? - data.table::setnames(x, "use_util_report", reportvar) - - #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # Filter rows - `keep` --------- + # Actual merge --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ## rows to keep ----- - if (keep %in% c("master", "left") ) { - - x <- x |> - fsubset(get(reportvar) != 2) - - } else if (keep %in% c("using", "right") ) { + # simple merge + i.yvars <- paste0("i.", yvars_w) - x <- x |> - fsubset(get(reportvar) != 1) + # keep relevant variables in y + y <- y |> + fselect(by, yvars_w) - } else if (keep == "inner") { + # Perform workhorse join + jn <- joyn_workhorse( + x = x, + y = y, + by = by, + suffixes = suffixes, + sort = sort, + reportvar = reportvar + ) - x <- x |> - fsubset(get(reportvar) >= 3) - } else if (keep == "anti") { - x <- x |> - fsubset(get(reportvar) == 1) - } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # Update x --------- + # Update jn --------- #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ var_use <- NULL if (isTRUE(update_NAs) || isTRUE(update_values)) { @@ -422,7 +395,7 @@ joyn <- function(x, if (isTRUE(update_NAs || update_values) & length(var_use) > 0 ) { - x <- update_na_values(dt = x, + jn <- update_na_values(dt = jn, var = var_use, reportvar = reportvar, suffixes = suffixes, @@ -432,6 +405,30 @@ joyn <- function(x, } + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # Filter rows - `keep` --------- + #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + ## rows to keep ----- + if (keep %in% c("master", "left") ) { + + jn <- jn |> + fsubset(get(reportvar) != 2) + + } else if (keep %in% c("using", "right") ) { + + jn <- jn |> + fsubset(get(reportvar) != 1) + + } else if (keep == "inner") { + + jn <- jn |> + fsubset(get(reportvar) >= 3) + } else if (keep == "anti") { + jn <- jn |> + fsubset(get(reportvar) == 1) + } + ### common vars ---------- @@ -441,15 +438,15 @@ joyn <- function(x, gsub("\\.", "\\\\.", x = _) |> paste0("$") - varx <- grep(patterns[1], names(x), value = TRUE) - vary <- grep(patterns[2], names(x), value = TRUE) + varx <- grep(patterns[1], names(jn), value = TRUE) + vary <- grep(patterns[2], names(jn), value = TRUE) # delete Y vars with suffix - collapse::get_vars(x, vary) <- NULL + collapse::get_vars(jn, vary) <- NULL # remove suffixes nsvar <- gsub(patterns[1], "", varx) - data.table::setnames(x, varx, nsvar) + data.table::setnames(jn, varx, nsvar) } @@ -458,20 +455,43 @@ joyn <- function(x, #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## cleaning temporary report variables ---- - collapse::settransform(x, - .xreport = NULL, - .yreport = NULL) + # collapse::settransform(jn, + # .xreport = NULL, + # .yreport = NULL) # Rename by variables ----- ## in output - if (any(grepl(pattern = "keyby", x = names(x)))) { - data.table::setnames(x, - old = names(x)[grepl(pattern = "keyby", - x = names(x))], + if (any(grepl("keyby",names(jn)))) { + data.table::setnames(jn, + old = names(jn)[grepl("keyby", names(jn))], new = xbynames) } + + # Report variable ---------- + # no matching obs + if (all(jn[[reportvar]] %in% c(1, 2)) && !keep == "anti") { + + store_joyn_msg(warn = " you have no matching obs. Make sure argument + `by` is correct. Right now, `joyn` is joining by + {.strongVar {by}}") + + } + ## convert to characters if chosen ------- + + if (reporttype == "factor") { + + get_vars(jn, reportvar) <- factor(jn[[reportvar]], + levels = 1:6, + labels = c("x", + "y", + "x & y", + "NA updated", + "value updated", + "not updated")) + } + if (reporttype == "character") { rvar_to_chr <- \(x) { @@ -484,29 +504,27 @@ joyn <- function(x, default = "conflict") } - settransformv(x, reportvar, rvar_to_chr) + settransformv(jn, reportvar, rvar_to_chr) } - # no matching obs - if ((all(x[[reportvar]] %in% c("x", "y")) || - all(x[[reportvar]] %in% c(1, 2))) && - !keep == "anti") { - - store_joyn_msg(warn = " you have no matching obs. Make sure argument - `by` is correct. Right now, `joyn` is joining by - {.strongVar {by}}") - - } ## Display results------ # freq table - d <- freq_table(x, reportvar) + # + if (update_NAs || update_values) { + d <- freq_table(jn, reportvar) + } else { + d <- report_from_attr(jn, y, reportvar) + } + # remove collapse::join attributes + attr(jn, "join.match") <- NULL + rlang::env_poke(.joynenv, "freq_joyn", d) # Report var if (dropreport) { - get_vars(x, reportvar) <- NULL + get_vars(jn, reportvar) <- NULL } # store timing @@ -517,15 +535,11 @@ joyn <- function(x, is executed in ", round(time_taken_joyn, 6))) # return messages - if (verbose == TRUE) { - cli::cli_h2("JOYn Report") - joyn_report() - cli::cli_rule(right = "End of {.field JOYn} report") - joyn_msg(msg_type) - } + joyn_report(verbose = verbose) + if (verbose == TRUE) joyn_msg(msg_type) - setattr(x, "class", class_x) + setattr(jn, "class", class_x) - x + jn } diff --git a/R/joyn-package.R b/R/joyn-package.R index 98d66da3..ace5a944 100644 --- a/R/joyn-package.R +++ b/R/joyn-package.R @@ -26,7 +26,8 @@ if (getRversion() >= "2.15.1") { ".yreport", 'use_util_reportvar', 'varx_na', - 'type' + 'type', + "joyn1" ), package = utils::packageName() ) diff --git a/R/joyn_workhorse.R b/R/joyn_workhorse.R index d19f6fa7..9d084c30 100644 --- a/R/joyn_workhorse.R +++ b/R/joyn_workhorse.R @@ -6,8 +6,6 @@ #' @param x data object, "left" or "master" #' @param y data object, "right" or "using" #' @param by atomic character vector: key specifying join -#' @param match_type atomic character vector of length 1: either "1:1" (default) -#' "1:m", "m:1", or "m:m". Relies on `collapse::join()` #' @param suffixes atomic character vector: give suffixes to columns common to both #' @param sort logical: sort the result by the columns in `by` #' `x` and `y` @@ -29,41 +27,32 @@ joyn_workhorse <- function( x, y, by = intersect(names(x), names(y)), - match_type = c("1:1"), sort = FALSE, - suffixes = getOption("joyn.suffixes") # data.table suffixes + suffixes = getOption("joyn.suffixes"), # data.table suffixes + reportvar = getOption("joyn.reportvar") ) { # Argument checks ------------------------------------------------------------ - match_type <- match.arg( - match_type, - choices = c( - "1:1", - "1:m", - "m:1", - "m:m" - ) - ) - if ( - length(by) == 0 - ) { + + if (length(by) == 0) { store_joyn_msg(err = "In joyn_workhorse {.strongArg by} argument has length of 0") - store_joyn_msg(info = "Either specify by to identify columns to join on in x and y, or x and y should have common column names") + store_joyn_msg(info = "Either specify by to identify columns to join on in x + and y, or x and y should have common column names") } # Measure time start_time <- Sys.time() # Do a full join ------------------------------------------------------------- + source_pkg <- "collapse::join" # if not 1:1 => use merge.data.table # not m:m => use collapse::join() dt_result <- tryCatch( expr = { - source_pkg <- "collapse::join" collapse::join(x = x, y = y, @@ -75,54 +64,64 @@ joyn_workhorse <- function( keep.col.order = TRUE, sort = sort, verbose = 0, - column = NULL) + column = ".joyn1", + attr = TRUE) }, # end of expr section error = function(e) { - joyn_msg("err", c("{.pkg {source_pkg}} returned the following:", - x = e$message)) + store_joyn_msg(err = "{.pkg {source_pkg}} returned the following: {e$message}") }, # end of error section warning = function(w) { if (grepl("[Oo]veridentified", w$message)) { - store_joyn_msg(warn = "Your data is overidentified. Below the original message from {.strong {source_pkg}}: \n{w$message}") + store_joyn_msg(warn = "Your data is overidentified. Below the original + message from {.strong {source_pkg}}: \n{w$message}") } else { - store_joyn_msg(warn = "{.strong {source_pkg}} returned the following warning: \n{w$message}") + store_joyn_msg(warn = "{.strong {source_pkg}} returned the following + warning: \n{w$message}") } - collapse::join( x = x, - y = y, - how = "full", - on = by, - multiple = TRUE, # matches row in x with m in y - validate = "m:m", # no checks performed - suffix = suffixes, # data.table suffixes - keep.col.order = TRUE, - sort = sort, - verbose = 0, - column = NULL) |> + collapse::join(x = x, + y = y, + how = "full", + on = by, + multiple = TRUE, # matches row in x with m in y + validate = "m:m", # no checks performed + suffix = suffixes, # data.table suffixes + keep.col.order = TRUE, + sort = sort, + verbose = 0, + column = ".joyn1", + attr = TRUE) |> suppressWarnings() } ) # End of trycatch + # change values of .joyn1 to numeric to make it consistent with joyn + mapping <- c('1' = 3, '2' = 1, '3' = 2) + dt_result <- dt_result |> + ftransform(.joyn1 = as.numeric(.joyn1)) |> + ftransform(.joyn1 = mapping[as.character(.joyn1)]) |> + frename(.joyn1 = reportvar, .nse = FALSE) + + # Calculate the time taken end_time <- Sys.time() time_taken <- end_time - start_time - store_joyn_msg(timing = paste("The full joyn is executed in", round(time_taken, 6))) + store_joyn_msg(timing = paste("The full joyn is executed in", + round(time_taken, 6))) # Return ---- - return( dt_result - ) } diff --git a/R/utils.R b/R/utils.R index be4a5b24..d97d2046 100644 --- a/R/utils.R +++ b/R/utils.R @@ -342,4 +342,3 @@ correct_names <- function(by, x, y, order = TRUE) { ybynames = ybynames) out } - diff --git a/man/anti_join.Rd b/man/anti_join.Rd index f5cc1c66..e0a22001 100644 --- a/man/anti_join.Rd +++ b/man/anti_join.Rd @@ -16,7 +16,7 @@ anti_join( relationship = "many-to-many", y_vars_to_keep = FALSE, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, diff --git a/man/freq_table.Rd b/man/freq_table.Rd index 2eba5c63..6be47f0c 100644 --- a/man/freq_table.Rd +++ b/man/freq_table.Rd @@ -4,7 +4,7 @@ \alias{freq_table} \title{Tabulate simple frequencies} \usage{ -freq_table(x, byvar, digits = 1, na.rm = TRUE) +freq_table(x, byvar, digits = 1, na.rm = FALSE) } \arguments{ \item{x}{data frame} @@ -13,7 +13,7 @@ freq_table(x, byvar, digits = 1, na.rm = TRUE) \item{digits}{numeric: number of decimal places to display. Default is 1.} -\item{na.rm}{logical: if TRUE remove NAs from calculations. Default is TRUE} +\item{na.rm}{logical: report NA values in frequencies. Default is FALSE.} } \value{ data.table with frequencies. diff --git a/man/full_join.Rd b/man/full_join.Rd index 4639ae23..d5be2aea 100644 --- a/man/full_join.Rd +++ b/man/full_join.Rd @@ -19,7 +19,7 @@ full_join( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, diff --git a/man/inner_join.Rd b/man/inner_join.Rd index f70b8196..162d6b94 100644 --- a/man/inner_join.Rd +++ b/man/inner_join.Rd @@ -19,7 +19,7 @@ inner_join( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, diff --git a/man/joyn.Rd b/man/joyn.Rd index e370129e..0f4bfb20 100644 --- a/man/joyn.Rd +++ b/man/joyn.Rd @@ -14,7 +14,7 @@ joyn( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = FALSE, diff --git a/man/joyn_workhorse.Rd b/man/joyn_workhorse.Rd index fed075be..807bf5d5 100644 --- a/man/joyn_workhorse.Rd +++ b/man/joyn_workhorse.Rd @@ -8,9 +8,9 @@ joyn_workhorse( x, y, by = intersect(names(x), names(y)), - match_type = c("1:1"), sort = FALSE, - suffixes = getOption("joyn.suffixes") + suffixes = getOption("joyn.suffixes"), + reportvar = getOption("joyn.reportvar") ) } \arguments{ @@ -20,9 +20,6 @@ joyn_workhorse( \item{by}{atomic character vector: key specifying join} -\item{match_type}{atomic character vector of length 1: either "1:1" (default) -"1:m", "m:1", or "m:m". Relies on \code{collapse::join()}} - \item{sort}{logical: sort the result by the columns in \code{by} \code{x} and \code{y}} diff --git a/man/left_join.Rd b/man/left_join.Rd index 871496c9..ac29a894 100644 --- a/man/left_join.Rd +++ b/man/left_join.Rd @@ -19,7 +19,7 @@ left_join( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, diff --git a/man/report_from_attr.Rd b/man/report_from_attr.Rd new file mode 100644 index 00000000..82698c0b --- /dev/null +++ b/man/report_from_attr.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/freq_table.R +\name{report_from_attr} +\alias{report_from_attr} +\title{Report frequencies from attributes in report var} +\usage{ +report_from_attr(x, y, reportvar) +} +\arguments{ +\item{x}{dataframe from \link{joyn_workhorse}} + +\item{y}{dataframe from original merge ("right" or "using")} +} +\value{ +dataframe with frequencies of report var +} +\description{ +Report frequencies from attributes in report var +} +\keyword{internal} diff --git a/man/right_join.Rd b/man/right_join.Rd index c885e721..7934ae77 100644 --- a/man/right_join.Rd +++ b/man/right_join.Rd @@ -19,7 +19,7 @@ right_join( update_values = FALSE, update_NAs = update_values, reportvar = getOption("joyn.reportvar"), - reporttype = c("character", "numeric"), + reporttype = c("factor", "character", "numeric"), roll = NULL, keep_common_vars = FALSE, sort = TRUE, diff --git a/man/store_joyn_msg.Rd b/man/store_joyn_msg.Rd index 6f002a88..d0c7881b 100644 --- a/man/store_joyn_msg.Rd +++ b/man/store_joyn_msg.Rd @@ -27,17 +27,28 @@ This function serves as a wrapper for the store_msg function, which is used to s } \section{Hot to pass the message string}{ -The function allows for the customization of the message string using {cli} classes to emphasize specific components of the message +The function allows for the customization of the message string using cli classes to emphasize specific components of the message Here's how to format the message string: -*For variables: .strongVar --example: "{.strongVar {reportvar}}" -*For function arguments: .strongArg --example: "{.strongArg {y_vars_to_keep}}" -*For dt/df: .strongTable --example: "{.strongTable x}" -*For text/anything else: .strong --example: "reportvar is {.strong NOT} returned" +*For variables: .strongVar +*For function arguments: .strongArg +*For dt/df: .strongTable +*For text/anything else: .strong *NOTE: By default, the number of seconds specified in timing messages is automatically emphasized using a custom formatting approach. -You do not need to apply {cli} classes nor to specify that the number is in seconds. ---example usage: store_joyn_msg(timing = -paste("The full joyn is executed in", round(time_taken, 6))) +You do not need to apply cli classes nor to specify that the number is in seconds. } +\examples{ +# Timing msg +joyn:::store_joyn_msg(timing = paste(" The entire joyn function, including checks, + is executed in ", round(1.8423467, 6))) + +# Error msg +joyn:::store_joyn_msg(err = " Input table {.strongTable x} has no columns.") + +# Info msg +joyn:::store_joyn_msg(info = "Joyn's report available in variable {.strongVar .joyn}") + + +} \keyword{internal} diff --git a/tests/testthat/test-dplyr-joins.R b/tests/testthat/test-dplyr-joins.R index 9d478d9b..1bc5dd15 100644 --- a/tests/testthat/test-dplyr-joins.R +++ b/tests/testthat/test-dplyr-joins.R @@ -1800,7 +1800,7 @@ test_that("ANTI JOIN - Conducts ANTI join", { "sorted") <- "id1" expect_equal( jn |> fselect(-get(reportvar)) |> dim(), - c(0, 6) + c(0, 4) ) }) diff --git a/tests/testthat/test-freq_table.R b/tests/testthat/test-freq_table.R index ca016659..f579699b 100644 --- a/tests/testthat/test-freq_table.R +++ b/tests/testthat/test-freq_table.R @@ -43,22 +43,23 @@ y4 = data.table(id = c(1, 2, 5, 6, 3), # internal = TRUE) test_that("correct inputs", { - x <- "not a data.table/data.frame" + dd <- "not a data.table/data.frame" - freq_table(x) |> + freq_table(dd) |> expect_error() }) test_that("correct frequencies", { - b <- base::table(y4$id2) - b <- as.numeric(b) + b <- base::table(y4$id2) |> + as.numeric() j <- freq_table(y4, "id2") - j <- j[ id2 != "total" - ][, n] - - expect_equal(b, j) + j |> + fsubset(id2 != "total") |> + fselect(n) |> + reg_elem() |> + expect_equal(b) }) @@ -67,10 +68,13 @@ test_that("correct totals", { tr <- nrow(y4) j <- freq_table(y4, "id2") - j <- j[ id2 == "total" - ][, n] + j <- freq_table(y4, "id2") + j |> + fsubset(id2 == "total") |> + fselect(n) |> + reg_elem() |> + expect_equal(tr) - expect_equal(tr, j) }) diff --git a/tests/testthat/test-joyn.R b/tests/testthat/test-joyn.R index c28d8e8f..1a799a19 100644 --- a/tests/testthat/test-joyn.R +++ b/tests/testthat/test-joyn.R @@ -680,7 +680,11 @@ test_that("error when there is not natural join", { test_that("different names in key vars are working fine", { - df <- joyn(x4, y4, by = c("id1 = id", "id2"), match_type = "m:1", y_vars_to_keep = c("y"), sort = TRUE) + df <- joyn(x4, y4, by = c("id1 = id", "id2"), + match_type = "m:1", + y_vars_to_keep = c("y"), + sort = TRUE, + reporttype = "character") dd <- data.table(id1 = c(1, 1, 2, 2, 3, 3, 5, 6), id2 = c(1, 1, 2, 1, 3, 4, 2, 3), diff --git a/tests/testthat/test-joyn_workhorse.R b/tests/testthat/test-joyn_workhorse.R index 6d2c1eaa..1973c755 100644 --- a/tests/testthat/test-joyn_workhorse.R +++ b/tests/testthat/test-joyn_workhorse.R @@ -73,6 +73,7 @@ test_that( # Checking output with match type m:m and 1:1 ------------------------------------------------- test_that("m:m and 1:1 gives the same output if data is correct", { + skip("This test is not needed anymore because the match is always m:m in collapse::join") expect_equal( joyn_workhorse( x = x2, @@ -133,8 +134,7 @@ test_that("full joyn is correct", { x <- joyn_workhorse( x = x1, y = y1, - by = "id", - match_type = "1:1" + by = "id" ) expect_equal( nrow(x), @@ -153,7 +153,9 @@ test_that("FULL- Compare with base::merge", { x = x1, y = y1, by = "id" - ) + ) |> + fselect(-.joyn) |> + setattr('join.match', NULL) br <- base::merge( x = x1, @@ -162,37 +164,21 @@ test_that("FULL- Compare with base::merge", { all = TRUE ) - setorderv( - br, - "id", - na.last = TRUE - ) - setorderv( - jn, - "id", - na.last = TRUE - ) - setattr( - br, - 'sorted', - "id" - ) - setattr( - jn, - 'sorted', - "id" - ) # ZP: check this + setorderv(br,"id", na.last = TRUE) + setorderv(jn, "id", na.last = TRUE) + setattr(br, 'sorted', "id") + setattr(jn, 'sorted', "id") # ZP: check this - expect_equal( - jn, - br - ) + expect_equal(jn, br) jn <- joyn_workhorse( x = x2, y = y2, by = "id" - ) + ) |> + fselect(-.joyn) |> + setattr('join.match', NULL) + br <- base::merge( x = x2, @@ -213,6 +199,7 @@ test_that("FULL- Compare with base::merge", { # Checking match types work ------------------------------------------------------------------------ test_that("match types work", { + skip("joyn_workhorse does not check match type") # note: `joyn_workhorse` does not # check whether match_type diff --git a/vignettes/aux-functions.Rmd b/vignettes/aux-functions.Rmd index dcf425d1..2eab35de 100644 --- a/vignettes/aux-functions.Rmd +++ b/vignettes/aux-functions.Rmd @@ -103,13 +103,13 @@ Furthermore, `joyn` provides a function that generates simple frequency tables, # Tabulating frequencies of var `id` freq_table(x = x1, - byvar = "id") + byvar = "id")[] # Removing NAs from the calculation freq_table(x = x1, byvar = "id", - na.rm = TRUE) + na.rm = TRUE)[] ```