Skip to content

Commit

Permalink
Remove logical condition recycling from fcase() (#6363)
Browse files Browse the repository at this point in the history
* Remove ability to recycle conditions in fcase()

* cleaner body on R side

* extra tests

* TODO for using ...length()

* not error
  • Loading branch information
MichaelChirico authored Aug 21, 2024
1 parent 4351399 commit 26c2c08
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 18 deletions.
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

b. `fread()` now supports automatic detection of `dec` (as either `.` or `,`, the latter being [common in many places in Europe, Africa, and South America](https://en.wikipedia.org/wiki/Decimal_separator)); this behavior is now the default, i.e. `dec='auto'`, [#2431](https://github.com/Rdatatable/data.table/issues/2431). Thanks @mattdowle for the original issue, 50 or more others for expressing support, and @MichaelChirico for the fix.

c. `fcase()` supports scalars in conditions (e.g. supplying just `TRUE`), vectors in `default=` (so the default can vary by row), and `default=` is now lazily evaluated, [#4258](https://github.com/Rdatatable/data.table/issues/4258). Thanks @sindribaldur for the feature request, @shrektan for doing most of the implementation, and @MichaelChirico for sewing things up.
c. `fcase()` supports vectors in `default=` (so the default can vary by row) and `default=` is now lazily evaluated, [#4258](https://github.com/Rdatatable/data.table/issues/4258). Thanks @sindribaldur for the feature request, @shrektan for doing most of the implementation, and @MichaelChirico for sewing things up. Thanks also to @DavisVaughan for some design guidance before release to remove an extraneous feature, [#6352](https://github.com/Rdatatable/data.table/issues/6352).

d. `[.data.table` gains argument `showProgress`, allowing users to toggle progress printing for slow "group by" operations, [#3060](https://github.com/Rdatatable/data.table/issues/3060). The progress bar reports information such as the number of groups processed, total groups, total time elapsed and estimated time until completion. This feature doesn't apply to `GForce`-optimized operations. Thanks to @eatonya and @zachmayer for filing FRs, and to everyone else that up-voted/chimed in on the issue. Thanks to @joshhwuu for the PR.

Expand Down
7 changes: 6 additions & 1 deletion R/wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@ fcoalesce = function(...) .Call(Ccoalesce, list(...), FALSE)
setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE)

fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na)
fcase = function(..., default=NA) .Call(CfcaseR, parent.frame(), as.list(substitute(list(..., TRUE, default)))[-1L])
fcase = function(..., default=NA) {
# TODO(R>=3.5.0): Use ...length() to avoid the need for suppressWarnings() here
default_condition <- suppressWarnings(rep(TRUE, length(switch(1, ...)))) # better than ..1/..elt(1): won't fail for empty fcase()
arg_list <- as.list(substitute(list(..., default_condition, default)))[-1L]
.Call(CfcaseR, parent.frame(), arg_list)
}

colnamesInt = function(x, cols, check_dups=FALSE, skip_absent=FALSE) .Call(CcolnamesInt, x, cols, check_dups, skip_absent)

Expand Down
21 changes: 14 additions & 7 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -16653,7 +16653,7 @@ test(2127.65, fcase(test_vec1, list(1)), list(1,1,1,1,1, NULL, NULL, NULL, NULL,
test(2127.66, fcase(test_vec1, as.Date("2019-10-11")), c(rep(as.Date("2019-10-11"),5),rep(NA,6)))
test(2127.67, fcase(test_vec1, factor("a", levels=letters[1:3])), factor(c(rep("a",5),rep("NA",6)), levels=letters[1:3]))
test(2127.68, fcase(test_vec1, 1L, default = 1:2), error = "Length of 'default' must be 1.")
test(2127.69, fcase(test_vec1, 1L, test_vec_na1, 2L), error = "Argument #3 has length 12 which differs from that of argument #1 (11). Please make sure all logical conditions have the same length or length 1.")
test(2127.69, fcase(test_vec1, 1L, test_vec_na1, 2L), error = "Argument #3 has length 12 which differs from that of argument #1 (11). Please make sure all logical conditions have the same length.")
test(2127.70, fcase(test_vec1, as.Date("2019-10-11"), test_vec2, 2), error = "Argument #4 has different class than argument #2, Please make sure all output values have the same class.")
test(2127.71, fcase(test_vec1, 1L, test_vec2, 2:3), error = "Length of output value #4 (2) must either be 1 or match the length of the logical condition (11).")
test(2127.72, fcase(TRUE, 1L, FALSE, stop("bang!")), 1L)
Expand Down Expand Up @@ -19002,18 +19002,25 @@ test(2276.04, fcase(c(TRUE, FALSE, NA, NA), (1:4)+1i, default=(11:14)+1i), c(1L,
test(2276.05, fcase(c(TRUE, FALSE, NA, NA), as.character(1:4), default=as.character(11:14)), as.character(c(1L, 12:14)))
test(2276.06, fcase(c(TRUE, FALSE, NA, NA), as.list(1:4), default=as.list(11:14)), as.list(c(1L, 12:14)))
## for scalar condition
test(2276.07, fcase(c(TRUE, FALSE, NA, NA), 1:4, TRUE, 11:13), error="Length of output value #4 (3) must either be 1 or match the length of the logical condition (4).")
test(2276.08, fcase(c(TRUE, FALSE, NA, NA), 1:4, TRUE, 11:14), c(1L, 12:14))
test(2276.09, fcase(c(TRUE, FALSE, NA, NA), 1:4 + 0.1, TRUE, 11:14 + 0.1), c(1L, 12:14) + 0.1)
test(2276.10, fcase(c(TRUE, FALSE, NA, NA), (1:4)+1i, TRUE, (11:14)+1i), c(1L, 12:14)+1i)
test(2276.11, fcase(c(TRUE, FALSE, NA, NA), as.character(1:4), TRUE, as.character(11:14)), as.character(c(1L, 12:14)))
test(2276.12, fcase(c(TRUE, FALSE, NA, NA), as.list(1:4), TRUE, as.list(11:14)), as.list(c(1L, 12:14)))
true4 = rep(TRUE, 4L)
test(2276.07, fcase(c(TRUE, FALSE, NA, NA), 1:4, true4, 11:13), error="Length of output value #4 (3) must either be 1 or match the length of the logical condition (4).")
test(2276.08, fcase(c(TRUE, FALSE, NA, NA), 1:4, true4, 11:14), c(1L, 12:14))
test(2276.09, fcase(c(TRUE, FALSE, NA, NA), 1:4 + 0.1, true4, 11:14 + 0.1), c(1L, 12:14) + 0.1)
test(2276.10, fcase(c(TRUE, FALSE, NA, NA), (1:4)+1i, true4, (11:14)+1i), c(1L, 12:14)+1i)
test(2276.11, fcase(c(TRUE, FALSE, NA, NA), as.character(1:4), true4, as.character(11:14)), as.character(c(1L, 12:14)))
test(2276.12, fcase(c(TRUE, FALSE, NA, NA), as.list(1:4), true4, as.list(11:14)), as.list(c(1L, 12:14)))
test(2276.13, fcase(TRUE, 1L, default=stop("lazy eval")), 1L) # default is lazy eval'ed
test(2276.14, fcase(c(TRUE, FALSE), 1L, c(TRUE, TRUE), NA), c(1L, NA_integer_)) # scalar NA will be converted

# output is missing
test(2276.15, fcase(c(TRUE, FALSE), NA_integer_, c(TRUE, TRUE), 2L), c(NA_integer_, 2L))

# length-1 condition not allowed
test(2276.16, fcase(c(TRUE, FALSE), 1:2, TRUE, 2L), error="Please make sure all logical conditions have the same length.")

# empty fcase() works without warning or error
test(2276.17, fcase(), logical())

# passing a function in env= doesn't trip up processing 'j', #6026
DT=data.table(a=1:2, b=3:4)
test(2277.1, DT[, builtin(b), env=list(builtin=sum)], 7L)
Expand Down
18 changes: 9 additions & 9 deletions src/fifelse.c
Original file line number Diff line number Diff line change
Expand Up @@ -243,10 +243,10 @@ SEXP fcaseR(SEXP rho, SEXP args) {
} else {
imask = false;
naout = xlength(thens) == 1 && TYPEOF(thens) == LGLSXP && LOGICAL(thens)[0]==NA_LOGICAL;
if (xlength(whens) != len0 && xlength(whens) != 1) {
if (xlength(whens) != len0) {
// no need to check `idefault` here because the con for default is always `TRUE`
error(_("Argument #%d has length %lld which differs from that of argument #1 (%lld). "
"Please make sure all logical conditions have the same length or length 1."),
"Please make sure all logical conditions have the same length."),
i*2+1, (long long)xlength(whens), (long long)len0);
}
if (!naout && TYPEOF(thens) != type0) {
Expand Down Expand Up @@ -290,7 +290,7 @@ SEXP fcaseR(SEXP rho, SEXP args) {
error(_("Length of output value #%d (%lld) must either be 1 or match the length of the logical condition (%lld)."), i*2+2, (long long)len1, (long long)len0);
}
}
int64_t thenMask = len1>1 ? INT64_MAX : 0, whenMask = xlength(whens)>1 ? INT64_MAX : 0;
int64_t thenMask = len1>1 ? INT64_MAX : 0;
switch(TYPEOF(ans)) {
case LGLSXP: {
const int *restrict pthens;
Expand All @@ -299,7 +299,7 @@ SEXP fcaseR(SEXP rho, SEXP args) {
const int pna = NA_LOGICAL;
for (int64_t j=0; j<len2; ++j) {
const int64_t idx = imask ? j : p[j];
if (pwhens[idx & whenMask]==1) {
if (pwhens[idx]==1) {
pans[idx] = naout ? pna : pthens[idx & thenMask];
} else {
if (imask) {
Expand All @@ -316,7 +316,7 @@ SEXP fcaseR(SEXP rho, SEXP args) {
const int pna = NA_INTEGER;
for (int64_t j=0; j<len2; ++j) {
const int64_t idx = imask ? j : p[j];
if (pwhens[idx & whenMask]==1) {
if (pwhens[idx]==1) {
pans[idx] = naout ? pna : pthens[idx & thenMask];
} else {
if (imask) {
Expand All @@ -334,7 +334,7 @@ SEXP fcaseR(SEXP rho, SEXP args) {
const double pna = na_double;
for (int64_t j=0; j<len2; ++j) {
const int64_t idx = imask ? j : p[j];
if (pwhens[idx & whenMask]==1) {
if (pwhens[idx]==1) {
pans[idx] = naout ? pna : pthens[idx & thenMask];
} else {
if (imask) {
Expand All @@ -351,7 +351,7 @@ SEXP fcaseR(SEXP rho, SEXP args) {
const Rcomplex pna = NA_CPLX;
for (int64_t j=0; j<len2; ++j) {
const int64_t idx = imask ? j : p[j];
if (pwhens[idx & whenMask]==1) {
if (pwhens[idx]==1) {
pans[idx] = naout ? pna : pthens[idx & thenMask];
} else {
if (imask) {
Expand All @@ -367,7 +367,7 @@ SEXP fcaseR(SEXP rho, SEXP args) {
const SEXP pna = NA_STRING;
for (int64_t j=0; j<len2; ++j) {
const int64_t idx = imask ? j : p[j];
if (pwhens[idx & whenMask]==1) {
if (pwhens[idx]==1) {
SET_STRING_ELT(ans, idx, naout ? pna : pthens[idx & thenMask]);
} else {
if (imask) {
Expand All @@ -384,7 +384,7 @@ SEXP fcaseR(SEXP rho, SEXP args) {
if (!naout) pthens = SEXPPTR_RO(thens); // the content is not useful if out is NA_LOGICAL scalar
for (int64_t j=0; j<len2; ++j) {
const int64_t idx = imask ? j : p[j];
if (pwhens[idx & whenMask]==1) {
if (pwhens[idx]==1) {
if (!naout) SET_VECTOR_ELT(ans, idx, pthens[idx & thenMask]);
} else {
p[l++] = idx;
Expand Down

0 comments on commit 26c2c08

Please sign in to comment.