Skip to content

Commit

Permalink
Handle mean(a,b) under R's startsWith() (#6291)
Browse files Browse the repository at this point in the history
* R's startsWith() doesn't accept non-character input -> regression

* Share logic with .gforce_ok
  • Loading branch information
MichaelChirico authored Jul 17, 2024
1 parent 5170452 commit ff808ae
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 6 deletions.
16 changes: 10 additions & 6 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -1987,11 +1987,11 @@ DT = function(x, ...) { #4872

.optmean = function(expr) { # called by optimization of j inside [.data.table only. Outside for a small speed advantage.
if (length(expr)==2L) # no parameters passed to mean, so defaults of trim=0 and na.rm=FALSE
return(call(".External",quote(Cfastmean),expr[[2L]], FALSE))
return(call(".External", quote(Cfastmean), expr[[2L]], FALSE))
# return(call(".Internal",expr)) # slightly faster than .External, but R now blocks .Internal in coerce.c from apx Sep 2012
if (length(expr)==3L && startsWith(names(expr)[3L], "na")) # one parameter passed to mean()
return(call(".External",quote(Cfastmean),expr[[2L]], expr[[3L]])) # faster than .Call
assign("nomeanopt",TRUE,parent.frame())
if (length(expr)==3L && .arg_is_narm(expr))
return(call(".External", quote(Cfastmean), expr[[2L]], expr[[3L]])) # faster than .Call
assign("nomeanopt", TRUE, parent.frame())
expr # e.g. trim is not optimized, just na.rm
}

Expand Down Expand Up @@ -3072,13 +3072,17 @@ is_constantish = function(q, check_singleton=FALSE) {
if (q1[[3L]] %chin% gdtfuns) return(q1[[3L]])
NULL
}

# Check for na.rm= in expr in the expected slot; allows partial matching and
# is robust to unnamed expr. Note that NA names are not possible here.
.arg_is_narm <- function(expr, which=3L) !is.null(nm <- names(expr)[which]) && startsWith(nm, "na")

.gforce_ok = function(q, x) {
if (is.N(q)) return(TRUE) # For #334
q1 = .get_gcall(q)
if (is.null(q1)) return(FALSE)
if (!(q2 <- q[[2L]]) %chin% names(x) && q2 != ".I") return(FALSE) # 875
if (length(q)==2L || (!is.null(names(q)) && startsWith(names(q)[3L], "na") && is_constantish(q[[3L]]))) return(TRUE)
# ^^ base::startWith errors on NULL unfortunately
if (length(q)==2L || (.arg_is_narm(q) && is_constantish(q[[3L]]))) return(TRUE)
switch(as.character(q1),
"shift" = .gshift_ok(q),
"weighted.mean" = .gweighted.mean_ok(q, x),
Expand Down
4 changes: 4 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -18739,3 +18739,7 @@ test(2268, rbindlist(y, fill=TRUE), rbindlist(x, fill=TRUE)[rep(1:5, N)])
dt = data.table(x=as.POSIXct(c(NA, NA)))
test(2269.1, fread("x\n \n \n", colClasses="POSIXct"), dt)
test(2269.2, fread("x\n?\n \n", colClasses="POSIXct", na.strings="?"), dt)

# Error found by revdep in #6284: mean(a,b) is valid, expr names() can be NULL
DT = data.table(a = 1, b = 2)
test(2270, options=c(datatable.optimize=1L), DT[, mean(b, 1), by=a], data.table(a=1, V1=2), warning="Unable to optimize call to mean()")

0 comments on commit ff808ae

Please sign in to comment.