Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various fixes for R 3.3.0 #6383

Merged
merged 14 commits into from
Aug 20, 2024
2 changes: 1 addition & 1 deletion R/as.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ as.data.table.list = function(x,
# not worse than before, and gets us in a better centralized place to port as.data.table.list to C and use MAYBE_REFERENCED
# again in future, for #617.
}
if (identical(x, list())) vector("list", nrow) else rep_len(x, nrow) # new objects don't need copy
if (identical(x, list())) vector("list", nrow) else safe_rep_len(x, nrow) # new objects don't need copy
}
vnames = character(ncol)
k = 1L
Expand Down
1 change: 1 addition & 0 deletions R/print.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ char.trunc = function(x, trunc.char = getOption("datatable.prettyprint.char")) {
nchar_chars = nchar(x, 'char')
is_full_width = nchar_width > nchar_chars
idx = pmin(nchar_width, nchar_chars) > trunc.char
if (!any(idx)) return(x) # strtrim() errors for width=integer() on R 3.3.0
x[idx] = paste0(strtrim(x[idx], trunc.char * fifelse(is_full_width[idx], 2L, 1L)), "...")
x
}
Expand Down
11 changes: 11 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,17 @@ nan_is_na = function(x) {
stopf("Argument 'nan' must be NA or NaN")
}

# TODO(R>=4.0.0): Remove this workaround. From R 4.0.0, rep_len() dispatches rep.Date(), which we need.
# Before that, rep_len() strips attributes --> breaks data.table()'s internal recycle() helper.
if (inherits(rep_len(Sys.Date(), 1L), "Date")) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just checked and this seems still a problem at 3.6.0 so we might need to hold on to this for quite some time, but it would be best to check at which version this is fixed in R and add a comment

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for checking. I suspected that might be the case -- I checked the R NEWS and NEWS.3 files and didn't see anything relevant here (searching 'attrib', 'rep_len'), and glanced at the rep_len() blame to see if anything about attributes popped out, no luck.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually looking again, I think it's this:

r-devel/r-svn@70e8a3f

Then rep_len() on Date dispatches to rep.Date().

That puts us in R 4.0.0:

https://github.com/r-devel/r-svn/blob/874fa1008e6cb090c7c898c422d703ca0026d600/doc/NEWS.Rd#L4885-L4887

# NB: safe_rep_len=rep_len throws an R CMD check error because it _appears_ to the AST
# walker that we've used .Internal ourselves (which is not true, but codetools can't tell:
# safe_rep_len = rep_len; body(safe_rep_len)[[1]] # .Internal)
safe_rep_len = function(x, n) rep_len(x, n)
} else {
safe_rep_len = function(x, n) rep(x, length.out = n) # nolint: rep_len_linter.
}

# endsWith no longer used from #5097 so no need to backport; prevent usage to avoid dev delay until GLCI's R 3.1.0 test
endsWith = function(...) stop("Internal error: use endsWithAny instead of base::endsWith", call.=FALSE)

Expand Down
80 changes: 42 additions & 38 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -14788,48 +14788,52 @@ test(2029.3, fread(txt, quote="", fill=TRUE), data.table(A=1:3, B=4:6, C=c(7:8,N

# .Last.updated #1885
d = data.table(a=1:4, b=2:5)
d[, z:=5L]
test(2030.01, .Last.updated, 4L) # new column
d[, z:=6L]
test(2030.02, .Last.updated, 4L) # update existing column
d[2:3, z:=7L]
test(2030.03, .Last.updated, 2L) # sub assign
d[integer(), z:=8L]
test(2030.04, .Last.updated, 0L) # empty sub-assign
d[-1L, z:=9L]
test(2030.05, .Last.updated, 3L) # inverse sub-assign
d[-(1:4), z:=10L]
test(2030.06, .Last.updated, 0L) # inverse empty sub-assign
# NB: On R 3.3.0, .Last.updated gets touched by test() itself and this cascades even to re-assignments
# of .Last.updated like n_updated=.Last.updated, so really force a deep copy by round-tripping from string.
# Seen on R 3.3.0, unsure when this behavior changes.
force_copy = function(n) as.integer(sprintf("%d", n))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

again thanks for the clear explanation

d[, z:=5L]; n_updated = force_copy(.Last.updated)
test(2030.01, n_updated, 4L) # new column
d[, z:=6L]; n_updated = force_copy(.Last.updated)
test(2030.02, n_updated, 4L) # update existing column
d[2:3, z:=7L]; n_updated = force_copy(.Last.updated)
test(2030.03, n_updated, 2L) # sub assign
d[integer(), z:=8L]; n_updated = force_copy(.Last.updated)
test(2030.04, n_updated, 0L) # empty sub-assign
d[-1L, z:=9L]; n_updated = force_copy(.Last.updated)
test(2030.05, n_updated, 3L) # inverse sub-assign
d[-(1:4), z:=10L]; n_updated = force_copy(.Last.updated)
test(2030.06, n_updated, 0L) # inverse empty sub-assign
d[, z:=NULL]; n_updated = force_copy(.Last.updated)
test(2030.07, n_updated, 4L) # delete column
d[2:3, z:=11L]; n_updated = force_copy(.Last.updated)
test(2030.08, n_updated, 2L) # new column during sub-assign
d[, z:=NULL]
test(2030.07, .Last.updated, 4L) # delete column
d[2:3, z:=11L]
test(2030.08, .Last.updated, 2L) # new column during sub-assign
d[integer(), z:=12L]; n_updated = force_copy(.Last.updated)
test(2030.09, n_updated, 0L) # new columns from empty sub-assign
d[, z:=NULL]
d[integer(), z:=12L]
test(2030.09, .Last.updated, 0L) # new columns from empty sub-assign
d[, z:=NULL]
d[-(1:4), z:=13L]
test(2030.10, .Last.updated, 0L) # new columns from empty inverse sub-assign
d[, z:=NULL][, z:=14L]
test(2030.11, .Last.updated, 4L) # new column from chaining
d[, z:=NULL][2:3, z:=14L]
test(2030.12, .Last.updated, 2L) # sub-assign from chaining
d[2:3, z:=14L][, z:=NULL]
test(2030.13, .Last.updated, 4L) # delete column from chaining
set(d, 1:2, "z", 15L)
test(2030.14, .Last.updated, 2L) # set() updates .Last.updated too
d[-(1:4), z:=13L]; n_updated = force_copy(.Last.updated)
test(2030.10, n_updated, 0L) # new columns from empty inverse sub-assign
d[, z:=NULL][, z:=14L]; n_updated = force_copy(.Last.updated)
test(2030.11, n_updated, 4L) # new column from chaining
d[, z:=NULL][2:3, z:=14L]; n_updated = force_copy(.Last.updated)
test(2030.12, n_updated, 2L) # sub-assign from chaining
d[2:3, z:=14L][, z:=NULL]; n_updated = force_copy(.Last.updated)
test(2030.13, n_updated, 4L) # delete column from chaining
set(d, 1:2, "z", 15L); n_updated = force_copy(.Last.updated)
test(2030.14, n_updated, 2L) # set() updates .Last.updated too
g = data.table(a=1:4, z=15L) # join
d[g, on="a", z:=i.z]
test(2030.15, .Last.updated, 4L) # all match of all rows
d[g, on="a", z:=i.z]; n_updated = force_copy(.Last.updated)
test(2030.15, n_updated, 4L) # all match of all rows
g = data.table(a=2:4, z=16L) # join
d[, z:=NULL][g, on="a", z:=i.z]
test(2030.16, .Last.updated, 3L) # all match
d[, z:=NULL][g, on="a", z:=i.z]; n_updated = force_copy(.Last.updated)
test(2030.16, n_updated, 3L) # all match
g = data.table(a=c(2L,4L,6L), z=17L)
d[, z:=NULL][g, on="a", z:=i.z]
test(2030.17, .Last.updated, 2L) # partial match
d[, z:=NULL][g, on="a", z:=i.z]; n_updated = force_copy(.Last.updated)
test(2030.17, n_updated, 2L) # partial match
g = data.table(a=5:6, z=18L)
d[, z:=NULL][g, on="a", z:=i.z]
test(2030.18, .Last.updated, 0L) # zero match
d[, z:=NULL][g, on="a", z:=i.z]; n_updated = force_copy(.Last.updated)
test(2030.18, n_updated, 0L) # zero match

# rbind vec with list regression dev 1.12.3; #3528
test(2031.01, rbind(data.table(A=1:3, B=7:9), data.table(A=4:6, B=as.list(10:12))), ans<-data.table(A=1:6, B=as.list(7:12)))
Expand Down Expand Up @@ -18453,7 +18457,7 @@ rm(.datatable.aware)
local({
lc_ctype = Sys.getlocale('LC_CTYPE')
Sys.setlocale('LC_CTYPE', "en_US.UTF-8") # Japanese multibyte characters require utf8
on.exit({Sys.setlocale('LC_CTYPE', lc_ctype)})
on.exit(Sys.setlocale('LC_CTYPE', lc_ctype))
accented_a = "\u0061\u0301"
ja_ichi = "\u4E00"
ja_ni = "\u4E8C"
Expand Down Expand Up @@ -18718,7 +18722,7 @@ test(2264.8, print(DT, show.indices=TRUE), output=ans)
# integer64 columns print even when bit64 isn't loaded
if (test_bit64) local({
DT = data.table(a = 'abc', b = as.integer64(1))
invisible(unloadNamespace("bit64"))
suppressPackageStartupMessages(unloadNamespace("bit64"))
on.exit(suppressPackageStartupMessages(library(bit64)))
test(2265, DT, output="abc\\s*1$")
})
Expand Down
5 changes: 4 additions & 1 deletion src/assign.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,10 @@ SEXP setdt_nrows(SEXP x)
}
SEXP dim_xi = getAttrib(xi, R_DimSymbol);
R_len_t len_xi;
R_len_t n_dim = LENGTH(dim_xi);
// NB: LENGTH() produces an undefined large number here on R 3.3.0.
// There's also a note in NEWS for R 3.1.0 saying length() should always be used by packages,
// but with some overhead for being a function/not macro...
R_len_t n_dim = length(dim_xi);
if (n_dim) {
if (test_matrix_cols && n_dim > 1) {
warn_matrix_column(i+1);
Expand Down
Loading