diff --git a/R/as.data.table.R b/R/as.data.table.R index 1addc58aa..3137f7532 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -169,7 +169,7 @@ as.data.table.list = function(x, # not worse than before, and gets us in a better centralized place to port as.data.table.list to C and use MAYBE_REFERENCED # again in future, for #617. } - if (identical(x, list())) vector("list", nrow) else rep_len(x, nrow) # new objects don't need copy + if (identical(x, list())) vector("list", nrow) else safe_rep_len(x, nrow) # new objects don't need copy } vnames = character(ncol) k = 1L diff --git a/R/print.data.table.R b/R/print.data.table.R index 2e3dfc69f..528138279 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -248,6 +248,7 @@ char.trunc = function(x, trunc.char = getOption("datatable.prettyprint.char")) { nchar_chars = nchar(x, 'char') is_full_width = nchar_width > nchar_chars idx = pmin(nchar_width, nchar_chars) > trunc.char + if (!any(idx)) return(x) # strtrim() errors for width=integer() on R 3.3.0 x[idx] = paste0(strtrim(x[idx], trunc.char * fifelse(is_full_width[idx], 2L, 1L)), "...") x } diff --git a/R/utils.R b/R/utils.R index cf23609ee..960b17ad1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -21,6 +21,17 @@ nan_is_na = function(x) { stopf("Argument 'nan' must be NA or NaN") } +# TODO(R>=4.0.0): Remove this workaround. From R 4.0.0, rep_len() dispatches rep.Date(), which we need. +# Before that, rep_len() strips attributes --> breaks data.table()'s internal recycle() helper. +if (inherits(rep_len(Sys.Date(), 1L), "Date")) { + # NB: safe_rep_len=rep_len throws an R CMD check error because it _appears_ to the AST + # walker that we've used .Internal ourselves (which is not true, but codetools can't tell: + # safe_rep_len = rep_len; body(safe_rep_len)[[1]] # .Internal) + safe_rep_len = function(x, n) rep_len(x, n) +} else { + safe_rep_len = function(x, n) rep(x, length.out = n) # nolint: rep_len_linter. +} + # endsWith no longer used from #5097 so no need to backport; prevent usage to avoid dev delay until GLCI's R 3.1.0 test endsWith = function(...) stop("Internal error: use endsWithAny instead of base::endsWith", call.=FALSE) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f86ec3ee4..e948166f1 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14788,48 +14788,52 @@ test(2029.3, fread(txt, quote="", fill=TRUE), data.table(A=1:3, B=4:6, C=c(7:8,N # .Last.updated #1885 d = data.table(a=1:4, b=2:5) -d[, z:=5L] -test(2030.01, .Last.updated, 4L) # new column -d[, z:=6L] -test(2030.02, .Last.updated, 4L) # update existing column -d[2:3, z:=7L] -test(2030.03, .Last.updated, 2L) # sub assign -d[integer(), z:=8L] -test(2030.04, .Last.updated, 0L) # empty sub-assign -d[-1L, z:=9L] -test(2030.05, .Last.updated, 3L) # inverse sub-assign -d[-(1:4), z:=10L] -test(2030.06, .Last.updated, 0L) # inverse empty sub-assign +# NB: On R 3.3.0, .Last.updated gets touched by test() itself and this cascades even to re-assignments +# of .Last.updated like n_updated=.Last.updated, so really force a deep copy by round-tripping from string. +# Seen on R 3.3.0, unsure when this behavior changes. +force_copy = function(n) as.integer(sprintf("%d", n)) +d[, z:=5L]; n_updated = force_copy(.Last.updated) +test(2030.01, n_updated, 4L) # new column +d[, z:=6L]; n_updated = force_copy(.Last.updated) +test(2030.02, n_updated, 4L) # update existing column +d[2:3, z:=7L]; n_updated = force_copy(.Last.updated) +test(2030.03, n_updated, 2L) # sub assign +d[integer(), z:=8L]; n_updated = force_copy(.Last.updated) +test(2030.04, n_updated, 0L) # empty sub-assign +d[-1L, z:=9L]; n_updated = force_copy(.Last.updated) +test(2030.05, n_updated, 3L) # inverse sub-assign +d[-(1:4), z:=10L]; n_updated = force_copy(.Last.updated) +test(2030.06, n_updated, 0L) # inverse empty sub-assign +d[, z:=NULL]; n_updated = force_copy(.Last.updated) +test(2030.07, n_updated, 4L) # delete column +d[2:3, z:=11L]; n_updated = force_copy(.Last.updated) +test(2030.08, n_updated, 2L) # new column during sub-assign d[, z:=NULL] -test(2030.07, .Last.updated, 4L) # delete column -d[2:3, z:=11L] -test(2030.08, .Last.updated, 2L) # new column during sub-assign +d[integer(), z:=12L]; n_updated = force_copy(.Last.updated) +test(2030.09, n_updated, 0L) # new columns from empty sub-assign d[, z:=NULL] -d[integer(), z:=12L] -test(2030.09, .Last.updated, 0L) # new columns from empty sub-assign -d[, z:=NULL] -d[-(1:4), z:=13L] -test(2030.10, .Last.updated, 0L) # new columns from empty inverse sub-assign -d[, z:=NULL][, z:=14L] -test(2030.11, .Last.updated, 4L) # new column from chaining -d[, z:=NULL][2:3, z:=14L] -test(2030.12, .Last.updated, 2L) # sub-assign from chaining -d[2:3, z:=14L][, z:=NULL] -test(2030.13, .Last.updated, 4L) # delete column from chaining -set(d, 1:2, "z", 15L) -test(2030.14, .Last.updated, 2L) # set() updates .Last.updated too +d[-(1:4), z:=13L]; n_updated = force_copy(.Last.updated) +test(2030.10, n_updated, 0L) # new columns from empty inverse sub-assign +d[, z:=NULL][, z:=14L]; n_updated = force_copy(.Last.updated) +test(2030.11, n_updated, 4L) # new column from chaining +d[, z:=NULL][2:3, z:=14L]; n_updated = force_copy(.Last.updated) +test(2030.12, n_updated, 2L) # sub-assign from chaining +d[2:3, z:=14L][, z:=NULL]; n_updated = force_copy(.Last.updated) +test(2030.13, n_updated, 4L) # delete column from chaining +set(d, 1:2, "z", 15L); n_updated = force_copy(.Last.updated) +test(2030.14, n_updated, 2L) # set() updates .Last.updated too g = data.table(a=1:4, z=15L) # join -d[g, on="a", z:=i.z] -test(2030.15, .Last.updated, 4L) # all match of all rows +d[g, on="a", z:=i.z]; n_updated = force_copy(.Last.updated) +test(2030.15, n_updated, 4L) # all match of all rows g = data.table(a=2:4, z=16L) # join -d[, z:=NULL][g, on="a", z:=i.z] -test(2030.16, .Last.updated, 3L) # all match +d[, z:=NULL][g, on="a", z:=i.z]; n_updated = force_copy(.Last.updated) +test(2030.16, n_updated, 3L) # all match g = data.table(a=c(2L,4L,6L), z=17L) -d[, z:=NULL][g, on="a", z:=i.z] -test(2030.17, .Last.updated, 2L) # partial match +d[, z:=NULL][g, on="a", z:=i.z]; n_updated = force_copy(.Last.updated) +test(2030.17, n_updated, 2L) # partial match g = data.table(a=5:6, z=18L) -d[, z:=NULL][g, on="a", z:=i.z] -test(2030.18, .Last.updated, 0L) # zero match +d[, z:=NULL][g, on="a", z:=i.z]; n_updated = force_copy(.Last.updated) +test(2030.18, n_updated, 0L) # zero match # rbind vec with list regression dev 1.12.3; #3528 test(2031.01, rbind(data.table(A=1:3, B=7:9), data.table(A=4:6, B=as.list(10:12))), ans<-data.table(A=1:6, B=as.list(7:12))) @@ -18453,7 +18457,7 @@ rm(.datatable.aware) local({ lc_ctype = Sys.getlocale('LC_CTYPE') Sys.setlocale('LC_CTYPE', "en_US.UTF-8") # Japanese multibyte characters require utf8 - on.exit({Sys.setlocale('LC_CTYPE', lc_ctype)}) + on.exit(Sys.setlocale('LC_CTYPE', lc_ctype)) accented_a = "\u0061\u0301" ja_ichi = "\u4E00" ja_ni = "\u4E8C" @@ -18718,7 +18722,7 @@ test(2264.8, print(DT, show.indices=TRUE), output=ans) # integer64 columns print even when bit64 isn't loaded if (test_bit64) local({ DT = data.table(a = 'abc', b = as.integer64(1)) - invisible(unloadNamespace("bit64")) + suppressPackageStartupMessages(unloadNamespace("bit64")) on.exit(suppressPackageStartupMessages(library(bit64))) test(2265, DT, output="abc\\s*1$") }) diff --git a/src/assign.c b/src/assign.c index a78b9452c..4041bf2a9 100644 --- a/src/assign.c +++ b/src/assign.c @@ -220,7 +220,10 @@ SEXP setdt_nrows(SEXP x) } SEXP dim_xi = getAttrib(xi, R_DimSymbol); R_len_t len_xi; - R_len_t n_dim = LENGTH(dim_xi); + // NB: LENGTH() produces an undefined large number here on R 3.3.0. + // There's also a note in NEWS for R 3.1.0 saying length() should always be used by packages, + // but with some overhead for being a function/not macro... + R_len_t n_dim = length(dim_xi); if (n_dim) { if (test_matrix_cols && n_dim > 1) { warn_matrix_column(i+1);