diff --git a/NEWS.md b/NEWS.md index 8f3f7e028..262aace17 100644 --- a/NEWS.md +++ b/NEWS.md @@ -66,6 +66,8 @@ 12. data.table's `all.equal()` method now dispatches to each column's own `all.equal()` method as appropriate, [#4543](https://github.com/Rdatatable/data.table/issues/4543). Thanks @MichaelChirico for the report and fix. Note that this had two noteworthy changes to data.table's own test suite that might affect you: (1) comparisons of POSIXct columns compare absolute, not relative differences, meaning that millisecond-scale differences might trigger a "not equal" report that was hidden before; and (2) comparisons of integer64 columns could be totally wrong since they were being compared on the basis of their representation as doubles, not long integers. The former might be a matter of preference requiring you to specify a different `tolerance=`, while the latter was clearly a bug. +12. `rbindlist` could lead to a protection stack overflow when applied to a list containing many nested lists exceeding the pointer protection stack size, [#4536](https://github.com/Rdatatable/data.table/issues/4536). Thanks to @ProfFancyPants for reporting, and Benjamin Schwendinger for the fix. + ## NOTES 1. `transform` method for data.table sped up substantially when creating new columns on large tables. Thanks to @OfekShilon for the report and PR. The implemented solution was proposed by @ColeMiller1. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 26ab04a5e..e84151138 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18719,3 +18719,16 @@ test(2266, eval(parse(text="DT[ , .N, a\U00F1o]$N[1L]")), 2L) DT1 = data.table(t = .POSIXct(1590973200, tz='UTC')) DT2 = data.table(t = .POSIXct(1590974413, tz='UTC')) test(2267, all.equal(DT1, DT2), "Column 't': Mean absolute difference: 1213") + +# rbindlist with many nested lists leads to protection stack overflow #4536 +# test case needs to bind list columns with atomic columns to trigger +x = list( + data.table(V1="a"), + data.table(V1=""), + data.table(V2=list(FALSE)), + data.table(V2=list(list(V2A=FALSE, V2B="", V2C="" ))), + data.table(V1=list(list(V1A="", V1B="", V1C="", V1D="", V1E="")), V3 = list(list(V3A=TRUE, V3B="", V3C=1L, V3D=0L, V3F=1L))) +) +N = 5e4 +y = x[rep(1:5, N)] +test(2267, rbindlist(y, fill=TRUE), rbindlist(x, fill=TRUE)[rep(1:5, N)]) diff --git a/src/rbindlist.c b/src/rbindlist.c index 2d4666e3e..5fd4cf700 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -515,12 +515,14 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) if (w==-1 || !length(thisCol=VECTOR_ELT(li, w))) { // !length for zeroCol warning above; #1871 writeNA(target, ansloc, thisnrow, false); // writeNA is integer64 aware and writes INT64_MIN } else { + bool listprotect = false; if ((TYPEOF(target)==VECSXP || TYPEOF(target)==EXPRSXP) && TYPEOF(thisCol)!=TYPEOF(target)) { // do an as.list() on the atomic column; #3528 - thisCol = PROTECT(coerceVector(thisCol, TYPEOF(target))); nprotect++; + thisCol = PROTECT(coerceVector(thisCol, TYPEOF(target))); listprotect = true; } // else coerces if needed within memrecycle; with a no-alloc direct coerce from 1.12.4 (PR #3909) const char *ret = memrecycle(target, R_NilValue, ansloc, thisnrow, thisCol, 0, -1, idcol+j+1, foundName); + if (listprotect) UNPROTECT(1); // earlier unprotect rbindlist calls with lots of lists #4536 if (ret) warning(_("Column %d of item %d: %s"), w+1, i+1, ret); // e.g. when precision is lost like assigning 3.4 to integer64; test 2007.2 // TODO: but maxType should handle that and this should never warn