diff --git a/CODEOWNERS b/CODEOWNERS index af741a6a4..918f4bee2 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -35,6 +35,10 @@ /src/shift.c @ben-schwen @michaelchirico /man/shift.Rd @ben-schwen @michaelchirico +# rbind +/src/rbindlist.c @ben-schwen +/man/rbindlist.Rd @ben-schwen + # translations /inst/po/ @michaelchirico /po/ @michaelchirico diff --git a/NEWS.md b/NEWS.md index de5b5cb65..493465546 100644 --- a/NEWS.md +++ b/NEWS.md @@ -61,6 +61,8 @@ rowwiseDT( 8. `dt[, col]` now returns a copy of `col` also when it is a list column, as in any other case, [#4877](https://github.com/Rdatatable/data.table/issues/4877). Thanks to @tlapak for reporting and the PR. +9. `rbindlist` and `rbind` binding `bit64::integer64` columns with `character`/`complex`/`list` columns now works, [#5504](https://github.com/Rdatatable/data.table/issues/5504). Thanks to @MichaelChirico for the request and @ben-schwen for the PR. + ## NOTES 1. Tests run again when some Suggests packages are missing, [#6411](https://github.com/Rdatatable/data.table/issues/6411). Thanks @aadler for the note and @MichaelChirico for the fix. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e1a372f53..0c541e8a4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18736,7 +18736,7 @@ test(2264.8, print(DT, show.indices=TRUE), output=ans) if (test_bit64) local({ DT = data.table(a = 'abc', b = as.integer64(1)) suppressPackageStartupMessages(unloadNamespace("bit64")) - on.exit(suppressPackageStartupMessages(library(bit64))) + on.exit(suppressPackageStartupMessages(library(bit64, pos="package:base"))) test(2265, DT, output="abc\\s*1$") }) @@ -19208,3 +19208,17 @@ test(2287.1, address(dt[, A]) != address(dt[, A])) l = dt[, A] dt[1L, A := .(list(1L))] test(2287.2, !identical(DT$A[[1L]], l[[1L]])) + +# rbind do not copy class/attributes for integer64 and CPLXSXP/STRSXP/VECSXP #5504 +if (test_bit64) { + a = data.table(as.integer64(c(1,2))) + b = data.table(c("a","b")) + c = data.table(complex(real = 3:4, imaginary = 5:6)) + d = data.table(list(3.5, 4L)) + test(2288.1, rbind(a,b), data.table(c("1","2","a","b"))) + test(2288.2, rbind(b,a), data.table(c("a","b","1","2"))) + test(2288.3, rbind(a,c), data.table(complex(real = 1:4, imaginary = c(0, 0, 5:6)))) + test(2288.4, rbind(c,a), data.table(complex(real = c(3:4, 1:2), imaginary = c(5:6, 0, 0)))) + test(2288.5, rbind(a,d), data.table(list(as.integer64(1), as.integer64(2), 3.5, 4L))) + test(2288.6, rbind(d,a), data.table(list(3.5, 4L, as.integer64(1), as.integer64(2)))) +} diff --git a/src/rbindlist.c b/src/rbindlist.c index 35e5377c9..e53e98c5b 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -338,12 +338,13 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg, SEXP ignor if (!foundName) { static char buff[12]; snprintf(buff,12,"V%d",j+1), SET_STRING_ELT(ansNames, idcol+j, mkChar(buff)); foundName=buff; } if (factor) maxType=INTSXP; // if any items are factors then a factor is created (could be an option) - if (int64 && maxType!=REALSXP) + if (int64 && !(maxType==REALSXP || maxType==STRSXP || maxType==VECSXP || maxType==CPLXSXP)) internal_error(__func__, "column %d of result is determined to be integer64 but maxType=='%s' != REALSXP", j+1, type2char(maxType)); // # nocov if (date && INHERITS(firstCol, char_IDate)) maxType=INTSXP; // first encountered Date determines class and type #5309 SEXP target; SET_VECTOR_ELT(ans, idcol+j, target=allocVector(maxType, nrow)); // does not initialize logical & numerics, but does initialize character and list - if (!factor) copyMostAttrib(firstCol, target); // all but names,dim and dimnames; mainly for class. And if so, we want a copy here, not keepattr's SET_ATTRIB. + // #5504 do not copy class for mixing int64 and higher maxTypes CPLXSXP/STRSXP/VECSXP + if (!factor && !(int64 && (maxType==STRSXP || maxType==VECSXP || maxType==CPLXSXP))) copyMostAttrib(firstCol, target); // all but names,dim and dimnames; mainly for class. And if so, we want a copy here, not keepattr's SET_ATTRIB. if (factor && anyNotStringOrFactor) { // in future warn, or use list column instead ... warning(_("Column %d contains a factor but not all items for the column are character or factor"), idcol+j+1); @@ -539,7 +540,8 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg, SEXP ignor bool listprotect = (TYPEOF(target)==VECSXP || TYPEOF(target)==EXPRSXP) && TYPEOF(thisCol)!=TYPEOF(target); // do an as.list() on the atomic column; #3528 if (listprotect) { - thisCol = PROTECT(coerceVector(thisCol, TYPEOF(target))); + // coerceAs for int64 to copy attributes (coerceVector does not copy atts) + thisCol = PROTECT(INHERITS(thisCol, char_integer64) ? coerceAs(thisCol, target, ScalarLogical(TRUE)) : coerceVector(thisCol, TYPEOF(target))); // else coerces if needed within memrecycle; with a no-alloc direct coerce from 1.12.4 (PR #3909) const char *ret = memrecycle(target, R_NilValue, ansloc, thisnrow, thisCol, 0, -1, idcol+j+1, foundName); UNPROTECT(1); // earlier unprotect rbindlist calls with lots of lists #4536