Skip to content

Commit

Permalink
allow rbind for integer64 and character/complex/vector (#5874)
Browse files Browse the repository at this point in the history
* allow rbind for integer64 and character/complex

* update comments

* delete carried over line

* add vector support

* add coverage

* add CODEOWNER

* update NEWS

* restore merge cut

* internal error

* trailing ws

* more trailing

* refine comments

* internal error

* fix re-attach bit64

* test imaginary components in complex case

---------

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>
  • Loading branch information
ben-schwen and MichaelChirico authored Sep 8, 2024
1 parent 713c701 commit f0aaaf9
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 4 deletions.
4 changes: 4 additions & 0 deletions CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
/src/shift.c @ben-schwen @michaelchirico
/man/shift.Rd @ben-schwen @michaelchirico

# rbind
/src/rbindlist.c @ben-schwen
/man/rbindlist.Rd @ben-schwen

# translations
/inst/po/ @michaelchirico
/po/ @michaelchirico
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ rowwiseDT(

8. `dt[, col]` now returns a copy of `col` also when it is a list column, as in any other case, [#4877](https://github.com/Rdatatable/data.table/issues/4877). Thanks to @tlapak for reporting and the PR.

9. `rbindlist` and `rbind` binding `bit64::integer64` columns with `character`/`complex`/`list` columns now works, [#5504](https://github.com/Rdatatable/data.table/issues/5504). Thanks to @MichaelChirico for the request and @ben-schwen for the PR.

## NOTES

1. Tests run again when some Suggests packages are missing, [#6411](https://github.com/Rdatatable/data.table/issues/6411). Thanks @aadler for the note and @MichaelChirico for the fix.
Expand Down
16 changes: 15 additions & 1 deletion inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -18736,7 +18736,7 @@ test(2264.8, print(DT, show.indices=TRUE), output=ans)
if (test_bit64) local({
DT = data.table(a = 'abc', b = as.integer64(1))
suppressPackageStartupMessages(unloadNamespace("bit64"))
on.exit(suppressPackageStartupMessages(library(bit64)))
on.exit(suppressPackageStartupMessages(library(bit64, pos="package:base")))
test(2265, DT, output="abc\\s*1$")
})

Expand Down Expand Up @@ -19208,3 +19208,17 @@ test(2287.1, address(dt[, A]) != address(dt[, A]))
l = dt[, A]
dt[1L, A := .(list(1L))]
test(2287.2, !identical(DT$A[[1L]], l[[1L]]))

# rbind do not copy class/attributes for integer64 and CPLXSXP/STRSXP/VECSXP #5504
if (test_bit64) {
a = data.table(as.integer64(c(1,2)))
b = data.table(c("a","b"))
c = data.table(complex(real = 3:4, imaginary = 5:6))
d = data.table(list(3.5, 4L))
test(2288.1, rbind(a,b), data.table(c("1","2","a","b")))
test(2288.2, rbind(b,a), data.table(c("a","b","1","2")))
test(2288.3, rbind(a,c), data.table(complex(real = 1:4, imaginary = c(0, 0, 5:6))))
test(2288.4, rbind(c,a), data.table(complex(real = c(3:4, 1:2), imaginary = c(5:6, 0, 0))))
test(2288.5, rbind(a,d), data.table(list(as.integer64(1), as.integer64(2), 3.5, 4L)))
test(2288.6, rbind(d,a), data.table(list(3.5, 4L, as.integer64(1), as.integer64(2))))
}
8 changes: 5 additions & 3 deletions src/rbindlist.c
Original file line number Diff line number Diff line change
Expand Up @@ -338,12 +338,13 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg, SEXP ignor

if (!foundName) { static char buff[12]; snprintf(buff,12,"V%d",j+1), SET_STRING_ELT(ansNames, idcol+j, mkChar(buff)); foundName=buff; }
if (factor) maxType=INTSXP; // if any items are factors then a factor is created (could be an option)
if (int64 && maxType!=REALSXP)
if (int64 && !(maxType==REALSXP || maxType==STRSXP || maxType==VECSXP || maxType==CPLXSXP))
internal_error(__func__, "column %d of result is determined to be integer64 but maxType=='%s' != REALSXP", j+1, type2char(maxType)); // # nocov
if (date && INHERITS(firstCol, char_IDate)) maxType=INTSXP; // first encountered Date determines class and type #5309
SEXP target;
SET_VECTOR_ELT(ans, idcol+j, target=allocVector(maxType, nrow)); // does not initialize logical & numerics, but does initialize character and list
if (!factor) copyMostAttrib(firstCol, target); // all but names,dim and dimnames; mainly for class. And if so, we want a copy here, not keepattr's SET_ATTRIB.
// #5504 do not copy class for mixing int64 and higher maxTypes CPLXSXP/STRSXP/VECSXP
if (!factor && !(int64 && (maxType==STRSXP || maxType==VECSXP || maxType==CPLXSXP))) copyMostAttrib(firstCol, target); // all but names,dim and dimnames; mainly for class. And if so, we want a copy here, not keepattr's SET_ATTRIB.

if (factor && anyNotStringOrFactor) {
// in future warn, or use list column instead ... warning(_("Column %d contains a factor but not all items for the column are character or factor"), idcol+j+1);
Expand Down Expand Up @@ -539,7 +540,8 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg, SEXP ignor
bool listprotect = (TYPEOF(target)==VECSXP || TYPEOF(target)==EXPRSXP) && TYPEOF(thisCol)!=TYPEOF(target);
// do an as.list() on the atomic column; #3528
if (listprotect) {
thisCol = PROTECT(coerceVector(thisCol, TYPEOF(target)));
// coerceAs for int64 to copy attributes (coerceVector does not copy atts)
thisCol = PROTECT(INHERITS(thisCol, char_integer64) ? coerceAs(thisCol, target, ScalarLogical(TRUE)) : coerceVector(thisCol, TYPEOF(target)));
// else coerces if needed within memrecycle; with a no-alloc direct coerce from 1.12.4 (PR #3909)
const char *ret = memrecycle(target, R_NilValue, ansloc, thisnrow, thisCol, 0, -1, idcol+j+1, foundName);
UNPROTECT(1); // earlier unprotect rbindlist calls with lots of lists #4536
Expand Down

0 comments on commit f0aaaf9

Please sign in to comment.