diff --git a/NEWS.md b/NEWS.md index 06f1b8771..32097b4ad 100644 --- a/NEWS.md +++ b/NEWS.md @@ -69,6 +69,8 @@ rowwiseDT( 5. Some grouping operations run much faster under `verbose=TRUE`, [#6286](https://github.com/Rdatatable/data.table/issues/6286). Thanks @joshhwuu for the report and fix. This overhead was not present on Windows. As a rule, users should expect `verbose=TRUE` operations to run more slowly, as extra statistics might be calculated as part of the report; here was a case where the overhead was particularly high and the fix was particularly easy. +6. `set()` and `:=` now provide some extra guidance for common incorrect approaches to assigning `NULL` to some rows of a list column. The correct way is to put `list(list(NULL))` on the RHS of `:=` (or `.(.(NULL))` for short). Thanks to @MichaelChirico for the suggestion and @Nj221102 for the implementation. + # data.table [v1.16.0](https://github.com/Rdatatable/data.table/milestone/30) (25 August 2024) ## BREAKING CHANGES diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c6ef5509c..a6bbdbf86 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -5580,9 +5580,14 @@ test(1352.7, DT[,sum(b),by=.(Grp=a%%2)], DT[,sum(b),by=list(Grp=a%%2)]) test(1352.8, DT[,sum(b),by=.(a%%2,c)], DT[,sum(b),by=list(a%%2,c)]) # that :=NULL together with i is now an error -DT = data.table(a=1:3, b=1:6) +DT = data.table(a=1:3, b=1:6, c=list(7, 8, 9, 8, 7, 6)) test(1353.1, DT[2, b:=NULL], error="When deleting columns, i should not be provided") test(1353.2, DT[2, c("a","b"):=list(42, NULL)], error="When deleting columns, i should not be provided") +# #5526: friendlier error nudging to the correct way to sub-assign NULL to list columns +test(1353.3, DT[2, c := NULL], error="Invalid attempt to delete a list column.*did you intend to add NULL") +test(1353.4, DT[2, c := .(NULL)], error="Invalid attempt to delete a list column.*did you intend to add NULL") +test(1353.5, DT[2, `:=`(b=2, c=NULL)], error="Invalid attempt to delete a list column.*did you intend to add NULL") +test(1353.6, DT[2, d := NULL], error="Doubly-invalid attempt to delete a non-existent column while also providing i") # order optimisation caused trouble due to chaining because of 'substitute(x)' usage in [.data.table. set.seed(1L) @@ -7246,7 +7251,7 @@ if (test_bit64) { # fix for #1082 dt1 = data.table(x=rep(c("a","b","c"),each=3), y=c(1,3,6), v=1:9, key=c("x", "y")) dt2 = copy(dt1) -test(1502.1, dt1["a", z := NULL], error="When deleting columns, i should not be provided") +test(1502.1, dt1["a", z := NULL], error="Doubly-invalid attempt to delete a non-existent column while also providing i") # this shouldn't segfault on 'dt1[...]' test(1502.2, dt1["a", z := 42L], dt2["a", z := 42L]) diff --git a/src/assign.c b/src/assign.c index 85798082a..e12d6f449 100644 --- a/src/assign.c +++ b/src/assign.c @@ -467,11 +467,17 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) } coln--; SEXP thisvalue = RHS_list_of_columns ? VECTOR_ELT(values, i) : values; - vlen = length(thisvalue); - if (isNull(thisvalue) && !isNull(rows)) error(_("When deleting columns, i should not be provided")); // #1082, #3089 + if (isNull(thisvalue) && !isNull(rows)) { + if (coln >= oldncol) + error(_("Doubly-invalid attempt to delete a non-existent column while also providing i")); + if (TYPEOF(VECTOR_ELT(dt, coln)) == VECSXP) + error(_("Invalid attempt to delete a list column while also providing i; did you intend to add NULL to those rows instead? If so, use list_col := list(list(NULL)).")); // #5526 + error(_("When deleting columns, i should not be provided")); // #1082, #3089 + } if (coln+1 <= oldncol) colnam = STRING_ELT(names,coln); else colnam = STRING_ELT(newcolnames,coln-length(names)); if (coln+1 <= oldncol && isNull(thisvalue)) continue; // delete existing column(s) afterwards, near end of this function + vlen = length(thisvalue); //if (vlen<1 && nrow>0) { if (coln+1 <= oldncol && nrow>0 && vlen<1 && numToDo>0) { // numToDo > 0 fixes #2829, see test 1911 error(_("RHS of assignment to existing column '%s' is zero length but not NULL. If you intend to delete the column use NULL. Otherwise, the RHS must have length > 0; e.g., NA_integer_. If you are trying to change the column type to be an empty list column then, as with all column type changes, provide a full length RHS vector such as vector('list',nrow(DT)); i.e., 'plonk' in the new column."), CHAR(STRING_ELT(names,coln)));