From 960ff28b76387ee152d48b4bfc27198f53f070d3 Mon Sep 17 00:00:00 2001
From: Michael Chirico <chiricom@google.com>
Date: Tue, 24 Sep 2024 13:52:41 -0400
Subject: [PATCH 1/8] Refactor internal helpers which generate fragmented
 messages (#6534)

* Refacator internal helpers which generate fragmented messages

* Update test error strings

* deprecate internal group.desc argument
---
 R/fmelt.R             | 35 ++++++++++++++---------------------
 R/utils.R             | 10 ++++++++--
 inst/tests/tests.Rraw | 10 +++++-----
 man/measure.Rd        |  4 +---
 4 files changed, 28 insertions(+), 31 deletions(-)

diff --git a/R/fmelt.R b/R/fmelt.R
index 3f924c2e4..55a6fb0ad 100644
--- a/R/fmelt.R
+++ b/R/fmelt.R
@@ -64,14 +64,11 @@ measure = function(..., sep="_", pattern, cols, multiple.keyword="value.name") {
     }
     fun.list[[fun.i]] = fun
   }
-  measurev.args = c(
-    list(fun.list),
-    L[formal.i.vec],
-    list(group.desc="... arguments to measure"))
+  measurev.args = c(list(fun.list), L[formal.i.vec])
   do.call(measurev, measurev.args)
 }
 
-measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.name", group.desc="elements of fun.list"){
+measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.name"){
   # 1. basic error checking.
   if (!missing(sep) && !missing(pattern)) {
     stopf("both sep and pattern arguments used; must use either sep or pattern (not both)")
@@ -88,21 +85,11 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na
     which(!nzchar(names(fun.list)))
   }
   if (length(prob.i)) {
-    stopf("in measurev, %s must be named, problems: %s", group.desc, brackify(prob.i))
+    stopf("in measurev, elements of fun.list must be named, problems: %s", brackify(prob.i))
   }
-  err.names.unique = function(err.what, name.vec) {
-    name.tab = table(name.vec)
-    bad.counts = name.tab[1 < name.tab]
-    if (length(bad.counts)) {
-      stopf("%s should be uniquely named, problems: %s", err.what, brackify(names(bad.counts)))
-    }
-  }
-  err.args.groups = function(type, N){
-    if (N != length(fun.list)) {
-      stopf("number of %s =%d must be same as %s =%d", group.desc, length(fun.list), type, N)
-    }
+  if (length(dup.funs <- duplicated_values(names(fun.list)))) {
+    stopf("elements of fun.list should be uniquely named, problems: %s", brackify(dup.funs))
   }
-  err.names.unique(group.desc, names(fun.list))
   # 2. compute initial group data table, used as variable_table attribute.
   group.mat = if (!missing(pattern)) {
     if (!is.character(pattern)) {
@@ -117,7 +104,9 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na
     if (is.null(start)) {
       stopf("pattern must contain at least one capture group (parenthesized sub-pattern)")
     }
-    err.args.groups("number of capture groups in pattern", ncol(start))
+    if (ncol(start) != length(fun.list)) {
+      stopf("number of elements of fun.list (%d) must be the same as the number of capture groups in pattern (%d)", length(fun.list), ncol(start))
+    }
     end = attr(match.vec, "capture.length")[measure.vec.i,]+start-1L
     measure.vec <- cols[measure.vec.i]
     names.mat = matrix(measure.vec, nrow(start), ncol(start))
@@ -132,12 +121,16 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na
     if (n.groups == 1) {
       stopf("each column name results in only one item after splitting using sep, which means that all columns would be melted; to fix please either specify melt on all columns directly without using measure, or use a different sep/pattern specification")
     }
-    err.args.groups("max number of items after splitting column names", n.groups)
+    if (n.groups != length(fun.list)) {
+      stopf("number of elements of fun.list (%d) must be the same as the max number of items after splitting column names (%d)", length(fun.list), n.groups)
+    }
     measure.vec.i = which(vector.lengths==n.groups)
     measure.vec = cols[measure.vec.i]
     do.call(rbind, list.of.vectors[measure.vec.i])
   }
-  err.names.unique("measured columns", measure.vec)
+  if (length(dup.measures <- duplicated_values(measure.vec))) {
+    stopf("measured columns should be uniquely named, problems: %s", brackify(dup.measures))
+  }
   uniq.mat = unique(group.mat)
   if (nrow(uniq.mat) < nrow(group.mat)) {
     stopf("number of unique column IDs =%d is less than number of melted columns =%d; fix by changing pattern/sep", nrow(uniq.mat), nrow(group.mat))
diff --git a/R/utils.R b/R/utils.R
index 2f91491be..4a1aadd43 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -34,11 +34,17 @@ check_duplicate_names = function(x, table_name=deparse(substitute(x))) {
   if (!anyDuplicated(nm <- names(x))) return(invisible())
   duplicate_names = unique(nm[duplicated(nm)])
   stopf(ngettext(length(duplicate_names),
-                  "%s has duplicated column name %s. Please remove or rename the duplicate and try again.",
-                  "%s has duplicated column names %s. Please remove or rename the duplicates and try again."),
+                 "%s has duplicated column name %s. Please remove or rename the duplicate and try again.",
+                 "%s has duplicated column names %s. Please remove or rename the duplicates and try again."),
         table_name, brackify(duplicate_names), domain=NA)
 }
 
+duplicated_values = function(x) {
+  # fast anyDuplicated for the typical/non-error case; second duplicated() pass for (usually) error case
+  if (!anyDuplicated(x)) return(vector(typeof(x)))
+  unique(x[duplicated(x)])
+}
+
 # TODO(R>=4.0.0): Remove this workaround. From R 4.0.0, rep_len() dispatches rep.Date(), which we need.
 #   Before that, rep_len() strips attributes --> breaks data.table()'s internal recycle() helper.
 #   This also impacts test 2 in S4.Rraw, because the error message differs for rep.int() vs. rep_len().
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 2b3958acd..322530139 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -17301,11 +17301,11 @@ test(2183.00020, melt(iris.dt, measure.vars=measurev(value.name, dim, sep=".", p
 test(2183.000201, melt(iris.dt, measure.vars=measurev(list(NULL, dim=NULL), sep=".")), error="in measurev, elements of fun.list must be named, problems: [1]")
 test(2183.000202, melt(iris.dt, measure.vars=measurev(list(NULL, NULL), sep=".")), error="in measurev, elements of fun.list must be named, problems: [1, 2]")
 test(2183.00027, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim="bar"), sep=".")), error="in the measurev fun.list, each non-NULL element must be a function with at least one argument, problem: dim")
-test(2183.00028, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), sep=".")), error="number of elements of fun.list =3 must be same as max number of items after splitting column names =2")
+test(2183.00028, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), sep=".")), error="number of elements of fun.list (3) must be the same as the max number of items after splitting column names (2)")
 test(2183.00042, melt(DTid, measure.vars=measurev(list(value.name=NULL, istr=function()1), pattern="([ab])([12])")), error="in the measurev fun.list, each non-NULL element must be a function with at least one argument, problem: istr")
 test(2183.00043, melt(DTid, measure.vars=measurev(list(value.name=NULL, istr=interactive), pattern="([ab])([12])")), error="in the measurev fun.list, each non-NULL element must be a function with at least one argument, problem: istr")
 test(2183.00044, melt(DTid, measure.vars=measurev(list(value.name=NULL, istr=function(x)1), pattern="([ab])([12])")), error="each conversion function must return an atomic vector with same length as its first argument, problem: istr")
-test(2183.00045, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), pattern="(.*)[.](.*)")), error="number of elements of fun.list =3 must be same as number of capture groups in pattern =2")
+test(2183.00045, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), pattern="(.*)[.](.*)")), error="number of elements of fun.list (3) must be the same as the number of capture groups in pattern (2)")
 test(2183.00048, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, value.name=NULL), sep=".")), error="elements of fun.list should be uniquely named, problems: [value.name]")
 # measure with factor conversion.
 myfac = function(x)factor(x)#user-defined conversion function.
@@ -17348,7 +17348,7 @@ test(2183.24, names(melt(iris.dt, measure.vars=measure(value.name, dim, sep=".")
 test(2183.25, names(melt(iris.dt, measure.vars=measure(part, value.name, sep="."))), c("Species", "part", "Length", "Width"))
 test(2183.26, names(melt(iris.dt, measure.vars=measure(part, dim, sep="."))), c("Species", "part", "dim", "value"))
 test(2183.27, melt(iris.dt, measure.vars=measure(value.name, dim="bar", sep=".")), error="each ... argument to measure must be a function with at least one argument, problem: dim")
-test(2183.28, melt(iris.dt, measure.vars=measure(value.name, dim, baz, sep=".")), error="number of ... arguments to measure =3 must be same as max number of items after splitting column names =2")
+test(2183.28, melt(iris.dt, measure.vars=measure(value.name, dim, baz, sep=".")), error="number of elements of fun.list (3) must be the same as the max number of items after splitting column names (2)")
 test(2183.29, melt(iris.dt, measure.vars=measure()), error="each column name results in only one item after splitting using sep, which means that all columns would be melted; to fix please either specify melt on all columns directly without using measure, or use a different sep/pattern specification")
 # patterns with iris data.
 test(2183.40, names(melt(iris.dt, measure.vars=patterns("[.]"))), c("Species", "variable", "value"))
@@ -17357,10 +17357,10 @@ test(2183.41, melt(DTid, measure.vars=measure(value.name, istr="bar", pattern="(
 test(2183.42, melt(DTid, measure.vars=measure(value.name, istr=function()1, pattern="([ab])([12])")), error="each ... argument to measure must be a function with at least one argument, problem: istr")
 test(2183.43, melt(DTid, measure.vars=measure(value.name, istr=interactive, pattern="([ab])([12])")), error="each ... argument to measure must be a function with at least one argument, problem: istr")
 test(2183.44, melt(DTid, measure.vars=measure(value.name, istr=function(x)1, pattern="([ab])([12])")), error="each conversion function must return an atomic vector with same length as its first argument, problem: istr")
-test(2183.45, melt(iris.dt, measure.vars=measure(value.name, dim, baz, pattern="(.*)[.](.*)")), error="number of ... arguments to measure =3 must be same as number of capture groups in pattern =2")
+test(2183.45, melt(iris.dt, measure.vars=measure(value.name, dim, baz, pattern="(.*)[.](.*)")), error="number of elements of fun.list (3) must be the same as the number of capture groups in pattern (2)")
 test(2183.46, melt(iris.dt, measure.vars=measure(function(x)factor(x), dim, pattern="(.*)[.](.*)")), error="each ... argument to measure must be either a symbol without argument name, or a function with argument name, problems: [1]")
 test(2183.47, melt(iris.dt, measure.vars=measure(function(x)factor(x), pattern="(.*)[.](.*)")), error="each ... argument to measure must be either a symbol without argument name, or a function with argument name, problems: [1]")
-test(2183.48, melt(iris.dt, measure.vars=measure(value.name, value.name, sep=".")), error="... arguments to measure should be uniquely named, problems: [value.name]")
+test(2183.48, melt(iris.dt, measure.vars=measure(value.name, value.name, sep=".")), error="elements of fun.list should be uniquely named, problems: [value.name]")
 # measure with factor conversion.
 myfac = function(x)factor(x)#user-defined conversion function.
 test(2183.60, melt(DTid, measure.vars=measure(letter=myfac, value.name, pattern="([ab])([12])")), data.table(id=1, letter=factor(c("a","b")), "2"=c(2,2), "1"=c(NA,1)))
diff --git a/man/measure.Rd b/man/measure.Rd
index 73a315e00..b7933e17e 100644
--- a/man/measure.Rd
+++ b/man/measure.Rd
@@ -18,8 +18,7 @@
 }
 \usage{
 measure(\dots, sep, pattern, cols, multiple.keyword="value.name")
-measurev(fun.list, sep, pattern, cols, multiple.keyword="value.name",
-  group.desc="elements of fun.list")
+measurev(fun.list, sep, pattern, cols, multiple.keyword="value.name")
 }
 \arguments{
   \item{\dots}{One or more (1) symbols (without argument name; symbol
@@ -44,7 +43,6 @@ measurev(fun.list, sep, pattern, cols, multiple.keyword="value.name",
     value columns (with names defined by the unique values in that
     group). Otherwise if the string not used as a group name, then
     measure returns a vector and melt returns a single value column.}
-  \item{group.desc}{Internal, used in error messages.}
 }
 \seealso{
   \code{\link{melt}},

From 2a70e2f63cc94c97d35bc493b57321f884539755 Mon Sep 17 00:00:00 2001
From: rikivillalba <32423469+rikivillalba@users.noreply.github.com>
Date: Tue, 24 Sep 2024 21:33:56 -0300
Subject: [PATCH 2/8] column *indices* should be *positions* (#6537)

Here column *indices* might be *positions* since *indices* better refers to `indices()`.  Can be also *numbers* though
(while re-reading for translations...;)
---
 vignettes/datatable-reshape.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vignettes/datatable-reshape.Rmd b/vignettes/datatable-reshape.Rmd
index c84c1558d..e5efb660f 100644
--- a/vignettes/datatable-reshape.Rmd
+++ b/vignettes/datatable-reshape.Rmd
@@ -80,7 +80,7 @@ str(DT.m1)
 
 * `measure.vars` specify the set of columns we would like to collapse (or combine) together.
 
-* We can also specify column *indices* instead of *names*.
+* We can also specify column *positions* instead of *names*.
 
 * By default, `variable` column is of type `factor`. Set `variable.factor` argument to `FALSE` if you'd like to return a *`character`* vector instead.
 

From 92a29f8baa4425a9cd71c261c9715a7df903133f Mon Sep 17 00:00:00 2001
From: Toby Dylan Hocking <tdhock5@gmail.com>
Date: Tue, 24 Sep 2024 21:08:43 -0400
Subject: [PATCH 3/8] Add support for patterns(cols=user_provided) (#6510)

* grep(value=TRUE) in patterns()

* test user-defined cols subset of names(DT)

* vector(s)

* comment ref issue

* adjust NEWS

---------

Co-authored-by: Toby Dylan Hocking <toby.dylan.hocking@usherbrooke.ca>
Co-authored-by: Michael Chirico <chiricom@google.com>
---
 NEWS.md               | 2 ++
 R/fmelt.R             | 2 +-
 inst/tests/tests.Rraw | 3 +++
 man/patterns.Rd       | 4 ++--
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 99e4c8598..4bf066f41 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -63,6 +63,8 @@ rowwiseDT(
     # 15: All values  Total  Total   999   999   NaN    10
     ```
 
+4. `patterns()` in `melt()` combines correctly with user-defined `cols=`, which can be useful to specify a subset of columns to reshape without having to use a regex, for example `patterns("2", cols=c("y1", "y2"))` will only give `y2` even if there are other columns in the input matching `2`, [#6498](https://github.com/Rdatatable/data.table/issues/6498). Thanks to @hongyuanjia for the report, and to @tdhock for the PR.
+
 ## BUG FIXES
 
 1. Using `print.data.table()` with character truncation using `datatable.prettyprint.char` no longer errors with `NA` entries, [#6441](https://github.com/Rdatatable/data.table/issues/6441). Thanks to @r2evans for the bug report, and @joshhwuu for the fix.
diff --git a/R/fmelt.R b/R/fmelt.R
index 55a6fb0ad..c0cc47e9c 100644
--- a/R/fmelt.R
+++ b/R/fmelt.R
@@ -29,7 +29,7 @@ patterns = function(..., cols=character(0L), ignore.case=FALSE, perl=FALSE, fixe
   p = unlist(L, use.names = any(nzchar(names(L))))
   if (!is.character(p))
     stopf("Input patterns must be of type character.")
-  matched = lapply(p, grep, cols, ignore.case=ignore.case, perl=perl, fixed=fixed, useBytes=useBytes)
+  matched = lapply(p, grep, cols, ignore.case=ignore.case, perl=perl, fixed=fixed, useBytes=useBytes, value=TRUE)
   if (length(idx <- which(lengths(matched) == 0L)))
     stopf(ngettext(length(idx), 'Pattern not found: [%s]', 'Patterns not found: [%s]'), brackify(p[idx]), domain=NA)
   if (length(matched) == 1L) return(matched[[1L]])
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 322530139..cb70d4314 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -12427,6 +12427,9 @@ DTout = data.table(
   value2 = c("a", "b", "c", "d", "e", "f", "g", "h", "i", "j")
 )
 test(1866.6, melt(DT, measure.vars = patterns("^x", "^y", cols=names(DT))), DTout)
+# patterns supports cols arg, #6498
+test(1866.7, melt(data.table(x1=1,x2=2,y1=3,y2=4),measure.vars=patterns("2",cols=c("y1","y2"))), data.table(x1=1,x2=2,y1=3,variable=factor("y2"),value=4))
+test(1866.8, DT[, lapply(.SD, sum), .SDcols=patterns("2",cols=c("x1","x2"))], data.table(x2=40L))
 
 # informative errors for bad user-provided cols arg to patterns
 DT = data.table(x1=1,x2=2,y1=3,y2=4)
diff --git a/man/patterns.Rd b/man/patterns.Rd
index cd3d3fd8b..d48bb9724 100644
--- a/man/patterns.Rd
+++ b/man/patterns.Rd
@@ -2,8 +2,8 @@
 \alias{patterns}
 \title{Obtain matching indices corresponding to patterns}
 \description{
-\code{patterns} returns the matching indices in the argument \code{cols}
-corresponding to the regular expression patterns provided. The patterns must be
+\code{patterns} returns the elements of \code{cols}
+that match the regular expression patterns, which must be
 supported by \code{\link[base]{grep}}.
 
 From \code{v1.9.6}, \code{\link{melt.data.table}} has an enhanced functionality

From e22f7742aee78651fe09aed1434625947dda600a Mon Sep 17 00:00:00 2001
From: Michael Chirico <chiricom@google.com>
Date: Tue, 24 Sep 2024 21:47:45 -0400
Subject: [PATCH 4/8] make test numbers strictly increasing (#6538)

---
 inst/tests/tests.Rraw | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index cb70d4314..f12bea83d 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -12428,8 +12428,8 @@ DTout = data.table(
 )
 test(1866.6, melt(DT, measure.vars = patterns("^x", "^y", cols=names(DT))), DTout)
 # patterns supports cols arg, #6498
-test(1866.7, melt(data.table(x1=1,x2=2,y1=3,y2=4),measure.vars=patterns("2",cols=c("y1","y2"))), data.table(x1=1,x2=2,y1=3,variable=factor("y2"),value=4))
-test(1866.8, DT[, lapply(.SD, sum), .SDcols=patterns("2",cols=c("x1","x2"))], data.table(x2=40L))
+test(1866.71, melt(data.table(x1=1,x2=2,y1=3,y2=4),measure.vars=patterns("2",cols=c("y1","y2"))), data.table(x1=1,x2=2,y1=3,variable=factor("y2"),value=4))
+test(1866.72, DT[, lapply(.SD, sum), .SDcols=patterns("2",cols=c("x1","x2"))], data.table(x2=40L))
 
 # informative errors for bad user-provided cols arg to patterns
 DT = data.table(x1=1,x2=2,y1=3,y2=4)

From 34ec8e9b94d34bab94408693de010588d80459f9 Mon Sep 17 00:00:00 2001
From: Michael Chirico <chiricom@google.com>
Date: Thu, 26 Sep 2024 06:09:50 -0700
Subject: [PATCH 5/8] Refactor CHECK_RANGE to be i18n friendly (#6530)

* Refactor CHECK_RANGE to be i18n friendly

* Move column_desc buffer inside columnDesc routine

* make nominative case clearer
---
 src/assign.c | 43 ++++++++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/src/assign.c b/src/assign.c
index 4402608ff..4fc05d992 100644
--- a/src/assign.c
+++ b/src/assign.c
@@ -744,6 +744,12 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
 #define MSGSIZE 1000
 static char memrecycle_message[MSGSIZE+1]; // returned to rbindlist so it can prefix with which one of the list of data.table-like objects
 
+const char *columnDesc(int colnum, const char *colname) {
+  static char column_desc[MSGSIZE+1]; // can contain column names, hence relatively large allocation.
+  snprintf(column_desc, MSGSIZE, _("(column %d named '%s')"), colnum, colname);
+  return column_desc;
+}
+
 const char *memrecycle(const SEXP target, const SEXP where, const int start, const int len, SEXP source, const int sourceStart, const int sourceLen, const int colnum, const char *colname)
 // like memcpy but recycles single-item source
 // 'where' a 1-based INTEGER vector subset of target to assign to, or NULL or integer()
@@ -932,18 +938,17 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
     //   inside BODY that cater for 'where' or not. Maybe there's a way to merge the two macros in future.
     // The idea is to do these range checks without calling coerceVector() (which allocates)
 
-    #define CHECK_RANGE(STYPE, RFUN, COND, FMT, TO, FMTVAL) {{                                                              \
+    #define CHECK_RANGE(STYPE, RFUN, COND, FMTVAL, FMT) {{                                                                  \
       const STYPE *sd = (const STYPE *)RFUN(source);                                                                        \
       for (int i=0; i<slen; ++i) {                                                                                          \
         const STYPE val = sd[i+soff];                                                                                       \
         if (COND) {                                                                                                         \
           const char *sType = sourceIsI64 ? "integer64" : type2char(TYPEOF(source));                                        \
           const char *tType = targetIsI64 ? "integer64" : type2char(TYPEOF(target));                                        \
-          snprintf(memrecycle_message, MSGSIZE,                                                                             \
-            colnum == 0                                                                                                     \
-            ? _("%"FMT" (type '%s') at RHS position %d "TO" when assigning to type '%s' (target vector)")                   \
-            : _("%"FMT" (type '%s') at RHS position %d "TO" when assigning to type '%s' (column %d named '%s')"),           \
-            FMTVAL, sType, i+1, tType, colnum, colname);                                                                    \
+          snprintf(memrecycle_message, MSGSIZE, FMT,                                                                        \
+            FMTVAL, sType, i+1, tType,                                                                                      \
+            /* NB: important for () to be part of the translated string as a signal of nominative case to translators */    \
+            colnum == 0 ? _("(target vector)") : columnDesc(colnum, colname));                                              \
           /* string returned so that rbindlist/dogroups can prefix it with which item of its list this refers to  */        \
           break;                                                                                                            \
         }                                                                                                                   \
@@ -953,36 +958,36 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
     switch(TYPEOF(target)) {
     case LGLSXP:
       switch (TYPEOF(source)) {
-      case RAWSXP:  CHECK_RANGE(Rbyte, RAW,    val!=0 && val!=1,                                        "d",    "taken as TRUE", val)
-      case INTSXP:  CHECK_RANGE(int, INTEGER,  val!=0 && val!=1 && val!=NA_INTEGER,                     "d",    "taken as TRUE", val)
+      case RAWSXP:  CHECK_RANGE(Rbyte, RAW,    val!=0 && val!=1,                                        val, _("%d (type '%s') at RHS position %d taken as TRUE when assigning to type '%s' %s"))
+      case INTSXP:  CHECK_RANGE(int, INTEGER,  val!=0 && val!=1 && val!=NA_INTEGER,                     val, _("%d (type '%s') at RHS position %d taken as TRUE when assigning to type '%s' %s"))
       case REALSXP: if (sourceIsI64)
-                    CHECK_RANGE(int64_t, REAL, val!=0 && val!=1 && val!=NA_INTEGER64,                   PRId64, "taken as TRUE", val)
-              else  CHECK_RANGE(double, REAL,  !ISNAN(val) && val!=0.0 && val!=1.0,                     "f",    "taken as TRUE", val)
+                    CHECK_RANGE(int64_t, REAL, val!=0 && val!=1 && val!=NA_INTEGER64,                   val, _("%"PRId64" (type '%s') at RHS position %d taken as TRUE when assigning to type '%s' %s"))
+              else  CHECK_RANGE(double, REAL,  !ISNAN(val) && val!=0.0 && val!=1.0,                     val, _("%f (type '%s') at RHS position %d taken as TRUE when assigning to type '%s' %s"))
       } break;
     case RAWSXP:
       switch (TYPEOF(source)) {
-      case INTSXP:  CHECK_RANGE(int, INTEGER,  val<0 || val>255,                                        "d",    "taken as 0",    val)
+      case INTSXP:  CHECK_RANGE(int, INTEGER,  val<0 || val>255,                                        val, _("%d (type '%s' at RHS position %d taken as 0 when assigning to type '%s' %s"))
       case REALSXP: if (sourceIsI64)
-                    CHECK_RANGE(int64_t, REAL, val<0 || val>255,                                        PRId64, "taken as 0",    val)
-              else  CHECK_RANGE(double, REAL,  !R_FINITE(val) || val<0.0 || val>256.0 || (int)val!=val, "f",    "either truncated (precision lost) or taken as 0", val)
+                    CHECK_RANGE(int64_t, REAL, val<0 || val>255,                                        val, _("%"PRId64" (type '%s') at RHS position %d taken as 0 when assigning to type '%s' %s"))
+              else  CHECK_RANGE(double, REAL,  !R_FINITE(val) || val<0.0 || val>256.0 || (int)val!=val, val, _("%f (type '%s') at RHS position %d either truncated (precision lost) or taken as 0 when assigning to type '%s' %s"))
       } break;
     case INTSXP:
       switch (TYPEOF(source)) {
       case REALSXP: if (sourceIsI64)
-                    CHECK_RANGE(int64_t, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX),   PRId64,  "out-of-range (NA)", val)
-              else  CHECK_RANGE(double, REAL,  !ISNAN(val) && (!within_int32_repres(val) || (int)val!=val),        "f",     "out-of-range(NA) or truncated (precision lost)", val)
+                    CHECK_RANGE(int64_t, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX),   val, _("%"PRId64" (type '%s') at RHS position %d out-of-range (NA) when assigning to type '%s' %s"))
+              else  CHECK_RANGE(double, REAL,  !ISNAN(val) && (!within_int32_repres(val) || (int)val!=val),        val, _("%f (type '%s') at RHS position %d out-of-range(NA) or truncated (precision lost) when assigning to type '%s' %s"))
       case CPLXSXP: CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) &&
-                                                     (ISNAN(val.r) || (within_int32_repres(val.r) && (int)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r)
+                                                     (ISNAN(val.r) || (within_int32_repres(val.r) && (int)val.r==val.r))), val.r, _("%f (type '%s') at RHS position %d either imaginary part discarded or real part truncated (precision lost) when assigning to type '%s' %s"))
       } break;
     case REALSXP:
       switch (TYPEOF(source)) {
       case REALSXP: if (targetIsI64 && !sourceIsI64)
-                    CHECK_RANGE(double, REAL,  !ISNAN(val) && (!within_int64_repres(val) || (int64_t)val!=val),    "f",     "out-of-range(NA) or truncated (precision lost)", val)
+                    CHECK_RANGE(double, REAL,  !ISNAN(val) && (!within_int64_repres(val) || (int64_t)val!=val),    val, _("%f (type '%s') at RHS position %d out-of-range(NA) or truncated (precision lost) when assigning to type '%s' %s"))
                     break;
       case CPLXSXP: if (targetIsI64)
                     CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) &&
-                                                     (ISNAN(val.r) || (R_FINITE(val.r) && (int64_t)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r)
-              else  CHECK_RANGE(Rcomplex, COMPLEX, !(ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)),  "f",     "imaginary part discarded", val.i)
+                                                     (ISNAN(val.r) || (R_FINITE(val.r) && (int64_t)val.r==val.r))), val.r, _("%f (type '%s') at RHS position %d either imaginary part discarded or real part truncated (precision lost) when assigning to type '%s' %s"))
+              else  CHECK_RANGE(Rcomplex, COMPLEX, !(ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)),  val.r, _("%f (type '%s') at RHS position %d imaginary part discarded when assigning to type '%s' %s"))
       }
     }
   }

From 623bee9ec292a3f178946be05836ffc0e11666ee Mon Sep 17 00:00:00 2001
From: Michael Chirico <chiricom@google.com>
Date: Thu, 26 Sep 2024 06:10:27 -0700
Subject: [PATCH 6/8] Add notranslate for untranslateable fragments (#6539)

---
 src/assign.c       |  3 ++-
 src/bmerge.c       |  3 ++-
 src/dogroups.c     | 12 ++++++++----
 src/forder.c       |  7 +++----
 src/fread.c        | 14 +++++++-------
 src/freadR.c       |  5 +++--
 src/fsort.c        |  6 ++++--
 src/fwrite.c       | 11 ++++++-----
 src/gsumm.c        |  3 ++-
 src/openmp-utils.c | 20 +++++++++++---------
 src/reorder.c      |  6 ++++--
 src/utils.c        |  2 +-
 12 files changed, 53 insertions(+), 39 deletions(-)

diff --git a/src/assign.c b/src/assign.c
index 4fc05d992..dc6b9a6de 100644
--- a/src/assign.c
+++ b/src/assign.c
@@ -437,7 +437,8 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
     values = PROTECT(eval(PROTECT(lang2(sym_as_posixct, values)), R_GlobalEnv)); protecti+=2;
   }
   bool RHS_list_of_columns = TYPEOF(values)==VECSXP && length(cols)>1;  // initial value; may be revised below
-  if (verbose) Rprintf(_("RHS_list_of_columns == %s\n"), RHS_list_of_columns ? "true" : "false");
+  if (verbose)
+    Rprintf("RHS_list_of_columns == %s\n", RHS_list_of_columns ? "true" : "false"); // # notranslate
   if (TYPEOF(values)==VECSXP && length(cols)==1 && length(values)==1) {
     SEXP item = VECTOR_ELT(values,0);
     if (isNull(item) || length(item)==1 || length(item)==targetlen) {
diff --git a/src/bmerge.c b/src/bmerge.c
index d290d70c6..cb71f9c20 100644
--- a/src/bmerge.c
+++ b/src/bmerge.c
@@ -66,7 +66,8 @@ SEXP bmerge(SEXP idt, SEXP xdt, SEXP icolsArg, SEXP xcolsArg, SEXP xoArg, SEXP r
     if (xcols[col]>LENGTH(xdt) || xcols[col]<1) error(_("xcols[%d]=%d outside range [1,length(x)=%d]"), col, xcols[col], LENGTH(xdt));
     int it = TYPEOF(VECTOR_ELT(idt, icols[col]-1));
     int xt = TYPEOF(VECTOR_ELT(xdt, xcols[col]-1));
-    if (iN && it!=xt) error(_("typeof x.%s (%s) != typeof i.%s (%s)"), CHAR(STRING_ELT(getAttrib(xdt,R_NamesSymbol),xcols[col]-1)), type2char(xt), CHAR(STRING_ELT(getAttrib(idt,R_NamesSymbol),icols[col]-1)), type2char(it));
+    if (iN && it!=xt)
+      error("typeof x.%s (%s) != typeof i.%s (%s)", CHAR(STRING_ELT(getAttrib(xdt,R_NamesSymbol),xcols[col]-1)), type2char(xt), CHAR(STRING_ELT(getAttrib(idt,R_NamesSymbol),icols[col]-1)), type2char(it)); // # notranslate
     if (iN && it!=LGLSXP && it!=INTSXP && it!=REALSXP && it!=STRSXP)
       error(_("Type '%s' is not supported for joining/merging"), type2char(it));
   }
diff --git a/src/dogroups.c b/src/dogroups.c
index d55439332..0717ce0c1 100644
--- a/src/dogroups.c
+++ b/src/dogroups.c
@@ -106,7 +106,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
   }
   setAttrib(BY, R_NamesSymbol, bynames); // Fix for #42 - BY doesn't retain names anymore
   R_LockBinding(sym_BY, env);
-  if (isNull(jiscols) && (length(bynames)!=length(groups) || length(bynames)!=length(grpcols))) error(_("!length(bynames)[%d]==length(groups)[%d]==length(grpcols)[%d]"),length(bynames),length(groups),length(grpcols));
+  if (isNull(jiscols) && (length(bynames)!=length(groups) || length(bynames)!=length(grpcols)))
+    error("!length(bynames)[%d]==length(groups)[%d]==length(grpcols)[%d]", length(bynames), length(groups), length(grpcols)); // # notranslate
   // TO DO: check this check above.
 
   N =   PROTECT(findVar(install(".N"), env));   nprotect++; // PROTECT for rchk
@@ -135,7 +136,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
   // fetch names of .SD and prepare symbols. In case they are copied-on-write by user assigning to those variables
   // using <- in j (which is valid, useful and tested), they are repointed to the .SD cols for each group.
   SEXP names = PROTECT(getAttrib(SDall, R_NamesSymbol)); nprotect++;
-  if (length(names) != length(SDall)) error(_("length(names)!=length(SD)"));
+  if (length(names) != length(SDall))
+    error("length(names)!=length(SD)"); // # notranslate
   SEXP *nameSyms = (SEXP *)R_alloc(length(names), sizeof(SEXP));
 
   for(int i=0; i<length(SDall); ++i) {
@@ -151,7 +153,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
   }
 
   SEXP xknames = PROTECT(getAttrib(xSD, R_NamesSymbol)); nprotect++;
-  if (length(xknames) != length(xSD)) error(_("length(xknames)!=length(xSD)"));
+  if (length(xknames) != length(xSD))
+    error("length(xknames)!=length(xSD)"); // # notranslate
   SEXP *xknameSyms = (SEXP *)R_alloc(length(xknames), sizeof(SEXP));
   for(int i=0; i<length(xSD); ++i) {
     if (SIZEOF(VECTOR_ELT(xSD, i))==0)
@@ -400,7 +403,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
       } else {
         estn = ((double)ngrp/i)*1.1*(ansloc+maxn);
         if (verbose) Rprintf(_("dogroups: growing from %d to %d rows\n"), length(VECTOR_ELT(ans,0)), estn);
-        if (length(ans) != ngrpcols + njval) error(_("dogroups: length(ans)[%d]!=ngrpcols[%d]+njval[%d]"),length(ans),ngrpcols,njval);
+        if (length(ans) != ngrpcols + njval)
+          error("dogroups: length(ans)[%d]!=ngrpcols[%d]+njval[%d]", length(ans), ngrpcols, njval); // # notranslate
         for (int j=0; j<length(ans); ++j) SET_VECTOR_ELT(ans, j, growVector(VECTOR_ELT(ans,j), estn));
       }
     }
diff --git a/src/forder.c b/src/forder.c
index 4939ade28..853ad622c 100644
--- a/src/forder.c
+++ b/src/forder.c
@@ -781,7 +781,7 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsA
   }
   if (key[nradix]!=NULL) nradix++;  // nradix now number of bytes in key
   #ifdef TIMING_ON
-  Rprintf(_("nradix=%d\n"), nradix);
+  Rprintf("nradix=%d\n", nradix); // # notranslate
   #endif
 
   // global nth, TMP & UGRP
@@ -869,9 +869,8 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsA
     for (int i=0; i<=last; i++) {
       Rprintf(_("Timing block %2d%s = %8.3f   %8d\n"), i, (i>=17&&i<=19)?"(*)":"   ", tblock[i], nblock[i]);
     }
-    for (int i=0; i<=256; i++) {
-      if (stat[i]) Rprintf(_("stat[%03d]==%20"PRIu64"\n"), i, (uint64_t)stat[i]);
-    }
+    for (int i=0; i<=256; i++) if (stat[i])
+      Rprintf("stat[%03d]==%20"PRIu64"\n", i, (uint64_t)stat[i]); // # notranslate
   }
   #endif
   return ans;
diff --git a/src/fread.c b/src/fread.c
index 746d38863..1169f645d 100644
--- a/src/fread.c
+++ b/src/fread.c
@@ -1340,10 +1340,10 @@ int freadMain(freadMainArgs _args) {
     if (*NAstrings == NULL) {
       DTPRINT(_("  No NAstrings provided.\n"));
     } else {
-      DTPRINT(_("  NAstrings = ["));
+      DTPRINT("  NAstrings = ["); // # notranslate
       const char * const* s = NAstrings;
       while (*s++) DTPRINT(*s? "<<%s>>, " : "<<%s>>", s[-1]);
-      DTPRINT(_("]\n"));
+      DTPRINT("]\n");             // # notranslate
       if (any_number_like_NAstrings)
         DTPRINT(_("  One or more of the NAstrings looks like a number.\n"));
       else
@@ -2085,14 +2085,14 @@ int freadMain(freadMainArgs _args) {
     // sd can be very close to 0.0 sometimes, so apply a +10% minimum
     // blank lines have length 1 so for fill=true apply a +100% maximum. It'll be grown if needed.
     if (verbose) {
-      DTPRINT(_("  =====\n"));
+      DTPRINT("  =====\n"); // # notranslate
       DTPRINT(_("  Sampled %"PRIu64" rows (handled \\n inside quoted fields) at %d jump points\n"), (uint64_t)sampleLines, nJumps);
       DTPRINT(_("  Bytes from first data row on line %d to the end of last row: %"PRIu64"\n"), row1line, (uint64_t)bytesRead);
       DTPRINT(_("  Line length: mean=%.2f sd=%.2f min=%d max=%d\n"), meanLineLen, sd, minLen, maxLen);
       DTPRINT(_("  Estimated number of rows: %"PRIu64" / %.2f = %"PRIu64"\n"), (uint64_t)bytesRead, meanLineLen, (uint64_t)estnrow);
       DTPRINT(_("  Initial alloc = %"PRIu64" rows (%"PRIu64" + %d%%) using bytes/max(mean-2*sd,min) clamped between [1.1*estn, 2.0*estn]\n"),
               (uint64_t)allocnrow, (uint64_t)estnrow, (int)(100.0*allocnrow/estnrow-100.0));
-      DTPRINT(_("  =====\n"));
+      DTPRINT("  =====\n"); // # notranslate
     } else {
       if (sampleLines > allocnrow) INTERNAL_STOP("sampleLines(%"PRIu64") > allocnrow(%"PRIu64")", (uint64_t)sampleLines, (uint64_t)allocnrow); // # nocov
     }
@@ -2266,8 +2266,8 @@ int freadMain(freadMainArgs _args) {
   if (verbose) DTPRINT(_("[11] Read the data\n"));
   read:  // we'll return here to reread any columns with out-of-sample type exceptions, or dirty jumps
   restartTeam = false;
-  if (verbose) DTPRINT(_("  jumps=[%d..%d), chunk_size=%"PRIu64", total_size=%"PRIu64"\n"),
-                       jump0, nJumps, (uint64_t)chunkBytes, (uint64_t)(eof-pos));
+  if (verbose)
+    DTPRINT("  jumps=[%d..%d), chunk_size=%"PRIu64", total_size=%"PRIu64"\n", jump0, nJumps, (uint64_t)chunkBytes, (uint64_t)(eof-pos)); // # notranslate
   ASSERT(allocnrow <= nrowLimit, "allocnrow(%"PRIu64") <= nrowLimit(%"PRIu64")", (uint64_t)allocnrow, (uint64_t)nrowLimit);
   #pragma omp parallel num_threads(nth)
   {
@@ -2744,7 +2744,7 @@ int freadMain(freadMainArgs _args) {
   }
 
   if (verbose) {
-    DTPRINT(_("=============================\n"));
+    DTPRINT("=============================\n"); // # notranslate
     if (tTot<0.000001) tTot=0.000001;  // to avoid nan% output in some trivially small tests where tot==0.000s
     DTPRINT(_("%8.3fs (%3.0f%%) Memory map %.3fGB file\n"), tMap-t0, 100.0*(tMap-t0)/tTot, 1.0*fileSize/(1024*1024*1024));
     DTPRINT(_("%8.3fs (%3.0f%%) sep="), tLayout-tMap, 100.0*(tLayout-tMap)/tTot);
diff --git a/src/freadR.c b/src/freadR.c
index 8063ca707..05b8cd00e 100644
--- a/src/freadR.c
+++ b/src/freadR.c
@@ -155,7 +155,8 @@ SEXP freadR(
   args.skipEmptyLines = LOGICAL(skipEmptyLinesArg)[0];
   args.fill = INTEGER(fillArg)[0];
   args.showProgress = LOGICAL(showProgressArg)[0];
-  if (INTEGER(nThreadArg)[0]<1) error(_("nThread(%d)<1"), INTEGER(nThreadArg)[0]);
+  if (INTEGER(nThreadArg)[0]<1)
+    error("nThread(%d)<1", INTEGER(nThreadArg)[0]); // # notranslate
   args.nth = (uint32_t)INTEGER(nThreadArg)[0];
   args.verbose = verbose;
   args.warningsAreErrors = warningsAreErrors;
@@ -716,7 +717,7 @@ void __halt(bool warn, const char *format, ...) {
   // if (warn) warning(_("%s"), msg);
   //   this warning() call doesn't seem to honor warn=2 straight away in R 3.6, so now always call error() directly to be sure
   //   we were going via warning() before to get the (converted from warning) prefix in the message (which we could mimic in future)
-  error(_("%s"), msg); // include "%s" because data in msg might include '%'
+  error("%s", msg); // # notranslate. include "%s" because data in msg might include '%'
 }
 
 void prepareThreadContext(ThreadLocalFreadParsingContext *ctx) {}
diff --git a/src/fsort.c b/src/fsort.c
index 5501cdbaa..a3bc38a2b 100644
--- a/src/fsort.c
+++ b/src/fsort.c
@@ -129,7 +129,8 @@ SEXP fsort(SEXP x, SEXP verboseArg) {
 
   int nth = getDTthreads(xlength(x), true);
   int nBatch=nth*2;  // at least nth; more to reduce last-man-home; but not too large to keep counts small in cache
-  if (verbose) Rprintf(_("nth=%d, nBatch=%d\n"),nth,nBatch);
+  if (verbose)
+    Rprintf("nth=%d, nBatch=%d\n", nth, nBatch); // # notranslate
 
   size_t batchSize = (xlength(x)-1)/nBatch + 1;
   if (batchSize < 1024) batchSize = 1024; // simple attempt to work reasonably for short vector. 1024*8 = 2 4kb pages
@@ -185,7 +186,8 @@ SEXP fsort(SEXP x, SEXP verboseArg) {
   int MSBNbits = maxBit > 15 ? 16 : maxBit+1;       // how many bits make up the MSB
   int shift = maxBit + 1 - MSBNbits;                // the right shift to leave the MSB bits remaining
   size_t MSBsize = 1LL<<MSBNbits;                   // the number of possible MSB values (16 bits => 65,536)
-  if (verbose) Rprintf(_("maxBit=%d; MSBNbits=%d; shift=%d; MSBsize=%zu\n"), maxBit, MSBNbits, shift, MSBsize);
+  if (verbose)
+    Rprintf("maxBit=%d; MSBNbits=%d; shift=%d; MSBsize=%zu\n", maxBit, MSBNbits, shift, MSBsize); // # notranslate
 
   uint64_t *counts = (uint64_t *)R_alloc(nBatch*MSBsize, sizeof(uint64_t));
   memset(counts, 0, nBatch*MSBsize*sizeof(uint64_t));
diff --git a/src/fwrite.c b/src/fwrite.c
index 06255b052..757da1b0b 100644
--- a/src/fwrite.c
+++ b/src/fwrite.c
@@ -628,15 +628,16 @@ void fwriteMain(fwriteMainArgs args)
 
   if (verbose) {
     DTPRINT(_("Column writers: "));
+    // # notranslate start
     if (args.ncol<=50) {
-      for (int j=0; j<args.ncol; j++) DTPRINT(_("%d "), args.whichFun[j]);
+      for (int j=0; j<args.ncol; j++) DTPRINT("%d ", args.whichFun[j]);
     } else {
-      for (int j=0; j<30; j++) DTPRINT(_("%d "), args.whichFun[j]);
+      for (int j=0; j<30; j++) DTPRINT("%d ", args.whichFun[j]);
       DTPRINT(_("... "));
-      for (int j=args.ncol-10; j<args.ncol; j++) DTPRINT(_("%d "), args.whichFun[j]);
+      for (int j=args.ncol-10; j<args.ncol; j++) DTPRINT("%d ", args.whichFun[j]);
     }
-    DTPRINT(_("\nargs.doRowNames=%d args.rowNames=%p args.rowNameFun=%d doQuote=%d args.nrow=%"PRId64" args.ncol=%d eolLen=%d\n"),
-          args.doRowNames, args.rowNames, args.rowNameFun, doQuote, args.nrow, args.ncol, eolLen);
+    DTPRINT("\nargs.doRowNames=%d args.rowNames=%p args.rowNameFun=%d doQuote=%d args.nrow=%"PRId64" args.ncol=%d eolLen=%d\n", args.doRowNames, args.rowNames, args.rowNameFun, doQuote, args.nrow, args.ncol, eolLen);
+    // # notranslate end
   }
 
   // Calculate upper bound for line length. Numbers use a fixed maximum (e.g. 12 for integer) while strings find the longest
diff --git a/src/gsumm.c b/src/gsumm.c
index c1f0fef81..55a2a4f1c 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -60,7 +60,8 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) {
   }
   else error(_("irowsArg is neither an integer vector nor NULL"));  // # nocov
   ngrp = LENGTH(l);
-  if (LENGTH(f) != ngrp) error(_("length(f)=%d != length(l)=%d"), LENGTH(f), ngrp);
+  if (LENGTH(f) != ngrp)
+    error("length(f)=%d != length(l)=%d", LENGTH(f), ngrp); // # notranslate
   nrow=0;
   grpsize = INTEGER(l);
   maxgrpn = 0;
diff --git a/src/openmp-utils.c b/src/openmp-utils.c
index 48f362a56..ea53bea67 100644
--- a/src/openmp-utils.c
+++ b/src/openmp-utils.c
@@ -86,15 +86,17 @@ SEXP getDTthreads_R(SEXP verbose) {
     // this output is captured, paste0(collapse="; ")'d, and placed at the end of test.data.table() for display in the last 13 lines of CRAN check logs
     // it is also printed at the start of test.data.table() so that we can trace any Killed events on CRAN before the end is reached
     // this is printed verbatim (e.g. without using data.table to format the output) in case there is a problem even with simple data.table creation/printing
-    Rprintf(_("  omp_get_num_procs()            %d\n"), omp_get_num_procs());
-    Rprintf(_("  R_DATATABLE_NUM_PROCS_PERCENT  %s\n"), mygetenv("R_DATATABLE_NUM_PROCS_PERCENT", "unset (default 50)"));
-    Rprintf(_("  R_DATATABLE_NUM_THREADS        %s\n"), mygetenv("R_DATATABLE_NUM_THREADS", "unset"));
-    Rprintf(_("  R_DATATABLE_THROTTLE           %s\n"), mygetenv("R_DATATABLE_THROTTLE", "unset (default 1024)"));
-    Rprintf(_("  omp_get_thread_limit()         %d\n"), omp_get_thread_limit());
-    Rprintf(_("  omp_get_max_threads()          %d\n"), omp_get_max_threads());
-    Rprintf(_("  OMP_THREAD_LIMIT               %s\n"), mygetenv("OMP_THREAD_LIMIT", "unset"));  // CRAN sets to 2
-    Rprintf(_("  OMP_NUM_THREADS                %s\n"), mygetenv("OMP_NUM_THREADS", "unset"));
-    Rprintf(_("  RestoreAfterFork               %s\n"), RestoreAfterFork ? "true" : "false");
+    // # notranslate start
+    Rprintf("  omp_get_num_procs()            %d\n", omp_get_num_procs());
+    Rprintf("  R_DATATABLE_NUM_PROCS_PERCENT  %s\n", mygetenv("R_DATATABLE_NUM_PROCS_PERCENT", "unset (default 50)"));
+    Rprintf("  R_DATATABLE_NUM_THREADS        %s\n", mygetenv("R_DATATABLE_NUM_THREADS", "unset"));
+    Rprintf("  R_DATATABLE_THROTTLE           %s\n", mygetenv("R_DATATABLE_THROTTLE", "unset (default 1024)"));
+    Rprintf("  omp_get_thread_limit()         %d\n", omp_get_thread_limit());
+    Rprintf("  omp_get_max_threads()          %d\n", omp_get_max_threads());
+    Rprintf("  OMP_THREAD_LIMIT               %s\n", mygetenv("OMP_THREAD_LIMIT", "unset"));  // CRAN sets to 2
+    Rprintf("  OMP_NUM_THREADS                %s\n", mygetenv("OMP_NUM_THREADS", "unset"));
+    Rprintf("  RestoreAfterFork               %s\n", RestoreAfterFork ? "true" : "false");
+    // # notranslate end
     Rprintf(_("  data.table is using %d threads with throttle==%d. See ?setDTthreads.\n"), getDTthreads(INT_MAX, false), DTthrottle);
   }
   return ScalarInteger(getDTthreads(INT_MAX, false));
diff --git a/src/reorder.c b/src/reorder.c
index 1f9390628..a91b4a2bc 100644
--- a/src/reorder.c
+++ b/src/reorder.c
@@ -35,8 +35,10 @@ SEXP reorder(SEXP x, SEXP order)
     nrow = length(x);
     ncol = 1;
   }
-  if (!isInteger(order)) error(_("order must be an integer vector"));
-  if (length(order) != nrow) error(_("nrow(x)[%d]!=length(order)[%d]"),nrow,length(order));
+  if (!isInteger(order))
+    error(_("order must be an integer vector"));
+  if (length(order) != nrow)
+    error("nrow(x)[%d]!=length(order)[%d]", nrow, length(order)); // # notranslate
   int nprotect = 0;
   if (ALTREP(order)) { order=PROTECT(copyAsPlain(order)); nprotect++; }  // TODO: if it's an ALTREP sequence some optimizations are possible rather than expand
 
diff --git a/src/utils.c b/src/utils.c
index 5aad705e3..b88a07985 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -388,7 +388,7 @@ SEXP coerceAs(SEXP x, SEXP as, SEXP copyArg) {
     Rprintf(_("Coercing %s[%s] into %s[%s]\n"), type2char(TYPEOF(x)), class1(x), type2char(TYPEOF(as)), class1(as));
   const char *ret = memrecycle(/*target=*/ans, /*where=*/R_NilValue, /*start=*/0, /*len=*/LENGTH(x), /*source=*/x, /*sourceStart=*/0, /*sourceLen=*/-1, /*colnum=*/0, /*colname=*/"");
   if (ret)
-    warning(_("%s"), ret);
+    warning("%s", ret); // # notranslate
   UNPROTECT(1);
   return ans;
 }

From 11a44bd5a31271ec176ff5b8134458ec2f153917 Mon Sep 17 00:00:00 2001
From: Michael Chirico <chiricom@google.com>
Date: Thu, 26 Sep 2024 06:14:47 -0700
Subject: [PATCH 7/8] Restore covr (#6540)

* Revert "Disable covr (#6122)"

This reverts commit 3130f11bc05d1609ee5373613b675f8890befba2.

* pin R 4.3 with TODO

* Try a pinned covr version

* go further back

* Use cribbed updated covr action?

* Not crashing, but very slow; try pinned version again

* we don't have secrets set up

* Added a repo secret to data.table (I think)
---
 .github/workflows/test-coverage.yaml | 55 ++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 .github/workflows/test-coverage.yaml

diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
new file mode 100644
index 000000000..79fcd6fdc
--- /dev/null
+++ b/.github/workflows/test-coverage.yaml
@@ -0,0 +1,55 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+name: test-coverage.yaml
+
+permissions: read-all
+
+jobs:
+  test-coverage:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+          r-version: '4.3' # TODO(r-lib/covr#567): Go back to using r-devel
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::covr, any::xml2
+          needs: coverage
+
+      - name: Test coverage
+        run: |
+          cov <- covr::package_coverage(
+            quiet = FALSE,
+            clean = FALSE,
+            install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
+          )
+          covr::to_cobertura(cov)
+        shell: Rscript {0}
+
+      - uses: codecov/codecov-action@v4
+        with:
+          fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }}
+          file: ./cobertura.xml
+          plugin: noop
+          disable_search: true
+          token: ${{ secrets.CODECOV_TOKEN }}
+
+      - name: Upload test results
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-test-failures
+          path: ${{ runner.temp }}/package

From 0c957c70d019ebf36c44c988d5b2a4def9c05c92 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Thu, 26 Sep 2024 18:33:34 -0700
Subject: [PATCH 8/8] # nocov for recent missed lines (#6541)

* # nocov for recent missed lines

* Need '__func__' in internal_error()

* nocov new internal_error
---
 R/data.table.R  |  6 +++---
 R/fread.R       |  4 +++-
 R/fwrite.R      |  2 ++
 R/last.R        |  2 ++
 R/xts.R         |  2 ++
 src/assign.c    |  8 ++++----
 src/between.c   |  2 +-
 src/bmerge.c    |  4 ++--
 src/chmatch.c   |  2 +-
 src/dogroups.c  |  6 +++---
 src/fastmean.c  |  4 ++--
 src/fmelt.c     |  2 +-
 src/forder.c    | 41 ++++++++++++++++++++---------------------
 src/fread.c     | 24 ++++++++++++++----------
 src/fsort.c     |  4 ++--
 src/fwrite.c    |  8 +++++---
 src/gsumm.c     | 10 +++++-----
 src/init.c      |  2 +-
 src/rbindlist.c |  6 +++---
 src/subset.c    |  2 +-
 src/uniqlist.c  |  8 ++++----
 21 files changed, 81 insertions(+), 68 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index fe41ed2d3..7b48704a1 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -2256,8 +2256,8 @@ tail.data.table = function(x, n=6L, ...) {
 
 "$<-.data.table" = function(x, name, value) {
   if (!cedta()) {
-    ans = `$<-.data.frame`(x, name, value)
-    return(setalloccol(ans))           # over-allocate (again)
+    ans = `$<-.data.frame`(x, name, value) # nocov
+    return(setalloccol(ans))           # nocov. over-allocate (again)
   }
   x = copy(x)
   set(x,j=name,value=value)  # important i is missing here
@@ -2430,7 +2430,7 @@ which_ = function(x, bool = TRUE) {
 }
 
 is.na.data.table = function(x) {
-  if (!cedta()) return(is.na.data.frame(x))
+  if (!cedta()) return(is.na.data.frame(x)) # nocov
   do.call(cbind, lapply(x, is.na))
 }
 
diff --git a/R/fread.R b/R/fread.R
index 883d445c1..e4970749d 100644
--- a/R/fread.R
+++ b/R/fread.R
@@ -158,6 +158,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
     }
     # whitespace at the beginning or end of na.strings is checked at C level and is an error there; test 1804
   }
+  # nocov start. Tested in other.Rraw tests 16, not in the main suite.
   if (yaml) {
     if (!requireNamespace('yaml', quietly = TRUE))
       stopf("'data.table' relies on the package 'yaml' to parse the file header; please add this to your library with install.packages('yaml') and try again.") # nocov
@@ -267,6 +268,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
     }
     if (is.integer(skip)) skip = skip + n_read
   }
+  # nocov end
   warnings2errors = getOption("warn") >= 2
   stopifnot(identical(tz,"UTC") || identical(tz,""))
   if (tz=="") {
@@ -341,7 +343,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
       key = cols_from_csv(key)
     setkeyv(ans, key)
   }
-  if (yaml) setattr(ans, 'yaml_metadata', yaml_header)
+  if (yaml) setattr(ans, 'yaml_metadata', yaml_header) # nocov
   if (!is.null(index) && data.table) {
     if (!all(vapply_1b(index, is.character)))
       stopf("index argument of data.table() must be a character vector naming columns (NB: col.names are applied before this)")
diff --git a/R/fwrite.R b/R/fwrite.R
index ad92859f3..d67c886d1 100644
--- a/R/fwrite.R
+++ b/R/fwrite.R
@@ -91,6 +91,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
       return(invisible())
     }
   }
+  # nocov start. See test 17 in other.Rraw, not tested in the main suite.
   yaml = if (!yaml) "" else {
     if (!requireNamespace('yaml', quietly=TRUE))
       stopf("'data.table' relies on the package 'yaml' to write the file header; please add this to your library with install.packages('yaml') and try again.") # nocov
@@ -114,6 +115,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
     )
     paste0('---', eol, yaml::as.yaml(yaml_header, line.sep=eol), '---', eol) # NB: as.yaml adds trailing newline
   }
+  # nocov end
   file = enc2native(file) # CfwriteR cannot handle UTF-8 if that is not the native encoding, see #3078.
   .Call(CfwriteR, x, file, sep, sep2, eol, na, dec, quote, qmethod=="escape", append,
         row.names, col.names, logical01, scipen, dateTimeAs, buffMB, nThread,
diff --git a/R/last.R b/R/last.R
index 0a889e622..a93ec3173 100644
--- a/R/last.R
+++ b/R/last.R
@@ -1,6 +1,7 @@
 # data.table defined last(x) with no arguments, just for last. If you need the last 10 then use tail(x,10).
 # for xts class objects it will dispatch to xts::last
 # reworked to avoid loading xts namespace (#3857) then again to fix dispatching of xts class (#4053)
+# nocov start. Tests 19.* in other.Rraw, not in the main suite.
 last = function(x, n=1L, ...) {
   verbose = isTRUE(getOption("datatable.verbose", FALSE))
   if (!inherits(x, "xts")) {
@@ -82,3 +83,4 @@ first = function(x, n=1L, ...) {
     xts::first(x, n=n, ...)
   }
 }
+# nocov end
diff --git a/R/xts.R b/R/xts.R
index 234f36cac..bd4cb75b3 100644
--- a/R/xts.R
+++ b/R/xts.R
@@ -1,3 +1,4 @@
+# nocov start. See tests 18.* in other.Rraw, not in the main suite.
 as.data.table.xts = function(x, keep.rownames = TRUE, key=NULL, ...) {
   stopifnot(requireNamespace("xts"), !missing(x), xts::is.xts(x))
   if (length(keep.rownames) != 1L) stopf("keep.rownames must be length 1")
@@ -26,3 +27,4 @@ as.xts.data.table = function(x, numeric.only = TRUE, ...) {
   }
   return(xts::xts(as.matrix(r), order.by = if (inherits(x[[1L]], "IDate")) as.Date(x[[1L]]) else x[[1L]]))
 }
+# nocov end
diff --git a/src/assign.c b/src/assign.c
index dc6b9a6de..b280c2259 100644
--- a/src/assign.c
+++ b/src/assign.c
@@ -1190,7 +1190,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
       }
     }
   } break;
-  default :
+  default : // # nocov
     error(_("Unsupported column type in assign.c:memrecycle '%s'"), type2char(TYPEOF(target)));  // # nocov
   }
   UNPROTECT(protecti);
@@ -1244,7 +1244,7 @@ void writeNA(SEXP v, const int from, const int n, const bool listNA)
   case EXPRSXP :
     for (int i=from; i<=to; ++i) SET_VECTOR_ELT(v, i, R_NilValue);
     break;
-  default :
+  default : // # nocov
     internal_error(__func__, "Unsupported type '%s' for v", type2char(TYPEOF(v)));  // # nocov
   }
 }
@@ -1283,8 +1283,8 @@ void savetl_init(void) {
   saveds = (SEXP *)malloc(nalloc * sizeof(SEXP));
   savedtl = (R_len_t *)malloc(nalloc * sizeof(R_len_t));
   if (!saveds || !savedtl) {
-    free(saveds); free(savedtl);
-    savetl_end();                                                        // # nocov
+    free(saveds); free(savedtl);                                            // # nocov
+    savetl_end();                                                           // # nocov
     error(_("Failed to allocate initial %d items in savetl_init"), nalloc); // # nocov
   }
 }
diff --git a/src/between.c b/src/between.c
index 25d85b57c..808bf2d35 100644
--- a/src/between.c
+++ b/src/between.c
@@ -192,7 +192,7 @@ SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP incbounds, SEXP NAboundsArg, S
     }
     if (verbose) Rprintf(_("between non-parallel processing of character took %8.3fs\n"), omp_get_wtime()-tic);
   } break;
-  default:
+  default: // # nocov
     internal_error(__func__, "unsupported type '%s' should have been caught at R level", type2char(TYPEOF(x)));  // # nocov
   }
   UNPROTECT(nprotect);
diff --git a/src/bmerge.c b/src/bmerge.c
index cb71f9c20..f6f640e71 100644
--- a/src/bmerge.c
+++ b/src/bmerge.c
@@ -381,8 +381,8 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisg
     }
     break;
   // supported types were checked up front to avoid handling an error here in (future) parallel region
-  default:
-    error(_("Type '%s' is not supported for joining/merging"), type2char(TYPEOF(xc)));
+  default: // # nocov
+    internal_error("Invalid join/merge type '%s' should have been caught earlier", type2char(TYPEOF(xc))); // # nocov
   }
 
   if (xlow<xupp-1 || rollLow || rollUpp) { // if value found, xlow and xupp surround it, unlike standard binary search where low falls on it
diff --git a/src/chmatch.c b/src/chmatch.c
index 800095109..2bad963f9 100644
--- a/src/chmatch.c
+++ b/src/chmatch.c
@@ -98,8 +98,8 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
     int *counts = (int *)calloc(nuniq, sizeof(int));
     int *map =    (int *)calloc(mapsize, sizeof(int));
     if (!counts || !map) {
-      free(counts); free(map);
       // # nocov start
+      free(counts); free(map);
       for (int i=0; i<tablelen; i++) SET_TRUELENGTH(td[i], 0);
       savetl_end();
       error(_("Failed to allocate %"PRIu64" bytes working memory in chmatchdup: length(table)=%d length(unique(table))=%d"), ((uint64_t)tablelen*2+nuniq)*sizeof(int), tablelen, nuniq);
diff --git a/src/dogroups.c b/src/dogroups.c
index 0717ce0c1..cc7554c3e 100644
--- a/src/dogroups.c
+++ b/src/dogroups.c
@@ -137,7 +137,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
   // using <- in j (which is valid, useful and tested), they are repointed to the .SD cols for each group.
   SEXP names = PROTECT(getAttrib(SDall, R_NamesSymbol)); nprotect++;
   if (length(names) != length(SDall))
-    error("length(names)!=length(SD)"); // # notranslate
+    internal_error(__func__, "length(names)!=length(SD)"); // # nocov
   SEXP *nameSyms = (SEXP *)R_alloc(length(names), sizeof(SEXP));
 
   for(int i=0; i<length(SDall); ++i) {
@@ -154,7 +154,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
 
   SEXP xknames = PROTECT(getAttrib(xSD, R_NamesSymbol)); nprotect++;
   if (length(xknames) != length(xSD))
-    error("length(xknames)!=length(xSD)"); // # notranslate
+    internal_error(__func__, "length(xknames)!=length(xSD)"); // # nocov
   SEXP *xknameSyms = (SEXP *)R_alloc(length(xknames), sizeof(SEXP));
   for(int i=0; i<length(xSD); ++i) {
     if (SIZEOF(VECTOR_ELT(xSD, i))==0)
@@ -544,7 +544,7 @@ SEXP growVector(SEXP x, const R_len_t newlen)
     for (int i=0; i<len; ++i)
       SET_VECTOR_ELT(newx, i, xd[i]);
   } break;
-  default :
+  default : // # nocov
     internal_error(__func__, "type '%s' not supported", type2char(TYPEOF(x)));  // # nocov
   }
   // if (verbose) Rprintf(_("Growing vector from %d to %d items of type '%s'\n"), len, newlen, type2char(TYPEOF(x)));
diff --git a/src/fastmean.c b/src/fastmean.c
index e50c22f1a..8b1116365 100644
--- a/src/fastmean.c
+++ b/src/fastmean.c
@@ -79,7 +79,7 @@ SEXP fastmean(SEXP args)
       }
       REAL(ans)[0] = (double) s;
       break;
-    default:
+    default: // # nocov
       internal_error(__func__, "type '%s' not caught earlier in fastmean", type2char(TYPEOF(x)));  // # nocov
     }
   } else {  // narm==FALSE
@@ -107,7 +107,7 @@ SEXP fastmean(SEXP args)
       }
       REAL(ans)[0] = (double) s;
       break;
-    default:
+    default: // # nocov
       internal_error(__func__, "type '%s' not caught earlier in fastmean", type2char(TYPEOF(x)));  // # nocov
     }
   }
diff --git a/src/fmelt.c b/src/fmelt.c
index 953ae1886..59e82455b 100644
--- a/src/fmelt.c
+++ b/src/fmelt.c
@@ -804,7 +804,7 @@ SEXP fmelt(SEXP DT, SEXP id, SEXP measure, SEXP varfactor, SEXP valfactor, SEXP
   int protecti=0;
   dtnames = PROTECT(getAttrib(DT, R_NamesSymbol)); protecti++;
   if (isNull(dtnames))
-    internal_error(__func__, "names(data) is NULL");
+    internal_error(__func__, "names(data) is NULL"); // # nocov
   if (LOGICAL(narmArg)[0] == TRUE) narm = TRUE;
   if (LOGICAL(verboseArg)[0] == TRUE) verbose = TRUE;
   struct processData data;
diff --git a/src/forder.c b/src/forder.c
index 853ad622c..398d471f9 100644
--- a/src/forder.c
+++ b/src/forder.c
@@ -283,8 +283,8 @@ static void cradix(SEXP *x, int n)
   cradix_counts = (int *)calloc(ustr_maxlen*256, sizeof(int));  // counts for the letters of left-aligned strings
   cradix_xtmp = (SEXP *)malloc(ustr_n*sizeof(SEXP));
   if (!cradix_counts || !cradix_xtmp) {
-    free(cradix_counts); free(cradix_xtmp);
-    STOP(_("Failed to alloc cradix_counts and/or cradix_tmp"));
+    free(cradix_counts); free(cradix_xtmp); // # nocov
+    STOP(_("Failed to alloc cradix_counts and/or cradix_tmp")); // # nocov
   }
   cradix_r(x, n, 0);
   free(cradix_counts); cradix_counts=NULL;
@@ -772,12 +772,11 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsA
       }}
       free_ustr();  // ustr could be left allocated and reused, but free now in case large and we're tight on ram
       break;
-    default:
-       internal_error_with_cleanup(__func__, "column not supported, not caught earlier");  // # nocov
+    default: // # nocov
+       internal_error_with_cleanup(__func__, "column not supported, not caught earlier"); // # nocov
     }
     nradix += nbyte-1+(spare==0);
     TEND(4)
-    // Rprintf(_("Written key for column %d\n"), col);
   }
   if (key[nradix]!=NULL) nradix++;  // nradix now number of bytes in key
   #ifdef TIMING_ON
@@ -789,8 +788,8 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsA
   TMP =  (int *)malloc(nth*UINT16_MAX*sizeof(int)); // used by counting sort (my_n<=65536) in radix_r()
   UGRP = (uint8_t *)malloc(nth*256);                // TODO: align TMP and UGRP to cache lines (and do the same for stack allocations too)
   if (!TMP || !UGRP /*|| TMP%64 || UGRP%64*/) {
-    free(TMP); free(UGRP);
-    STOP(_("Failed to allocate TMP or UGRP or they weren't cache line aligned: nth=%d"), nth);
+    free(TMP); free(UGRP); // # nocov
+    STOP(_("Failed to allocate TMP or UGRP or they weren't cache line aligned: nth=%d"), nth); // # nocov
   }
   
   if (retgrp) {
@@ -798,8 +797,8 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsA
     gs_thread_alloc = calloc(nth, sizeof(int));
     gs_thread_n = calloc(nth, sizeof(int));
     if (!gs_thread || !gs_thread_alloc || !gs_thread_n) {
-      free(gs_thread); free(gs_thread_alloc); free(gs_thread_n);
-      STOP(_("Could not allocate (very tiny) group size thread buffers"));
+      free(gs_thread); free(gs_thread_alloc); free(gs_thread_n); // # nocov
+      STOP(_("Could not allocate (very tiny) group size thread buffers")); // # nocov
     }
   }
   if (nradix) {
@@ -917,7 +916,7 @@ void radix_r(const int from, const int to, const int radix) {
     uint8_t *restrict my_key = key[radix]+from;  // safe to write as we don't use this radix again
     uint8_t *o = (uint8_t *)malloc(my_n * sizeof(uint8_t));
     if (!o)
-      STOP(_("Failed to allocate %d bytes for '%s'."), (int)(my_n * sizeof(uint8_t)), "o");
+      STOP(_("Failed to allocate %d bytes for '%s'."), (int)(my_n * sizeof(uint8_t)), "o"); // # nocov
     // if last key (i.e. radix+1==nradix) there are no more keys to reorder so we could reorder osub by reference directly and save allocating and populating o just
     // to use it once. However, o's type is uint8_t so many moves within this max-256 vector should be faster than many moves in osub (4 byte or 8 byte ints) [1 byte
     // type is always aligned]
@@ -986,8 +985,8 @@ void radix_r(const int from, const int to, const int radix) {
       // reorder osub and each remaining ksub
       int *TMP = malloc(my_n * sizeof(int));
       if (!TMP) {
-        free(o);
-        STOP(_("Failed to allocate %d bytes for '%s'."), (int)(my_n * sizeof(int)), "TMP");
+        free(o); // # nocov
+        STOP(_("Failed to allocate %d bytes for '%s'."), (int)(my_n * sizeof(int)), "TMP"); // # nocov
       }
       const int *restrict osub = anso+from;
       for (int i=0; i<my_n; i++) TMP[i] = osub[o[i]];
@@ -1009,7 +1008,7 @@ void radix_r(const int from, const int to, const int radix) {
     int ngrp=0; //minor TODO: could know number of groups with certainty up above
     int *my_gs = malloc(my_n * sizeof(int));
     if (!my_gs)
-      STOP(_("Failed to allocate %d bytes for '%s'."), (int)(my_n * sizeof(int)), "my_gs");
+      STOP(_("Failed to allocate %d bytes for '%s'."), (int)(my_n * sizeof(int)), "my_gs"); // # nocov
     my_gs[ngrp]=1;
     for (int i=1; i<my_n; i++) {
       if (my_key[i]!=my_key[i-1]) my_gs[++ngrp] = 1;
@@ -1111,7 +1110,7 @@ void radix_r(const int from, const int to, const int radix) {
     }
     int *my_gs = malloc((ngrp==0 ? 256 : ngrp) * sizeof(int)); // ngrp==0 when sort and skip==true; we didn't count the non-zeros in my_counts yet in that case
     if (!my_gs)
-      STOP(_("Failed to allocate %d bytes for '%s'."), (int)((ngrp==0 ? 256 : ngrp) * sizeof(int)), "my_gs");
+      STOP(_("Failed to allocate %d bytes for '%s'."), (int)((ngrp==0 ? 256 : ngrp) * sizeof(int)), "my_gs"); // # nocov
     if (sortType!=0) {
       ngrp=0;
       for (int i=0; i<256; i++) if (my_counts[i]) my_gs[ngrp++]=my_counts[i];  // this casts from uint16_t to int32, too
@@ -1141,8 +1140,8 @@ void radix_r(const int from, const int to, const int radix) {
   uint8_t  *ugrps =  malloc(nBatch*256*sizeof(uint8_t));
   int      *ngrps =  calloc(nBatch    ,sizeof(int));
   if (!counts || !ugrps || !ngrps) {
-    free(counts); free(ugrps); free(ngrps);
-    STOP(_("Failed to allocate parallel counts. my_n=%d, nBatch=%d"), my_n, nBatch);
+    free(counts); free(ugrps); free(ngrps); // # nocov
+    STOP(_("Failed to allocate parallel counts. my_n=%d, nBatch=%d"), my_n, nBatch); // # nocov
   }
 
   bool skip=true;
@@ -1242,7 +1241,7 @@ void radix_r(const int from, const int to, const int radix) {
 
   int *starts = calloc(nBatch*256, sizeof(int));  // keep starts the same shape and ugrp order as counts
   if (!starts)
-    STOP(_("Failed to allocate %d bytes for '%s'."), (int)(nBatch*256*sizeof(int)), "starts");
+    STOP(_("Failed to allocate %d bytes for '%s'."), (int)(nBatch*256*sizeof(int)), "starts"); // # nocov
   for (int j=0, sum=0; j<ngrp; j++) {  // iterate through columns (ngrp bytes)
     uint16_t *tmp1 = counts+ugrp[j];
     int      *tmp2 = starts+ugrp[j];
@@ -1259,7 +1258,7 @@ void radix_r(const int from, const int to, const int radix) {
   if (!skip) {
     int *TMP = malloc(my_n * sizeof(int));
     if (!TMP)
-      STOP(_("Unable to allocate TMP for my_n=%d items in parallel batch counting"), my_n);
+      STOP(_("Unable to allocate TMP for my_n=%d items in parallel batch counting"), my_n); // # nocov
     #pragma omp parallel for num_threads(getDTthreads(nBatch, false))
     for (int batch=0; batch<nBatch; batch++) {
       const int *restrict      my_starts = starts + batch*256;
@@ -1298,7 +1297,7 @@ void radix_r(const int from, const int to, const int radix) {
 
   int *my_gs = malloc(ngrp * sizeof(int));
   if (!my_gs)
-    STOP(_("Failed to allocate %d bytes for '%s'."), (int)(ngrp * sizeof(int)), "my_gs");
+    STOP(_("Failed to allocate %d bytes for '%s'."), (int)(ngrp * sizeof(int)), "my_gs"); // # nocov
   for (int i=1; i<ngrp; i++) my_gs[i-1] = starts[ugrp[i]] - starts[ugrp[i-1]];   // use the first row of starts to get totals
   my_gs[ngrp-1] = my_n - starts[ugrp[ngrp-1]];
 
@@ -1441,7 +1440,7 @@ SEXP issorted(SEXP x, SEXP by)
       types[j] = 3;
       ptrs[j] = (const char *)STRING_PTR_RO(col);
       break;
-    default:
+    default: // # nocov
       STOP(_("type '%s' is not yet supported"), type2char(TYPEOF(col)));  // # nocov
     }
   }
@@ -1475,7 +1474,7 @@ SEXP issorted(SEXP x, SEXP by)
                 strcmp(CHAR(p[0]), CHAR(p[-1]))) >= 0;
         }
       } break;
-      default :
+      default : // # nocov
         STOP(_("type '%s' is not yet supported"), type2char(TYPEOF(x)));  // # nocov
       }
       if (!ok) return ScalarLogical(FALSE);  // not sorted so return early
diff --git a/src/fread.c b/src/fread.c
index 1169f645d..7b89b3294 100644
--- a/src/fread.c
+++ b/src/fread.c
@@ -442,7 +442,7 @@ double copyFile(size_t fileSize)  // only called in very very rare cases
   double tt = wallclock();
   mmp_copy = (char *)malloc((size_t)fileSize + 1 /* extra \0 */);
   if (!mmp_copy)
-    return -1.0;
+    return -1.0; // # nocov
   memcpy(mmp_copy, mmp, fileSize);
   sof = mmp_copy;
   eof = (char *)mmp_copy + fileSize;
@@ -563,7 +563,7 @@ static void Field(FieldParseContext *ctx)
       if (ch!=ch2) fieldStart--;   // field ending is this sep|eol; neither (*1) or (*2) happened; opening quote wasn't really an opening quote
     }
     break;
-  default:
+  default: // # nocov
     return;  // # nocov Internal error: undefined quote rule
   }
   target->len = (int32_t)(ch - fieldStart);
@@ -1540,14 +1540,16 @@ int freadMain(freadMainArgs _args) {
         // In future, we may discover a way to mmap fileSize+1 on all OS when fileSize%4096==0, reliably. If and when, this clause can be updated with no code impact elsewhere.
         double time_taken = copyFile(fileSize);
         if (time_taken == -1.0) {
+          // # nocov start
           if (!verbose)
             DTPRINT("%s. Attempt to copy file in RAM failed.", msg);
           STOP(_("Unable to allocate %s of contiguous virtual RAM."), filesize_to_str(fileSize));
+          // # nocov end
         }
         if (verbose)
           DTPRINT(_("  File copy in RAM took %.3f seconds.\n"), time_taken);
         else if (time_taken > 0.5)
-          DTPRINT(_("Avoidable file copy in RAM took %.3f seconds. %s.\n"), time_taken, msg);  // not warning as that could feasibly cause CRAN tests to fail, say, if test machine is heavily loaded
+          DTPRINT(_("Avoidable file copy in RAM took %.3f seconds. %s.\n"), time_taken, msg);  // # nocov. not warning as that could feasibly cause CRAN tests to fail, say, if test machine is heavily loaded
       }
     }
     *_const_cast(eof) = '\0';  // cow page
@@ -1822,14 +1824,16 @@ int freadMain(freadMainArgs _args) {
       ASSERT(mmp_copy==NULL, "mmp has already been copied due to abrupt non-eol ending, so it does not end with 2 or more eol.%s", ""/*dummy arg for macro*/); // #nocov
       double time_taken = copyFile(fileSize);
       if (time_taken == -1.0) {
+        // # nocov start
         if (!verbose)
           DTPRINT("%s. Attempt to copy file in RAM failed.", msg);
         STOP(_("Unable to allocate %s of contiguous virtual RAM."), filesize_to_str(fileSize));
+        // # nocov end
       }
       if (verbose)
         DTPRINT(_("  File copy in RAM took %.3f seconds.\n"), time_taken);
-      else if (tt>0.5)
-        DTPRINT(_("Avoidable file copy in RAM took %.3f seconds. %s.\n"), time_taken, msg);  // not warning as that could feasibly cause CRAN tests to fail, say, if test machine is heavily loaded
+      else if (tt>0.5) // # nocov
+        DTPRINT(_("Avoidable file copy in RAM took %.3f seconds. %s.\n"), time_taken, msg);  // # nocov. not warning as that could feasibly cause CRAN tests to fail, say, if test machine is heavily loaded
       pos = sof + (pos-(const char *)mmp);
       firstJumpEnd = sof + (firstJumpEnd-(const char *)mmp);
     } else {
@@ -1858,8 +1862,8 @@ int freadMain(freadMainArgs _args) {
   type =    (int8_t *)malloc((size_t)ncol * sizeof(int8_t));
   tmpType = (int8_t *)malloc((size_t)ncol * sizeof(int8_t));  // used i) in sampling to not stop on errors when bad jump point and ii) when accepting user overrides
   if (!type || !tmpType) {
-    free(type); free(tmpType);
-    STOP(_("Failed to allocate 2 x %d bytes for type and tmpType: %s"), ncol, strerror(errno));
+    free(type); free(tmpType); // # nocov
+    STOP(_("Failed to allocate 2 x %d bytes for type and tmpType: %s"), ncol, strerror(errno)); // # nocov
   }
 
   if (sep == ',' && dec == '\0') { // if sep=',' detected, don't attempt to detect dec [NB: . is not par of seps]
@@ -2119,7 +2123,7 @@ int freadMain(freadMainArgs _args) {
   } else {
     colNames = (lenOff*) calloc((size_t)ncol, sizeof(lenOff));
     if (!colNames)
-      STOP(_("Unable to allocate %d*%d bytes for column name pointers: %s"), ncol, sizeof(lenOff), strerror(errno));
+      STOP(_("Unable to allocate %d*%d bytes for column name pointers: %s"), ncol, sizeof(lenOff), strerror(errno)); // # nocov
     if (sep==' ') while (*ch==' ') ch++;
     void *targets[9] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, colNames + autoFirstColName};
     FieldParseContext fctx = {
@@ -2174,7 +2178,7 @@ int freadMain(freadMainArgs _args) {
   rowSize8 = 0;
   size = (int8_t *)malloc((size_t)ncol * sizeof(int8_t));  // TODO: remove size[] when we implement Pasha's idea to += size inside processor
   if (!size)
-    STOP(_("Failed to allocate %d bytes for '%s': %s"), (int)(ncol * sizeof(int8_t)), "size", strerror(errno));
+    STOP(_("Failed to allocate %d bytes for '%s': %s"), (int)(ncol * sizeof(int8_t)), "size", strerror(errno)); // # nocov
   nStringCols = 0;
   nNonStringCols = 0;
   for (int j=0; j<ncol; j++) {
@@ -2615,7 +2619,7 @@ int freadMain(freadMainArgs _args) {
     }
     dropFill = (int *)malloc((size_t)ndropFill * sizeof(int));
     if (!dropFill)
-      STOP(_("Failed to allocate %d bytes for '%s'."), (int)(ndropFill * sizeof(int)), "dropFill");
+      STOP(_("Failed to allocate %d bytes for '%s'."), (int)(ndropFill * sizeof(int)), "dropFill"); // # nocov
     int i=0;
     for (int j=max_col; j<ncol; ++j) {
       type[j] = CT_DROP;
diff --git a/src/fsort.c b/src/fsort.c
index a3bc38a2b..039676a39 100644
--- a/src/fsort.c
+++ b/src/fsort.c
@@ -143,8 +143,8 @@ SEXP fsort(SEXP x, SEXP verboseArg) {
   double *mins = (double *)malloc(nBatch * sizeof(double));
   double *maxs = (double *)malloc(nBatch * sizeof(double));
   if (!mins || !maxs) {
-    free(mins); free(maxs);
-    error(_("Failed to allocate %d bytes in fsort()."), (int)(2 * nBatch * sizeof(double)));
+    free(mins); free(maxs); // # nocov
+    error(_("Failed to allocate %d bytes in fsort()."), (int)(2 * nBatch * sizeof(double))); // # nocov
   }
   const double *restrict xp = REAL(x);
   #pragma omp parallel for schedule(dynamic) num_threads(getDTthreads(nBatch, false))
diff --git a/src/fwrite.c b/src/fwrite.c
index 757da1b0b..e6d149750 100644
--- a/src/fwrite.c
+++ b/src/fwrite.c
@@ -465,6 +465,7 @@ void writePOSIXct(const void *col, int64_t row, char **pch)
   *pch = ch;
 }
 
+// # nocov start. Covered in other.Rraw test 22, not the main suite.
 void writeNanotime(const void *col, int64_t row, char **pch)
 {
   int64_t x = ((const int64_t *)col)[row];
@@ -497,6 +498,7 @@ void writeNanotime(const void *col, int64_t row, char **pch)
   }
   *pch = ch;
 }
+// # nocov end
 
 static inline void write_string(const char *x, char **pch)
 {
@@ -670,7 +672,7 @@ void fwriteMain(fwriteMainArgs args)
       case WF_List:
         width = getMaxListItemLen(args.columns[j], args.nrow);
         break;
-      default:
+      default: // # nocov
         INTERNAL_STOP("type %d has no max length method implemented", args.whichFun[j]);  // # nocov
       }
     }
@@ -725,7 +727,7 @@ void fwriteMain(fwriteMainArgs args)
   if (headerLen) {
     char *buff = malloc(headerLen);
     if (!buff)
-      STOP(_("Unable to allocate %zu MiB for header: %s"), headerLen / 1024 / 1024, strerror(errno));
+      STOP(_("Unable to allocate %zu MiB for header: %s"), headerLen / 1024 / 1024, strerror(errno)); // # nocov
     char *ch = buff;
     if (args.bom) {*ch++=(char)0xEF; *ch++=(char)0xBB; *ch++=(char)0xBF; }  // 3 appears above (search for "bom")
     memcpy(ch, args.yaml, yamlLen);
@@ -859,7 +861,7 @@ void fwriteMain(fwriteMainArgs args)
 #ifndef NOZLIB
   z_stream *thread_streams = (z_stream *)malloc(nth * sizeof(z_stream));
   if (!thread_streams)
-    STOP(_("Failed to allocated %d bytes for '%s'."), (int)(nth * sizeof(z_stream)), "thread_streams");
+    STOP(_("Failed to allocated %d bytes for '%s'."), (int)(nth * sizeof(z_stream)), "thread_streams"); // # nocov
   // VLA on stack should be fine for nth structs; in zlib v1.2.11 sizeof(struct)==112 on 64bit
   // not declared inside the parallel region because solaris appears to move the struct in
   // memory when the #pragma omp for is entered, which causes zlib's internal self reference
diff --git a/src/gsumm.c b/src/gsumm.c
index 55a2a4f1c..7f0eb75df 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -120,8 +120,8 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) {
     int *counts = calloc(nBatch*highSize, sizeof(int));  // TODO: cache-line align and make highSize a multiple of 64
     int *TMP   = malloc(nrow*2l*sizeof(int)); // must multiple the long int otherwise overflow may happen, #4295
     if (!counts || !TMP ) {
-      free(counts); free(TMP);
-      error(_("Failed to allocate counts or TMP when assigning g in gforce"));
+      free(counts); free(TMP); // # nocov
+      error(_("Failed to allocate counts or TMP when assigning g in gforce")); // # nocov
     }
     #pragma omp parallel for num_threads(getDTthreads(nBatch, false))   // schedule(dynamic,1)
     for (int b=0; b<nBatch; b++) {
@@ -338,7 +338,7 @@ void *gather(SEXP x, bool *anyNA)
       if (my_anyNA) *anyNA = true;  // naked write ok since just bool and always writing true; and no performance issue as maximum nBatch writes
     }
   } break;
-  default :
+  default : // # nocov
     error(_("gather implemented for INTSXP, REALSXP, and CPLXSXP but not '%s'"), type2char(TYPEOF(x)));   // # nocov
   }
   if (verbose) { Rprintf(_("gather took %.3fs\n"), wallclock()-started); }
@@ -627,7 +627,7 @@ SEXP gmean(SEXP x, SEXP narmArg)
       // narm==true and anyNA==true
       int *restrict nna_counts = calloc(ngrp, sizeof(int));
       if (!nna_counts)
-        error(_("Unable to allocate %d * %zu bytes for non-NA counts in gmean na.rm=TRUE"), ngrp, sizeof(int));
+        error(_("Unable to allocate %d * %zu bytes for non-NA counts in gmean na.rm=TRUE"), ngrp, sizeof(int)); // # nocov
       #pragma omp parallel for num_threads(getDTthreads(highSize, false))
       for (int h=0; h<highSize; h++) {
           double *restrict _ans = ansp + (h<<bitshift);
@@ -1126,7 +1126,7 @@ SEXP gprod(SEXP x, SEXP narmArg) {
   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gprod");
   long double *s = malloc(ngrp * sizeof(long double));
   if (!s)
-    error(_("Unable to allocate %d * %zu bytes for gprod"), ngrp, sizeof(long double));
+    error(_("Unable to allocate %d * %zu bytes for gprod"), ngrp, sizeof(long double)); // # nocov
   for (int i=0; i<ngrp; ++i) s[i] = 1.0;
   switch(TYPEOF(x)) {
   case LGLSXP: case INTSXP: {
diff --git a/src/init.c b/src/init.c
index 18ad71ab8..0f1a76c3d 100644
--- a/src/init.c
+++ b/src/init.c
@@ -349,7 +349,6 @@ SEXP hasOpenMP(void) {
 #endif
 
 }
-// # nocov end
 
 SEXP beforeR340(void) {
   // used in onAttach.R for message about fread memory leak fix needing R 3.4.0
@@ -360,6 +359,7 @@ SEXP beforeR340(void) {
   return ScalarLogical(false);
   #endif
 }
+// # nocov end
 
 extern int *_Last_updated;  // assign.c
 
diff --git a/src/rbindlist.c b/src/rbindlist.c
index e53e98c5b..9b65ec4c7 100644
--- a/src/rbindlist.c
+++ b/src/rbindlist.c
@@ -73,7 +73,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg, SEXP ignor
     // first find number of unique column names present; i.e. length(unique(unlist(lapply(l,names))))
     SEXP *uniq = (SEXP *)malloc(upperBoundUniqueNames * sizeof(SEXP));  // upperBoundUniqueNames was initialized with 1 to ensure this is defined (otherwise 0 when no item has names)
     if (!uniq)
-      error(_("Failed to allocate upper bound of %"PRId64" unique column names [sum(lapply(l,ncol))]"), (int64_t)upperBoundUniqueNames);
+      error(_("Failed to allocate upper bound of %"PRId64" unique column names [sum(lapply(l,ncol))]"), (int64_t)upperBoundUniqueNames); // # nocov
     savetl_init();
     int nuniq=0;
     for (int i=0; i<LENGTH(l); i++) {
@@ -379,8 +379,8 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg, SEXP ignor
         nLevel = allocLevel = longestLen;
         levelsRaw = (SEXP *)malloc(nLevel * sizeof(SEXP));
         if (!levelsRaw) {
-          savetl_end();
-          error(_("Failed to allocate working memory for %d ordered factor levels of result column %d"), nLevel, idcol+j+1);
+          savetl_end(); // # nocov
+          error(_("Failed to allocate working memory for %d ordered factor levels of result column %d"), nLevel, idcol+j+1); // # nocov
         }
         for (int k=0; k<longestLen; ++k) {
           SEXP s = sd[k];
diff --git a/src/subset.c b/src/subset.c
index cb9738113..c7653b893 100644
--- a/src/subset.c
+++ b/src/subset.c
@@ -97,7 +97,7 @@ void subsetVectorRaw(SEXP ans, SEXP source, SEXP idx, const bool anyNA)
     Rbyte *ap = RAW(ans);
     PARLOOP(0)
   } break;
-  default :
+  default : // # nocov
     internal_error(__func__, "column type '%s' not supported by data.table subset, but all known types are supported", type2char(TYPEOF(source)));  // # nocov
   }
 }
diff --git a/src/uniqlist.c b/src/uniqlist.c
index fd3c7d56c..347019a90 100644
--- a/src/uniqlist.c
+++ b/src/uniqlist.c
@@ -94,7 +94,7 @@ SEXP uniqlist(SEXP l, SEXP order)
         }
       }
     } break;
-    default :
+    default : // # nocov
       error(_("Type '%s' is not supported"), type2char(TYPEOF(v)));  // # nocov
     }
   } else {
@@ -126,7 +126,7 @@ SEXP uniqlist(SEXP l, SEXP order)
             // to be stored, ii) many short-circuit early before the if (!b) anyway (negating benefit) and iii) we may not have needed LHS this time so logic would be complex.
           }
           break;
-        default :
+        default : // # nocov
           error(_("Type '%s' is not supported"), type2char(TYPEOF(v)));  // # nocov
         }
       }
@@ -207,7 +207,7 @@ SEXP rleid(SEXP l, SEXP cols) {
           Rcomplex *pz = COMPLEX(jcol);
           same = memcmp(&pz[i], &pz[i-1], sizeof(Rcomplex))==0; // compiler optimization should replace library call with best 16-byte fixed method
         } break;
-        default :
+        default : // # nocov
           error(_("Type '%s' is not supported"), type2char(TYPEOF(jcol)));  // # nocov
         }
       }
@@ -317,7 +317,7 @@ SEXP nestedid(SEXP l, SEXP cols, SEXP order, SEXP grps, SEXP resetvals, SEXP mul
           b = i64[j] ? ((int64_t *)xd)[thisi] >= ((int64_t *)xd)[previ] :
                        dtwiddle(xd[thisi]) >= dtwiddle(xd[previ]);
         } break;
-        default:
+        default: // # nocov
           error(_("Type '%s' is not supported"), type2char(TYPEOF(v)));  // # nocov
         }
       }