From 960ff28b76387ee152d48b4bfc27198f53f070d3 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 24 Sep 2024 13:52:41 -0400 Subject: [PATCH 1/8] Refactor internal helpers which generate fragmented messages (#6534) * Refacator internal helpers which generate fragmented messages * Update test error strings * deprecate internal group.desc argument --- R/fmelt.R | 35 ++++++++++++++--------------------- R/utils.R | 10 ++++++++-- inst/tests/tests.Rraw | 10 +++++----- man/measure.Rd | 4 +--- 4 files changed, 28 insertions(+), 31 deletions(-) diff --git a/R/fmelt.R b/R/fmelt.R index 3f924c2e4..55a6fb0ad 100644 --- a/R/fmelt.R +++ b/R/fmelt.R @@ -64,14 +64,11 @@ measure = function(..., sep="_", pattern, cols, multiple.keyword="value.name") { } fun.list[[fun.i]] = fun } - measurev.args = c( - list(fun.list), - L[formal.i.vec], - list(group.desc="... arguments to measure")) + measurev.args = c(list(fun.list), L[formal.i.vec]) do.call(measurev, measurev.args) } -measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.name", group.desc="elements of fun.list"){ +measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.name"){ # 1. basic error checking. if (!missing(sep) && !missing(pattern)) { stopf("both sep and pattern arguments used; must use either sep or pattern (not both)") @@ -88,21 +85,11 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na which(!nzchar(names(fun.list))) } if (length(prob.i)) { - stopf("in measurev, %s must be named, problems: %s", group.desc, brackify(prob.i)) + stopf("in measurev, elements of fun.list must be named, problems: %s", brackify(prob.i)) } - err.names.unique = function(err.what, name.vec) { - name.tab = table(name.vec) - bad.counts = name.tab[1 < name.tab] - if (length(bad.counts)) { - stopf("%s should be uniquely named, problems: %s", err.what, brackify(names(bad.counts))) - } - } - err.args.groups = function(type, N){ - if (N != length(fun.list)) { - stopf("number of %s =%d must be same as %s =%d", group.desc, length(fun.list), type, N) - } + if (length(dup.funs <- duplicated_values(names(fun.list)))) { + stopf("elements of fun.list should be uniquely named, problems: %s", brackify(dup.funs)) } - err.names.unique(group.desc, names(fun.list)) # 2. compute initial group data table, used as variable_table attribute. group.mat = if (!missing(pattern)) { if (!is.character(pattern)) { @@ -117,7 +104,9 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na if (is.null(start)) { stopf("pattern must contain at least one capture group (parenthesized sub-pattern)") } - err.args.groups("number of capture groups in pattern", ncol(start)) + if (ncol(start) != length(fun.list)) { + stopf("number of elements of fun.list (%d) must be the same as the number of capture groups in pattern (%d)", length(fun.list), ncol(start)) + } end = attr(match.vec, "capture.length")[measure.vec.i,]+start-1L measure.vec <- cols[measure.vec.i] names.mat = matrix(measure.vec, nrow(start), ncol(start)) @@ -132,12 +121,16 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na if (n.groups == 1) { stopf("each column name results in only one item after splitting using sep, which means that all columns would be melted; to fix please either specify melt on all columns directly without using measure, or use a different sep/pattern specification") } - err.args.groups("max number of items after splitting column names", n.groups) + if (n.groups != length(fun.list)) { + stopf("number of elements of fun.list (%d) must be the same as the max number of items after splitting column names (%d)", length(fun.list), n.groups) + } measure.vec.i = which(vector.lengths==n.groups) measure.vec = cols[measure.vec.i] do.call(rbind, list.of.vectors[measure.vec.i]) } - err.names.unique("measured columns", measure.vec) + if (length(dup.measures <- duplicated_values(measure.vec))) { + stopf("measured columns should be uniquely named, problems: %s", brackify(dup.measures)) + } uniq.mat = unique(group.mat) if (nrow(uniq.mat) < nrow(group.mat)) { stopf("number of unique column IDs =%d is less than number of melted columns =%d; fix by changing pattern/sep", nrow(uniq.mat), nrow(group.mat)) diff --git a/R/utils.R b/R/utils.R index 2f91491be..4a1aadd43 100644 --- a/R/utils.R +++ b/R/utils.R @@ -34,11 +34,17 @@ check_duplicate_names = function(x, table_name=deparse(substitute(x))) { if (!anyDuplicated(nm <- names(x))) return(invisible()) duplicate_names = unique(nm[duplicated(nm)]) stopf(ngettext(length(duplicate_names), - "%s has duplicated column name %s. Please remove or rename the duplicate and try again.", - "%s has duplicated column names %s. Please remove or rename the duplicates and try again."), + "%s has duplicated column name %s. Please remove or rename the duplicate and try again.", + "%s has duplicated column names %s. Please remove or rename the duplicates and try again."), table_name, brackify(duplicate_names), domain=NA) } +duplicated_values = function(x) { + # fast anyDuplicated for the typical/non-error case; second duplicated() pass for (usually) error case + if (!anyDuplicated(x)) return(vector(typeof(x))) + unique(x[duplicated(x)]) +} + # TODO(R>=4.0.0): Remove this workaround. From R 4.0.0, rep_len() dispatches rep.Date(), which we need. # Before that, rep_len() strips attributes --> breaks data.table()'s internal recycle() helper. # This also impacts test 2 in S4.Rraw, because the error message differs for rep.int() vs. rep_len(). diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2b3958acd..322530139 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17301,11 +17301,11 @@ test(2183.00020, melt(iris.dt, measure.vars=measurev(value.name, dim, sep=".", p test(2183.000201, melt(iris.dt, measure.vars=measurev(list(NULL, dim=NULL), sep=".")), error="in measurev, elements of fun.list must be named, problems: [1]") test(2183.000202, melt(iris.dt, measure.vars=measurev(list(NULL, NULL), sep=".")), error="in measurev, elements of fun.list must be named, problems: [1, 2]") test(2183.00027, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim="bar"), sep=".")), error="in the measurev fun.list, each non-NULL element must be a function with at least one argument, problem: dim") -test(2183.00028, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), sep=".")), error="number of elements of fun.list =3 must be same as max number of items after splitting column names =2") +test(2183.00028, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), sep=".")), error="number of elements of fun.list (3) must be the same as the max number of items after splitting column names (2)") test(2183.00042, melt(DTid, measure.vars=measurev(list(value.name=NULL, istr=function()1), pattern="([ab])([12])")), error="in the measurev fun.list, each non-NULL element must be a function with at least one argument, problem: istr") test(2183.00043, melt(DTid, measure.vars=measurev(list(value.name=NULL, istr=interactive), pattern="([ab])([12])")), error="in the measurev fun.list, each non-NULL element must be a function with at least one argument, problem: istr") test(2183.00044, melt(DTid, measure.vars=measurev(list(value.name=NULL, istr=function(x)1), pattern="([ab])([12])")), error="each conversion function must return an atomic vector with same length as its first argument, problem: istr") -test(2183.00045, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), pattern="(.*)[.](.*)")), error="number of elements of fun.list =3 must be same as number of capture groups in pattern =2") +test(2183.00045, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), pattern="(.*)[.](.*)")), error="number of elements of fun.list (3) must be the same as the number of capture groups in pattern (2)") test(2183.00048, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, value.name=NULL), sep=".")), error="elements of fun.list should be uniquely named, problems: [value.name]") # measure with factor conversion. myfac = function(x)factor(x)#user-defined conversion function. @@ -17348,7 +17348,7 @@ test(2183.24, names(melt(iris.dt, measure.vars=measure(value.name, dim, sep=".") test(2183.25, names(melt(iris.dt, measure.vars=measure(part, value.name, sep="."))), c("Species", "part", "Length", "Width")) test(2183.26, names(melt(iris.dt, measure.vars=measure(part, dim, sep="."))), c("Species", "part", "dim", "value")) test(2183.27, melt(iris.dt, measure.vars=measure(value.name, dim="bar", sep=".")), error="each ... argument to measure must be a function with at least one argument, problem: dim") -test(2183.28, melt(iris.dt, measure.vars=measure(value.name, dim, baz, sep=".")), error="number of ... arguments to measure =3 must be same as max number of items after splitting column names =2") +test(2183.28, melt(iris.dt, measure.vars=measure(value.name, dim, baz, sep=".")), error="number of elements of fun.list (3) must be the same as the max number of items after splitting column names (2)") test(2183.29, melt(iris.dt, measure.vars=measure()), error="each column name results in only one item after splitting using sep, which means that all columns would be melted; to fix please either specify melt on all columns directly without using measure, or use a different sep/pattern specification") # patterns with iris data. test(2183.40, names(melt(iris.dt, measure.vars=patterns("[.]"))), c("Species", "variable", "value")) @@ -17357,10 +17357,10 @@ test(2183.41, melt(DTid, measure.vars=measure(value.name, istr="bar", pattern="( test(2183.42, melt(DTid, measure.vars=measure(value.name, istr=function()1, pattern="([ab])([12])")), error="each ... argument to measure must be a function with at least one argument, problem: istr") test(2183.43, melt(DTid, measure.vars=measure(value.name, istr=interactive, pattern="([ab])([12])")), error="each ... argument to measure must be a function with at least one argument, problem: istr") test(2183.44, melt(DTid, measure.vars=measure(value.name, istr=function(x)1, pattern="([ab])([12])")), error="each conversion function must return an atomic vector with same length as its first argument, problem: istr") -test(2183.45, melt(iris.dt, measure.vars=measure(value.name, dim, baz, pattern="(.*)[.](.*)")), error="number of ... arguments to measure =3 must be same as number of capture groups in pattern =2") +test(2183.45, melt(iris.dt, measure.vars=measure(value.name, dim, baz, pattern="(.*)[.](.*)")), error="number of elements of fun.list (3) must be the same as the number of capture groups in pattern (2)") test(2183.46, melt(iris.dt, measure.vars=measure(function(x)factor(x), dim, pattern="(.*)[.](.*)")), error="each ... argument to measure must be either a symbol without argument name, or a function with argument name, problems: [1]") test(2183.47, melt(iris.dt, measure.vars=measure(function(x)factor(x), pattern="(.*)[.](.*)")), error="each ... argument to measure must be either a symbol without argument name, or a function with argument name, problems: [1]") -test(2183.48, melt(iris.dt, measure.vars=measure(value.name, value.name, sep=".")), error="... arguments to measure should be uniquely named, problems: [value.name]") +test(2183.48, melt(iris.dt, measure.vars=measure(value.name, value.name, sep=".")), error="elements of fun.list should be uniquely named, problems: [value.name]") # measure with factor conversion. myfac = function(x)factor(x)#user-defined conversion function. test(2183.60, melt(DTid, measure.vars=measure(letter=myfac, value.name, pattern="([ab])([12])")), data.table(id=1, letter=factor(c("a","b")), "2"=c(2,2), "1"=c(NA,1))) diff --git a/man/measure.Rd b/man/measure.Rd index 73a315e00..b7933e17e 100644 --- a/man/measure.Rd +++ b/man/measure.Rd @@ -18,8 +18,7 @@ } \usage{ measure(\dots, sep, pattern, cols, multiple.keyword="value.name") -measurev(fun.list, sep, pattern, cols, multiple.keyword="value.name", - group.desc="elements of fun.list") +measurev(fun.list, sep, pattern, cols, multiple.keyword="value.name") } \arguments{ \item{\dots}{One or more (1) symbols (without argument name; symbol @@ -44,7 +43,6 @@ measurev(fun.list, sep, pattern, cols, multiple.keyword="value.name", value columns (with names defined by the unique values in that group). Otherwise if the string not used as a group name, then measure returns a vector and melt returns a single value column.} - \item{group.desc}{Internal, used in error messages.} } \seealso{ \code{\link{melt}}, From 2a70e2f63cc94c97d35bc493b57321f884539755 Mon Sep 17 00:00:00 2001 From: rikivillalba <32423469+rikivillalba@users.noreply.github.com> Date: Tue, 24 Sep 2024 21:33:56 -0300 Subject: [PATCH 2/8] column *indices* should be *positions* (#6537) Here column *indices* might be *positions* since *indices* better refers to `indices()`. Can be also *numbers* though (while re-reading for translations...;) --- vignettes/datatable-reshape.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/datatable-reshape.Rmd b/vignettes/datatable-reshape.Rmd index c84c1558d..e5efb660f 100644 --- a/vignettes/datatable-reshape.Rmd +++ b/vignettes/datatable-reshape.Rmd @@ -80,7 +80,7 @@ str(DT.m1) * `measure.vars` specify the set of columns we would like to collapse (or combine) together. -* We can also specify column *indices* instead of *names*. +* We can also specify column *positions* instead of *names*. * By default, `variable` column is of type `factor`. Set `variable.factor` argument to `FALSE` if you'd like to return a *`character`* vector instead. From 92a29f8baa4425a9cd71c261c9715a7df903133f Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Tue, 24 Sep 2024 21:08:43 -0400 Subject: [PATCH 3/8] Add support for patterns(cols=user_provided) (#6510) * grep(value=TRUE) in patterns() * test user-defined cols subset of names(DT) * vector(s) * comment ref issue * adjust NEWS --------- Co-authored-by: Toby Dylan Hocking Co-authored-by: Michael Chirico --- NEWS.md | 2 ++ R/fmelt.R | 2 +- inst/tests/tests.Rraw | 3 +++ man/patterns.Rd | 4 ++-- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 99e4c8598..4bf066f41 100644 --- a/NEWS.md +++ b/NEWS.md @@ -63,6 +63,8 @@ rowwiseDT( # 15: All values Total Total 999 999 NaN 10 ``` +4. `patterns()` in `melt()` combines correctly with user-defined `cols=`, which can be useful to specify a subset of columns to reshape without having to use a regex, for example `patterns("2", cols=c("y1", "y2"))` will only give `y2` even if there are other columns in the input matching `2`, [#6498](https://github.com/Rdatatable/data.table/issues/6498). Thanks to @hongyuanjia for the report, and to @tdhock for the PR. + ## BUG FIXES 1. Using `print.data.table()` with character truncation using `datatable.prettyprint.char` no longer errors with `NA` entries, [#6441](https://github.com/Rdatatable/data.table/issues/6441). Thanks to @r2evans for the bug report, and @joshhwuu for the fix. diff --git a/R/fmelt.R b/R/fmelt.R index 55a6fb0ad..c0cc47e9c 100644 --- a/R/fmelt.R +++ b/R/fmelt.R @@ -29,7 +29,7 @@ patterns = function(..., cols=character(0L), ignore.case=FALSE, perl=FALSE, fixe p = unlist(L, use.names = any(nzchar(names(L)))) if (!is.character(p)) stopf("Input patterns must be of type character.") - matched = lapply(p, grep, cols, ignore.case=ignore.case, perl=perl, fixed=fixed, useBytes=useBytes) + matched = lapply(p, grep, cols, ignore.case=ignore.case, perl=perl, fixed=fixed, useBytes=useBytes, value=TRUE) if (length(idx <- which(lengths(matched) == 0L))) stopf(ngettext(length(idx), 'Pattern not found: [%s]', 'Patterns not found: [%s]'), brackify(p[idx]), domain=NA) if (length(matched) == 1L) return(matched[[1L]]) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 322530139..cb70d4314 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -12427,6 +12427,9 @@ DTout = data.table( value2 = c("a", "b", "c", "d", "e", "f", "g", "h", "i", "j") ) test(1866.6, melt(DT, measure.vars = patterns("^x", "^y", cols=names(DT))), DTout) +# patterns supports cols arg, #6498 +test(1866.7, melt(data.table(x1=1,x2=2,y1=3,y2=4),measure.vars=patterns("2",cols=c("y1","y2"))), data.table(x1=1,x2=2,y1=3,variable=factor("y2"),value=4)) +test(1866.8, DT[, lapply(.SD, sum), .SDcols=patterns("2",cols=c("x1","x2"))], data.table(x2=40L)) # informative errors for bad user-provided cols arg to patterns DT = data.table(x1=1,x2=2,y1=3,y2=4) diff --git a/man/patterns.Rd b/man/patterns.Rd index cd3d3fd8b..d48bb9724 100644 --- a/man/patterns.Rd +++ b/man/patterns.Rd @@ -2,8 +2,8 @@ \alias{patterns} \title{Obtain matching indices corresponding to patterns} \description{ -\code{patterns} returns the matching indices in the argument \code{cols} -corresponding to the regular expression patterns provided. The patterns must be +\code{patterns} returns the elements of \code{cols} +that match the regular expression patterns, which must be supported by \code{\link[base]{grep}}. From \code{v1.9.6}, \code{\link{melt.data.table}} has an enhanced functionality From e22f7742aee78651fe09aed1434625947dda600a Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 24 Sep 2024 21:47:45 -0400 Subject: [PATCH 4/8] make test numbers strictly increasing (#6538) --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index cb70d4314..f12bea83d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -12428,8 +12428,8 @@ DTout = data.table( ) test(1866.6, melt(DT, measure.vars = patterns("^x", "^y", cols=names(DT))), DTout) # patterns supports cols arg, #6498 -test(1866.7, melt(data.table(x1=1,x2=2,y1=3,y2=4),measure.vars=patterns("2",cols=c("y1","y2"))), data.table(x1=1,x2=2,y1=3,variable=factor("y2"),value=4)) -test(1866.8, DT[, lapply(.SD, sum), .SDcols=patterns("2",cols=c("x1","x2"))], data.table(x2=40L)) +test(1866.71, melt(data.table(x1=1,x2=2,y1=3,y2=4),measure.vars=patterns("2",cols=c("y1","y2"))), data.table(x1=1,x2=2,y1=3,variable=factor("y2"),value=4)) +test(1866.72, DT[, lapply(.SD, sum), .SDcols=patterns("2",cols=c("x1","x2"))], data.table(x2=40L)) # informative errors for bad user-provided cols arg to patterns DT = data.table(x1=1,x2=2,y1=3,y2=4) From 34ec8e9b94d34bab94408693de010588d80459f9 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 26 Sep 2024 06:09:50 -0700 Subject: [PATCH 5/8] Refactor CHECK_RANGE to be i18n friendly (#6530) * Refactor CHECK_RANGE to be i18n friendly * Move column_desc buffer inside columnDesc routine * make nominative case clearer --- src/assign.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/assign.c b/src/assign.c index 4402608ff..4fc05d992 100644 --- a/src/assign.c +++ b/src/assign.c @@ -744,6 +744,12 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) #define MSGSIZE 1000 static char memrecycle_message[MSGSIZE+1]; // returned to rbindlist so it can prefix with which one of the list of data.table-like objects +const char *columnDesc(int colnum, const char *colname) { + static char column_desc[MSGSIZE+1]; // can contain column names, hence relatively large allocation. + snprintf(column_desc, MSGSIZE, _("(column %d named '%s')"), colnum, colname); + return column_desc; +} + const char *memrecycle(const SEXP target, const SEXP where, const int start, const int len, SEXP source, const int sourceStart, const int sourceLen, const int colnum, const char *colname) // like memcpy but recycles single-item source // 'where' a 1-based INTEGER vector subset of target to assign to, or NULL or integer() @@ -932,18 +938,17 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con // inside BODY that cater for 'where' or not. Maybe there's a way to merge the two macros in future. // The idea is to do these range checks without calling coerceVector() (which allocates) - #define CHECK_RANGE(STYPE, RFUN, COND, FMT, TO, FMTVAL) {{ \ + #define CHECK_RANGE(STYPE, RFUN, COND, FMTVAL, FMT) {{ \ const STYPE *sd = (const STYPE *)RFUN(source); \ for (int i=0; i255, "d", "taken as 0", val) + case INTSXP: CHECK_RANGE(int, INTEGER, val<0 || val>255, val, _("%d (type '%s' at RHS position %d taken as 0 when assigning to type '%s' %s")) case REALSXP: if (sourceIsI64) - CHECK_RANGE(int64_t, REAL, val<0 || val>255, PRId64, "taken as 0", val) - else CHECK_RANGE(double, REAL, !R_FINITE(val) || val<0.0 || val>256.0 || (int)val!=val, "f", "either truncated (precision lost) or taken as 0", val) + CHECK_RANGE(int64_t, REAL, val<0 || val>255, val, _("%"PRId64" (type '%s') at RHS position %d taken as 0 when assigning to type '%s' %s")) + else CHECK_RANGE(double, REAL, !R_FINITE(val) || val<0.0 || val>256.0 || (int)val!=val, val, _("%f (type '%s') at RHS position %d either truncated (precision lost) or taken as 0 when assigning to type '%s' %s")) } break; case INTSXP: switch (TYPEOF(source)) { case REALSXP: if (sourceIsI64) - CHECK_RANGE(int64_t, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX), PRId64, "out-of-range (NA)", val) - else CHECK_RANGE(double, REAL, !ISNAN(val) && (!within_int32_repres(val) || (int)val!=val), "f", "out-of-range(NA) or truncated (precision lost)", val) + CHECK_RANGE(int64_t, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX), val, _("%"PRId64" (type '%s') at RHS position %d out-of-range (NA) when assigning to type '%s' %s")) + else CHECK_RANGE(double, REAL, !ISNAN(val) && (!within_int32_repres(val) || (int)val!=val), val, _("%f (type '%s') at RHS position %d out-of-range(NA) or truncated (precision lost) when assigning to type '%s' %s")) case CPLXSXP: CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) && - (ISNAN(val.r) || (within_int32_repres(val.r) && (int)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r) + (ISNAN(val.r) || (within_int32_repres(val.r) && (int)val.r==val.r))), val.r, _("%f (type '%s') at RHS position %d either imaginary part discarded or real part truncated (precision lost) when assigning to type '%s' %s")) } break; case REALSXP: switch (TYPEOF(source)) { case REALSXP: if (targetIsI64 && !sourceIsI64) - CHECK_RANGE(double, REAL, !ISNAN(val) && (!within_int64_repres(val) || (int64_t)val!=val), "f", "out-of-range(NA) or truncated (precision lost)", val) + CHECK_RANGE(double, REAL, !ISNAN(val) && (!within_int64_repres(val) || (int64_t)val!=val), val, _("%f (type '%s') at RHS position %d out-of-range(NA) or truncated (precision lost) when assigning to type '%s' %s")) break; case CPLXSXP: if (targetIsI64) CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) && - (ISNAN(val.r) || (R_FINITE(val.r) && (int64_t)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r) - else CHECK_RANGE(Rcomplex, COMPLEX, !(ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)), "f", "imaginary part discarded", val.i) + (ISNAN(val.r) || (R_FINITE(val.r) && (int64_t)val.r==val.r))), val.r, _("%f (type '%s') at RHS position %d either imaginary part discarded or real part truncated (precision lost) when assigning to type '%s' %s")) + else CHECK_RANGE(Rcomplex, COMPLEX, !(ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)), val.r, _("%f (type '%s') at RHS position %d imaginary part discarded when assigning to type '%s' %s")) } } } From 623bee9ec292a3f178946be05836ffc0e11666ee Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 26 Sep 2024 06:10:27 -0700 Subject: [PATCH 6/8] Add notranslate for untranslateable fragments (#6539) --- src/assign.c | 3 ++- src/bmerge.c | 3 ++- src/dogroups.c | 12 ++++++++---- src/forder.c | 7 +++---- src/fread.c | 14 +++++++------- src/freadR.c | 5 +++-- src/fsort.c | 6 ++++-- src/fwrite.c | 11 ++++++----- src/gsumm.c | 3 ++- src/openmp-utils.c | 20 +++++++++++--------- src/reorder.c | 6 ++++-- src/utils.c | 2 +- 12 files changed, 53 insertions(+), 39 deletions(-) diff --git a/src/assign.c b/src/assign.c index 4fc05d992..dc6b9a6de 100644 --- a/src/assign.c +++ b/src/assign.c @@ -437,7 +437,8 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) values = PROTECT(eval(PROTECT(lang2(sym_as_posixct, values)), R_GlobalEnv)); protecti+=2; } bool RHS_list_of_columns = TYPEOF(values)==VECSXP && length(cols)>1; // initial value; may be revised below - if (verbose) Rprintf(_("RHS_list_of_columns == %s\n"), RHS_list_of_columns ? "true" : "false"); + if (verbose) + Rprintf("RHS_list_of_columns == %s\n", RHS_list_of_columns ? "true" : "false"); // # notranslate if (TYPEOF(values)==VECSXP && length(cols)==1 && length(values)==1) { SEXP item = VECTOR_ELT(values,0); if (isNull(item) || length(item)==1 || length(item)==targetlen) { diff --git a/src/bmerge.c b/src/bmerge.c index d290d70c6..cb71f9c20 100644 --- a/src/bmerge.c +++ b/src/bmerge.c @@ -66,7 +66,8 @@ SEXP bmerge(SEXP idt, SEXP xdt, SEXP icolsArg, SEXP xcolsArg, SEXP xoArg, SEXP r if (xcols[col]>LENGTH(xdt) || xcols[col]<1) error(_("xcols[%d]=%d outside range [1,length(x)=%d]"), col, xcols[col], LENGTH(xdt)); int it = TYPEOF(VECTOR_ELT(idt, icols[col]-1)); int xt = TYPEOF(VECTOR_ELT(xdt, xcols[col]-1)); - if (iN && it!=xt) error(_("typeof x.%s (%s) != typeof i.%s (%s)"), CHAR(STRING_ELT(getAttrib(xdt,R_NamesSymbol),xcols[col]-1)), type2char(xt), CHAR(STRING_ELT(getAttrib(idt,R_NamesSymbol),icols[col]-1)), type2char(it)); + if (iN && it!=xt) + error("typeof x.%s (%s) != typeof i.%s (%s)", CHAR(STRING_ELT(getAttrib(xdt,R_NamesSymbol),xcols[col]-1)), type2char(xt), CHAR(STRING_ELT(getAttrib(idt,R_NamesSymbol),icols[col]-1)), type2char(it)); // # notranslate if (iN && it!=LGLSXP && it!=INTSXP && it!=REALSXP && it!=STRSXP) error(_("Type '%s' is not supported for joining/merging"), type2char(it)); } diff --git a/src/dogroups.c b/src/dogroups.c index d55439332..0717ce0c1 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -106,7 +106,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX } setAttrib(BY, R_NamesSymbol, bynames); // Fix for #42 - BY doesn't retain names anymore R_LockBinding(sym_BY, env); - if (isNull(jiscols) && (length(bynames)!=length(groups) || length(bynames)!=length(grpcols))) error(_("!length(bynames)[%d]==length(groups)[%d]==length(grpcols)[%d]"),length(bynames),length(groups),length(grpcols)); + if (isNull(jiscols) && (length(bynames)!=length(groups) || length(bynames)!=length(grpcols))) + error("!length(bynames)[%d]==length(groups)[%d]==length(grpcols)[%d]", length(bynames), length(groups), length(grpcols)); // # notranslate // TO DO: check this check above. N = PROTECT(findVar(install(".N"), env)); nprotect++; // PROTECT for rchk @@ -135,7 +136,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX // fetch names of .SD and prepare symbols. In case they are copied-on-write by user assigning to those variables // using <- in j (which is valid, useful and tested), they are repointed to the .SD cols for each group. SEXP names = PROTECT(getAttrib(SDall, R_NamesSymbol)); nprotect++; - if (length(names) != length(SDall)) error(_("length(names)!=length(SD)")); + if (length(names) != length(SDall)) + error("length(names)!=length(SD)"); // # notranslate SEXP *nameSyms = (SEXP *)R_alloc(length(names), sizeof(SEXP)); for(int i=0; i=17&&i<=19)?"(*)":" ", tblock[i], nblock[i]); } - for (int i=0; i<=256; i++) { - if (stat[i]) Rprintf(_("stat[%03d]==%20"PRIu64"\n"), i, (uint64_t)stat[i]); - } + for (int i=0; i<=256; i++) if (stat[i]) + Rprintf("stat[%03d]==%20"PRIu64"\n", i, (uint64_t)stat[i]); // # notranslate } #endif return ans; diff --git a/src/fread.c b/src/fread.c index 746d38863..1169f645d 100644 --- a/src/fread.c +++ b/src/fread.c @@ -1340,10 +1340,10 @@ int freadMain(freadMainArgs _args) { if (*NAstrings == NULL) { DTPRINT(_(" No NAstrings provided.\n")); } else { - DTPRINT(_(" NAstrings = [")); + DTPRINT(" NAstrings = ["); // # notranslate const char * const* s = NAstrings; while (*s++) DTPRINT(*s? "<<%s>>, " : "<<%s>>", s[-1]); - DTPRINT(_("]\n")); + DTPRINT("]\n"); // # notranslate if (any_number_like_NAstrings) DTPRINT(_(" One or more of the NAstrings looks like a number.\n")); else @@ -2085,14 +2085,14 @@ int freadMain(freadMainArgs _args) { // sd can be very close to 0.0 sometimes, so apply a +10% minimum // blank lines have length 1 so for fill=true apply a +100% maximum. It'll be grown if needed. if (verbose) { - DTPRINT(_(" =====\n")); + DTPRINT(" =====\n"); // # notranslate DTPRINT(_(" Sampled %"PRIu64" rows (handled \\n inside quoted fields) at %d jump points\n"), (uint64_t)sampleLines, nJumps); DTPRINT(_(" Bytes from first data row on line %d to the end of last row: %"PRIu64"\n"), row1line, (uint64_t)bytesRead); DTPRINT(_(" Line length: mean=%.2f sd=%.2f min=%d max=%d\n"), meanLineLen, sd, minLen, maxLen); DTPRINT(_(" Estimated number of rows: %"PRIu64" / %.2f = %"PRIu64"\n"), (uint64_t)bytesRead, meanLineLen, (uint64_t)estnrow); DTPRINT(_(" Initial alloc = %"PRIu64" rows (%"PRIu64" + %d%%) using bytes/max(mean-2*sd,min) clamped between [1.1*estn, 2.0*estn]\n"), (uint64_t)allocnrow, (uint64_t)estnrow, (int)(100.0*allocnrow/estnrow-100.0)); - DTPRINT(_(" =====\n")); + DTPRINT(" =====\n"); // # notranslate } else { if (sampleLines > allocnrow) INTERNAL_STOP("sampleLines(%"PRIu64") > allocnrow(%"PRIu64")", (uint64_t)sampleLines, (uint64_t)allocnrow); // # nocov } @@ -2266,8 +2266,8 @@ int freadMain(freadMainArgs _args) { if (verbose) DTPRINT(_("[11] Read the data\n")); read: // we'll return here to reread any columns with out-of-sample type exceptions, or dirty jumps restartTeam = false; - if (verbose) DTPRINT(_(" jumps=[%d..%d), chunk_size=%"PRIu64", total_size=%"PRIu64"\n"), - jump0, nJumps, (uint64_t)chunkBytes, (uint64_t)(eof-pos)); + if (verbose) + DTPRINT(" jumps=[%d..%d), chunk_size=%"PRIu64", total_size=%"PRIu64"\n", jump0, nJumps, (uint64_t)chunkBytes, (uint64_t)(eof-pos)); // # notranslate ASSERT(allocnrow <= nrowLimit, "allocnrow(%"PRIu64") <= nrowLimit(%"PRIu64")", (uint64_t)allocnrow, (uint64_t)nrowLimit); #pragma omp parallel num_threads(nth) { @@ -2744,7 +2744,7 @@ int freadMain(freadMainArgs _args) { } if (verbose) { - DTPRINT(_("=============================\n")); + DTPRINT("=============================\n"); // # notranslate if (tTot<0.000001) tTot=0.000001; // to avoid nan% output in some trivially small tests where tot==0.000s DTPRINT(_("%8.3fs (%3.0f%%) Memory map %.3fGB file\n"), tMap-t0, 100.0*(tMap-t0)/tTot, 1.0*fileSize/(1024*1024*1024)); DTPRINT(_("%8.3fs (%3.0f%%) sep="), tLayout-tMap, 100.0*(tLayout-tMap)/tTot); diff --git a/src/freadR.c b/src/freadR.c index 8063ca707..05b8cd00e 100644 --- a/src/freadR.c +++ b/src/freadR.c @@ -155,7 +155,8 @@ SEXP freadR( args.skipEmptyLines = LOGICAL(skipEmptyLinesArg)[0]; args.fill = INTEGER(fillArg)[0]; args.showProgress = LOGICAL(showProgressArg)[0]; - if (INTEGER(nThreadArg)[0]<1) error(_("nThread(%d)<1"), INTEGER(nThreadArg)[0]); + if (INTEGER(nThreadArg)[0]<1) + error("nThread(%d)<1", INTEGER(nThreadArg)[0]); // # notranslate args.nth = (uint32_t)INTEGER(nThreadArg)[0]; args.verbose = verbose; args.warningsAreErrors = warningsAreErrors; @@ -716,7 +717,7 @@ void __halt(bool warn, const char *format, ...) { // if (warn) warning(_("%s"), msg); // this warning() call doesn't seem to honor warn=2 straight away in R 3.6, so now always call error() directly to be sure // we were going via warning() before to get the (converted from warning) prefix in the message (which we could mimic in future) - error(_("%s"), msg); // include "%s" because data in msg might include '%' + error("%s", msg); // # notranslate. include "%s" because data in msg might include '%' } void prepareThreadContext(ThreadLocalFreadParsingContext *ctx) {} diff --git a/src/fsort.c b/src/fsort.c index 5501cdbaa..a3bc38a2b 100644 --- a/src/fsort.c +++ b/src/fsort.c @@ -129,7 +129,8 @@ SEXP fsort(SEXP x, SEXP verboseArg) { int nth = getDTthreads(xlength(x), true); int nBatch=nth*2; // at least nth; more to reduce last-man-home; but not too large to keep counts small in cache - if (verbose) Rprintf(_("nth=%d, nBatch=%d\n"),nth,nBatch); + if (verbose) + Rprintf("nth=%d, nBatch=%d\n", nth, nBatch); // # notranslate size_t batchSize = (xlength(x)-1)/nBatch + 1; if (batchSize < 1024) batchSize = 1024; // simple attempt to work reasonably for short vector. 1024*8 = 2 4kb pages @@ -185,7 +186,8 @@ SEXP fsort(SEXP x, SEXP verboseArg) { int MSBNbits = maxBit > 15 ? 16 : maxBit+1; // how many bits make up the MSB int shift = maxBit + 1 - MSBNbits; // the right shift to leave the MSB bits remaining size_t MSBsize = 1LL< 65,536) - if (verbose) Rprintf(_("maxBit=%d; MSBNbits=%d; shift=%d; MSBsize=%zu\n"), maxBit, MSBNbits, shift, MSBsize); + if (verbose) + Rprintf("maxBit=%d; MSBNbits=%d; shift=%d; MSBsize=%zu\n", maxBit, MSBNbits, shift, MSBsize); // # notranslate uint64_t *counts = (uint64_t *)R_alloc(nBatch*MSBsize, sizeof(uint64_t)); memset(counts, 0, nBatch*MSBsize*sizeof(uint64_t)); diff --git a/src/fwrite.c b/src/fwrite.c index 06255b052..757da1b0b 100644 --- a/src/fwrite.c +++ b/src/fwrite.c @@ -628,15 +628,16 @@ void fwriteMain(fwriteMainArgs args) if (verbose) { DTPRINT(_("Column writers: ")); + // # notranslate start if (args.ncol<=50) { - for (int j=0; j Date: Thu, 26 Sep 2024 06:14:47 -0700 Subject: [PATCH 7/8] Restore covr (#6540) * Revert "Disable covr (#6122)" This reverts commit 3130f11bc05d1609ee5373613b675f8890befba2. * pin R 4.3 with TODO * Try a pinned covr version * go further back * Use cribbed updated covr action? * Not crashing, but very slow; try pinned version again * we don't have secrets set up * Added a repo secret to data.table (I think) --- .github/workflows/test-coverage.yaml | 55 ++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 .github/workflows/test-coverage.yaml diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 000000000..79fcd6fdc --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,55 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [master] + pull_request: + branches: [master] + +name: test-coverage.yaml + +permissions: read-all + +jobs: + test-coverage: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + r-version: '4.3' # TODO(r-lib/covr#567): Go back to using r-devel + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::covr, any::xml2 + needs: coverage + + - name: Test coverage + run: | + cov <- covr::package_coverage( + quiet = FALSE, + clean = FALSE, + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") + ) + covr::to_cobertura(cov) + shell: Rscript {0} + + - uses: codecov/codecov-action@v4 + with: + fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }} + file: ./cobertura.xml + plugin: noop + disable_search: true + token: ${{ secrets.CODECOV_TOKEN }} + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package From 0c957c70d019ebf36c44c988d5b2a4def9c05c92 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 26 Sep 2024 18:33:34 -0700 Subject: [PATCH 8/8] # nocov for recent missed lines (#6541) * # nocov for recent missed lines * Need '__func__' in internal_error() * nocov new internal_error --- R/data.table.R | 6 +++--- R/fread.R | 4 +++- R/fwrite.R | 2 ++ R/last.R | 2 ++ R/xts.R | 2 ++ src/assign.c | 8 ++++---- src/between.c | 2 +- src/bmerge.c | 4 ++-- src/chmatch.c | 2 +- src/dogroups.c | 6 +++--- src/fastmean.c | 4 ++-- src/fmelt.c | 2 +- src/forder.c | 41 ++++++++++++++++++++--------------------- src/fread.c | 24 ++++++++++++++---------- src/fsort.c | 4 ++-- src/fwrite.c | 8 +++++--- src/gsumm.c | 10 +++++----- src/init.c | 2 +- src/rbindlist.c | 6 +++--- src/subset.c | 2 +- src/uniqlist.c | 8 ++++---- 21 files changed, 81 insertions(+), 68 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index fe41ed2d3..7b48704a1 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2256,8 +2256,8 @@ tail.data.table = function(x, n=6L, ...) { "$<-.data.table" = function(x, name, value) { if (!cedta()) { - ans = `$<-.data.frame`(x, name, value) - return(setalloccol(ans)) # over-allocate (again) + ans = `$<-.data.frame`(x, name, value) # nocov + return(setalloccol(ans)) # nocov. over-allocate (again) } x = copy(x) set(x,j=name,value=value) # important i is missing here @@ -2430,7 +2430,7 @@ which_ = function(x, bool = TRUE) { } is.na.data.table = function(x) { - if (!cedta()) return(is.na.data.frame(x)) + if (!cedta()) return(is.na.data.frame(x)) # nocov do.call(cbind, lapply(x, is.na)) } diff --git a/R/fread.R b/R/fread.R index 883d445c1..e4970749d 100644 --- a/R/fread.R +++ b/R/fread.R @@ -158,6 +158,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") } # whitespace at the beginning or end of na.strings is checked at C level and is an error there; test 1804 } + # nocov start. Tested in other.Rraw tests 16, not in the main suite. if (yaml) { if (!requireNamespace('yaml', quietly = TRUE)) stopf("'data.table' relies on the package 'yaml' to parse the file header; please add this to your library with install.packages('yaml') and try again.") # nocov @@ -267,6 +268,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") } if (is.integer(skip)) skip = skip + n_read } + # nocov end warnings2errors = getOption("warn") >= 2 stopifnot(identical(tz,"UTC") || identical(tz,"")) if (tz=="") { @@ -341,7 +343,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") key = cols_from_csv(key) setkeyv(ans, key) } - if (yaml) setattr(ans, 'yaml_metadata', yaml_header) + if (yaml) setattr(ans, 'yaml_metadata', yaml_header) # nocov if (!is.null(index) && data.table) { if (!all(vapply_1b(index, is.character))) stopf("index argument of data.table() must be a character vector naming columns (NB: col.names are applied before this)") diff --git a/R/fwrite.R b/R/fwrite.R index ad92859f3..d67c886d1 100644 --- a/R/fwrite.R +++ b/R/fwrite.R @@ -91,6 +91,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto", return(invisible()) } } + # nocov start. See test 17 in other.Rraw, not tested in the main suite. yaml = if (!yaml) "" else { if (!requireNamespace('yaml', quietly=TRUE)) stopf("'data.table' relies on the package 'yaml' to write the file header; please add this to your library with install.packages('yaml') and try again.") # nocov @@ -114,6 +115,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto", ) paste0('---', eol, yaml::as.yaml(yaml_header, line.sep=eol), '---', eol) # NB: as.yaml adds trailing newline } + # nocov end file = enc2native(file) # CfwriteR cannot handle UTF-8 if that is not the native encoding, see #3078. .Call(CfwriteR, x, file, sep, sep2, eol, na, dec, quote, qmethod=="escape", append, row.names, col.names, logical01, scipen, dateTimeAs, buffMB, nThread, diff --git a/R/last.R b/R/last.R index 0a889e622..a93ec3173 100644 --- a/R/last.R +++ b/R/last.R @@ -1,6 +1,7 @@ # data.table defined last(x) with no arguments, just for last. If you need the last 10 then use tail(x,10). # for xts class objects it will dispatch to xts::last # reworked to avoid loading xts namespace (#3857) then again to fix dispatching of xts class (#4053) +# nocov start. Tests 19.* in other.Rraw, not in the main suite. last = function(x, n=1L, ...) { verbose = isTRUE(getOption("datatable.verbose", FALSE)) if (!inherits(x, "xts")) { @@ -82,3 +83,4 @@ first = function(x, n=1L, ...) { xts::first(x, n=n, ...) } } +# nocov end diff --git a/R/xts.R b/R/xts.R index 234f36cac..bd4cb75b3 100644 --- a/R/xts.R +++ b/R/xts.R @@ -1,3 +1,4 @@ +# nocov start. See tests 18.* in other.Rraw, not in the main suite. as.data.table.xts = function(x, keep.rownames = TRUE, key=NULL, ...) { stopifnot(requireNamespace("xts"), !missing(x), xts::is.xts(x)) if (length(keep.rownames) != 1L) stopf("keep.rownames must be length 1") @@ -26,3 +27,4 @@ as.xts.data.table = function(x, numeric.only = TRUE, ...) { } return(xts::xts(as.matrix(r), order.by = if (inherits(x[[1L]], "IDate")) as.Date(x[[1L]]) else x[[1L]])) } +# nocov end diff --git a/src/assign.c b/src/assign.c index dc6b9a6de..b280c2259 100644 --- a/src/assign.c +++ b/src/assign.c @@ -1190,7 +1190,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con } } } break; - default : + default : // # nocov error(_("Unsupported column type in assign.c:memrecycle '%s'"), type2char(TYPEOF(target))); // # nocov } UNPROTECT(protecti); @@ -1244,7 +1244,7 @@ void writeNA(SEXP v, const int from, const int n, const bool listNA) case EXPRSXP : for (int i=from; i<=to; ++i) SET_VECTOR_ELT(v, i, R_NilValue); break; - default : + default : // # nocov internal_error(__func__, "Unsupported type '%s' for v", type2char(TYPEOF(v))); // # nocov } } @@ -1283,8 +1283,8 @@ void savetl_init(void) { saveds = (SEXP *)malloc(nalloc * sizeof(SEXP)); savedtl = (R_len_t *)malloc(nalloc * sizeof(R_len_t)); if (!saveds || !savedtl) { - free(saveds); free(savedtl); - savetl_end(); // # nocov + free(saveds); free(savedtl); // # nocov + savetl_end(); // # nocov error(_("Failed to allocate initial %d items in savetl_init"), nalloc); // # nocov } } diff --git a/src/between.c b/src/between.c index 25d85b57c..808bf2d35 100644 --- a/src/between.c +++ b/src/between.c @@ -192,7 +192,7 @@ SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP incbounds, SEXP NAboundsArg, S } if (verbose) Rprintf(_("between non-parallel processing of character took %8.3fs\n"), omp_get_wtime()-tic); } break; - default: + default: // # nocov internal_error(__func__, "unsupported type '%s' should have been caught at R level", type2char(TYPEOF(x))); // # nocov } UNPROTECT(nprotect); diff --git a/src/bmerge.c b/src/bmerge.c index cb71f9c20..f6f640e71 100644 --- a/src/bmerge.c +++ b/src/bmerge.c @@ -381,8 +381,8 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisg } break; // supported types were checked up front to avoid handling an error here in (future) parallel region - default: - error(_("Type '%s' is not supported for joining/merging"), type2char(TYPEOF(xc))); + default: // # nocov + internal_error("Invalid join/merge type '%s' should have been caught earlier", type2char(TYPEOF(xc))); // # nocov } if (xlow= 0; } } break; - default : + default : // # nocov STOP(_("type '%s' is not yet supported"), type2char(TYPEOF(x))); // # nocov } if (!ok) return ScalarLogical(FALSE); // not sorted so return early diff --git a/src/fread.c b/src/fread.c index 1169f645d..7b89b3294 100644 --- a/src/fread.c +++ b/src/fread.c @@ -442,7 +442,7 @@ double copyFile(size_t fileSize) // only called in very very rare cases double tt = wallclock(); mmp_copy = (char *)malloc((size_t)fileSize + 1 /* extra \0 */); if (!mmp_copy) - return -1.0; + return -1.0; // # nocov memcpy(mmp_copy, mmp, fileSize); sof = mmp_copy; eof = (char *)mmp_copy + fileSize; @@ -563,7 +563,7 @@ static void Field(FieldParseContext *ctx) if (ch!=ch2) fieldStart--; // field ending is this sep|eol; neither (*1) or (*2) happened; opening quote wasn't really an opening quote } break; - default: + default: // # nocov return; // # nocov Internal error: undefined quote rule } target->len = (int32_t)(ch - fieldStart); @@ -1540,14 +1540,16 @@ int freadMain(freadMainArgs _args) { // In future, we may discover a way to mmap fileSize+1 on all OS when fileSize%4096==0, reliably. If and when, this clause can be updated with no code impact elsewhere. double time_taken = copyFile(fileSize); if (time_taken == -1.0) { + // # nocov start if (!verbose) DTPRINT("%s. Attempt to copy file in RAM failed.", msg); STOP(_("Unable to allocate %s of contiguous virtual RAM."), filesize_to_str(fileSize)); + // # nocov end } if (verbose) DTPRINT(_(" File copy in RAM took %.3f seconds.\n"), time_taken); else if (time_taken > 0.5) - DTPRINT(_("Avoidable file copy in RAM took %.3f seconds. %s.\n"), time_taken, msg); // not warning as that could feasibly cause CRAN tests to fail, say, if test machine is heavily loaded + DTPRINT(_("Avoidable file copy in RAM took %.3f seconds. %s.\n"), time_taken, msg); // # nocov. not warning as that could feasibly cause CRAN tests to fail, say, if test machine is heavily loaded } } *_const_cast(eof) = '\0'; // cow page @@ -1822,14 +1824,16 @@ int freadMain(freadMainArgs _args) { ASSERT(mmp_copy==NULL, "mmp has already been copied due to abrupt non-eol ending, so it does not end with 2 or more eol.%s", ""/*dummy arg for macro*/); // #nocov double time_taken = copyFile(fileSize); if (time_taken == -1.0) { + // # nocov start if (!verbose) DTPRINT("%s. Attempt to copy file in RAM failed.", msg); STOP(_("Unable to allocate %s of contiguous virtual RAM."), filesize_to_str(fileSize)); + // # nocov end } if (verbose) DTPRINT(_(" File copy in RAM took %.3f seconds.\n"), time_taken); - else if (tt>0.5) - DTPRINT(_("Avoidable file copy in RAM took %.3f seconds. %s.\n"), time_taken, msg); // not warning as that could feasibly cause CRAN tests to fail, say, if test machine is heavily loaded + else if (tt>0.5) // # nocov + DTPRINT(_("Avoidable file copy in RAM took %.3f seconds. %s.\n"), time_taken, msg); // # nocov. not warning as that could feasibly cause CRAN tests to fail, say, if test machine is heavily loaded pos = sof + (pos-(const char *)mmp); firstJumpEnd = sof + (firstJumpEnd-(const char *)mmp); } else { @@ -1858,8 +1862,8 @@ int freadMain(freadMainArgs _args) { type = (int8_t *)malloc((size_t)ncol * sizeof(int8_t)); tmpType = (int8_t *)malloc((size_t)ncol * sizeof(int8_t)); // used i) in sampling to not stop on errors when bad jump point and ii) when accepting user overrides if (!type || !tmpType) { - free(type); free(tmpType); - STOP(_("Failed to allocate 2 x %d bytes for type and tmpType: %s"), ncol, strerror(errno)); + free(type); free(tmpType); // # nocov + STOP(_("Failed to allocate 2 x %d bytes for type and tmpType: %s"), ncol, strerror(errno)); // # nocov } if (sep == ',' && dec == '\0') { // if sep=',' detected, don't attempt to detect dec [NB: . is not par of seps] @@ -2119,7 +2123,7 @@ int freadMain(freadMainArgs _args) { } else { colNames = (lenOff*) calloc((size_t)ncol, sizeof(lenOff)); if (!colNames) - STOP(_("Unable to allocate %d*%d bytes for column name pointers: %s"), ncol, sizeof(lenOff), strerror(errno)); + STOP(_("Unable to allocate %d*%d bytes for column name pointers: %s"), ncol, sizeof(lenOff), strerror(errno)); // # nocov if (sep==' ') while (*ch==' ') ch++; void *targets[9] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, colNames + autoFirstColName}; FieldParseContext fctx = { @@ -2174,7 +2178,7 @@ int freadMain(freadMainArgs _args) { rowSize8 = 0; size = (int8_t *)malloc((size_t)ncol * sizeof(int8_t)); // TODO: remove size[] when we implement Pasha's idea to += size inside processor if (!size) - STOP(_("Failed to allocate %d bytes for '%s': %s"), (int)(ncol * sizeof(int8_t)), "size", strerror(errno)); + STOP(_("Failed to allocate %d bytes for '%s': %s"), (int)(ncol * sizeof(int8_t)), "size", strerror(errno)); // # nocov nStringCols = 0; nNonStringCols = 0; for (int j=0; j= ((int64_t *)xd)[previ] : dtwiddle(xd[thisi]) >= dtwiddle(xd[previ]); } break; - default: + default: // # nocov error(_("Type '%s' is not supported"), type2char(TYPEOF(v))); // # nocov } }