From 34ec8e9b94d34bab94408693de010588d80459f9 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 26 Sep 2024 06:09:50 -0700 Subject: [PATCH 1/2] Refactor CHECK_RANGE to be i18n friendly (#6530) * Refactor CHECK_RANGE to be i18n friendly * Move column_desc buffer inside columnDesc routine * make nominative case clearer --- src/assign.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/assign.c b/src/assign.c index 4402608ff..4fc05d992 100644 --- a/src/assign.c +++ b/src/assign.c @@ -744,6 +744,12 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) #define MSGSIZE 1000 static char memrecycle_message[MSGSIZE+1]; // returned to rbindlist so it can prefix with which one of the list of data.table-like objects +const char *columnDesc(int colnum, const char *colname) { + static char column_desc[MSGSIZE+1]; // can contain column names, hence relatively large allocation. + snprintf(column_desc, MSGSIZE, _("(column %d named '%s')"), colnum, colname); + return column_desc; +} + const char *memrecycle(const SEXP target, const SEXP where, const int start, const int len, SEXP source, const int sourceStart, const int sourceLen, const int colnum, const char *colname) // like memcpy but recycles single-item source // 'where' a 1-based INTEGER vector subset of target to assign to, or NULL or integer() @@ -932,18 +938,17 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con // inside BODY that cater for 'where' or not. Maybe there's a way to merge the two macros in future. // The idea is to do these range checks without calling coerceVector() (which allocates) - #define CHECK_RANGE(STYPE, RFUN, COND, FMT, TO, FMTVAL) {{ \ + #define CHECK_RANGE(STYPE, RFUN, COND, FMTVAL, FMT) {{ \ const STYPE *sd = (const STYPE *)RFUN(source); \ for (int i=0; i255, "d", "taken as 0", val) + case INTSXP: CHECK_RANGE(int, INTEGER, val<0 || val>255, val, _("%d (type '%s' at RHS position %d taken as 0 when assigning to type '%s' %s")) case REALSXP: if (sourceIsI64) - CHECK_RANGE(int64_t, REAL, val<0 || val>255, PRId64, "taken as 0", val) - else CHECK_RANGE(double, REAL, !R_FINITE(val) || val<0.0 || val>256.0 || (int)val!=val, "f", "either truncated (precision lost) or taken as 0", val) + CHECK_RANGE(int64_t, REAL, val<0 || val>255, val, _("%"PRId64" (type '%s') at RHS position %d taken as 0 when assigning to type '%s' %s")) + else CHECK_RANGE(double, REAL, !R_FINITE(val) || val<0.0 || val>256.0 || (int)val!=val, val, _("%f (type '%s') at RHS position %d either truncated (precision lost) or taken as 0 when assigning to type '%s' %s")) } break; case INTSXP: switch (TYPEOF(source)) { case REALSXP: if (sourceIsI64) - CHECK_RANGE(int64_t, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX), PRId64, "out-of-range (NA)", val) - else CHECK_RANGE(double, REAL, !ISNAN(val) && (!within_int32_repres(val) || (int)val!=val), "f", "out-of-range(NA) or truncated (precision lost)", val) + CHECK_RANGE(int64_t, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX), val, _("%"PRId64" (type '%s') at RHS position %d out-of-range (NA) when assigning to type '%s' %s")) + else CHECK_RANGE(double, REAL, !ISNAN(val) && (!within_int32_repres(val) || (int)val!=val), val, _("%f (type '%s') at RHS position %d out-of-range(NA) or truncated (precision lost) when assigning to type '%s' %s")) case CPLXSXP: CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) && - (ISNAN(val.r) || (within_int32_repres(val.r) && (int)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r) + (ISNAN(val.r) || (within_int32_repres(val.r) && (int)val.r==val.r))), val.r, _("%f (type '%s') at RHS position %d either imaginary part discarded or real part truncated (precision lost) when assigning to type '%s' %s")) } break; case REALSXP: switch (TYPEOF(source)) { case REALSXP: if (targetIsI64 && !sourceIsI64) - CHECK_RANGE(double, REAL, !ISNAN(val) && (!within_int64_repres(val) || (int64_t)val!=val), "f", "out-of-range(NA) or truncated (precision lost)", val) + CHECK_RANGE(double, REAL, !ISNAN(val) && (!within_int64_repres(val) || (int64_t)val!=val), val, _("%f (type '%s') at RHS position %d out-of-range(NA) or truncated (precision lost) when assigning to type '%s' %s")) break; case CPLXSXP: if (targetIsI64) CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) && - (ISNAN(val.r) || (R_FINITE(val.r) && (int64_t)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r) - else CHECK_RANGE(Rcomplex, COMPLEX, !(ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)), "f", "imaginary part discarded", val.i) + (ISNAN(val.r) || (R_FINITE(val.r) && (int64_t)val.r==val.r))), val.r, _("%f (type '%s') at RHS position %d either imaginary part discarded or real part truncated (precision lost) when assigning to type '%s' %s")) + else CHECK_RANGE(Rcomplex, COMPLEX, !(ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)), val.r, _("%f (type '%s') at RHS position %d imaginary part discarded when assigning to type '%s' %s")) } } } From 623bee9ec292a3f178946be05836ffc0e11666ee Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 26 Sep 2024 06:10:27 -0700 Subject: [PATCH 2/2] Add notranslate for untranslateable fragments (#6539) --- src/assign.c | 3 ++- src/bmerge.c | 3 ++- src/dogroups.c | 12 ++++++++---- src/forder.c | 7 +++---- src/fread.c | 14 +++++++------- src/freadR.c | 5 +++-- src/fsort.c | 6 ++++-- src/fwrite.c | 11 ++++++----- src/gsumm.c | 3 ++- src/openmp-utils.c | 20 +++++++++++--------- src/reorder.c | 6 ++++-- src/utils.c | 2 +- 12 files changed, 53 insertions(+), 39 deletions(-) diff --git a/src/assign.c b/src/assign.c index 4fc05d992..dc6b9a6de 100644 --- a/src/assign.c +++ b/src/assign.c @@ -437,7 +437,8 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) values = PROTECT(eval(PROTECT(lang2(sym_as_posixct, values)), R_GlobalEnv)); protecti+=2; } bool RHS_list_of_columns = TYPEOF(values)==VECSXP && length(cols)>1; // initial value; may be revised below - if (verbose) Rprintf(_("RHS_list_of_columns == %s\n"), RHS_list_of_columns ? "true" : "false"); + if (verbose) + Rprintf("RHS_list_of_columns == %s\n", RHS_list_of_columns ? "true" : "false"); // # notranslate if (TYPEOF(values)==VECSXP && length(cols)==1 && length(values)==1) { SEXP item = VECTOR_ELT(values,0); if (isNull(item) || length(item)==1 || length(item)==targetlen) { diff --git a/src/bmerge.c b/src/bmerge.c index d290d70c6..cb71f9c20 100644 --- a/src/bmerge.c +++ b/src/bmerge.c @@ -66,7 +66,8 @@ SEXP bmerge(SEXP idt, SEXP xdt, SEXP icolsArg, SEXP xcolsArg, SEXP xoArg, SEXP r if (xcols[col]>LENGTH(xdt) || xcols[col]<1) error(_("xcols[%d]=%d outside range [1,length(x)=%d]"), col, xcols[col], LENGTH(xdt)); int it = TYPEOF(VECTOR_ELT(idt, icols[col]-1)); int xt = TYPEOF(VECTOR_ELT(xdt, xcols[col]-1)); - if (iN && it!=xt) error(_("typeof x.%s (%s) != typeof i.%s (%s)"), CHAR(STRING_ELT(getAttrib(xdt,R_NamesSymbol),xcols[col]-1)), type2char(xt), CHAR(STRING_ELT(getAttrib(idt,R_NamesSymbol),icols[col]-1)), type2char(it)); + if (iN && it!=xt) + error("typeof x.%s (%s) != typeof i.%s (%s)", CHAR(STRING_ELT(getAttrib(xdt,R_NamesSymbol),xcols[col]-1)), type2char(xt), CHAR(STRING_ELT(getAttrib(idt,R_NamesSymbol),icols[col]-1)), type2char(it)); // # notranslate if (iN && it!=LGLSXP && it!=INTSXP && it!=REALSXP && it!=STRSXP) error(_("Type '%s' is not supported for joining/merging"), type2char(it)); } diff --git a/src/dogroups.c b/src/dogroups.c index d55439332..0717ce0c1 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -106,7 +106,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX } setAttrib(BY, R_NamesSymbol, bynames); // Fix for #42 - BY doesn't retain names anymore R_LockBinding(sym_BY, env); - if (isNull(jiscols) && (length(bynames)!=length(groups) || length(bynames)!=length(grpcols))) error(_("!length(bynames)[%d]==length(groups)[%d]==length(grpcols)[%d]"),length(bynames),length(groups),length(grpcols)); + if (isNull(jiscols) && (length(bynames)!=length(groups) || length(bynames)!=length(grpcols))) + error("!length(bynames)[%d]==length(groups)[%d]==length(grpcols)[%d]", length(bynames), length(groups), length(grpcols)); // # notranslate // TO DO: check this check above. N = PROTECT(findVar(install(".N"), env)); nprotect++; // PROTECT for rchk @@ -135,7 +136,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX // fetch names of .SD and prepare symbols. In case they are copied-on-write by user assigning to those variables // using <- in j (which is valid, useful and tested), they are repointed to the .SD cols for each group. SEXP names = PROTECT(getAttrib(SDall, R_NamesSymbol)); nprotect++; - if (length(names) != length(SDall)) error(_("length(names)!=length(SD)")); + if (length(names) != length(SDall)) + error("length(names)!=length(SD)"); // # notranslate SEXP *nameSyms = (SEXP *)R_alloc(length(names), sizeof(SEXP)); for(int i=0; i=17&&i<=19)?"(*)":" ", tblock[i], nblock[i]); } - for (int i=0; i<=256; i++) { - if (stat[i]) Rprintf(_("stat[%03d]==%20"PRIu64"\n"), i, (uint64_t)stat[i]); - } + for (int i=0; i<=256; i++) if (stat[i]) + Rprintf("stat[%03d]==%20"PRIu64"\n", i, (uint64_t)stat[i]); // # notranslate } #endif return ans; diff --git a/src/fread.c b/src/fread.c index 746d38863..1169f645d 100644 --- a/src/fread.c +++ b/src/fread.c @@ -1340,10 +1340,10 @@ int freadMain(freadMainArgs _args) { if (*NAstrings == NULL) { DTPRINT(_(" No NAstrings provided.\n")); } else { - DTPRINT(_(" NAstrings = [")); + DTPRINT(" NAstrings = ["); // # notranslate const char * const* s = NAstrings; while (*s++) DTPRINT(*s? "<<%s>>, " : "<<%s>>", s[-1]); - DTPRINT(_("]\n")); + DTPRINT("]\n"); // # notranslate if (any_number_like_NAstrings) DTPRINT(_(" One or more of the NAstrings looks like a number.\n")); else @@ -2085,14 +2085,14 @@ int freadMain(freadMainArgs _args) { // sd can be very close to 0.0 sometimes, so apply a +10% minimum // blank lines have length 1 so for fill=true apply a +100% maximum. It'll be grown if needed. if (verbose) { - DTPRINT(_(" =====\n")); + DTPRINT(" =====\n"); // # notranslate DTPRINT(_(" Sampled %"PRIu64" rows (handled \\n inside quoted fields) at %d jump points\n"), (uint64_t)sampleLines, nJumps); DTPRINT(_(" Bytes from first data row on line %d to the end of last row: %"PRIu64"\n"), row1line, (uint64_t)bytesRead); DTPRINT(_(" Line length: mean=%.2f sd=%.2f min=%d max=%d\n"), meanLineLen, sd, minLen, maxLen); DTPRINT(_(" Estimated number of rows: %"PRIu64" / %.2f = %"PRIu64"\n"), (uint64_t)bytesRead, meanLineLen, (uint64_t)estnrow); DTPRINT(_(" Initial alloc = %"PRIu64" rows (%"PRIu64" + %d%%) using bytes/max(mean-2*sd,min) clamped between [1.1*estn, 2.0*estn]\n"), (uint64_t)allocnrow, (uint64_t)estnrow, (int)(100.0*allocnrow/estnrow-100.0)); - DTPRINT(_(" =====\n")); + DTPRINT(" =====\n"); // # notranslate } else { if (sampleLines > allocnrow) INTERNAL_STOP("sampleLines(%"PRIu64") > allocnrow(%"PRIu64")", (uint64_t)sampleLines, (uint64_t)allocnrow); // # nocov } @@ -2266,8 +2266,8 @@ int freadMain(freadMainArgs _args) { if (verbose) DTPRINT(_("[11] Read the data\n")); read: // we'll return here to reread any columns with out-of-sample type exceptions, or dirty jumps restartTeam = false; - if (verbose) DTPRINT(_(" jumps=[%d..%d), chunk_size=%"PRIu64", total_size=%"PRIu64"\n"), - jump0, nJumps, (uint64_t)chunkBytes, (uint64_t)(eof-pos)); + if (verbose) + DTPRINT(" jumps=[%d..%d), chunk_size=%"PRIu64", total_size=%"PRIu64"\n", jump0, nJumps, (uint64_t)chunkBytes, (uint64_t)(eof-pos)); // # notranslate ASSERT(allocnrow <= nrowLimit, "allocnrow(%"PRIu64") <= nrowLimit(%"PRIu64")", (uint64_t)allocnrow, (uint64_t)nrowLimit); #pragma omp parallel num_threads(nth) { @@ -2744,7 +2744,7 @@ int freadMain(freadMainArgs _args) { } if (verbose) { - DTPRINT(_("=============================\n")); + DTPRINT("=============================\n"); // # notranslate if (tTot<0.000001) tTot=0.000001; // to avoid nan% output in some trivially small tests where tot==0.000s DTPRINT(_("%8.3fs (%3.0f%%) Memory map %.3fGB file\n"), tMap-t0, 100.0*(tMap-t0)/tTot, 1.0*fileSize/(1024*1024*1024)); DTPRINT(_("%8.3fs (%3.0f%%) sep="), tLayout-tMap, 100.0*(tLayout-tMap)/tTot); diff --git a/src/freadR.c b/src/freadR.c index 8063ca707..05b8cd00e 100644 --- a/src/freadR.c +++ b/src/freadR.c @@ -155,7 +155,8 @@ SEXP freadR( args.skipEmptyLines = LOGICAL(skipEmptyLinesArg)[0]; args.fill = INTEGER(fillArg)[0]; args.showProgress = LOGICAL(showProgressArg)[0]; - if (INTEGER(nThreadArg)[0]<1) error(_("nThread(%d)<1"), INTEGER(nThreadArg)[0]); + if (INTEGER(nThreadArg)[0]<1) + error("nThread(%d)<1", INTEGER(nThreadArg)[0]); // # notranslate args.nth = (uint32_t)INTEGER(nThreadArg)[0]; args.verbose = verbose; args.warningsAreErrors = warningsAreErrors; @@ -716,7 +717,7 @@ void __halt(bool warn, const char *format, ...) { // if (warn) warning(_("%s"), msg); // this warning() call doesn't seem to honor warn=2 straight away in R 3.6, so now always call error() directly to be sure // we were going via warning() before to get the (converted from warning) prefix in the message (which we could mimic in future) - error(_("%s"), msg); // include "%s" because data in msg might include '%' + error("%s", msg); // # notranslate. include "%s" because data in msg might include '%' } void prepareThreadContext(ThreadLocalFreadParsingContext *ctx) {} diff --git a/src/fsort.c b/src/fsort.c index 5501cdbaa..a3bc38a2b 100644 --- a/src/fsort.c +++ b/src/fsort.c @@ -129,7 +129,8 @@ SEXP fsort(SEXP x, SEXP verboseArg) { int nth = getDTthreads(xlength(x), true); int nBatch=nth*2; // at least nth; more to reduce last-man-home; but not too large to keep counts small in cache - if (verbose) Rprintf(_("nth=%d, nBatch=%d\n"),nth,nBatch); + if (verbose) + Rprintf("nth=%d, nBatch=%d\n", nth, nBatch); // # notranslate size_t batchSize = (xlength(x)-1)/nBatch + 1; if (batchSize < 1024) batchSize = 1024; // simple attempt to work reasonably for short vector. 1024*8 = 2 4kb pages @@ -185,7 +186,8 @@ SEXP fsort(SEXP x, SEXP verboseArg) { int MSBNbits = maxBit > 15 ? 16 : maxBit+1; // how many bits make up the MSB int shift = maxBit + 1 - MSBNbits; // the right shift to leave the MSB bits remaining size_t MSBsize = 1LL< 65,536) - if (verbose) Rprintf(_("maxBit=%d; MSBNbits=%d; shift=%d; MSBsize=%zu\n"), maxBit, MSBNbits, shift, MSBsize); + if (verbose) + Rprintf("maxBit=%d; MSBNbits=%d; shift=%d; MSBsize=%zu\n", maxBit, MSBNbits, shift, MSBsize); // # notranslate uint64_t *counts = (uint64_t *)R_alloc(nBatch*MSBsize, sizeof(uint64_t)); memset(counts, 0, nBatch*MSBsize*sizeof(uint64_t)); diff --git a/src/fwrite.c b/src/fwrite.c index 06255b052..757da1b0b 100644 --- a/src/fwrite.c +++ b/src/fwrite.c @@ -628,15 +628,16 @@ void fwriteMain(fwriteMainArgs args) if (verbose) { DTPRINT(_("Column writers: ")); + // # notranslate start if (args.ncol<=50) { - for (int j=0; j