Skip to content

Commit

Permalink
Message fixes for the C code (#6504)
Browse files Browse the repository at this point in the history
* Expand the fragmented sentence

Otherwise it's very hard to translate without pgettext() in a language
where "there is no <x>" is translated as "<x> is absent"

* Combine the fragmented sentence

This will avoid potential word order issues.

* Correct internal error message

.Last.value is a variable in the global environment. .Last.updated belongs to the data.table package.

* data.table-help -> data.table issue tracker

* Avoid templating on the known value of a variable

* When '9'+1 != ':', say it's ASCII-incompatible

* Don't check for length(.) < 0

* Use R capitalization for logical constants

* newline->error for codecov

* Use internal_error() instead of manual error handling

Co-authored-by: Michael Chirico <chiricom@google.com>

* internal_error needs __func__

* Branch a previously templated translation

* Combine a fragmented sentence

* Recombine "gather took %.3fs\n"

Co-authored-by: rikivillalba <32423469+rikivillalba@users.noreply.github.com>

* Split off the shrinkMSB helper

Hopefully the compiler will be smart enough to inline it back.

Co-authored-by: Michael Chirico <chiricom@google.com>

* Branch a fragmented verbose sentence in fread

* Don't say we reduced MSBsize if it didn't change

Forgot to take #6504 (comment) into account.

* declaration style for pointer spacing

* line bump for codecov + readability

---------

Co-authored-by: Michael Chirico <chiricom@google.com>
Co-authored-by: rikivillalba <32423469+rikivillalba@users.noreply.github.com>
  • Loading branch information
3 people authored Sep 20, 2024
1 parent 1c771e5 commit 0e257a8
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 21 deletions.
6 changes: 4 additions & 2 deletions src/fmelt.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ SEXP whichwrapper(SEXP x, SEXP val) {

static const char *concat(SEXP vec, SEXP idx) {
if (!isString(vec)) error(_("concat: 'vec' must be a character vector"));
if (!isInteger(idx) || length(idx) < 0) error(_("concat: 'idx' must be an integer vector of length >= 0"));
if (!isInteger(idx))
error(_("concat: 'idx' must be an integer vector"));

static char ans[1024]; // so only one call to concat() per calling warning/error
int nidx=length(idx), nvec=length(vec);
Expand Down Expand Up @@ -802,7 +803,8 @@ SEXP fmelt(SEXP DT, SEXP id, SEXP measure, SEXP varfactor, SEXP valfactor, SEXP
}
int protecti=0;
dtnames = PROTECT(getAttrib(DT, R_NamesSymbol)); protecti++;
if (isNull(dtnames)) error(_("names(data) is NULL. Please report to data.table-help"));
if (isNull(dtnames))
internal_error(__func__, "names(data) is NULL");
if (LOGICAL(narmArg)[0] == TRUE) narm = TRUE;
if (LOGICAL(verboseArg)[0] == TRUE) verbose = TRUE;
struct processData data;
Expand Down
26 changes: 17 additions & 9 deletions src/fread.c
Original file line number Diff line number Diff line change
Expand Up @@ -1888,11 +1888,14 @@ int freadMain(freadMainArgs _args) {
// *2 to get a good spacing. We don't want overlaps resulting in double counting.
}
if (verbose) {
DTPRINT(_(" Number of sampling jump points = %d because "), nJumps);
if (nrowLimit<INT64_MAX) DTPRINT(_("nrow limit (%"PRIu64") supplied\n"), (uint64_t)nrowLimit);
else if (jump0size==0) DTPRINT(_("jump0size==0\n"));
else DTPRINT(_("(%"PRIu64" bytes from row 1 to eof) / (2 * %"PRIu64" jump0size) == %"PRIu64"\n"),
(uint64_t)sz, (uint64_t)jump0size, (uint64_t)(sz/(2*jump0size)));
if (nrowLimit<INT64_MAX) {
DTPRINT(_(" Number of sampling jump points = %d because nrow limit (%"PRIu64") supplied\n"), nJumps, (uint64_t)nrowLimit);
} else if (jump0size==0) {
DTPRINT(_(" Number of sampling jump points = %d because jump0size==0\n"), nJumps);
} else {
DTPRINT(_(" Number of sampling jump points = %d because (%"PRIu64" bytes from row 1 to eof) / (2 * %"PRIu64" jump0size) == %"PRIu64"\n"),
nJumps, (uint64_t)sz, (uint64_t)jump0size, (uint64_t)(sz/(2*jump0size)));
}
}
nJumps++; // the extra sample at the very end (up to eof) is sampled and format checked but not jumped to when reading
if (nrowLimit<INT64_MAX && nrowLimit>0) nJumps=1; // when nrows>0 supplied by user, no jumps (not even at the end) and single threaded
Expand Down Expand Up @@ -1930,8 +1933,10 @@ int freadMain(freadMainArgs _args) {
}
if ( (thisNcol<ncol && ncol>1 && !fill) ||
(!eol(&ch) && ch!=eof) ) {
if (verbose) DTPRINT(_(" A line with too-%s fields (%d/%d) was found on line %d of sample jump %d. %s\n"),
thisNcol<ncol ? _("few") : _("many"), thisNcol, ncol, jumpLine, jump, jump>0 ? _("Most likely this jump landed awkwardly so type bumps here will be skipped.") : "");
if (verbose)
DTPRINT(thisNcol<ncol ? _(" A line with too-few fields (%d/%d) was found on line %d of sample jump %d. %s\n")
: _(" A line with too-many fields (%d/%d) was found on line %d of sample jump %d. %s\n"),
thisNcol, ncol, jumpLine, jump, jump>0 ? _("Most likely this jump landed awkwardly so type bumps here will be skipped.") : "");
bumped = false;
if (jump==0) lastRowEnd=eof; // to prevent the end from being tested; e.g. a short file with blank line within first 100 like test 976
break;
Expand Down Expand Up @@ -2035,7 +2040,10 @@ int freadMain(freadMainArgs _args) {
}
if (verbose) {
if (sampleLines==0) {
DTPRINT(_(" 'header' determined to be %s because there are%s number fields in the first and only row\n"), args.header?"TRUE":"FALSE", args.header?_(" no"):"");
if (args.header)
DTPRINT(_(" 'header' determined to be TRUE because there are no number fields in the first and only row\n"));
else
DTPRINT(_(" 'header' determined to be FALSE because there are number fields in the first and only row\n"));
} else {
if (args.header)
DTPRINT(_(" 'header' determined to be true because all columns are type string and a better guess is not possible\n"));
Expand Down Expand Up @@ -2621,7 +2629,7 @@ int freadMain(freadMainArgs _args) {

if (stopTeam) {
if (internalErr[0]!='\0') {
STOP("%s %s: %s. %s", _("Internal error in"), __func__, internalErr, _("Please report to the data.table issues tracker")); // # nocov
STOP(_("Internal error in %s: %s. Please report to the data.table issues tracker"), __func__, internalErr); // # nocov
}
stopTeam = false;

Expand Down
16 changes: 11 additions & 5 deletions src/fsort.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,16 @@ int qsort_cmp(const void *a, const void *b) {
return (x<y)-(x>y); // largest first in a safe branchless way casting long to int
}

static size_t shrinkMSB(size_t MSBsize, uint64_t *msbCounts, int *order, Rboolean verbose) {
size_t oldMSBsize = MSBsize;
while (MSBsize>0 && msbCounts[order[MSBsize-1]] < 2)
MSBsize--;
if (verbose && oldMSBsize != MSBsize) {
Rprintf(_("Reduced MSBsize from %zu to %zu by excluding 0 and 1 counts\n"), oldMSBsize, MSBsize);
}
return MSBsize;
}

/*
OpenMP is used here to find the range and distribution of data for efficient
grouping and sorting.
Expand Down Expand Up @@ -252,12 +262,8 @@ SEXP fsort(SEXP x, SEXP verboseArg) {

if (verbose) {
Rprintf(_("Top 20 MSB counts: ")); for(int i=0; i<MIN(MSBsize,20); i++) Rprintf(_("%"PRId64" "), (int64_t)msbCounts[order[i]]); Rprintf(_("\n"));
Rprintf(_("Reduced MSBsize from %zu to "), MSBsize);
}
while (MSBsize>0 && msbCounts[order[MSBsize-1]] < 2) MSBsize--;
if (verbose) {
Rprintf(_("%zu by excluding 0 and 1 counts\n"), MSBsize);
}
MSBsize = shrinkMSB(MSBsize, msbCounts, order, verbose);

bool failed=false, alloc_fail=false, non_monotonic=false; // shared bools only ever assigned true; no need for atomic or critical assign
t[6] = wallclock();
Expand Down
3 changes: 1 addition & 2 deletions src/gsumm.c
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,6 @@ void *gather(SEXP x, bool *anyNA)
{
double started=wallclock();
const bool verbose = GetVerbose();
if (verbose) Rprintf(_("gather took ... "));
switch (TYPEOF(x)) {
case LGLSXP: case INTSXP: {
const int *restrict thisx = INTEGER(x);
Expand Down Expand Up @@ -341,7 +340,7 @@ void *gather(SEXP x, bool *anyNA)
default :
error(_("gather implemented for INTSXP, REALSXP, and CPLXSXP but not '%s'"), type2char(TYPEOF(x))); // # nocov
}
if (verbose) { Rprintf(_("%.3fs\n"), wallclock()-started); }
if (verbose) { Rprintf(_("gather took %.3fs\n"), wallclock()-started); }
return gx;
}

Expand Down
6 changes: 3 additions & 3 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,9 @@ void attribute_visible R_init_data_table(DllInfo *info)
if (ld != 0.0) error(_("Checking memset(&ld, 0, sizeof(long double)); ld == (long double)0.0 %s"), msg);

// Check unsigned cast used in fread.c. This isn't overflow/underflow, just cast.
if ((uint_fast8_t)('0'-'/') != 1) error(_("The ascii character '/' is not just before '0'"));
if ((uint_fast8_t)('0'-'/') != 1) error(_("Unlike the very common case, e.g. ASCII, the character '/' is not just before '0'."));
if ((uint_fast8_t)('/'-'0') < 10) error(_("The C expression (uint_fast8_t)('/'-'0')<10 is true. Should be false."));
if ((uint_fast8_t)(':'-'9') != 1) error(_("The ascii character ':' is not just after '9'"));
if ((uint_fast8_t)(':'-'9') != 1) error(_("Unlike the very common case, e.g. ASCII, the character ':' is not just after '9'."));
if ((uint_fast8_t)('9'-':') < 10) error(_("The C expression (uint_fast8_t)('9'-':')<10 is true. Should be false."));

// Variables rather than #define for NA_INT64 to ensure correct usage; i.e. not casted
Expand Down Expand Up @@ -364,7 +364,7 @@ SEXP beforeR340(void) {
extern int *_Last_updated; // assign.c

SEXP initLastUpdated(SEXP var) {
if (!isInteger(var) || LENGTH(var)!=1) error(_(".Last.value in namespace is not a length 1 integer"));
if (!isInteger(var) || LENGTH(var)!=1) error(_(".Last.updated in namespace is not a length 1 integer"));
_Last_updated = INTEGER(var);
return R_NilValue;
}
Expand Down

0 comments on commit 0e257a8

Please sign in to comment.