Skip to content

Commit

Permalink
Refactor colClasses handling for readability (#6106)
Browse files Browse the repository at this point in the history
Towards #6105. This PR is a simple refactor to reduce code nesting / improve readability. It makes the actual implementation of #6105, #6107, much simpler.
  • Loading branch information
MichaelChirico authored Jun 1, 2024
1 parent 35f2979 commit 6a4bd54
Showing 1 changed file with 28 additions and 24 deletions.
52 changes: 28 additions & 24 deletions src/freadR.c
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
else type[i]=CT_DROP;
}
}
colClassesAs = NULL;
colClassesAs = NULL; // any coercions we can't handle here in C are deferred to R (to handle with methods::as) via this attribute
if (length(colClassesSxp)) {
SEXP typeRName_sxp = PROTECT(allocVector(STRSXP, NUT));
for (int i=0; i<NUT; i++) SET_STRING_ELT(typeRName_sxp, i, mkChar(typeRName[i]));
Expand All @@ -316,7 +316,7 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
SET_STRING_ELT(typeRName_sxp, CT_ISO8601_DATE, R_BlankString);
SET_STRING_ELT(typeRName_sxp, CT_ISO8601_TIME, R_BlankString);
}
SET_VECTOR_ELT(RCHK, 2, colClassesAs=allocVector(STRSXP, ncol)); // if any, this attached to the DT for R level to call as_ methods on
SET_VECTOR_ELT(RCHK, 2, colClassesAs=allocVector(STRSXP, ncol));
if (isString(colClassesSxp)) {
SEXP typeEnum_idx = PROTECT(chmatch(colClassesSxp, typeRName_sxp, NUT));
if (selectColClasses==false) {
Expand Down Expand Up @@ -375,45 +375,49 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
}

for (int i=0; i<LENGTH(colClassesSxp); i++) {
const int w = INTEGER(typeEnum_idx)[i];
signed char thisType = typeEnum[w-1];
if (thisType==CT_DROP) continue; // was dealt with earlier above
const int colClassTypeIdx = INTEGER(typeEnum_idx)[i];
signed char colClassType = typeEnum[colClassTypeIdx-1];
if (colClassType == CT_DROP) continue; // was dealt with earlier above
SEXP items = VECTOR_ELT(colClassesSxp,i);
SEXP itemsInt;
if (isString(items)) itemsInt = PROTECT(chmatch(items, colNamesSxp, NA_INTEGER));
else itemsInt = PROTECT(coerceVector(items, INTSXP));
// UNPROTECTed directly just after this for loop. No nprotect++ here is correct.
for (int j=0; j<LENGTH(items); j++) {
int k = INTEGER(itemsInt)[j];
if (k==NA_INTEGER) {
int colIdx = INTEGER(itemsInt)[j]; // NB: 1-based
if (colIdx == NA_INTEGER) {
if (isString(items))
DTWARN(_("Column name '%s' (colClasses[[%d]][%d]) not found"), CHAR(STRING_ELT(items, j)), i+1, j+1);
else
DTWARN(_("colClasses[[%d]][%d] is NA"), i+1, j+1);
continue;
}
if (colIdx<1 || colIdx>ncol) {
DTWARN(_("Column number %d (colClasses[[%d]][%d]) is out of range [1,ncol=%d]"), colIdx, i+1, j+1, ncol);
continue;
}
if (type[colIdx-1]<0) {
DTWARN(_("Column %d ('%s') appears more than once in colClasses. The second time is colClasses[[%d]][%d]."), colIdx, CHAR(STRING_ELT(colNamesSxp,colIdx-1)), i+1, j+1);
continue;
}
if (type[colIdx-1] == CT_DROP) {
continue;
}
if (selectRankD) selectRankD[colIdx-1] = rank++;
// NB: mark as negative to indicate 'seen'
if (colClassType == CT_ISO8601_TIME && type[colIdx-1]!=CT_ISO8601_TIME) {
type[colIdx-1] = -CT_STRING; // don't use in-built UTC parser, defer to character and as.POSIXct afterwards which reads in local time
SET_STRING_ELT(colClassesAs, colIdx-1, STRING_ELT(listNames, i));
} else {
if (k>=1 && k<=ncol) {
if (type[k-1]<0)
DTWARN(_("Column %d ('%s') appears more than once in colClasses. The second time is colClasses[[%d]][%d]."), k, CHAR(STRING_ELT(colNamesSxp,k-1)), i+1, j+1);
else if (type[k-1]!=CT_DROP) {
if (thisType==CT_ISO8601_TIME && type[k-1]!=CT_ISO8601_TIME) {
type[k-1] = -CT_STRING; // don't use in-built UTC parser, defer to character and as.POSIXct afterwards which reads in local time
SET_STRING_ELT(colClassesAs, k-1, STRING_ELT(listNames,i));
} else {
type[k-1] = -thisType; // freadMain checks bump up only not down. Deliberately don't catch here to test freadMain; e.g. test 959
if (w==NUT) SET_STRING_ELT(colClassesAs, k-1, STRING_ELT(listNames,i));
}
if (selectRankD) selectRankD[k-1] = rank++;
}
} else {
DTWARN(_("Column number %d (colClasses[[%d]][%d]) is out of range [1,ncol=%d]"), k, i+1, j+1, ncol);
}
type[colIdx-1] = -colClassType; // freadMain checks bump up only not down. Deliberately don't catch here to test freadMain; e.g. test 959
if (colClassTypeIdx == NUT) SET_STRING_ELT(colClassesAs, colIdx-1, STRING_ELT(listNames, i)); // unknown type --> defer to R
}
}
UNPROTECT(1); // UNPROTECTing itemsInt inside loop to save protection stack
}
for (int i=0; i<ncol; i++) {
if (type[i]<0) type[i] *= -1; // undo sign; was used to detect duplicates
else if (selectColClasses) type[i] = CT_DROP; // reading will proceed in order of columns in file; reorder happens afterwards at R level
else if (selectColClasses) type[i] = CT_DROP; // not seen --> drop; reading will proceed in order of columns in file; reorder happens afterwards at R level
}
UNPROTECT(2); // listNames and typeEnum_idx
}
Expand Down

0 comments on commit 6a4bd54

Please sign in to comment.