From 4ce298cde921ab3bb8967e176ba5d5691a9de64f Mon Sep 17 00:00:00 2001
From: Michael Chirico <chiricom@google.com>
Date: Mon, 23 Sep 2024 12:04:12 -0400
Subject: [PATCH] Add class1/classes1 helpers for common operation class(x)[1]
 (#6525)

* Add class1/classes1 helpers for common operation class(x)[1]

* nolint
---
 .ci/linters/r/class1_linter.R | 10 ++++++++++
 R/data.table.R                | 10 +++++-----
 R/fcast.R                     |  4 ++--
 R/fmelt.R                     |  6 +++---
 R/groupingsets.R              |  7 +++----
 R/onLoad.R                    |  4 ++--
 R/print.data.table.R          |  4 ++--
 R/setops.R                    |  4 ++--
 R/test.data.table.R           | 12 ++++++------
 R/utils.R                     |  3 +++
 10 files changed, 38 insertions(+), 26 deletions(-)
 create mode 100644 .ci/linters/r/class1_linter.R

diff --git a/.ci/linters/r/class1_linter.R b/.ci/linters/r/class1_linter.R
new file mode 100644
index 000000000..276f94672
--- /dev/null
+++ b/.ci/linters/r/class1_linter.R
@@ -0,0 +1,10 @@
+class1_linter = lintr::make_linter_from_xpath(
+  "
+    //OP-LEFT-BRACKET[
+      preceding-sibling::expr/expr/SYMBOL_FUNCTION_CALL[text() = 'class']
+      and following-sibling::expr/NUM_CONST[text() = '1' or text() = '1L']
+    ]
+      /parent::expr
+  ",
+  "Use class1(x) to get class(x)[1L], or classes1(x) to do so for a full list/data.table"
+)
diff --git a/R/data.table.R b/R/data.table.R
index daf700a58..a4473cb94 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -180,12 +180,12 @@ replace_dot_alias = function(e) {
   naturaljoin = FALSE
   names_x = names(x)
   if (missing(i) && !missing(on)) {
-    tt = eval.parent(.massagei(substitute(on)))
-    if (!is.list(tt) || !length(names(tt))) {
-      warningf("When on= is provided but not i=, on= must be a named list or data.table|frame, and a natural join (i.e. join on common names) is invoked. Ignoring on= which is '%s'.", class(tt)[1L])
+    on_tmp = eval.parent(.massagei(substitute(on)))
+    if (!is.list(on_tmp) || !length(names(on_tmp))) {
+      warningf("When on= is provided but not i=, on= must be a named list or data.table|frame, and a natural join (i.e. join on common names) is invoked. Ignoring on= which is '%s'.", class1(on_tmp))
       on = NULL
     } else {
-      i = tt
+      i = on_tmp
       naturaljoin = TRUE
     }
   }
@@ -2798,7 +2798,7 @@ cbind.data.table = data.table
 
 rbindlist = function(l, use.names="check", fill=FALSE, idcol=NULL, ignore.attr=FALSE) {
   if (is.null(l)) return(null.data.table())
-  if (!is.list(l) || is.data.frame(l)) stopf("Input is %s but should be a plain list of items to be stacked", class(l)[1L])
+  if (!is.list(l) || is.data.frame(l)) stopf("Input is %s but should be a plain list of items to be stacked", class1(l))
   if (isFALSE(idcol)) { idcol = NULL }
   else if (!is.null(idcol)) {
     if (isTRUE(idcol)) idcol = ".id"
diff --git a/R/fcast.R b/R/fcast.R
index 67940f481..ac7b5f218 100644
--- a/R/fcast.R
+++ b/R/fcast.R
@@ -17,8 +17,8 @@ dcast <- function(
   else {
     data_name = deparse(substitute(data))
     ns = tryCatch(getNamespace("reshape2"), error=function(e)
-      stopf("The %1$s generic in data.table has been passed a %2$s, but data.table::%1$s currently only has a method for data.tables. Please confirm your input is a data.table, with setDT(%3$s) or as.data.table(%3$s). If you intend to use a method from reshape2, try installing that package first, but do note that reshape2 is superseded and is no longer actively developed.", "dcast", class(data)[1L], data_name))
-    warningf("The %1$s generic in data.table has been passed a %2$s and will attempt to redirect to the relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and this redirection is now deprecated. Please do this redirection yourself like reshape2::%1$s(%3$s). In the next version, this warning will become an error.", "dcast", class(data)[1L], data_name)
+      stopf("The %1$s generic in data.table has been passed a %2$s, but data.table::%1$s currently only has a method for data.tables. Please confirm your input is a data.table, with setDT(%3$s) or as.data.table(%3$s). If you intend to use a method from reshape2, try installing that package first, but do note that reshape2 is superseded and is no longer actively developed.", "dcast", class1(data), data_name))
+    warningf("The %1$s generic in data.table has been passed a %2$s and will attempt to redirect to the relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and this redirection is now deprecated. Please do this redirection yourself like reshape2::%1$s(%3$s). In the next version, this warning will become an error.", "dcast", class1(data), data_name)
     ns$dcast(data, formula, fun.aggregate = fun.aggregate, ..., margins = margins,
              subset = subset, fill = fill, value.var = value.var)
   }
diff --git a/R/fmelt.R b/R/fmelt.R
index f8cb42b3e..7c6b991a5 100644
--- a/R/fmelt.R
+++ b/R/fmelt.R
@@ -13,8 +13,8 @@ melt.default = function(data, ..., na.rm = FALSE, value.name = "value") {
   # nocov start
   data_name = deparse(substitute(data))
   ns = tryCatch(getNamespace("reshape2"), error=function(e)
-    stopf("The %1$s generic in data.table has been passed a %2$s, but data.table::%1$s currently only has a method for data.tables. Please confirm your input is a data.table, with setDT(%3$s) or as.data.table(%3$s). If you intend to use a method from reshape2, try installing that package first, but do note that reshape2 is superseded and is no longer actively developed.", "melt", class(data)[1L], data_name))
-  warningf("The %1$s generic in data.table has been passed a %2$s and will attempt to redirect to the relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::%1$s(%3$s). In the next version, this warning will become an error.", "melt", class(data)[1L], data_name)
+    stopf("The %1$s generic in data.table has been passed a %2$s, but data.table::%1$s currently only has a method for data.tables. Please confirm your input is a data.table, with setDT(%3$s) or as.data.table(%3$s). If you intend to use a method from reshape2, try installing that package first, but do note that reshape2 is superseded and is no longer actively developed.", "melt", class1(data), data_name))
+  warningf("The %1$s generic in data.table has been passed a %2$s and will attempt to redirect to the relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::%1$s(%3$s). In the next version, this warning will become an error.", "melt", class1(data), data_name)
   ns$melt(data, ..., na.rm=na.rm, value.name=value.name)
   # nocov end
 }
@@ -165,7 +165,7 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na
   # 4. compute measure.vars list or vector.
   if (multiple.keyword %in% names(fun.list)) {# multiple output columns.
     if (!is.character(group.dt[[multiple.keyword]])) {
-      stopf("%s column class=%s after applying conversion function, but must be character", multiple.keyword, class(group.dt[[multiple.keyword]])[1L])
+      stopf("%s column class=%s after applying conversion function, but must be character", multiple.keyword, class1(group.dt[[multiple.keyword]]))
     }
     is.other = names(group.dt) != multiple.keyword
     if (!any(is.other)) {
diff --git a/R/groupingsets.R b/R/groupingsets.R
index 2997e34b5..e3972140b 100644
--- a/R/groupingsets.R
+++ b/R/groupingsets.R
@@ -76,8 +76,7 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, labe
     warningf("'sets' contains a duplicate (i.e., equivalent up to sorting) element at index %d; as such, there will be duplicate rows in the output -- note that grouping by A,B and B,A will produce the same aggregations. Use `sets=unique(lapply(sets, sort))` to eliminate duplicates.", idx)
   if (is.list(label)) {
     other.allowed.names = c("character", "integer", "numeric", "factor", "Date", "IDate")
-    allowed.label.list.names = c(by, vapply_1c(.shallow(x, by), function(u) class(u)[1]),
-                                 other.allowed.names)
+    allowed.label.list.names = c(by, classes1(.shallow(x, by)), other.allowed.names)
     label.names = names(label)
     if (!all(label.names %in% allowed.label.list.names))
       stopf("When argument 'label' is a list, all element names must be (1) in 'by', or (2) the first element of the class in the data.table 'x' of a variable in 'by', or (3) one of %s. Element names not satisfying this condition: %s",
@@ -128,13 +127,13 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, labe
     total.vars = intersect(by, unlist(lapply(sets, function(u) setdiff(by, u))))
     if (is.list(label)) {
       by.vars.not.in.label = setdiff(by, names(label))
-      by.vars.not.in.label.class1 = vapply_1c(x, function(u) class(u)[1L])[by.vars.not.in.label]
+      by.vars.not.in.label.class1 = classes1(x, use.names=TRUE)[by.vars.not.in.label]
       labels.by.vars.not.in.label = label[by.vars.not.in.label.class1[by.vars.not.in.label.class1 %in% label.names.not.in.by]]
       names(labels.by.vars.not.in.label) <- by.vars.not.in.label[by.vars.not.in.label.class1 %in% label.names.not.in.by]
       label.expanded = c(label[label.names.in.by], labels.by.vars.not.in.label)
       label.expanded = label.expanded[intersect(by, names(label.expanded))] # reorder
     } else {
-      by.vars.matching.scalar.class1 = by[vapply_1c(x, function(u) class(u)[1L])[by] == class(label)[1L]]
+      by.vars.matching.scalar.class1 = by[classes1(x, use.names=TRUE)[by] == class1(label)]
       label.expanded = as.list(rep(label, length(by.vars.matching.scalar.class1)))
       names(label.expanded) <- by.vars.matching.scalar.class1
     }
diff --git a/R/onLoad.R b/R/onLoad.R
index 01b159f77..396d66e05 100644
--- a/R/onLoad.R
+++ b/R/onLoad.R
@@ -43,7 +43,7 @@
     # If R 3.6.2 (not yet released) includes the c|rbind S3 dispatch fix, then this workaround still works.
     tt = base::cbind.data.frame
     ss = body(tt)
-    if (class(ss)[1L]!="{") ss = as.call(c(as.name("{"), ss))
+    if (class1(ss) != "{") ss = as.call(c(as.name("{"), ss))
     prefix = if (!missing(pkgname)) "data.table::" else ""  # R provides the arguments when it calls .onLoad, I don't in dev/test
     if (!length(grep("data.table", ss[[2L]], fixed = TRUE))) {
       ss = ss[c(1L, NA, 2L:length(ss))]
@@ -55,7 +55,7 @@
     }
     tt = base::rbind.data.frame
     ss = body(tt)
-    if (class(ss)[1L]!="{") ss = as.call(c(as.name("{"), ss))
+    if (class1(ss) != "{") ss = as.call(c(as.name("{"), ss))
     if (!length(grep("data.table", ss[[2L]], fixed = TRUE))) {
       ss = ss[c(1L, NA, 2L:length(ss))]
       ss[[2L]] = parse(text=paste0("for (x in list(...)) { if (inherits(x,'data.table')) return(",prefix,".rbind.data.table(...)) }"))[[1L]] # fix for #89
diff --git a/R/print.data.table.R b/R/print.data.table.R
index 27bcb3e90..0bebfce9a 100644
--- a/R/print.data.table.R
+++ b/R/print.data.table.R
@@ -103,7 +103,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
       factor = "<fctr>", POSIXct = "<POSc>", logical = "<lgcl>",
       IDate = "<IDat>", integer64 = "<i64>", raw = "<raw>",
       expression = "<expr>", ordered = "<ord>")
-    classes = vapply_1c(x, function(col) class(col)[1L], use.names=FALSE)
+    classes = classes1(x)
     abbs = unname(class_abb[classes])
     if ( length(idx <- which(is.na(abbs))) ) abbs[idx] = paste0("<", classes[idx], ">")
     toprint = rbind(abbs, toprint)
@@ -233,7 +233,7 @@ format_list_item.default = function(x, ...) {
     # format_list_item would not be reached) but this particular list item does have a format method so use it
     formatted
   } else {
-    paste0("<", class(x)[1L], paste_dims(x), ">")
+    paste0("<", class1(x), paste_dims(x), ">")
   }
 }
 
diff --git a/R/setops.R b/R/setops.R
index f91e0aaac..c431c944d 100644
--- a/R/setops.R
+++ b/R/setops.R
@@ -47,12 +47,12 @@ funique = function(x) {
           brackify(bad_types[found]), domain=NA)
   super = function(x) {
     # allow character->factor and integer->numeric because from v1.12.4 i's type is retained by joins, #3820
-    ans = class(x)[1L]
+    ans = class1(x)
     switch(ans, factor="character", integer="numeric", ans)
   }
   if (!identical(sx<-sapply(x, super), sy<-sapply(y, super))) {
     w = which.first(sx!=sy)
-    stopf("Item %d of x is '%s' but the corresponding item of y is '%s'.", w, class(x[[w]])[1L], class(y[[w]])[1L])
+    stopf("Item %d of x is '%s' but the corresponding item of y is '%s'.", w, class1(x[[w]]), class1(y[[w]]))
   }
   if (.seqn && ".seqn" %chin% names(x)) stopf("None of the datasets should contain a column named '.seqn'")
 }
diff --git a/R/test.data.table.R b/R/test.data.table.R
index ff88888f5..6d2a78afa 100644
--- a/R/test.data.table.R
+++ b/R/test.data.table.R
@@ -290,13 +290,13 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
 
 # nocov start
 compactprint = function(DT, topn=2L) {
-  tt = vapply_1c(DT,function(x)class(x)[1L])
-  tt[tt=="integer64"] = "i64"
-  tt = substr(tt, 1L, 3L)
+  classes = classes1(DT)
+  classes[classes == "integer64"] = "i64"
+  classes = substr(classes, 1L, 3L)
   makeString = function(x) paste(x, collapse = ",")  # essentially toString.default
-  cn = paste0(" [Key=",makeString(key(DT)),
-             " Types=", makeString(substr(sapply(DT, typeof), 1L, 3L)),
-             " Classes=", makeString(tt), "]")
+  cn = paste0(" [Key=", makeString(key(DT)),
+             " Types=", makeString(substr(vapply_1c(DT, typeof), 1L, 3L)),
+             " Classes=", makeString(classes), "]")
   if (nrow(DT)) {
     print(copy(DT)[,(cn):="",verbose=FALSE], topn=topn, class=FALSE)
   } else {
diff --git a/R/utils.R b/R/utils.R
index 32178ba23..2f91491be 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -104,6 +104,9 @@ vapply_1i = function(x, fun, ..., use.names = TRUE) {
   vapply(X = x, FUN = fun, ..., FUN.VALUE = NA_integer_, USE.NAMES = use.names)
 }
 
+class1 = function(x) class(x)[1L] # nolint: class1_linter.
+classes1 = function(x, ..., use.names=FALSE) vapply_1c(x, class1, ..., use.names=use.names)
+
 # base::xor(), but with scalar operators
 XOR = function(x, y) (x || y) && !(x && y)