Activate + apply some linters (#6124)

* Activate + apply some linters * Updated paste_linter in dev version of lintr * trailing whitespace missed last time * Different error message from rep_len() * one more * Also knock out '<<-' usage
Rdatatable · May 6, 2024 · 014c82f · 014c82f
1 parent ca815ba
commit 014c82f
Show file tree

Hide file tree

Showing 10 changed files with 25 additions and 28 deletions.
diff --git a/.ci/.lintr.R b/.ci/.lintr.R
@@ -25,10 +25,7 @@ linters = c(dt_linters, all_linters(
   #   par = NULL,
   #   setwd = NULL
   # )),
-  undesirable_operator_linter(modify_defaults(
-    default_undesirable_operators,
-    `<<-` = NULL
-  )),
+  undesirable_operator_linter(),
   # TODO(lintr#2441): Use upstream implementation.
   assignment_linter = NULL,
   absolute_path_linter = NULL, # too many false positives
@@ -57,7 +54,7 @@ linters = c(dt_linters, all_linters(
   strings_as_factors_linter = NULL,
   # TODO(lintr->3.2.0): Fix on a valid TODO style, enforce it, and re-activate.
   todo_comment_linter = NULL,
-  # TODO(michaelchirico): Enforce these and re-activate them one-by-one. Also stop using '<<-'.
+  # TODO(michaelchirico): Enforce these and re-activate them one-by-one.
   brace_linter = NULL,
   condition_call_linter = NULL,
   conjunct_test_linter = NULL,
@@ -74,12 +71,9 @@ linters = c(dt_linters, all_linters(
   object_overwrite_linter = NULL,
   paren_body_linter = NULL,
   redundant_equals_linter = NULL,
-  rep_len_linter = NULL,
-  repeat_linter = NULL,
   return_linter = NULL,
   sample_int_linter = NULL,
   scalar_in_linter = NULL,
-  seq_linter = NULL,
   undesirable_function_linter = NULL,
   unnecessary_concatenation_linter = NULL,
   unnecessary_lambda_linter = NULL,
@@ -116,7 +110,9 @@ exclusions = c(local({
       comparison_negation_linter = Inf,
       duplicate_argument_linter = Inf,
       equals_na_linter = Inf,
-      paste_linter = Inf
+      paste_linter = Inf,
+      rep_len_linter = Inf,
+      seq_linter = Inf
     ))
   )
 }),

diff --git a/R/as.data.table.R b/R/as.data.table.R
@@ -169,7 +169,7 @@ as.data.table.list = function(x,
       #       not worse than before, and gets us in a better centralized place to port as.data.table.list to C and use MAYBE_REFERENCED
       #       again in future, for #617.
     }
-    if (identical(x,list())) vector("list", nrow) else rep(x, length.out=nrow)   # new objects don't need copy
+    if (identical(x, list())) vector("list", nrow) else rep_len(x, nrow)   # new objects don't need copy
   }
   vnames = character(ncol)
   k = 1L

diff --git a/R/cedta.R b/R/cedta.R
@@ -31,7 +31,7 @@ cedta.pkgEvalsUserCode = c("gWidgetsWWW","statET","FastRWeb","slidify","rmarkdow
 .any_eval_calls_in_stack <- function() {
   calls = sys.calls()
   # likelier to be close to the end of the call stack, right?
-  for (ii in length(calls):1) { # rev(seq_len(length(calls)))? See https://bugs.r-project.org/show_bug.cgi?id=18406.
+  for (ii in length(calls):1) { # nolint: seq_linter. rev(seq_len(length(calls)))? See https://bugs.r-project.org/show_bug.cgi?id=18406.
     the_call <- calls[[ii]][[1L]]
     if (is.name(the_call) && (the_call %chin% c("eval", "evalq"))) return(TRUE)
   }

diff --git a/R/data.table.R b/R/data.table.R
@@ -825,7 +825,7 @@ replace_dot_alias = function(e) {
           if (!bysameorder && keyby && !length(irows) && isTRUE(getOption("datatable.use.index"))) {
             # TODO: could be allowed if length(irows)>1 but then the index would need to be squashed for use by uniqlist, #3062
             # find if allbyvars is leading subset of any of the indices; add a trailing "__" to fix #3498 where a longer column name starts with a shorter column name
-            tt = paste0(c(allbyvars,""), collapse="__")
+            tt = paste(c(allbyvars,""), collapse="__")
             w = which.first(startsWith(paste0(indices(x), "__"), tt))
             if (!is.na(w)) {
               byindex = indices(x)[w]
@@ -960,6 +960,7 @@ replace_dot_alias = function(e) {
               else if (any(idx <- nm != jvnames))
                 warningf('Different branches of j expression produced different auto-named columns: %s; using the most "last" names. If this was intentional (e.g., you know only one branch will ever be used in a given query because the branch is controlled by a function argument), please (1) pull this branch out of the call; (2) explicitly provide missing defaults for each branch in all cases; or (3) use the same name for each branch and re-name it in a follow-up call.',  brackify(sprintf('%s!=%s', nm[idx], jvnames[idx])))
             }
+            # nolint next: undesirable_operator_linter. Workaround is clunkier, though a bigger refactor could be considered.
             jvnames <<- nm # TODO: handle if() list(a, b) else list(b, a) better
             setattr(q, "names", NULL)  # drops the names from the list so it's faster to eval the j for each group; reinstated at the end on the result.
           }
@@ -3216,18 +3217,18 @@ is_constantish = function(q, check_singleton=FALSE) {
       }
     }
     if (!is.null(idx)){
-      if (verbose) {catf("Optimized subsetting with index '%s'\n", paste0( idxCols, collapse = "__"));flush.console()}
+      if (verbose) {catf("Optimized subsetting with index '%s'\n", paste(idxCols, collapse = "__"));flush.console()}
     }
   }
   if (is.null(idx)){
     ## if nothing else helped, auto create a new index that can be used
     if (!getOption("datatable.auto.index")) return(NULL)
-    if (verbose) {catf("Creating new index '%s'\n", paste0(names(i), collapse = "__"));flush.console()}
-    if (verbose) {last.started.at=proc.time();catf("Creating index %s done in ...", paste0(names(i), collapse = "__"));flush.console()}
+    if (verbose) {catf("Creating new index '%s'\n", paste(names(i), collapse = "__"));flush.console()}
+    if (verbose) {last.started.at=proc.time();catf("Creating index %s done in ...", paste(names(i), collapse = "__"));flush.console()}
     setindexv(x, names(i))
     if (verbose) {cat(timetaken(last.started.at),"\n");flush.console()}
-    if (verbose) {catf("Optimized subsetting with index '%s'\n", paste0(names(i), collapse = "__"));flush.console()}
-    idx = attr(attr(x, "index", exact=TRUE), paste0("__", names(i), collapse = ""), exact=TRUE)
+    if (verbose) {catf("Optimized subsetting with index '%s'\n", paste(names(i), collapse = "__"));flush.console()}
+    idx = attr(attr(x, "index", exact=TRUE), paste("__", names(i), collapse = ""), exact=TRUE)
     idxCols = names(i)
   }
   if(!is.null(idxCols)){

diff --git a/R/fcast.R b/R/fcast.R
@@ -130,7 +130,7 @@ aggregate_funs = function(funs, vals, sep="_", ...) {
 
 dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ..., margins = NULL, subset = NULL, fill = NULL, drop = TRUE, value.var = guess(data), verbose = getOption("datatable.verbose"), value.var.in.dots = FALSE, value.var.in.LHSdots = value.var.in.dots, value.var.in.RHSdots = value.var.in.dots) {
   if (!is.data.table(data)) stopf("'data' must be a data.table.")
-  drop = as.logical(rep(drop, length.out=2L))
+  drop = as.logical(rep_len(drop, 2L))
   if (anyNA(drop)) stopf("'drop' must be logical TRUE/FALSE")
   if (!isTRUEorFALSE(value.var.in.dots))
     stopf("Argument 'value.var.in.dots' should be logical TRUE/FALSE")

diff --git a/R/fread.R b/R/fread.R
@@ -187,7 +187,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
 
     yaml_comment_re = '^#'
     yaml_string = character(0L)
-    while (TRUE) {
+    repeat {
       this_line = readLines(f, n=1L)
       n_read = n_read + 1L
       if (!length(this_line)){

diff --git a/R/setkey.R b/R/setkey.R
@@ -77,7 +77,7 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU
   }
   if (!is.character(cols) || length(cols)<1L) stopf("Internal error. 'cols' should be character at this point in setkey; please report.") # nocov
 
-  newkey = paste0(cols, collapse="__")
+  newkey = paste(cols, collapse="__")
   if (!any(indices(x) == newkey)) {
     if (verbose) {
       tt = suppressMessages(system.time(o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)))  # system.time does a gc, so we don't want this always on, until refcnt is on by default in R

diff --git a/R/test.data.table.R b/R/test.data.table.R
@@ -234,7 +234,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
     ", Sys.timezone()=='", suppressWarnings(Sys.timezone()), "'",
     ", Sys.getlocale()=='", Sys.getlocale(), "'",
     ", l10n_info()=='", paste0(names(l10n_info()), "=", l10n_info(), collapse="; "), "'",
-    ", getDTthreads()=='", paste0(gsub("[ ][ ]+","==",gsub("^[ ]+","",capture.output(invisible(getDTthreads(verbose=TRUE))))), collapse="; "), "'",
+    ", getDTthreads()=='", paste(gsub("[ ][ ]+","==",gsub("^[ ]+","",capture.output(invisible(getDTthreads(verbose=TRUE))))), collapse="; "), "'",
     ", ", .Call(Cdt_zlib_version),
     "\n", sep="")
 
@@ -403,18 +403,18 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
   xsub = substitute(x)
   ysub = substitute(y)
 
-  actual = list("warning"=NULL, "error"=NULL, "message"=NULL)
+  actual = list2env(list(warning=NULL, error=NULL, message=NULL))
   wHandler = function(w) {
     # Thanks to: https://stackoverflow.com/a/4947528/403310
-    actual$warning <<- c(actual$warning, conditionMessage(w))
+    actual$warning <- c(actual$warning, conditionMessage(w))
     invokeRestart("muffleWarning")
   }
   eHandler = function(e) {
-    actual$error <<- conditionMessage(e)
+    actual$error <- conditionMessage(e)
     e
   }
   mHandler = function(m) {
-    actual$message <<- c(actual$message, conditionMessage(m))
+    actual$message <- c(actual$message, conditionMessage(m))
     m
   }
   if (is.null(output) && is.null(notOutput)) {

diff --git a/inst/tests/S4.Rraw b/inst/tests/S4.Rraw
@@ -61,7 +61,7 @@ removeClass("S4Composition")
 # miscellaneous missing tests uncovered by CodeCov difference in the process of PR #2573 [S4 portion, c.f. 1872.* in tests.Rraw]
 ## data.table cannot recycle complicated types
 short_s4_col = getClass("MethodDefinition")
-test(2, data.table(a = 1:4, short_s4_col), error="attempt to replicate an object of type 'S4'")
+test(2, data.table(a = 1:4, short_s4_col), error="attempt to replicate non-vector")
 
 # print dims in list-columns, #3671, c.f. 2130.* in tests.Rraw
 s4class = setClass("ex_class", slots = list(x="integer", y="character", z="numeric"))

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
@@ -3678,12 +3678,12 @@ test(1100, dt1[dt2,roll=-Inf,rollends=c(FALSE,TRUE)]$ind, INT(NA,NA,1,2,2,2,2,2,
   set.seed(1)
   DT = data.table(index = c("a","b"), x = 1:2, y = rnorm(2))
   test(1102.186, names(dcast(DT, ... ~ ..., value.var = "y", value.var.in.LHSdots = TRUE)), c("index", "x", "y", "a_1", "b_2"))
-  
+
   DT = data.table(year = c(rep(1986,4), rep(1987,3), rep(1988,2), 1989:1991), continent = rep(c("Europe","Asia"), each = 6), country = rep(c("Sweden","Germany","France","India","Japan","China"), each = 2))
   DT_dcasted_LHSddd = dcast(DT, ... ~ ..., fun.aggregate = length, value.var.in.RHSdots = TRUE, value.var = "country")
   test(1102.187, dim(DT_dcasted_LHSddd), c(7L,11L))
   test(1102.188, names(DT_dcasted_LHSddd), c("year", "continent", "1986_Europe_Germany", "1986_Europe_Sweden", "1987_Asia_India", "1987_Europe_France", "1988_Asia_India", "1988_Asia_Japan", "1989_Asia_Japan", "1990_Asia_China", "1991_Asia_China"))
-  
+
   DT_dcasted_LHSd = dcast(DT, . ~ ..., fun.aggregate = length, value.var.in.RHSdots = TRUE, value.var = "country")
   DT_dcasted_LHSi = dcast(DT, 1 ~ ..., fun.aggregate = length, value.var.in.RHSdots = TRUE, value.var = "country")