diff --git a/.ci/.lintr.R b/.ci/.lintr.R index bfc454768..a62745b15 100644 --- a/.ci/.lintr.R +++ b/.ci/.lintr.R @@ -38,8 +38,6 @@ linters = c(dt_linters, all_linters( function_argument_linter = NULL, indentation_linter = NULL, infix_spaces_linter = NULL, - # TODO(R>3.2.0): Activate this, extending to recognize vapply_1i(x, length). - lengths_linter = NULL, line_length_linter = NULL, missing_package_linter = NULL, namespace_linter = NULL, @@ -57,26 +55,20 @@ linters = c(dt_linters, all_linters( # TODO(michaelchirico): Enforce these and re-activate them one-by-one. brace_linter = NULL, condition_call_linter = NULL, - conjunct_test_linter = NULL, fixed_regex_linter = NULL, - function_left_parentheses_linter = NULL, if_not_else_linter = NULL, implicit_assignment_linter = NULL, implicit_integer_linter = NULL, keyword_quote_linter = NULL, - length_levels_linter = NULL, - matrix_apply_linter = NULL, missing_argument_linter = NULL, nzchar_linter = NULL, object_overwrite_linter = NULL, paren_body_linter = NULL, redundant_equals_linter = NULL, return_linter = NULL, - sample_int_linter = NULL, scalar_in_linter = NULL, undesirable_function_linter = NULL, unnecessary_concatenation_linter = NULL, - unnecessary_lambda_linter = NULL, unnecessary_nesting_linter = NULL, unreachable_code_linter = NULL, unused_import_linter = NULL @@ -98,7 +90,8 @@ exclusions = c(local({ undesirable_function_linter = Inf )), exclusion_for_dir("vignettes", list( - quotes_linter = Inf + quotes_linter = Inf, + sample_int_linter = Inf # strings_as_factors_linter = Inf # system_time_linter = Inf )), @@ -112,7 +105,9 @@ exclusions = c(local({ equals_na_linter = Inf, paste_linter = Inf, rep_len_linter = Inf, - seq_linter = Inf + sample_int_linter = Inf, + seq_linter = Inf, + unnecessary_lambda_linter = Inf )) ) }), diff --git a/R/IDateTime.R b/R/IDateTime.R index 185952fe7..712fff1d8 100644 --- a/R/IDateTime.R +++ b/R/IDateTime.R @@ -83,7 +83,7 @@ as.list.IDate = function(x, ...) NextMethod() ## round.IDate = function (x, digits, units=digits, ...) { ## if (missing(digits)) digits = units # workaround to provide a units argument to match the round generic and round.POSIXt ## units = match.arg(digits, c("weeks", "months", "quarters", "years")) -round.IDate = function (x, digits=c("weeks", "months", "quarters", "years"), ...) { +round.IDate = function(x, digits=c("weeks", "months", "quarters", "years"), ...) { units = match.arg(digits) as.IDate(switch(units, weeks = round(x, "year") + 7L * (yday(x) %/% 7L), @@ -93,7 +93,7 @@ round.IDate = function (x, digits=c("weeks", "months", "quarters", "years"), ... } #Adapted from `+.Date` -`+.IDate` = function (e1, e2) { +`+.IDate` = function(e1, e2) { if (nargs() == 1L) return(e1) # TODO: investigate Ops.IDate method a la Ops.difftime @@ -108,7 +108,7 @@ round.IDate = function (x, digits=c("weeks", "months", "quarters", "years"), ... (setattr(as.integer(unclass(e1) + unclass(e2)), "class", c("IDate", "Date"))) # () wrap to return visibly } -`-.IDate` = function (e1, e2) { +`-.IDate` = function(e1, e2) { if (!inherits(e1, "IDate")) { if (inherits(e1, 'Date')) return(base::`-.Date`(e1, e2)) stopf("can only subtract from \"IDate\" objects") @@ -228,7 +228,7 @@ print.ITime = function(x, ...) { print(format(x)) } -rep.ITime = function (x, ...) +rep.ITime = function(x, ...) { y = rep(unclass(x), ...) class(y) = "ITime" # unlass and rep could feasibly not copy, hence use class<- not setattr() diff --git a/R/data.table.R b/R/data.table.R index aba3dacd3..ce5da744a 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -129,7 +129,7 @@ replace_dot_alias = function(e) { } } -"[.data.table" = function (x, i, j, by, keyby, with=TRUE, nomatch=NA, mult="all", roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL, env=NULL) +"[.data.table" = function(x, i, j, by, keyby, with=TRUE, nomatch=NA, mult="all", roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL, env=NULL) { # ..selfcount <<- ..selfcount+1 # in dev, we check no self calls, each of which doubles overhead, or could # test explicitly if the caller is [.data.table (even stronger test. TO DO.) @@ -1920,7 +1920,7 @@ replace_dot_alias = function(e) { attrs = attr(x, 'index', exact=TRUE) skeys = names(attributes(attrs)) if (!is.null(skeys)) { - hits = unlist(lapply(paste0("__", names_x[cols]), function(x) grep(x, skeys, fixed = TRUE))) + hits = unlist(lapply(paste0("__", names_x[cols]), grep, skeys, fixed=TRUE)) hits = skeys[unique(hits)] for (i in seq_along(hits)) setattr(attrs, hits[i], NULL) # does by reference } @@ -2074,19 +2074,17 @@ as.matrix.data.table = function(x, rownames=NULL, rownames.value=NULL, ...) { if (is.ff(X[[j]])) X[[j]] = X[[j]][] # nocov to bring the ff into memory, since we need to create a matrix in memory xj = X[[j]] if (length(dj <- dim(xj)) == 2L && dj[2L] > 1L) { - if (inherits(xj, "data.table")) + if (is.data.table(xj)) xj = X[[j]] = as.matrix(X[[j]]) dnj = dimnames(xj)[[2L]] - collabs[[j]] = paste(collabs[[j]], if (length(dnj) > - 0L) - dnj - else seq_len(dj[2L]), sep = ".") + collabs[[j]] = paste( + collabs[[j]], + if (length(dnj) > 0L) dnj else seq_len(dj[2L]), sep = ".") } if (!is.logical(xj)) all.logical = FALSE - if (length(levels(xj)) > 0L || !(is.numeric(xj) || is.complex(xj) || is.logical(xj)) || - (!is.null(cl <- attr(xj, "class", exact=TRUE)) && any(cl %chin% - c("Date", "POSIXct", "POSIXlt")))) + if (nlevels(xj) > 0L || !(is.numeric(xj) || is.complex(xj) || is.logical(xj)) || + (!is.null(cl <- attr(xj, "class", exact=TRUE)) && any(cl %chin% c("Date", "POSIXct", "POSIXlt")))) non.numeric = TRUE if (!is.atomic(xj)) non.atomic = TRUE @@ -2104,7 +2102,7 @@ as.matrix.data.table = function(x, rownames=NULL, rownames.value=NULL, ...) { if (is.character(X[[j]])) next xj = X[[j]] miss = is.na(xj) - xj = if (length(levels(xj))) as.vector(xj) else format(xj) + xj = if (nlevels(xj)) as.vector(xj) else format(xj) is.na(xj) = miss X[[j]] = xj } @@ -2136,7 +2134,7 @@ tail.data.table = function(x, n=6L, ...) { x[i] } -"[<-.data.table" = function (x, i, j, value) { +"[<-.data.table" = function(x, i, j, value) { # [<- is provided for consistency, but := is preferred as it allows by group and by reference to subsets of columns # with no copy of the (very large, say 10GB) columns at all. := is like an UPDATE in SQL and we like and want two symbols to change. if (!cedta()) { @@ -2240,7 +2238,7 @@ dimnames.data.table = function(x) { list(NULL, names(x)) } -"dimnames<-.data.table" = function (x, value) # so that can do colnames(dt)=<..> as well as names(dt)=<..> +"dimnames<-.data.table" = function(x, value) # so that can do colnames(dt)=<..> as well as names(dt)=<..> { if (!cedta()) return(`dimnames<-.data.frame`(x,value)) # nocov ; will drop key but names<-.data.table (below) is more common usage and does retain the key if (!is.list(value) || length(value) != 2L) stopf("attempting to assign invalid object to dimnames of a data.table") @@ -2264,7 +2262,7 @@ dimnames.data.table = function(x) { x # this returned value is now shallow copied by R 3.1.0 via *tmp*. A very welcome change. } -within.data.table = function (data, expr, ...) +within.data.table = function(data, expr, ...) # basically within.list but retains key (if any) # will be slower than using := or a regular query (see ?within for further info). { @@ -2289,7 +2287,7 @@ within.data.table = function (data, expr, ...) ans } -transform.data.table = function (`_data`, ...) +transform.data.table = function(`_data`, ...) # basically transform.data.frame with data.table instead of data.frame, and retains key { if (!cedta()) return(NextMethod()) # nocov @@ -2299,7 +2297,7 @@ transform.data.table = function (`_data`, ...) `_data` } -subset.data.table = function (x, subset, select, ...) +subset.data.table = function(x, subset, select, ...) { key.cols = key(x) @@ -2348,7 +2346,7 @@ subset.data.table = function (x, subset, select, ...) is_na = function(x, by=seq_along(x)) .Call(Cdt_na, x, by) any_na = function(x, by=seq_along(x)) .Call(CanyNA, x, by) -na.omit.data.table = function (object, cols = seq_along(object), invert = FALSE, ...) { +na.omit.data.table = function(object, cols = seq_along(object), invert = FALSE, ...) { # compare to stats:::na.omit.data.frame if (!cedta()) return(NextMethod()) # nocov if ( !missing(invert) && is.na(as.logical(invert)) ) @@ -2374,7 +2372,7 @@ which_ = function(x, bool = TRUE) { .Call(Cwhichwrapper, x, bool) } -is.na.data.table = function (x) { +is.na.data.table = function(x) { if (!cedta()) return(`is.na.data.frame`(x)) do.call("cbind", lapply(x, "is.na")) } diff --git a/R/fcast.R b/R/fcast.R index 014bd76d4..d2423cd31 100644 --- a/R/fcast.R +++ b/R/fcast.R @@ -251,5 +251,5 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ..., setattr(ans, 'names', c(lhsnames, allcols)) setDT(ans); setattr(ans, 'sorted', lhsnames) } else stopf("Internal error -- empty rhsnames in dcast; please report") # nocov - return (ans) + return(ans) } diff --git a/R/fdroplevels.R b/R/fdroplevels.R index c7025dda0..69f23cb61 100644 --- a/R/fdroplevels.R +++ b/R/fdroplevels.R @@ -1,7 +1,7 @@ # 647 fast droplevels.data.table method fdroplevels = function(x, exclude = if (anyNA(levels(x))) NULL else NA, ...) { stopifnot(inherits(x, "factor")) - lev = which(tabulate(x, length(levels(x))) & (!match(levels(x), exclude, 0L))) + lev = which(tabulate(x, nlevels(x)) & (!match(levels(x), exclude, 0L))) ans = match(as.integer(x), lev) setattr(ans, 'levels', levels(x)[lev]) setattr(ans, 'class', class(x)) diff --git a/R/foverlaps.R b/R/foverlaps.R index 54dc61f93..a7d207428 100644 --- a/R/foverlaps.R +++ b/R/foverlaps.R @@ -179,10 +179,10 @@ foverlaps = function(x, y, by.x=if (!is.null(key(x))) key(x) else key(y), by.y=k # CsubsetDT bug has been fixed by Matt. So back to using it! Should improve subset substantially. if (which) { if (mult %chin% c("first", "last")) - return (olaps$yid) + return(olaps$yid) else if (!is.na(nomatch)) - return (.Call(CsubsetDT, olaps, which(olaps$yid > 0L), seq_along(olaps))) - else return (olaps) + return(.Call(CsubsetDT, olaps, which(olaps$yid > 0L), seq_along(olaps))) + else return(olaps) } else { if (!is.na(nomatch)) olaps = .Call(CsubsetDT, olaps, which(olaps$yid > 0L), seq_along(olaps)) @@ -195,7 +195,7 @@ foverlaps = function(x, y, by.x=if (!is.null(key(x))) key(x) else key(y), by.y=k xcols2 = setdiff(names(ans), xcols1) ans[, (ycols) := .Call(CsubsetDT, origy, olaps$yid, chmatch(ycols, names(origy)))] setcolorder(ans, c(xcols1, ycols, xcols2)) - return (ans[]) + return(ans[]) } } diff --git a/R/fread.R b/R/fread.R index a7cab35eb..da44d6be0 100644 --- a/R/fread.R +++ b/R/fread.R @@ -33,8 +33,8 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") if (is.na(nrows) || nrows<0) nrows=Inf # accept -1 to mean Inf, as read.table does if (identical(header,"auto")) header=NA stopifnot( - is.logical(header) && length(header)==1L, # TRUE, FALSE or NA - is.numeric(nThread) && length(nThread)==1L + is.logical(header), length(header)==1L, # TRUE, FALSE or NA + is.numeric(nThread), length(nThread)==1L ) nThread=as.integer(nThread) stopifnot(nThread>=1L) diff --git a/R/print.data.table.R b/R/print.data.table.R index f80a5833c..f16a39625 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -136,7 +136,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), invisible(x) } -format.data.table = function (x, ..., justify="none") { +format.data.table = function(x, ..., justify="none") { if (is.atomic(x) && !is.null(x)) { ## future R can use if (is.atomic(x)) stopf("Internal structure doesn't seem to be a list. Possibly corrupt data.table.") diff --git a/R/tables.R b/R/tables.R index 6a0209c86..7662598d8 100644 --- a/R/tables.R +++ b/R/tables.R @@ -12,7 +12,7 @@ type_size = function(DT) { tt = lookup[storage.mode(col)] if (is.na(tt)) tt = .Machine$sizeof.pointer tt = tt*nrow(DT) - if (is.factor(col)) tt = tt + length(levels(col))*.Machine$sizeof.pointer + if (is.factor(col)) tt = tt + nlevels(col)*.Machine$sizeof.pointer ans = ans + tt } ans + ncol(DT)*.Machine$sizeof.pointer # column name pointers diff --git a/R/test.data.table.R b/R/test.data.table.R index 1a6b725be..574bcb4f3 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -311,7 +311,7 @@ INT = function(...) { as.integer(c(...)) } # utility used in tests.Rraw gc_mem = function() { # nocov start # gc reports memory in MB - m = apply(gc()[, c(2L, 4L, 6L)], 2L, sum) + m = colSums(gc()[, c(2L, 4L, 6L)]) names(m) = c("GC_used", "GC_gc_trigger", "GC_max_used") m # nocov end diff --git a/R/uniqlist.R b/R/uniqlist.R index 4f3600f83..9bd64efc6 100644 --- a/R/uniqlist.R +++ b/R/uniqlist.R @@ -1,5 +1,4 @@ - -uniqlist = function (l, order = -1L) +uniqlist = function(l, order = -1L) { # Assumes input list is ordered by each list item (or by 'order' if supplied), and that all list elements are the same length # Finds the non-duplicate rows. Was called duplist but now grows vector - doesn't over-allocate result vector and diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 83d672bea..0a5de42c8 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -3282,7 +3282,7 @@ test(1034, as.data.table(x<-as.character(sample(letters, 5))), data.table(V1=x)) test(1036.01, dim(DT), INT(1,327)) test(1036.02, dim(ans<-melt(DT, 1:2)), INT(325,4), warning="'measure.vars' [[]Geography, Estimate; SEX AND AGE - Total population, Margin of Error; SEX AND AGE - Total population, Percent; SEX AND AGE - Total population, [.][.][.][]] are not all of the same type.*the molten data value column will be of type 'character'.*not of type 'character' will be coerced too") - test(1036.03, length(levels(ans$variable)), 317L) + test(1036.03, nlevels(ans$variable), 317L) test(1036.04, levels(ans$variable)[c(1,2,316,317)], tt <- c("Geography", "Estimate; SEX AND AGE - Total population", @@ -3702,7 +3702,7 @@ test(1100, dt1[dt2,roll=-Inf,rollends=c(FALSE,TRUE)]$ind, INT(NA,NA,1,2,2,2,2,2, # issues/713 - dcast and fun.aggregate DT = data.table(id=rep(1:2, c(3,4)), k=c(rep(letters[1:3], 2), 'c'), v=1:7) - foo = function (tbl, fun.aggregate) { + foo = function(tbl, fun.aggregate) { dcast(tbl, id ~ k, value.var='v', fun.aggregate=fun.aggregate, fill=NA_integer_) } test(1102.22, foo(DT, last), dcast(DT, id ~ k, value.var='v', fun.aggregate=last, fill=NA_integer_)) @@ -8314,7 +8314,7 @@ test(1581.18, DT[, v:=l[[f1]][f2], by=c("f1","f2")], # need eval to have enclos=parent.frame(), #4612 DT = data.table(id = c(1, 1, 2), value = c("a", "b", "c")) DT0 = copy(DT) -fun = function (DT, tag = c("A", "B")) DT[, var := tag[[.GRP]], by = "id"] +fun = function(DT, tag = c("A", "B")) DT[, var := tag[[.GRP]], by = "id"] fun(DT) test(1581.19, DT, DT0[ , var := c('A', 'A', 'B')]) @@ -8708,7 +8708,9 @@ test(1613.35, all.equal(dt1, dt2)) test(1613.36, !isTRUE(all.equal(dt1, dt2, trim.levels = FALSE))) test(1613.37, !isTRUE(all.equal(dt1, dt2, trim.levels = FALSE, check.attributes = FALSE))) test(1613.38, all.equal(dt1, dt2, trim.levels = FALSE, ignore.row.order = TRUE)) -test(1613.39, length(levels(dt1$A)) == 10L && length(levels(dt2$A)) == 5L, TRUE) # dt1 and dt2 not updated by reference +# dt1 and dt2 not updated by reference +test(1613.391, nlevels(dt1$A), 10L) +test(1613.392, nlevels(dt2$A), 5L) # unsupported column types: list dt = data.table(V1 = 1:4, V2 = letters[1:4], V3 = lapply(1:4, function(x) new.env())) test(1613.40, all.equal(dt, dt), TRUE)