Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use R 3.2.0 features #5838

Merged
merged 5 commits into from
May 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions R/as.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ as.data.table.data.frame = function(x, keep.rownames=FALSE, key=NULL, ...) {
setnames(ans, 'rn', keep.rownames[1L])
return(ans)
}
if (any(vapply_1i(x, function(xi) length(dim(xi))))) { # not is.atomic because is.atomic(matrix) is true
if (any(cols_with_dims(x))) {
# a data.frame with a column that is data.frame needs to be expanded; test 2013.4
# x may be a class with [[ method that behaves differently, so as.list first for default [[, #4526
return(as.data.table.list(as.list(x), keep.rownames=keep.rownames, ...))
Expand All @@ -245,7 +245,7 @@ as.data.table.data.frame = function(x, keep.rownames=FALSE, key=NULL, ...) {

as.data.table.data.table = function(x, ...) {
# as.data.table always returns a copy, automatically takes care of #473
if (any(vapply_1i(x, function(xi) length(dim(xi))))) { # for test 2089.2
if (any(cols_with_dims(x))) { # for test 2089.2
return(as.data.table.list(x, ...))
}
x = copy(x) # #1681
Expand Down
29 changes: 11 additions & 18 deletions R/data.table.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
if (!exists("trimws", "package:base")) {
# trimws was new in R 3.2.0. Backport it for internal data.table use in R 3.1.0
trimws = function(x) {
mysub = function(re, x) sub(re, "", x, perl = TRUE)
mysub("[ \t\r\n]+$", mysub("^[ \t\r\n]+", x))
}
}

dim.data.table = function(x)
{
.Call(Cdim, x)
Expand Down Expand Up @@ -356,8 +348,8 @@ replace_dot_alias = function(e) {
# Fixes 4994: a case where quoted expression with a "!", ex: expr = quote(!dt1); dt[eval(expr)] requires
# the "eval" to be checked before `as.name("!")`. Therefore interchanged.
restore.N = remove.N = FALSE
if (exists(".N", envir=parent.frame(), inherits=FALSE)) {
old.N = get(".N", envir=parent.frame(), inherits=FALSE)
old.N = get0(".N", envir=parent.frame(), inherits=FALSE)
if (!is.null(old.N)) {
locked.N = bindingIsLocked(".N", parent.frame())
if (locked.N) eval(call("unlockBinding", ".N", parent.frame())) # eval call to pass R CMD check NOTE until we find cleaner way
assign(".N", nrow(x), envir=parent.frame(), inherits=FALSE)
Expand Down Expand Up @@ -899,12 +891,12 @@ replace_dot_alias = function(e) {
}
if (!is.list(byval)) stopf("'by' or 'keyby' must evaluate to a vector or a list of vectors (where 'list' includes data.table and data.frame which are lists, too)")
if (length(byval)==1L && is.null(byval[[1L]])) bynull=TRUE #3530 when by=(function()NULL)()
if (!bynull) for (jj in seq_len(length(byval))) {
if (!bynull) for (jj in seq_along(byval)) {
if (!(this_type <- typeof(byval[[jj]])) %chin% ORDERING_TYPES) {
stopf("Column or expression %d of 'by' or 'keyby' is type '%s' which is not currently supported. If you have a compelling use case, please add it to https://github.com/Rdatatable/data.table/issues/1597. As a workaround, consider converting the column to a supported type, e.g. by=sapply(list_col, toString), whilst taking care to maintain distinctness in the process.", jj, this_type)
}
}
tt = vapply_1i(byval,length)
tt = lengths(byval)
if (any(tt!=xnrow)) stopf("The items in the 'by' or 'keyby' list are length(s) %s. Each must be length %d; the same length as there are rows in x (after subsetting if i is provided).", brackify(tt), xnrow)
if (is.null(bynames)) bynames = rep.int("",length(byval))
if (length(idx <- which(!nzchar(bynames))) && !bynull) {
Expand Down Expand Up @@ -1034,7 +1026,7 @@ replace_dot_alias = function(e) {
# allow filtering via function in .SDcols, #3950
if (is.function(.SDcols)) {
.SDcols = lapply(x, .SDcols)
if (any(idx <- vapply_1i(.SDcols, length) > 1L | vapply_1c(.SDcols, typeof) != 'logical' | vapply_1b(.SDcols, anyNA)))
if (any(idx <- lengths(.SDcols) > 1L | vapply_1c(.SDcols, typeof) != 'logical' | vapply_1b(.SDcols, anyNA)))
stopf("When .SDcols is a function, it is applied to each column; the output of this function must be a non-missing boolean scalar signalling inclusion/exclusion of the column. However, these conditions were not met for: %s", brackify(names(x)[idx]))
.SDcols = unlist(.SDcols, use.names = FALSE)
}
Expand Down Expand Up @@ -1290,11 +1282,12 @@ replace_dot_alias = function(e) {
# warningf(sym," in j is looking for ",getName," in calling scope, but a column '", sym, "' exists. Column names should not start with ..")
}
getName = substr(sym, 3L, nchar(sym))
if (!exists(getName, parent.frame())) {
getNameVal <- get0(getName, parent.frame())
if (is.null(getNameVal)) {
if (exists(sym, parent.frame())) next # user did 'manual' prefix; i.e. variable in calling scope has .. prefix
stopf("Variable '%s' is not found in calling scope. Looking in calling scope because this symbol was prefixed with .. in the j= parameter.", getName)
}
assign(sym, get(getName, parent.frame()), SDenv)
assign(sym, getNameVal, SDenv)
}
# hash=TRUE (the default) does seem better as expected using e.g. test 645. TO DO experiment with 'size' argument
if (missingby || bynull || (!byjoin && !length(byval))) {
Expand Down Expand Up @@ -1460,7 +1453,7 @@ replace_dot_alias = function(e) {
# is a more general issue but the former can be fixed by forcing units='secs'
SDenv$`-.POSIXt` = function(e1, e2) {
if (inherits(e2, 'POSIXt')) {
if (verbose && !exists('done_units_report', parent.frame())) {
if (verbose && !get0('done_units_report', parent.frame(), ifnotfound = FALSE)) {
catf('\nNote: forcing units="secs" on implicit difftime by group; call difftime explicitly to choose custom units\n')
assign('done_units_report', TRUE, parent.frame())
}
Expand Down Expand Up @@ -2804,7 +2797,7 @@ setDF = function(x, rownames=NULL) {
}
x
} else {
n = vapply_1i(x, length)
n = lengths(x)
mn = max(n)
if (any(n<mn))
stopf("All elements in argument 'x' to 'setDF' must be of same length")
Expand Down Expand Up @@ -3184,7 +3177,7 @@ is_constantish = function(q, check_singleton=FALSE) {
}
if (length(i) == 0L) stopf("Internal error in .isFastSubsettable. Please report to data.table developers") # nocov
## convert i to data.table with all combinations in rows.
if(length(i) > 1L && prod(vapply_1i(i, length)) > 1e4){
if(length(i) > 1L && prod(lengths(i)) > 1e4){
## CJ would result in more than 1e4 rows. This would be inefficient, especially memory-wise #2635
if (verbose) {catf("Subsetting optimization disabled because the cross-product of RHS values exceeds 1e4, causing memory problems.\n");flush.console()}
return(NULL)
Expand Down
2 changes: 1 addition & 1 deletion R/fcast.R
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
.Call(Csetlistelt, mapunique, 2L, seq_len(nrow(rhs_)))
lhs = lhs_; rhs = rhs_
}
maplen = vapply_1i(mapunique, length)
maplen = lengths(mapunique)
idx = do.call("CJ", mapunique)[map, 'I' := .I][["I"]] # TO DO: move this to C and avoid materialising the Cross Join.
some_fill = anyNA(idx)
fill.default = if (run_agg_funs && is.null(fill) && some_fill) dat_for_default_fill[, maybe_err(eval(fun.call))]
Expand Down
5 changes: 2 additions & 3 deletions R/fmelt.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ patterns = function(..., cols=character(0L), ignore.case=FALSE, perl=FALSE, fixe
if (!is.character(p))
stopf("Input patterns must be of type character.")
matched = lapply(p, grep, cols, ignore.case=ignore.case, perl=perl, fixed=fixed, useBytes=useBytes)
# replace with lengths when R 3.2.0 dependency arrives
if (length(idx <- which(sapply(matched, length) == 0L)))
if (length(idx <- which(lengths(matched) == 0L)))
stopf('Pattern(s) not found: [%s]', brackify(p[idx]))
if (length(matched) == 1L) return(matched[[1L]])
matched
Expand Down Expand Up @@ -125,7 +124,7 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na
stopf("sep must be character string")
}
list.of.vectors = strsplit(cols, sep, fixed=TRUE)
vector.lengths = sapply(list.of.vectors, length)
vector.lengths = lengths(list.of.vectors)
n.groups = max(vector.lengths)
if (n.groups == 1) {
stopf("each column name results in only one item after splitting using sep, which means that all columns would be melted; to fix please either specify melt on all columns directly without using measure, or use a different sep/pattern specification")
Expand Down
4 changes: 2 additions & 2 deletions R/fread.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
}
file_info = file.info(file)
if (is.na(file_info$size)) stopf("File '%s' does not exist or is non-readable. getwd()=='%s'", file, getwd())
if (isTRUE(file_info$isdir)) stopf("File '%s' is a directory. Not yet implemented.", file) # dir.exists() requires R v3.2+, #989
if (isTRUE(file_info$isdir)) stopf("File '%s' is a directory. Not yet implemented.", file) # Could use dir.exists(), but we already ran file.info().
if (!file_info$size) {
warningf("File '%s' has size 0. Returning a NULL %s.", file, if (data.table) 'data.table' else 'data.frame')
return(if (data.table) data.table(NULL) else data.frame(NULL))
Expand Down Expand Up @@ -350,7 +350,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
if (!all(vapply_1b(index, is.character)))
stopf("index argument of data.table() must be a character vector naming columns (NB: col.names are applied before this)")
if (is.list(index)) {
to_split = vapply_1i(index, length) == 1L
to_split = lengths(index) == 1L
if (any(to_split))
index[to_split] = sapply(index[to_split], strsplit, split = ",", fixed = TRUE)
} else {
Expand Down
2 changes: 1 addition & 1 deletion R/programming.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ list2lang = function(x) {
char = vapply(x, is.character, FALSE)
to.name = !asis & char
if (any(to.name)) { ## turns "my_name" character scalar into `my_name` symbol, for convenience
if (any(non.scalar.char <- vapply(x[to.name], length, 0L)!=1L)) {
if (any(non.scalar.char <- lengths(x[to.name])!=1L)) {
stopf("Character objects provided in the input are not scalar objects, if you need them as character vector rather than a name, then wrap each into 'I' call: %s", brackify(names(non.scalar.char)[non.scalar.char]))
}
x[to.name] = lapply(x[to.name], as.name)
Expand Down
2 changes: 1 addition & 1 deletion R/setkey.R
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ CJ = function(..., sorted = TRUE, unique = FALSE)
if (unique) l[[i]] = unique(y)
}
}
nrow = prod( vapply_1i(l, length) ) # lengths(l) will work from R 3.2.0
nrow = prod(lengths(l))
if (nrow > .Machine$integer.max) stopf("Cross product of elements provided to CJ() would result in %.0f rows which exceeds .Machine$integer.max == %d", nrow, .Machine$integer.max)
l = .Call(Ccj, l)
setDT(l)
Expand Down
4 changes: 2 additions & 2 deletions R/test.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -349,11 +349,11 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
# from all.equal and different to identical related to row.names and unused factor levels
# 3) each test has a unique id which we refer to in commit messages, emails etc.
# 4) test that a query generates exactly 2 warnings, that they are both the correct warning messages, and that the result is the one expected
.test.data.table = exists("nfail", parent.frame()) # test() can be used inside functions defined in tests.Rraw, so inherits=TRUE (default) here
nfail = get0("nfail", parent.frame()) # test() can be used inside functions defined in tests.Rraw, so inherits=TRUE (default) here
.test.data.table = !is.null(nfail)
numStr = sprintf("%.8g", num)
if (.test.data.table) {
prevtest = get("prevtest", parent.frame())
nfail = get("nfail", parent.frame()) # to cater for both test.data.table() and stepping through tests in dev
whichfail = get("whichfail", parent.frame())
assign("ntest", get("ntest", parent.frame()) + if (num>0) 1L else 0L, parent.frame(), inherits=TRUE) # bump number of tests run
lasttime = get("lasttime", parent.frame())
Expand Down
13 changes: 6 additions & 7 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ nan_is_na = function(x) {
stopf("Argument 'nan' must be NA or NaN")
}

if (base::getRversion() < "3.2.0") { # Apr 2015
isNamespaceLoaded = function(x) x %chin% loadedNamespaces()
}

if (!exists('startsWith', 'package:base', inherits=FALSE)) { # R 3.3.0; Apr 2016
startsWith = function(x, stub) substr(x, 1L, nchar(stub))==stub
}
Expand Down Expand Up @@ -67,20 +63,23 @@ require_bit64_if_needed = function(DT) {
}

# vapply for return value character(1)
vapply_1c = function (x, fun, ..., use.names = TRUE) {
vapply_1c = function(x, fun, ..., use.names = TRUE) {
vapply(X = x, FUN = fun, ..., FUN.VALUE = NA_character_, USE.NAMES = use.names)
}

# vapply for return value logical(1)
vapply_1b = function (x, fun, ..., use.names = TRUE) {
vapply_1b = function(x, fun, ..., use.names = TRUE) {
vapply(X = x, FUN = fun, ..., FUN.VALUE = NA, USE.NAMES = use.names)
}

# vapply for return value integer(1)
vapply_1i = function (x, fun, ..., use.names = TRUE) {
vapply_1i = function(x, fun, ..., use.names = TRUE) {
vapply(X = x, FUN = fun, ..., FUN.VALUE = NA_integer_, USE.NAMES = use.names)
}

# not is.atomic because is.atomic(matrix) is true
cols_with_dims = function(x) vapply_1i(x, function(j) length(dim(j))) > 0L

more = function(f) system(paste("more",f)) # nocov (just a dev helper)

# helper used to auto-name columns in data.table(x,y) as c("x","y"), CJ(x,y) and similar
Expand Down
2 changes: 1 addition & 1 deletion inst/tests/other.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ if (loaded[["xts"]]) { # was 1465 in tests.Rraw, #5516

# was 2108 in tests.Rraw, #5516
# first and last should no longer load xts namespace, #3857, below commented test for interactive validation when xts present but not loaded or attached
# stopifnot("xts"%in%installed.packages(), !"xts"%in%loadedNamespaces()); library(data.table); x=as.POSIXct("2019-01-01"); last(x); stopifnot(!"xts" %in% loadedNamespaces())
# stopifnot("xts"%in%installed.packages(), !isNamespaceLoaded("xts")); library(data.table); x=as.POSIXct("2019-01-01"); last(x); stopifnot(!isNamespaceLoaded("xts"))
x = as.POSIXct("2019-09-09")+0:1
old = options(datatable.verbose=TRUE)
test(19.01, last(x), x[length(x)], output="!is.xts(x)")
Expand Down
24 changes: 12 additions & 12 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
shallow = data.table:::shallow # until exported
.shallow = data.table:::.shallow
split.data.table = data.table:::split.data.table
if (!exists('startsWith', 'package:base', inherits=FALSE)) startsWith = data.table:::startsWith
if (!exists('startsWith', 'package:base', inherits=FALSE)) startsWith = data.table:::startsWith # R 3.3.0
stopf = data.table:::stopf
test = data.table:::test
uniqlengths = data.table:::uniqlengths
Expand Down Expand Up @@ -1472,13 +1472,13 @@ test(462, DT[,foo:=10L], data.table(a=1:3,v=4:9,foo=10L,key="a"))
unlink(f)

# Test CJ problems with v1.7.4, #1689
test(463, all(sapply(CJ(1:2,1:3),length)==6L))
test(463, all(lengths(CJ(1:2,1:3)) == 6L))
DT = data.table(x=1:4,y=1:2,cnt=1L,key=c('x', 'y'))
test(464, DT[CJ(1:4,1:4)]$cnt, INT(1,rep(NA,4),1,NA,NA,1,rep(NA,4),1,NA,NA))
test(465, DT[CJ(1:4,1:4), sum(cnt>0), by=.EACHI]$y, rep(1:4,4))
f1 = factor(c("READING","MATHEMATICS"))
f2 = factor(c("2010_2011","2009_2010","2008_2009"), levels=paste(2006:2010,2007:2011,sep="_"))
test(466, all(sapply(CJ(f1, f2),length)==6L))
test(466, all(lengths(CJ(f1, f2))==6L))

# Test list(.SD,newcol=..) gives error with guidance
DT = data.table(a=1:2,v=3:6)
Expand Down Expand Up @@ -2882,7 +2882,7 @@ test(966, fread(input, colClasses=list(character=2:4)), data.table(A=1:2, B=c("f
warning="Column number 4 (colClasses[[1]][3]) is out of range [1,ncol=3]")

# Character input more than 4096 bytes (used to be passed through path.expand which imposed the limit), #2649
test(967, nrow(fread( paste( rep('a\tb\n', 10000), collapse=''), header=FALSE)), 10000L)
test(967, nrow(fread( strrep('a\tb\n', 10000L), header=FALSE)), 10000L)

# Test fread warns about removal of any footer (and autostart skips up over it)
test(968, fread("A,B\n1,3\n2,4\n\nRowcount: 2\n"), data.table(A=1:2,B=3:4), warning="Discarded single-line footer.*Rowcount: 2")
Expand Down Expand Up @@ -7005,7 +7005,7 @@ test(1477.22, transpose(la, list.cols=NA), error="list.cols should be logical TR
ll = list(data.frame(a=1), data.frame(x=1, y=2), NULL, list())
ll <- lapply(ll, setDT)
test(1478.1, sapply(ll, truelength), c(1025L, 1026L, 1024L, 1024L))
test(1478.2, sapply(ll, length), INT(1,2,0,0))
test(1478.2, lengths(ll), INT(1,2,0,0))

# rbindlist stack imbalance issue, #980.
test(1479, rbindlist(replicate(4,rbindlist(replicate(47, NULL),
Expand Down Expand Up @@ -11630,23 +11630,23 @@ set.seed(1L)
ar.dimnames = list(color = sort(c("green","yellow","red")),
year = as.character(2011:2015),
status = sort(c("active","inactive","archived","removed")))
ar.dim = sapply(ar.dimnames, length)
ar.dim = lengths(ar.dimnames)
ar = array(sample(c(rep(NA, 4), 4:7/2), prod(ar.dim), TRUE),
unname(ar.dim), # array() having length(dims) < 3 will be created as matrix in R so will not be dispatched here but as.data.table.matrix
ar.dimnames)
dt = as.data.table(ar, na.rm=FALSE)
dimcols = head(names(dt), -1L)
test(1774.01, TRUE, all(
nrow(dt) == 60L,
prod(sapply(ar.dimnames, length)) == dt[, prod(sapply(.SD, uniqueN)), .SDcols = dimcols],
prod(lengths(ar.dimnames)) == dt[, prod(sapply(.SD, uniqueN)), .SDcols = dimcols],
dt[is.na(value), .N] == 30L,
dt[, .N==1L, c(dimcols)]$V1
))
dt = as.data.table(ar)
dimcols = head(names(dt), -1L)
test(1774.02, TRUE, all(
nrow(dt) == 30L,
prod(sapply(ar.dimnames, length)) == dt[, prod(sapply(.SD, uniqueN)), .SDcols = dimcols],
prod(lengths(ar.dimnames)) == dt[, prod(sapply(.SD, uniqueN)), .SDcols = dimcols],
dt[is.na(value), .N] == 0L,
dt[, .N==1L, c(dimcols)]$V1
))
Expand Down Expand Up @@ -11856,7 +11856,7 @@ test(1800.2, fread("A\n1e55555555\n-1e+234056\n2e-59745"), data.table(A=c("1e555
# Test files with "round" sizes (different multiples of 2, from 512B to 64KB)
for (mul in c(16, 128, 512, 1024, 2048)) {
ff = file(f<-tempfile(), open="wb")
cat(paste(rep("1234,5678,9012,3456,7890,abcd,4\x0A", mul), collapse=""), file=ff)
cat(strrep("1234,5678,9012,3456,7890,abcd,4\x0A", mul), file=ff)
close(ff)
DT = data.table(V1=rep(1234L, mul), V2=5678L, V3=9012L, V4=3456L, V5=7890L, V6="abcd", V7=4L)
test(1801 + log2(mul)/100 + 0.001, file.info(f)$size, mul*32)
Expand Down Expand Up @@ -12067,8 +12067,8 @@ if (test_longdouble) { #3258

# Test that integers just above 128 or 256 characters in length parse as strings, not as integers/floats
# This guards against potential overflows in the count of digits
src1 = paste0(rep("1234567890", 13), collapse="") # length = 130, slightly above 128
src2 = paste0(rep("12345678900987654321", 13), collapse="") # length = 260, slightly above 256
src1 = strrep("1234567890", 13L) # length = 130, slightly above 128
src2 = strrep("12345678900987654321", 13L) # length = 260, slightly above 256
test(1831.1, fread(paste0("A\n", src1)), data.table(A=src1))
test(1831.2, fread(paste0("A\n", src2)), data.table(A=src2))
test(1831.3, fread(paste0("A\n", src2, ".33")), data.table(A=1.2345678900987655e+259))
Expand Down Expand Up @@ -16996,7 +16996,7 @@ test(2156.1, DT[,list(list({attr(value,"class")<-"newclass";value})),by=series]$
DT[1,value])
test(2156.2, truelength(DT[,list(list(value)),by=series]$V1[[1L]])>=0L) # not -64 carried over by duplicate() of the .SD column
# cover NULL case in copyAsPlain by putting a NULL alongside a dogroups .SD column. The 'if(.GRP==1L)' is just for fun.
test(2156.3, sapply(DT[, list(if (.GRP==1L) list(value,NULL) else list(NULL,value)), by=series]$V1, length),
test(2156.3, lengths(DT[, list(if (.GRP==1L) list(value,NULL) else list(NULL,value)), by=series]$V1),
INT(64,0,0,64))

# CornerstoneR usage revealed copySharedColumns needed work afer PR#4655
Expand Down
Loading