Skip to content

Commit

Permalink
Merge branch 'master' into rchk-gha
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelChirico authored Jul 21, 2024
2 parents 84c2011 + b303cd4 commit 1fea14e
Show file tree
Hide file tree
Showing 32 changed files with 9,171 additions and 104 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
^\.devcontainer$
^\.graphics$
^\.github$
^\.zed$

^\.gitlab-ci\.yml$

Expand Down
2 changes: 1 addition & 1 deletion .ci/.lintr.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dt_linters = new.env()
for (f in list.files('.ci/linters', full.names=TRUE)) sys.source(f, dt_linters)
for (f in list.files('.ci/linters/r', full.names=TRUE)) sys.source(f, dt_linters)
rm(f)

# NB: Could do this inside the linter definition, this separation makes those files more standardized
Expand Down
36 changes: 36 additions & 0 deletions .ci/linters/c/alloc_linter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Ensure that we check the result of malloc()/calloc() for success
# More specifically, given that this is an AST-ignorant checker,
# 1. Find groups of malloc()/calloc() calls
# 2. Check the next line for a check like 'if (!x || !y)'
alloc_linter = function(c_file) {
lines <- readLines(c_file)
# Be a bit more precise to avoid mentions in comments
alloc_lines <- grep(R"{=\s*([(]\w+\s*[*][)])?[mc]alloc[(]}", lines)
if (!length(alloc_lines)) return()
# int *tmp=(int*)malloc(...); or just int tmp=malloc(...);
alloc_keys <- lines[alloc_lines] |>
strsplit(R"(\s*=\s*)") |>
vapply(head, 1L, FUN.VALUE="") |>
trimws() |>
# just normalize the more exotic assignments, namely 'type *restrict key = ...'
gsub(pattern = "[*]\\s*(restrict\\s*)?", replacement = "*") |>
strsplit("*", fixed=TRUE) |>
vapply(tail, 1L, FUN.VALUE="")
alloc_grp_id <- cumsum(c(TRUE, diff(alloc_lines) != 1L))

# execute by group
tapply(seq_along(alloc_lines), alloc_grp_id, function(grp_idx) {
keys_regex <- paste0("\\s*!\\s*", alloc_keys[grp_idx], "\\s*", collapse = "[|][|]")
check_regex <- paste0("if\\s*\\(", keys_regex)
check_line <- lines[alloc_lines[tail(grp_idx, 1L)] + 1L]
# Rarely (once in fread.c as of initialization), error checking is handled
# but not immediately, so use 'NOCHECK' to escape.
if (!grepl(check_regex, check_line) && !grepl("NOCHECK", check_line, fixed=TRUE)) {
bad_lines_idx <- seq(alloc_lines[grp_idx[1L]], length.out=length(grp_idx)+1L)
cat("FILE: ", c_file, "; LINES: ", head(bad_lines_idx, 1L), "-", tail(bad_lines_idx, 1L), "\n", sep="")
writeLines(lines[bad_lines_idx])
cat(strrep("-", max(nchar(lines[bad_lines_idx]))), "\n", sep="")
stop("Expected the malloc()/calloc() usage above to be followed immediately by error checking.", call.=FALSE)
}
})
}
File renamed without changes.
File renamed without changes.
3 changes: 2 additions & 1 deletion .dev/CRAN_Release.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@ Rscript -e "tools::update_pkg_po('.')"

# 2) Open a PR with the new templates & contact the translators
# * zh_CN: @hongyuanjia
# * pt_BR: @rffontenelle
## Translators to submit commits with translations to this PR
## [or perhaps, if we get several languages, each to open
## its own PR and merge to main translation PR]

## 3) Check validity with tools::checkPoFiles("zh_CN")
## 3) Check validity with tools::checkPoFiles(".*")

## 4) Compile the new .mo binary files with potools::po_compile()

Expand Down
60 changes: 37 additions & 23 deletions .github/workflows/R-CMD-check-occasional.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
on:
schedule:
- cron: '17 13 14 * *' # 14th of month at 13:17 UTC
- cron: '17 13 18 * *' # 18th of month at 13:17 UTC

# A more complete suite of checks to run monthly; each PR/merge need not pass all these, but they should pass before CRAN release
name: R-CMD-check-occasional
Expand Down Expand Up @@ -83,11 +83,15 @@ jobs:
run: brew install gdal proj

- name: Install remotes
env:
R_LIBS_USER: /home/runner/work/r-lib
run: install.packages("remotes")
shell: Rscript {0}

- name: Install system dependencies
if: runner.os == 'Linux'
env:
R_LIBS_USER: /home/runner/work/r-lib
run: |
while read -r cmd
do
Expand All @@ -100,46 +104,56 @@ jobs:
_R_CHECK_FORCE_SUGGESTS_: false
_R_CHECK_CRAN_INCOMING_REMOTE_: false
_R_CHECK_TESTS_NLINES_: 0
R_LIBS_USER: /home/runner/work/r-lib
run: |
options(crayon.enabled = TRUE)
install.packages("remotes") # different R_LIBS_USER now...
remotes::install_deps(dependencies=TRUE, force=TRUE)
other_deps_expr = parse('inst/tests/other.Rraw', n=1L)
eval(other_deps_expr)
pkgs <- get(as.character(other_deps_expr[[1L]][[2L]]))
# Many will not install on oldest R versions
try(remotes::install_cran(c(pkgs, "rcmdcheck"), force=TRUE))
# we define this in data.table namespace, but it appears to be exec
if (!exists("isFALSE", "package:base")) {
if (!exists("isFALSE", "package:base")) { # R>=3.5.0
if (!exists("isFALSE", asNamespace("data.table"))) {
message("isFALSE not found in base, but data.table did not define it either!\n")
}
# attempt defining it here as a workaround...
isFALSE = function(x) is.logical(x) && length(x) == 1L && !is.na(x) && !x
}
has_pkg = sapply(pkgs, requireNamespace, quietly=TRUE)
run_other = all(has_pkg)
other_deps_expr = parse('inst/tests/other.Rraw', n=1L)
eval(other_deps_expr)
other_pkgs = get(as.character(other_deps_expr[[1L]][[2L]]))
# Many will not install on oldest R versions
try(remotes::install_cran(c(other_pkgs, "rcmdcheck"), force=TRUE))
has_other_pkg = sapply(other_pkgs, requireNamespace, quietly=TRUE)
run_other = all(has_other_pkg)
if (!run_other) {
message(sprintf("Skipping other.Rraw since some required packages are not available: %s\n", toString(pkgs[!has_pkg])))
message(sprintf("Skipping other.Rraw since some required packages are not available: %s\n", toString(other_pkgs[!has_other_pkg])))
}
message("Will try and set TEST_DATA_TABLE_WITH_OTHER_PACKAGES=", as.character(run_other), " in R CMD check.")
# IINM rcmdcheck isolates its env from the calling process', besides what's passed to env=
env = c(
TEST_DATA_TABLE_WITH_OTHER_PACKAGES=as.character(run_other),
R_LIBS_USER=Sys.getenv("R_LIBS_USER")
)
do_vignettes = requireNamespace("knitr", quietly=TRUE)
build_args = NULL
check_args = c("--no-manual", "--as-cran")
if (!do_vignettes) {
message("Skipping vignettes since knitr is unavailable.")
build_args = "--no-build-vignettes"
check_args = c(check_args, "--no-build-vignettes", "--ignore-vignettes")
}
Sys.setenv(TEST_DATA_TABLE_WITH_OTHER_PACKAGES=as.character(run_other))
do_vignettes <- requireNamespace("knitr", quietly=TRUE)
vignette_args <- if (!do_vignettes) "--no-build-vignettes"
args <- c("--no-manual", "--as-cran", vignette_args)
if (requireNamespace("rcmdcheck", quietly=TRUE)) {
rcmdcheck::rcmdcheck(args = args, error_on = "warning", check_dir = "check")
rcmdcheck::rcmdcheck(args = check_args, build_args = build_args, error_on = "warning", check_dir = "check", env=env)
} else {
Rbin = if (.Platform$OS.type == "windows") "R.exe" else "R"
system2(Rbin, c("CMD", "build", ".", vignette_args))
dt_tar <- list.files(pattern = "^data[.]table_.*[.]tar[.]gz$")
system2(Rbin, c("CMD", "build", ".", build_args))
dt_tar = list.files(pattern = "^data[.]table_.*[.]tar[.]gz$")
if (!length(dt_tar)) stop("Built tar.gz not found among: ", toString(list.files()))
# --no-build-vignettes is not enough for R CMD check
if (!do_vignettes) args <- c(args, "--ignore-vignettes")
res = system2(Rbin, c("CMD", "check", dt_tar[1L], args), stdout=TRUE)
res = system2(Rbin, c("CMD", "check", dt_tar[1L], check_args), stdout=TRUE, env=sprintf("%s=%s", names(env), env))
if (!is.null(attr(res, "status")) || grep("^Status:.*(ERROR|WARNING)", res)) {
writeLines(res)
stop("R CMD check failed")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ on:
branches:
- master

name: lint
name: code-quality

jobs:
lint:
lint-r:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
Expand All @@ -28,8 +28,21 @@ jobs:
needs: lint

- name: Lint
run: lintr::lint_package(pattern = "(?i)[.](r|rmd|rraw)$") # TODO(#5830): use the default pattern
run: lintr::lint_package(pattern = "(?i)[.](r|rmd)$") # TODO(#5830): use the default pattern
shell: Rscript {0}
env:
LINTR_ERROR_ON_LINT: true
R_LINTR_LINTER_FILE: .ci/.lintr
lint-c:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: r-lib/actions/setup-r@v2
- name: Lint
run: |
for (f in list.files('.ci/linters/c', full.names=TRUE)) source(f)
for (f in list.files('src', pattern='[.]c$', full.names=TRUE)) {
alloc_linter(f)
# TODO(#6272): Incorporate more checks from CRAN_Release
}
shell: Rscript {0}
2 changes: 2 additions & 0 deletions .github/workflows/pkgup.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ on:
branches:
- 'master'

name: pkgdown-deploy

jobs:
build:
name: data.table
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ config.log
.Rproj.user
data.table.Rproj

# zed editor
.zed

# produced vignettes
vignettes/*.html
vignettes/*.pdf
Expand Down
20 changes: 10 additions & 10 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ test-lin-rel:
OPENBLAS_MAIN_FREE: "1"
script:
- *install-deps
- echo 'CFLAGS=-g -O3 -flto=auto -fno-common -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars
- echo 'CXXFLAGS=-g -O3 -flto=auto -fno-common -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars
- echo 'CFLAGS=-g -O3 -flto=auto -fno-common -fopenmp -Wall -Wvla -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars
- echo 'CXXFLAGS=-g -O3 -flto=auto -fno-common -fopenmp -Wall -Wvla -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars
- R CMD check $(ls -1t data.table_*.tar.gz | head -n 1)
- (! grep "warning:" data.table.Rcheck/00install.out)

Expand All @@ -132,8 +132,8 @@ test-lin-rel-vanilla:
<<: *test-lin
image: registry.gitlab.com/jangorecki/dockerfiles/r-base-gcc
script:
- echo 'CFLAGS=-g -O0 -fno-openmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars
- echo 'CXXFLAGS=-g -O0 -fno-openmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars
- echo 'CFLAGS=-g -O0 -fno-openmp -Wall -Wvla -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars
- echo 'CXXFLAGS=-g -O0 -fno-openmp -Wall -Wvla -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars
- R CMD check --no-manual --ignore-vignettes $(ls -1t data.table_*.tar.gz | head -n 1)

## R-release on Linux
Expand All @@ -149,8 +149,8 @@ test-lin-rel-cran:
_R_CHECK_PKG_SIZES_THRESHOLD_: "7" ## MB 'checking installed package size' NOTE
script:
- *install-deps
- echo 'CFLAGS=-g -O2 -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars
- echo 'CXXFLAGS=-g -O2 -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars
- echo 'CFLAGS=-g -O2 -fopenmp -Wall -Wvla -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars
- echo 'CXXFLAGS=-g -O2 -fopenmp -Wall -Wvla -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars
- R CMD check --as-cran $(ls -1t data.table_*.tar.gz | head -n 1)
- >-
Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); if (!identical(l, "Status: OK")) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote("Status: OK"), " but ", shQuote(l)) else q("no")'
Expand All @@ -168,8 +168,8 @@ test-lin-dev-gcc-strict-cran:
_R_S3_METHOD_LOOKUP_BASEENV_AFTER_GLOBALENV_: "FALSE" ## detects S3 method lookup found on search path #4777
_R_S3_METHOD_LOOKUP_REPORT_SEARCH_PATH_USES_: "TRUE"
script:
- echo 'CFLAGS=-g -O2 -flto=auto -fno-common -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars
- echo 'CXXFLAGS=-g -O2 -flto=auto -fno-common -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars
- echo 'CFLAGS=-g -O2 -flto=auto -fno-common -fopenmp -Wall -Wvla -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars
- echo 'CXXFLAGS=-g -O2 -flto=auto -fno-common -fopenmp -Wall -Wvla -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars
- *install-deps
- R CMD check --as-cran $(ls -1t data.table_*.tar.gz | head -n 1)
- (! grep "warning:" data.table.Rcheck/00install.out)
Expand All @@ -189,8 +189,8 @@ test-lin-dev-clang-cran:
_R_S3_METHOD_LOOKUP_BASEENV_AFTER_GLOBALENV_: "FALSE"
_R_S3_METHOD_LOOKUP_REPORT_SEARCH_PATH_USES_: "TRUE"
script:
- echo 'CFLAGS=-g -O2 -fno-common -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars
- echo 'CXXFLAGS=-g -O2 -fno-common -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars
- echo 'CFLAGS=-g -O2 -fno-common -Wall -Wvla -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars
- echo 'CXXFLAGS=-g -O2 -fno-common -Wall -Wvla -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars
- *install-deps
- R CMD check --as-cran $(ls -1t data.table_*.tar.gz | head -n 1)
- (! grep "warning:" data.table.Rcheck/00install.out)
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Authors@R: c(
person("Jan","Gorecki", role="aut"),
person("Michael","Chirico", role="aut", comment = c(ORCID="0000-0003-0787-087X")),
person("Toby","Hocking", role="aut", comment = c(ORCID="0000-0002-3146-0865")),
person("Benjamin","Schwendinger",role="aut"),
person("Benjamin","Schwendinger",role="aut", comment = c(ORCID="0000-0003-3315-8114")),
person("Pasha","Stetsenko", role="ctb"),
person("Tom","Short", role="ctb"),
person("Steve","Lianoglou", role="ctb"),
Expand Down
10 changes: 9 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@

12. data.table's `all.equal()` method now dispatches to each column's own `all.equal()` method as appropriate, [#4543](https://github.com/Rdatatable/data.table/issues/4543). Thanks @MichaelChirico for the report and fix. Note that this had two noteworthy changes to data.table's own test suite that might affect you: (1) comparisons of POSIXct columns compare absolute, not relative differences, meaning that millisecond-scale differences might trigger a "not equal" report that was hidden before; and (2) comparisons of integer64 columns could be totally wrong since they were being compared on the basis of their representation as doubles, not long integers. The former might be a matter of preference requiring you to specify a different `tolerance=`, while the latter was clearly a bug.

12. `rbindlist` could lead to a protection stack overflow when applied to a list containing many nested lists exceeding the pointer protection stack size, [#4536](https://github.com/Rdatatable/data.table/issues/4536). Thanks to @ProfFancyPants for reporting, and Benjamin Schwendinger for the fix.
13. `rbindlist` could lead to a protection stack overflow when applied to a list containing many nested lists exceeding the pointer protection stack size, [#4536](https://github.com/Rdatatable/data.table/issues/4536). Thanks to @ProfFancyPants for reporting, and Benjamin Schwendinger for the fix.

14. `fread(x, colClasses="POSIXct")` now also works for columns containing only NA values, [#6208](https://github.com/Rdatatable/data.table/issues/6208). Thanks to @markus-schaffer for the report, and Benjamin Schwendinger for the fix.

## NOTES

Expand Down Expand Up @@ -116,10 +118,16 @@
21. Refactored some non-API calls to R macros for S4 objects (#6180)[https://github.com/Rdatatable/data.table/issues/6180]. There should be no user-visible change. Thanks to various R users & R core for pushing to have a clearer definition of "API" for R, and thanks @MichaelChirico for implementing here.
22. C code was unified more in how failures to allocate memory (`malloc()`/`calloc()`) are handled, (#1115)[https://github.com/Rdatatable/data.table/issues/1115]. No OOM issues were reported, as these regions of code typically request relatively small blocks of memory, but it is good to handle memory pressure consistently. Thanks @elfring for the report and @MichaelChirico for the clean-up effort and future-proofing linter.
## TRANSLATIONS
1. Fix a typo in a Mandarin translation of an error message that was hiding the actual error message, [#6172](https://github.com/Rdatatable/data.table/issues/6172). Thanks @trafficfan for the report and @MichaelChirico for the fix.
2. data.table is now translated into Brazilian Portuguese (`pt_BR`) as well as Mandarin (`zh_CN`). Thanks to the [new translation team](https://github.com/orgs/Rdatatable/teams/brazil) consisting initially of @rffontenelle, @leofontenelle, and @italo-07. The team is open if you'd also like to join and support maintenance of these translations.

3. A more helpful error message for using `:=` inside the first argument (`i`) of `[.data.table` is now available in translation, [#6293](https://github.com/Rdatatable/data.table/issues/6293). Previously, the code to display this assumed an earlier message was printed in English. The solution is for calling `:=` directly (i.e., outside the second argument `j` of `[.data.table`) to throw an error of class `dt_invalid_let_error`. Thanks to Spanish translator @rikivillalba for spotting the issue and @MichaelChirico for the fix.

# data.table [v1.15.4](https://github.com/Rdatatable/data.table/milestone/33) (27 March 2024)

## BUG FIXES
Expand Down
2 changes: 1 addition & 1 deletion R/bmerge.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
if (!isReallyReal(i[[ic]])) {
# common case of ad hoc user-typed integers missing L postfix joining to correct integer keys
# we've always coerced to int and returned int, for convenience.
if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s", iname, xname)
if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s.\n", iname, xname)
val = as.integer(i[[ic]])
if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]]) # to retain Date for example; 3679
set(i, j=ic, value=val)
Expand Down
Loading

0 comments on commit 1fea14e

Please sign in to comment.