Skip to content

Commit

Permalink
Merge branch 'master' into root-call
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelChirico committed Jul 14, 2024
2 parents 919c262 + bbe7563 commit c29fe93
Show file tree
Hide file tree
Showing 127 changed files with 3,632 additions and 2,062 deletions.
2 changes: 1 addition & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
.dir-locals.el
^\.Rprofile$
^data\.table_.*\.tar\.gz$
^config\.log$
^vignettes/plots/figures$
^\.Renviron$
^[^/]+\.R$
Expand All @@ -16,7 +17,6 @@
^\.graphics$
^\.github$

^\.appveyor\.yml$
^\.gitlab-ci\.yml$

^Makefile$
Expand Down
71 changes: 0 additions & 71 deletions .appveyor.yml

This file was deleted.

112 changes: 112 additions & 0 deletions .ci/.lintr.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
dt_linters = new.env()
for (f in list.files('.ci/linters', full.names=TRUE)) sys.source(f, dt_linters)
rm(f)

# NB: Could do this inside the linter definition, this separation makes those files more standardized
dt_linters <- eapply(dt_linters, function(linter_factory) linter_factory())

linters = c(dt_linters, all_linters(
packages = "lintr", # TODO(lintr->3.2.0): Remove this.
# eq_assignment_linter(),
brace_linter(allow_single_line = TRUE),
# TODO(michaelchirico): Activate these incrementally. These are the
# parameterizations that match our style guide.
# implicit_assignment_linter(allow_lazy = TRUE, allow_scoped = TRUE),
# implicit_integer_linter(allow_colon = TRUE),
# system_time_linter = undesirable_function_linter(c(
# system.time = "Only run timings in benchmark.Rraw"
# )),
# undesirable_function_linter(modify_defaults(
# default_undesirable_functions,
# ifelse = "Use fifelse instead.",
# Sys.setenv = NULL,
# library = NULL,
# options = NULL,
# par = NULL,
# setwd = NULL
# )),
undesirable_operator_linter(),
# TODO(lintr#2441): Use upstream implementation.
assignment_linter = NULL,
absolute_path_linter = NULL, # too many false positives
# TODO(lintr#2442): Use this once x[ , j, by] is supported.
commas_linter = NULL,
commented_code_linter = NULL,
# TODO(linter->3.2.0): Activate this.
consecutive_assertion_linter = NULL,
cyclocomp_linter = NULL,
function_argument_linter = NULL,
indentation_linter = NULL,
infix_spaces_linter = NULL,
line_length_linter = NULL,
missing_package_linter = NULL,
namespace_linter = NULL,
nonportable_path_linter = NULL,
object_name_linter = NULL,
object_usage_linter = NULL,
quotes_linter = NULL,
semicolon_linter = NULL,
spaces_inside_linter = NULL,
spaces_left_parentheses_linter = NULL,
# TODO(michaelchirico): Only exclude from vignettes, not sure what's wrong.
strings_as_factors_linter = NULL,
# TODO(lintr->3.2.0): Fix on a valid TODO style, enforce it, and re-activate.
todo_comment_linter = NULL,
# TODO(michaelchirico): Enforce these and re-activate them one-by-one.
brace_linter = NULL,
fixed_regex_linter = NULL,
if_not_else_linter = NULL,
implicit_assignment_linter = NULL,
implicit_integer_linter = NULL,
keyword_quote_linter = NULL,
object_overwrite_linter = NULL,
paren_body_linter = NULL,
redundant_equals_linter = NULL,
undesirable_function_linter = NULL,
unnecessary_concatenation_linter = NULL,
unnecessary_nesting_linter = NULL,
unreachable_code_linter = NULL,
unused_import_linter = NULL
))
rm(dt_linters)

# TODO(lintr#2172): Glob with lintr itself.
exclusions = c(local({
exclusion_for_dir <- function(dir, exclusions) {
files = file.path("..", list.files(dir, pattern = "\\.(R|Rmd|Rraw)$", full.names=TRUE))
stats::setNames(rep(list(exclusions), length(files)), files)
}
c(
exclusion_for_dir("tests", list(
quotes_linter = Inf,
# TODO(michaelchirico): Enforce these and re-activate them one-by-one.
implicit_integer_linter = Inf,
infix_spaces_linter = Inf,
undesirable_function_linter = Inf
)),
exclusion_for_dir("vignettes", list(
quotes_linter = Inf,
sample_int_linter = Inf
# strings_as_factors_linter = Inf
# system_time_linter = Inf
)),
exclusion_for_dir("inst/tests", list(
library_call_linter = Inf,
numeric_leading_zero_linter = Inf,
undesirable_operator_linter = Inf, # For ':::', possibly we could be more careful to only exclude ':::'.
# TODO(michaelchirico): Enforce these and re-activate them one-by-one.
comparison_negation_linter = Inf,
condition_call_linter = Inf,
duplicate_argument_linter = Inf,
equals_na_linter = Inf,
missing_argument_linter = Inf,
paste_linter = Inf,
rep_len_linter = Inf,
sample_int_linter = Inf,
seq_linter = Inf,
unnecessary_lambda_linter = Inf
))
)
}),
list(`../inst/tests/froll.Rraw` = list(dt_test_literal_linter = Inf)) # TODO(michaelchirico): Fix these once #5898, #5692, #5682, #5576, #5575, #5441 are merged.
)
4 changes: 0 additions & 4 deletions .ci/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,6 @@ Artifacts:

TODO document

### [Appveyor](./../.appveyor.yml)

TODO document

## CI tools

### [`ci.R`](./ci.R)
Expand Down
124 changes: 124 additions & 0 deletions .ci/atime/tests.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# A list of performance tests.
#
# Each entry in this list corresponds to a performance test and contains a sublist with three mandatory arguments:
# - N: A numeric sequence of data sizes to vary.
# - setup: An expression evaluated for every data size before measuring time/memory.
# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions.
# This must call a function from data.table using a syntax with double or triple colon prefix.
# The package name before the colons will be replaced by a new package name that uses the commit SHA hash.
# (For instance, data.table:::[.data.table will become data.table.some_40_digit_SHA1_hash:::[.data.table)
#
# Optional parameters that may be useful to configure tests:
# - times: Number of times each expression is evaluated (default is 10).
# - seconds.limit: The maximum median timing (in seconds) of an expression. No timings for larger N are computed past that threshold. Default of 0.01 seconds should be sufficient for most tests.
# - Historical references (short version name = commit SHA1).
# For historical regressions, use version names 'Before', 'Regression', and 'Fixed'
# When there was no regression, use 'Slow' and 'Fast'
# @note Please check https://github.com/tdhock/atime/blob/main/vignettes/data.table.Rmd for more information.
# nolint start: undesirable_operator_linter. ':::' needed+appropriate here.
test.list <- atime::atime_test_list(
# A function to customize R package metadata and source files to facilitate version-specific installation and testing.
#
# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R)
# to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package.
# It appends a SHA1 hash to the package name (PKG.SHA), ensuring each version can be installed and tested separately.
#
# @param old.Package Current name of the package.
# @param new.Package New name of the package, including a SHA hash.
# @param sha SHA1 hash used for differentiating versions.
# @param new.pkg.path Path to the package files.
#
# @details
# The function modifies:
# - DESCRIPTION, updating the package name.
# - Makevars, customizing the shared object file name and adjusting the build settings.
# - R/onLoad.R, adapting custom version checking for package loading operations.
# - NAMESPACE, changing namespace settings for dynamic linking.
#
# @examples
# pkg.edit.fun("data.table", "data.table.some_SHA1_hash", "some_SHA1_hash", "/path/to/data.table")
#
# @return None (performs in-place file modifications)
# @note This setup is typically unnecessary for most packages but essential for data.table due to its unique configuration.
pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) {
pkg_find_replace <- function(glob, FIND, REPLACE) {
atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE)
}
Package_regex <- gsub(".", "_?", old.Package, fixed = TRUE)
Package_ <- gsub(".", "_", old.Package, fixed = TRUE)
new.Package_ <- paste0(Package_, "_", sha)
pkg_find_replace(
"DESCRIPTION",
paste0("Package:\\s+", old.Package),
paste("Package:", new.Package))
pkg_find_replace(
file.path("src", "Makevars.*in"),
Package_regex,
new.Package_)
pkg_find_replace(
file.path("R", "onLoad.R"),
Package_regex,
new.Package_)
pkg_find_replace(
file.path("R", "onLoad.R"),
sprintf('packageVersion\\("%s"\\)', old.Package),
sprintf('packageVersion\\("%s"\\)', new.Package))
pkg_find_replace(
file.path("src", "init.c"),
paste0("R_init_", Package_regex),
paste0("R_init_", gsub("[.]", "_", new.Package_)))
pkg_find_replace(
"NAMESPACE",
sprintf('useDynLib\\("?%s"?', Package_regex),
paste0('useDynLib(', new.Package_))
},

# Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311
# Fixed in: https://github.com/Rdatatable/data.table/pull/4440
"shallow regression fixed in #4440" = atime::atime_test(
N = 10^seq(3,8),
setup = {
set.seed(1L)
dt <- data.table(a = sample.int(N))
setindexv(dt, "a")
},
expr = data.table:::shallow(dt),
# Before = "", This needs to be updated later as there are two issues here: A) The source of regression (or the particular commit that led to it) is not clear; B) Older versions of data.table are having problems when being installed in this manner (This includes commits from before March 20 2020, when the issue that discovered or first mentioned the regression was created)
Regression = "b1b1832b0d2d4032b46477d9fe6efb15006664f4", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/0f0e7127b880df8459b0ed064dc841acd22f5b73) in the PR (https://github.com/Rdatatable/data.table/pull/4440/commits) that fixes the regression
Fixed = "9d3b9202fddb980345025a4f6ac451ed26a423be"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440)

# Test based on: https://github.com/Rdatatable/data.table/issues/5424
# Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491
# Fixed in: https://github.com/Rdatatable/data.table/pull/5463
"memrecycle regression fixed in #5463" = atime::atime_test(
N = 10^seq(3, 8),
setup = {
n <- N/100
set.seed(2L)
dt <- data.table(
g = sample(seq_len(n), N, TRUE),
x = runif(N),
key = "g")
dt_mod <- copy(dt)
},
expr = data.table:::`[.data.table`(dt_mod, , N := .N, by = g),
Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Parent of the regression causing commit (https://github.com/Rdatatable/data.table/commit/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits)
Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits)
Fixed = "58409197426ced4714af842650b0cc3b9e2cb842"), # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits)

# Issue reported in: https://github.com/Rdatatable/data.table/issues/5426
# To be fixed in: https://github.com/Rdatatable/data.table/pull/5427
"setDT improved in #5427" = atime::atime_test(
N = 10^seq(1, 7),
setup = {
L <- replicate(N, 1, simplify = FALSE)
setDT(L)
},
expr = {
data.table:::setattr(L, "class", NULL)
data.table:::setDT(L)
},
Slow = "c4a2085e35689a108d67dacb2f8261e4964d7e12", # Parent of the first commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/commit/7cc4da4c1c8e568f655ab5167922dcdb75953801)
Fast = "1872f473b20fdcddc5c1b35d79fe9229cd9a1d15") # Last commit in the PR that fixes the issue (https://github.com/Rdatatable/data.table/pull/5427/commits)
)
# nolint end: undesirable_operator_linter.
13 changes: 13 additions & 0 deletions .ci/linters/dt_lengths_linter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# This is lintr::lengths_linter(), but including 'vapply_1i()' which is more common internally in data.table
# alternatively, this could be defined with something like
# dt_lengths_linter <- lintr::lengths_linter()
# local({ # to remove 'e'
# e <- environment(dt_lengths_linter)
# e$function_names <- c(evalq(function_names, e), "vapply_1i")
# })
# but that feels more fragile than this.
dt_lengths_linter <- lintr::make_linter_from_function_xpath(
function_names = c("sapply", "vapply", "vapply_1i", "map_int", "map_dbl"),
xpath = "parent::expr/parent::expr[expr/SYMBOL[text() = 'length']]",
lint_message = "Use lengths() to find the length of each element in a list."
)
10 changes: 10 additions & 0 deletions .ci/linters/dt_test_literal_linter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Avoid test(SYMBOL) and test(SYMBOL+...) except in rare cases.
# in the latter case, prefer test(NUMBER+expr).
dt_test_literal_linter <- lintr::make_linter_from_xpath(
"
//SYMBOL_FUNCTION_CALL[text() = 'test']
/parent::expr
/following-sibling::expr[1][SYMBOL or expr[1]/SYMBOL]
",
"Prefer test's 'num' argument to start with a numeric literal except in rare cases, e.g. test(123, ...) or test(123+x/6, ...)"
)
21 changes: 14 additions & 7 deletions .dev/CRAN_Release.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -630,19 +630,26 @@ bunzip2 inst/tests/*.Rraw.bz2 # decompress *.Rraw again so as not to commit com
# 6. Search and replace this .dev/CRAN_Release.cmd to update 1.15.99 to 1.16.99 inc below, 1.16.0 to 1.17.0 above, 1.15.0 to 1.16.0 below
# 7. Another final gd to view all diffs using meld. (I have `alias gd='git difftool &> /dev/null'` and difftool meld: http://meldmerge.org/)
# 8. Push to master with this consistent commit message: "1.16.0 on CRAN. Bump to 1.16.99"
# 9. Take sha from step 8 and run `git tag 1.16.0 96c..sha..d77` then `git push origin 1.16.0` (not `git push --tags` according to https://stackoverflow.com/a/5195913/403310)
# 9. Take sha from the previous step and run `git tag 1.16.0 96c..sha..d77` then `git push origin 1.16.0` (not `git push --tags` according to https://stackoverflow.com/a/5195913/403310)
######

###### Branching policy for PATCH RELEASE
# Patch releases handle regressions and CRAN-required off-cycle fixes (e.g. when changes to r-devel break data.table).
# * Start a branch `patch-x.y.z` corresponding to the target release version. It should be branched from the previous CRAN release,
# e.g. branch `patch-1.15.4` from tag `v1.15.2`, `patch-1.15.6` from tag `v1.15.4`, etc.
# * Add fixes for issues tagged to the corresponding milestone as PRs targeting the `master` branch. These PRs should be reviewed as usual
# and include a NEWS item under the ongoing development release.
# * After merging a patch PR, `cherry-pick` the corresponding commit into the target patch branch. Edit the NEWS item to be under the heading
# for the patch release.
# * Delete the patch branch after the corresponding tag is pushed.

###### Bump dev for PATCH RELEASE
## WARNING: review this process during the next first patch release (x.y.2) from a regular release (x.y.0), possibly during 1.15.2 release.
# 0. Close milestone to prevent new issues being tagged with it. The final 'release checks' issue can be left open in a closed milestone.
# 1. Check that 'git status' shows 4 files in modified and uncommitted state: DESCRIPTION, NEWS.md, init.c and this .dev/CRAN_Release.cmd
# 2. Bump patch version in DESCRIPTION to next odd number. Note that DESCRIPTION was in edited and uncommitted state so even number never appears in git.
# 3. Add new heading in NEWS for the next dev PATCH version. Add "(submitted to CRAN on <today>)" on the released heading.
# 3. Add "(submitted to CRAN on <today>)" on the released heading for the NEWS.
# 4. Bump patch version in dllVersion() in init.c
# 5. Bump 3 patch version numbers in Makefile
# 6. Search and replace this .dev/CRAN_Release.cmd to update 1.14.99 to 1.15.99
# 7. Another final gd to view all diffs using meld. (I have `alias gd='git difftool &> /dev/null'` and difftool meld: http://meldmerge.org/)
# 8. Push to master with this consistent commit message: "1.15.0 on CRAN. Bump to 1.15.99"
# 9. Take sha from step 8 and run `git tag 1.14.8 96c..sha..d77` then `git push origin 1.14.8` (not `git push --tags` according to https://stackoverflow.com/a/5195913/403310)
# 6. Another final gd to view all diffs using meld. (I have `alias gd='git difftool &> /dev/null'` and difftool meld: http://meldmerge.org/)
# 7. Commit the changes, then take the SHA and run `git tag 1.15.{2,4,6,...} ${SHA}` then `git push origin 1.15.{2,4,6,...}` (not `git push --tags` according to https://stackoverflow.com/a/5195913/403310)
######
Loading

0 comments on commit c29fe93

Please sign in to comment.