From 94e4be689733ea50b0a0cae5e13e97efd0d4255b Mon Sep 17 00:00:00 2001 From: Anirban Date: Wed, 10 Apr 2024 22:58:22 -0700 Subject: [PATCH 01/15] Added my workflow (Marketplace version) and the two tests I used in the examples --- .github/workflows/autocomment.yml | 21 ++++++++++ inst/atime/tests.R | 69 +++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 .github/workflows/autocomment.yml create mode 100644 inst/atime/tests.R diff --git a/.github/workflows/autocomment.yml b/.github/workflows/autocomment.yml new file mode 100644 index 000000000..94a906a36 --- /dev/null +++ b/.github/workflows/autocomment.yml @@ -0,0 +1,21 @@ +name: Autocomment atime-based performance regression analysis on PRs + +on: + pull_request: + branches: + - '*' + types: + - opened + - reopened + - synchronize + +jobs: + comment: + runs-on: ubuntu-latest + container: ghcr.io/iterative/cml:0-dvc2-base1 + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + repo_token: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + steps: + - uses: Anirban166/Autocomment-atime-results@v1.1.6 \ No newline at end of file diff --git a/inst/atime/tests.R b/inst/atime/tests.R new file mode 100644 index 000000000..7095ae350 --- /dev/null +++ b/inst/atime/tests.R @@ -0,0 +1,69 @@ +pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { + pkg_find_replace <- function(glob, FIND, REPLACE) { + atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE) + } + Package_regex <- gsub(".", "_?", old.Package, fixed = TRUE) + Package_ <- gsub(".", "_", old.Package, fixed = TRUE) + new.Package_ <- paste0(Package_, "_", sha) + pkg_find_replace( + "DESCRIPTION", + paste0("Package:\\s+", old.Package), + paste("Package:", new.Package)) + pkg_find_replace( + file.path("src", "Makevars.*in"), + Package_regex, + new.Package_) + pkg_find_replace( + file.path("R", "onLoad.R"), + Package_regex, + new.Package_) + pkg_find_replace( + file.path("R", "onLoad.R"), + sprintf('packageVersion\\("%s"\\)', old.Package), + sprintf('packageVersion\\("%s"\\)', new.Package)) + pkg_find_replace( + file.path("src", "init.c"), + paste0("R_init_", Package_regex), + paste0("R_init_", gsub("[.]", "_", new.Package_))) + pkg_find_replace( + "NAMESPACE", + sprintf('useDynLib\\("?%s"?', Package_regex), + paste0('useDynLib(', new.Package_)) + } + +test.list <- list( + # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 + # Fixed in: https://github.com/Rdatatable/data.table/pull/4440 + "Test regression fixed in #4440" = list( + pkg.edit.fun = pkg.edit.fun, + N = 10^seq(3,8), + setup = quote({ + set.seed(1L) + dt <- data.table(a = sample(N, N)) + setindex(dt, a) + }), + expr = quote(data.table:::shallow(dt)), + "Before" = "9d3b9202fddb980345025a4f6ac451ed26a423be", # This should be changed later. Currently, the source of regression (or the particular commit that led to it) is not clear. In addition, older versions of data.table are having problems when being installed in this manner. (This includes commits from before Mar 20, 2020 or when the issue that discovered or first mentioned the regression was created) + "Regression" = "752012f577f8e268bb6d0084ca39a09fa7fbc1c4", # A commit that is affected by the regression: https://github.com/Rdatatable/data.table/commit/752012f577f8e268bb6d0084ca39a09fa7fbc1c4 + "Fixed" = "9d3b9202fddb980345025a4f6ac451ed26a423be"), # The merge commit in #4440, the PR that fixed the regression: https://github.com/Rdatatable/data.table/commit/9d3b9202fddb980345025a4f6ac451ed26a423be + + # Test based on: https://github.com/Rdatatable/data.table/issues/5424 + # Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491 + # Fixed in: https://github.com/Rdatatable/data.table/pull/5463 + "Test regression fixed in #5463" = list( + pkg.edit.fun = pkg.edit.fun, + N = 10^seq(3, 8), + expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), + setup = quote({ + n <- N/100 + set.seed(1L) + dt <- data.table( + g = sample(seq_len(n), N, TRUE), + x = runif(N), + key = "g") + dt_mod <- copy(dt) + }), + "Before" = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # The commit in PR #4491 that comes before the regression introducting commit: https://github.com/Rdatatable/data.table/pull/4491/commits/be2f72e6f5c90622fe72e1c315ca05769a9dc854 + "Regression" = "e793f53466d99f86e70fc2611b708ae8c601a451", # The commit in #4491 that introduced the regression: https://github.com/Rdatatable/data.table/pull/4491/commits/e793f53466d99f86e70fc2611b708ae8c601a451 + "Fixed" = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in #5463, the PR that fixed the regression: https://github.com/Rdatatable/data.table/pull/5463/commits/58409197426ced4714af842650b0cc3b9e2cb842 +) \ No newline at end of file From 523e3cc90553060f072e647a4ac68e921deb89b3 Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 11 Apr 2024 12:12:03 -0700 Subject: [PATCH 02/15] Renamed workflow as per Toby's suggestion --- .github/workflows/{autocomment.yml => performance-tests.yml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename .github/workflows/{autocomment.yml => performance-tests.yml} (87%) diff --git a/.github/workflows/autocomment.yml b/.github/workflows/performance-tests.yml similarity index 87% rename from .github/workflows/autocomment.yml rename to .github/workflows/performance-tests.yml index 94a906a36..027854faa 100644 --- a/.github/workflows/autocomment.yml +++ b/.github/workflows/performance-tests.yml @@ -18,4 +18,4 @@ jobs: repo_token: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes steps: - - uses: Anirban166/Autocomment-atime-results@v1.1.6 \ No newline at end of file + - uses: Anirban166/Autocomment-atime-results@v1.1.6 From e07565ceb79fa2153b903aa3440dd0e27adf57c0 Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 11 Apr 2024 17:24:46 -0700 Subject: [PATCH 03/15] Made the suggested changes --- inst/atime/tests.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 7095ae350..94844b7b3 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -43,9 +43,9 @@ test.list <- list( setindex(dt, a) }), expr = quote(data.table:::shallow(dt)), - "Before" = "9d3b9202fddb980345025a4f6ac451ed26a423be", # This should be changed later. Currently, the source of regression (or the particular commit that led to it) is not clear. In addition, older versions of data.table are having problems when being installed in this manner. (This includes commits from before Mar 20, 2020 or when the issue that discovered or first mentioned the regression was created) - "Regression" = "752012f577f8e268bb6d0084ca39a09fa7fbc1c4", # A commit that is affected by the regression: https://github.com/Rdatatable/data.table/commit/752012f577f8e268bb6d0084ca39a09fa7fbc1c4 - "Fixed" = "9d3b9202fddb980345025a4f6ac451ed26a423be"), # The merge commit in #4440, the PR that fixed the regression: https://github.com/Rdatatable/data.table/commit/9d3b9202fddb980345025a4f6ac451ed26a423be + Before = "9d3b9202fddb980345025a4f6ac451ed26a423be", # This needs to be changed later. Currently assigned to the merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440) as the source of regression (or the particular commit that led to it) is not clear. In addition, older versions of data.table are having problems when being installed in this manner. (This includes commits from before Mar 20, 2020 or when the issue that discovered or first mentioned the regression was created) + Regression = "b1b1832b0d2d4032b46477d9fe6efb15006664f4", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/0f0e7127b880df8459b0ed064dc841acd22f5b73) in the PR (https://github.com/Rdatatable/data.table/pull/4440/commits) that fixes the regression + Fixed = "769f02c6fbbb031391a79f46c6042de99f1ea712"), # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440/commits) # Test based on: https://github.com/Rdatatable/data.table/issues/5424 # Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491 @@ -63,7 +63,7 @@ test.list <- list( key = "g") dt_mod <- copy(dt) }), - "Before" = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # The commit in PR #4491 that comes before the regression introducting commit: https://github.com/Rdatatable/data.table/pull/4491/commits/be2f72e6f5c90622fe72e1c315ca05769a9dc854 - "Regression" = "e793f53466d99f86e70fc2611b708ae8c601a451", # The commit in #4491 that introduced the regression: https://github.com/Rdatatable/data.table/pull/4491/commits/e793f53466d99f86e70fc2611b708ae8c601a451 - "Fixed" = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in #5463, the PR that fixed the regression: https://github.com/Rdatatable/data.table/pull/5463/commits/58409197426ced4714af842650b0cc3b9e2cb842 -) \ No newline at end of file + Before = "19b7866112614db53eb3e909c097407d91cd6738", # Parent of the regression commit (https://github.com/Rdatatable/data.table/commit/0895fa247afcf6b38044bd5f56c0d209691ddb31), which is the parent of the first commit in the PR that causes the issue (https://github.com/Rdatatable/data.table/pull/5493/commits) + Regression = "0895fa247afcf6b38044bd5f56c0d209691ddb31", # The regression commit is the parent of the first commit in the PR that fixed the issue (https://github.com/Rdatatable/data.table/pull/5493/commits) + Fixed = "1e03fe7b890e63da9651d997ea52548c90b3ae32") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5493/commits) +) From a3d5cf938d6831cda483c79d7b6e4194865dde7c Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 11 Apr 2024 18:10:32 -0700 Subject: [PATCH 04/15] Reverted changes to the 'Fixed' commit SHA for the first test case since the last commit of #4440 failed to check out --- inst/atime/tests.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 94844b7b3..e023eec21 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -45,7 +45,7 @@ test.list <- list( expr = quote(data.table:::shallow(dt)), Before = "9d3b9202fddb980345025a4f6ac451ed26a423be", # This needs to be changed later. Currently assigned to the merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440) as the source of regression (or the particular commit that led to it) is not clear. In addition, older versions of data.table are having problems when being installed in this manner. (This includes commits from before Mar 20, 2020 or when the issue that discovered or first mentioned the regression was created) Regression = "b1b1832b0d2d4032b46477d9fe6efb15006664f4", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/0f0e7127b880df8459b0ed064dc841acd22f5b73) in the PR (https://github.com/Rdatatable/data.table/pull/4440/commits) that fixes the regression - Fixed = "769f02c6fbbb031391a79f46c6042de99f1ea712"), # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440/commits) + Fixed = "9d3b9202fddb980345025a4f6ac451ed26a423be"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440) # Test based on: https://github.com/Rdatatable/data.table/issues/5424 # Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491 From 3093a35db700441e0cb2b3fca5a67f36a17ddce1 Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 11 Apr 2024 19:11:03 -0700 Subject: [PATCH 05/15] Reverted changes to the 'Fixed' commit SHA for the second test case as well since the newly provided commit SHA is wrong --- inst/atime/tests.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index e023eec21..6f38660b4 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -65,5 +65,5 @@ test.list <- list( }), Before = "19b7866112614db53eb3e909c097407d91cd6738", # Parent of the regression commit (https://github.com/Rdatatable/data.table/commit/0895fa247afcf6b38044bd5f56c0d209691ddb31), which is the parent of the first commit in the PR that causes the issue (https://github.com/Rdatatable/data.table/pull/5493/commits) Regression = "0895fa247afcf6b38044bd5f56c0d209691ddb31", # The regression commit is the parent of the first commit in the PR that fixed the issue (https://github.com/Rdatatable/data.table/pull/5493/commits) - Fixed = "1e03fe7b890e63da9651d997ea52548c90b3ae32") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5493/commits) + Fixed = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits) ) From 48b2dddc551b7a2708cc3f6de061bcabf26513b9 Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 11 Apr 2024 19:29:09 -0700 Subject: [PATCH 06/15] Added the suggested path filters --- .github/workflows/performance-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/performance-tests.yml b/.github/workflows/performance-tests.yml index 027854faa..0473dcbc2 100644 --- a/.github/workflows/performance-tests.yml +++ b/.github/workflows/performance-tests.yml @@ -8,6 +8,9 @@ on: - opened - reopened - synchronize + paths: + - 'R/**' + - 'src/**' jobs: comment: From fb8ca6f696624cee3a13e02cf69356341c6d7763 Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 11 Apr 2024 19:42:50 -0700 Subject: [PATCH 07/15] Don't need R to retain the source code attributes when parsing and saving functions --- .github/workflows/performance-tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/performance-tests.yml b/.github/workflows/performance-tests.yml index 0473dcbc2..9c8cc664f 100644 --- a/.github/workflows/performance-tests.yml +++ b/.github/workflows/performance-tests.yml @@ -19,6 +19,5 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} repo_token: ${{ secrets.GITHUB_TOKEN }} - R_KEEP_PKG_SOURCE: yes steps: - uses: Anirban166/Autocomment-atime-results@v1.1.6 From cd0331ff7942e3dce4b39a6c68deefd8e406fbfb Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 11 Apr 2024 21:28:50 -0700 Subject: [PATCH 08/15] Added pseudo-roxygen style comments for the pkg.edit.fun function. --- inst/atime/tests.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 6f38660b4..cdbc7326e 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -1,3 +1,26 @@ +# A function to customize R package metadata and source files to facilitate version-specific installation and testing. +# +# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in `Makevars` and version checking code in `onLoad.R`) +# to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package. +# It appends a SHA1 hash to the package name (`PKG.SHA`), ensuring each version can be installed and tested separately. +# +# @param old.Package Current name of the package. +# @param new.Package New name of the package, including a SHA hash. +# @param sha SHA1 hash used for differentiating versions. +# @param new.pkg.path Path to the package files. +# +# @details +# The function modifies: +# - DESCRIPTION, updating the package name. +# - Makevars, customizing the shared object file name and adjusting the build settings. +# - R/onLoad.R, adapting custom version checking for package loading operations. +# - NAMESPACE, changing namespace settings for dynamic linking. +# +# @examples +# pkg.edit.fun("data.table", "data.table.some_40_digit_SHA1_hash", "some_40_digit_SHA1_hash", "/path/to/data.table") +# +# @return None (performs in-place file modifications) +# @note This setup is typically unnecessary for most packages but essential for `data.table` due to its unique configuration. pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { pkg_find_replace <- function(glob, FIND, REPLACE) { atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE) From 716da67b6045bf57a8049102020d6cd2f2a7e033 Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 11 Apr 2024 22:55:33 -0700 Subject: [PATCH 09/15] Made the suggested changes (and reverted to the correct commits for the second test case) --- inst/atime/tests.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index cdbc7326e..a02f07b52 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -86,7 +86,7 @@ test.list <- list( key = "g") dt_mod <- copy(dt) }), - Before = "19b7866112614db53eb3e909c097407d91cd6738", # Parent of the regression commit (https://github.com/Rdatatable/data.table/commit/0895fa247afcf6b38044bd5f56c0d209691ddb31), which is the parent of the first commit in the PR that causes the issue (https://github.com/Rdatatable/data.table/pull/5493/commits) - Regression = "0895fa247afcf6b38044bd5f56c0d209691ddb31", # The regression commit is the parent of the first commit in the PR that fixed the issue (https://github.com/Rdatatable/data.table/pull/5493/commits) + Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Commit preceding the regression causing commit (https://github.com/Rdatatable/data.table/pull/4491/commits/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) + Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) Fixed = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits) ) From 33736725e8d1e48552248f5f8d63628b88ae913a Mon Sep 17 00:00:00 2001 From: Ani Date: Fri, 12 Apr 2024 20:24:27 -0700 Subject: [PATCH 10/15] Documented test.list, made some formatting edits to what I documented yesterday (tick removals), added a link to the related atime vignette, tried to write in as much detail as I understand and included optional parameters for test.list --- inst/atime/tests.R | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index a02f07b52..3ef0f8bd7 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -1,8 +1,8 @@ # A function to customize R package metadata and source files to facilitate version-specific installation and testing. # -# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in `Makevars` and version checking code in `onLoad.R`) +# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R) # to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package. -# It appends a SHA1 hash to the package name (`PKG.SHA`), ensuring each version can be installed and tested separately. +# It appends a SHA1 hash to the package name (PKG.SHA), ensuring each version can be installed and tested separately. # # @param old.Package Current name of the package. # @param new.Package New name of the package, including a SHA hash. @@ -17,10 +17,10 @@ # - NAMESPACE, changing namespace settings for dynamic linking. # # @examples -# pkg.edit.fun("data.table", "data.table.some_40_digit_SHA1_hash", "some_40_digit_SHA1_hash", "/path/to/data.table") +# pkg.edit.fun("data.table", "data.table.some_SHA1_hash", "some_SHA1_hash", "/path/to/data.table") # # @return None (performs in-place file modifications) -# @note This setup is typically unnecessary for most packages but essential for `data.table` due to its unique configuration. +# @note This setup is typically unnecessary for most packages but essential for data.table due to its unique configuration. pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { pkg_find_replace <- function(glob, FIND, REPLACE) { atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE) @@ -54,6 +54,22 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { paste0('useDynLib(', new.Package_)) } +# A list of performance tests. +# +# Each entry in this list corresponds to a performance test and contains a sublist with three mandatory arguments: +# - N: A numeric sequence of data sizes to vary. +# - setup: An expression evaluated for every data size before measuring time/memory. +# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions. +# This must call a function from data.table using a syntax with double or triple colon prefix. +# The package name before the colons will be replaced by a new package name that uses the commit SHA hash. +# (For instance, data.table:::[.data.table will become data.table.some_40_digit_SHA1_hash:::[.data.table) +# +# Optional parameters that may be useful to configure tests: +# - times: Number of times each expression is evaluated (default is 10). +# - seconds.limit: The maximum median timing (in seconds) of an expression. No timings for larger N are computed past that threshold. +# - sha.vec: Named character vector or a list of vectors that specify data.table-specific commit SHAs for testing across those different git commit versions. +# For historical regressions, use 'Before', 'Regression', and 'Fixed' (otherwise something like 'Slow' or 'Fast' ideally). +# @note Please check https://github.com/tdhock/atime/blob/main/vignettes/data.table.Rmd for more information. test.list <- list( # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 # Fixed in: https://github.com/Rdatatable/data.table/pull/4440 From a65b08873d3145a0576a545a358d0e1e9f8b69d0 Mon Sep 17 00:00:00 2001 From: Ani Date: Fri, 12 Apr 2024 20:25:47 -0700 Subject: [PATCH 11/15] Temporarily removing the path filters to run a final check for the current commits to be working (tested locally, but just to ensure..) --- .github/workflows/performance-tests.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/performance-tests.yml b/.github/workflows/performance-tests.yml index 9c8cc664f..5750bf209 100644 --- a/.github/workflows/performance-tests.yml +++ b/.github/workflows/performance-tests.yml @@ -7,10 +7,7 @@ on: types: - opened - reopened - - synchronize - paths: - - 'R/**' - - 'src/**' + - synchronize jobs: comment: From 1d151e0cd63970fcd6db2a1e31f2a575a8c46459 Mon Sep 17 00:00:00 2001 From: Ani Date: Fri, 12 Apr 2024 20:46:15 -0700 Subject: [PATCH 12/15] Added back the path filters now that I'm confirmed the commit SHAs are working as expected. --- .github/workflows/performance-tests.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/performance-tests.yml b/.github/workflows/performance-tests.yml index 5750bf209..2fc3a76f5 100644 --- a/.github/workflows/performance-tests.yml +++ b/.github/workflows/performance-tests.yml @@ -7,7 +7,10 @@ on: types: - opened - reopened - - synchronize + - synchronize + paths: + - 'R/**' + - 'src/**' jobs: comment: From 27adaad7fb55687df4920a458564817d50f30564 Mon Sep 17 00:00:00 2001 From: Ani Date: Fri, 12 Apr 2024 22:29:06 -0700 Subject: [PATCH 13/15] Made the changes Michael suggested --- inst/atime/tests.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 3ef0f8bd7..83bb34ecf 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -79,7 +79,7 @@ test.list <- list( setup = quote({ set.seed(1L) dt <- data.table(a = sample(N, N)) - setindex(dt, a) + setindexv(dt, "a") }), expr = quote(data.table:::shallow(dt)), Before = "9d3b9202fddb980345025a4f6ac451ed26a423be", # This needs to be changed later. Currently assigned to the merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440) as the source of regression (or the particular commit that led to it) is not clear. In addition, older versions of data.table are having problems when being installed in this manner. (This includes commits from before Mar 20, 2020 or when the issue that discovered or first mentioned the regression was created) @@ -92,16 +92,16 @@ test.list <- list( "Test regression fixed in #5463" = list( pkg.edit.fun = pkg.edit.fun, N = 10^seq(3, 8), - expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), setup = quote({ n <- N/100 - set.seed(1L) + set.seed(2L) dt <- data.table( g = sample(seq_len(n), N, TRUE), x = runif(N), key = "g") dt_mod <- copy(dt) }), + expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Commit preceding the regression causing commit (https://github.com/Rdatatable/data.table/pull/4491/commits/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) Fixed = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits) From eaa70106493be5ad4b82052af82f64321889e652 Mon Sep 17 00:00:00 2001 From: Ani Date: Fri, 12 Apr 2024 22:30:25 -0700 Subject: [PATCH 14/15] Oops forgot one Co-authored-by: Michael Chirico --- inst/atime/tests.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 83bb34ecf..68770347f 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -78,7 +78,7 @@ test.list <- list( N = 10^seq(3,8), setup = quote({ set.seed(1L) - dt <- data.table(a = sample(N, N)) + dt <- data.table(a = sample.int(N)) setindexv(dt, "a") }), expr = quote(data.table:::shallow(dt)), From c823c615dd31237e3525da75ff8f3e9fef9c9016 Mon Sep 17 00:00:00 2001 From: Ani Date: Tue, 16 Apr 2024 13:37:48 -0700 Subject: [PATCH 15/15] Made the suggested changes that Toby and I discussed this morning --- inst/atime/tests.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 68770347f..a0635d063 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -82,7 +82,7 @@ test.list <- list( setindexv(dt, "a") }), expr = quote(data.table:::shallow(dt)), - Before = "9d3b9202fddb980345025a4f6ac451ed26a423be", # This needs to be changed later. Currently assigned to the merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440) as the source of regression (or the particular commit that led to it) is not clear. In addition, older versions of data.table are having problems when being installed in this manner. (This includes commits from before Mar 20, 2020 or when the issue that discovered or first mentioned the regression was created) + # Before = "", This needs to be updated later as there are two issues here: A) The source of regression (or the particular commit that led to it) is not clear; B) Older versions of data.table are having problems when being installed in this manner (This includes commits from before March 20 2020, when the issue that discovered or first mentioned the regression was created) Regression = "b1b1832b0d2d4032b46477d9fe6efb15006664f4", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/0f0e7127b880df8459b0ed064dc841acd22f5b73) in the PR (https://github.com/Rdatatable/data.table/pull/4440/commits) that fixes the regression Fixed = "9d3b9202fddb980345025a4f6ac451ed26a423be"), # Merge commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/4440) @@ -102,7 +102,7 @@ test.list <- list( dt_mod <- copy(dt) }), expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), - Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Commit preceding the regression causing commit (https://github.com/Rdatatable/data.table/pull/4491/commits/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) + Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Parent of the regression causing commit (https://github.com/Rdatatable/data.table/commit/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) Fixed = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits) )