Skip to content

Commit

Permalink
Add performance regression test for fread Date coercion (#6485)
Browse files Browse the repository at this point in the history
* Refactor colClasses handling for readability

* remove debug prints

* C-level changes needed

* update git hash again :\

* revert atime test, defer to follow-up

* re-site NEWS

* Add performance regression test for fread Date coercion

* correct comment

* for loop over fread args

* tidy up

* use atime_test_list(tests=extra.test.list)

* undo addition of empty lines

---------

Co-authored-by: Toby Dylan Hocking <toby.hocking@r-project.org>
Co-authored-by: Toby Dylan Hocking <toby.dylan.hocking@usherbrooke.ca>
Co-authored-by: Toby Dylan Hocking <tdhock5@gmail.com>
  • Loading branch information
4 people authored Sep 21, 2024
1 parent f4a3d92 commit a673460
Showing 1 changed file with 23 additions and 2 deletions.
25 changes: 23 additions & 2 deletions .ci/atime/tests.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,24 @@
# #6107 fixed performance across 3 ways to specify a column as Date, test each individually
extra.args.6107 <- c(
"colClasses=list(Date='date')",
"colClasses='Date'",
"select=list(Date='date')")
extra.test.list <- list()
for (extra.arg in extra.args.6107){
this.test <- atime::atime_test(
N = 10^seq(1, 7, by=0.25),
setup = {
set.seed(1)
DT = data.table(date=.Date(sample(20000, N, replace=TRUE)))
tmp_csv = tempfile()
fwrite(DT, tmp_csv)
},
Slow = "e9087ce9860bac77c51467b19e92cf4b72ca78c7", # Parent of the merge commit (https://github.com/Rdatatable/data.table/commit/a77e8c22e44e904835d7b34b047df2eff069d1f2) of the PR (https://github.com/Rdatatable/data.table/pull/6107) that fixes the issue
Fast = "a77e8c22e44e904835d7b34b047df2eff069d1f2") # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/6107) that fixes the issue
this.test$expr = str2lang(sprintf("data.table::fread(tmp_csv, %s)", extra.arg))
extra.test.list[[sprintf("fread(%s) improved in #6107", extra.arg)]] <- this.test
}

# A list of performance tests.
#
# See documentation in https://github.com/Rdatatable/data.table/wiki/Performance-testing for best practices.
Expand Down Expand Up @@ -176,6 +197,6 @@ test.list <- atime::atime_test_list(
expr = data.table:::transform.data.table(dt, y = round(x)),
Slow = "0895fa247afcf6b38044bd5f56c0d209691ddb31", # Parent of the first commit (https://github.com/Rdatatable/data.table/commit/93ce3ce1373bf733ebd2036e2883d2ffe377ab58) in the PR (https://github.com/Rdatatable/data.table/pull/5493/commits) that fixes the issue
Fast = "2d1a0575f87cc50e90f64825c30d7a6cb6b05dd7"), # Merge commit of the PR (https://github.com/Rdatatable/data.table/pull/5493) that fixes the issue
NULL)

tests=extra.test.list)
# nolint end: undesirable_operator_linter.

0 comments on commit a673460

Please sign in to comment.