-
Notifications
You must be signed in to change notification settings - Fork 0
/
groupby with dogroups (R expression) performance regression #4200.Rmd
90 lines (74 loc) · 2.44 KB
/
groupby with dogroups (R expression) performance regression #4200.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
---
title: "groupby with dogroups 4200"
author: "Doris Amoakohene"
date: '`r Sys.Date()`'
output: html_document
---
https://github.com/Rdatatable/data.table/issues/4200 -- Discusses Regression
https://github.com/Rdatatable/data.table/issues/4200#issuecomment-578629820
https://github.com/Rdatatable/data.table/pull/4558 --- Fixes Regression
```{r}
library(atime)
library(data.table)
```
```{r}
tdir <- tempfile()
dir.create(tdir)
git2r::clone("https://github.com/Rdatatable/data.table", tdir)
```
```{r}
atime.list.4200 <- atime::atime_versions(
pkg.path=tdir,
pkg.edit.fun=function(old.Package, new.Package, sha, new.pkg.path){
pkg_find_replace <- function(glob, FIND, REPLACE){
atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE)
}
Package_regex <- gsub(".", "_?", old.Package, fixed=TRUE)
Package_ <- gsub(".", "_", old.Package, fixed=TRUE)
new.Package_ <- paste0(Package_, "_", sha)
pkg_find_replace(
"DESCRIPTION",
paste0("Package:\\s+", old.Package),
paste("Package:", new.Package))
pkg_find_replace(
file.path("src","Makevars.*in"),
Package_regex,
new.Package_)
pkg_find_replace(
file.path("R", "onLoad.R"),
Package_regex,
new.Package_)
pkg_find_replace(
file.path("R", "onLoad.R"),
sprintf('packageVersion\\("%s"\\)', old.Package),
sprintf('packageVersion\\("%s"\\)', new.Package))
pkg_find_replace(
file.path("src", "init.c"),
paste0("R_init_", Package_regex),
paste0("R_init_", gsub("[.]", "_", new.Package_)))
pkg_find_replace(
"NAMESPACE",
sprintf('useDynLib\\("?%s"?', Package_regex),
paste0('useDynLib(', new.Package_))
},
N=10^seq(1,20),
setup={
set.seed(108)
d <- data.table(
id3 = sample(c(seq.int(N*0.9), sample(N*0.9, N*0.1, TRUE))),
v1 = sample(5L, N, TRUE),
v2 = sample(5L, N, TRUE))
},
expr=data.table:::`[.data.table`(d[, max(v1)-min(v2), by = id3]),
"Before"="20d485587d258f7d820e5e4cc0089dd6bda6a141",
"Regression"="15f0598b9828d3af2eb8ddc9b38e0356f42afe4f",
"Fixed"="ba32f3cba38ec270587e395f6e6c26a80be36be6")
```
```{r}
plot(atime.list.4200)+
labs(title = "groupby with dogroups (R expression) performance regression")
png("atime.list.4200.png")
plot(atime.list.4200)+
labs(title = "groupby with dogroups (R expression) performance regression")
dev.off()
```