Skip to content

Commit

Permalink
new argument to print indices alongside x
Browse files Browse the repository at this point in the history
  • Loading branch information
joshhwuu committed Jun 17, 2024
1 parent f5a1e09 commit 68e2df5
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 2 deletions.
13 changes: 12 additions & 1 deletion R/print.data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
trunc.cols=getOption("datatable.print.trunc.cols"),
quote=FALSE,
na.print=NULL,
timezone=FALSE, ...) {
timezone=FALSE,
indices=FALSE, ...) {
# topn - print the top topn and bottom topn rows with '---' inbetween (5)
# nrows - under this the whole (small) table is printed, unless topn is provided (100)
# class - should column class be printed underneath column name? (FALSE)
Expand Down Expand Up @@ -64,15 +65,25 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
}
return(invisible(x))
}
if (indices) {
if (is.null(indices(x))) {
indices = FALSE
} else {
index_col_name = paste0("index__", indices(x))
index_col = attr(attr(x, 'index'), paste0('__', indices(x)))
}
}
n_x = nrow(x)
if ((topn*2L+1L)<n_x && (n_x>nrows || !topnmiss)) {
toprint = rbindlist(list(head(x, topn), tail(x, topn)), use.names=FALSE) # no need to match names because head and tail of same x, and #3306
rn = c(seq_len(topn), seq.int(to=n_x, length.out=topn))
printdots = TRUE
if (indices) set(toprint, j=index_col_name, value=c(head(index_col, topn), tail(index_col, topn)))
} else {
toprint = x
rn = seq_len(n_x)
printdots = FALSE
if (indices) set(toprint, j=index_col_name, value=index_col)
}
toprint=format.data.table(toprint, na.encode=FALSE, timezone = timezone, ...) # na.encode=FALSE so that NA in character cols print as <NA>
require_bit64_if_needed(x)
Expand Down
46 changes: 46 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -18612,3 +18612,49 @@ test(2263.3, options=list(datatable.verbose=TRUE, datatable.optimize=0L), names(
test(2263.4, options=list(datatable.verbose=TRUE, datatable.optimize=Inf), dt[, .N, b], data.table(b=dt$b, N=1L), output="GForce optimized j to")
test(2263.5, options=list(datatable.verbose=TRUE, datatable.optimize=Inf), dt[, .N, .(b,c)], data.table(b=dt$b, c=dt$c, N=1L), output="GForce optimized j to")
test(2263.6, options=list(datatable.verbose=TRUE, datatable.optimize=Inf), names(attributes(dt[, .N, b]$b)), c("class", "att"), output="GForce optimized j to")

# tests for printing indices alongside data.tables
NN = 200
set.seed(2024)
DT = data.table(
grp1 = sample(100, NN, TRUE),
grp2 = sample(90, NN, TRUE),
grp3 = sample(80, NN, TRUE)
)
setkey(DT, grp1, grp2)
setindex(DT, grp1, grp3)
dt2 = copy(DT)
ans = c(
" grp1 grp2 grp3 index__grp1__grp3",
" 1: 1 5 15 1",
" 2: 1 24 60 2",
" 3: 2 26 32 5",
" 4: 2 36 57 3",
" 5: 2 51 30 4",
" --- ",
"196: 98 77 45 195",
"197: 98 87 70 197",
"198: 100 18 21 198",
"199: 100 36 51 199",
"200: 100 38 56 200"
)
test(2264.1, print(DT, indices=TRUE), output=ans)
setindex(DT, NULL) # clear indices
# if no indices are set, simply ignore
test(2264.2, capture.output(print(DT, indices=TRUE)), capture.output(print(DT, indices=FALSE)))
setindex(DT, grp3)
ans = c(
" grp1 grp2 grp3 index__grp3",
" 1: 1 5 15 10",
" 2: 1 24 60 119",
" 3: 2 26 32 164",
" 4: 2 36 57 192",
" 5: 2 51 30 63",
" --- ",
"196: 98 77 45 11",
"197: 98 87 70 66",
"198: 100 18 21 31",
"199: 100 36 51 139",
"200: 100 38 56 159"
)
test(2264.3, print(DT, indices=TRUE), output=ans)
15 changes: 14 additions & 1 deletion man/print.data.table.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
trunc.cols=getOption("datatable.print.trunc.cols"), # default: FALSE
quote=FALSE,
na.print=NULL,
timezone=FALSE, \dots)
timezone=FALSE,
indices=FALSE, \dots)

format_col(x, \dots)
\method{format_col}{default}(x, \dots)
Expand All @@ -49,6 +50,7 @@
\item{quote}{ If \code{TRUE}, all output will appear in quotes, as in \code{print.default}. }
\item{timezone}{ If \code{TRUE}, time columns of class POSIXct or POSIXlt will be printed with their timezones (if attribute is available). }
\item{na.print}{ The string to be printed in place of \code{NA} values, as in \code{print.default}. }
\item{indices}{ If \code{TRUE}, the row indices will be printed alongside \code{x}. }
\item{\dots}{ Other arguments ultimately passed to \code{format}. }
}
\value{
Expand Down Expand Up @@ -116,6 +118,17 @@
x = data.table(z = c(1 + 3i, 2 - 1i, pi + 2.718i))
print(x)

NN = 200
set.seed(2024)
DT = data.table(
grp1 = sample(100, NN, TRUE),
grp2 = sample(90, NN, TRUE),
grp3 = sample(80, NN, TRUE)
)
setkey(DT, grp1, grp2)
setindex(DT, grp1, grp3)
print(DT, indices = TRUE)

iris = as.data.table(iris)
iris_agg = iris[ , .(reg = list(lm(Sepal.Length ~ Petal.Length))), by = Species]
format_list_item.lm = function(x, ...) sprintf('<lm:\%s>', format(x$call$formula))
Expand Down

0 comments on commit 68e2df5

Please sign in to comment.