new argument to print indices alongside x

Rdatatable · Jun 17, 2024 · 68e2df5 · 68e2df5
1 parent f5a1e09
commit 68e2df5
Show file tree

Hide file tree

Showing 3 changed files with 72 additions and 2 deletions.
diff --git a/R/print.data.table.R b/R/print.data.table.R
@@ -9,7 +9,8 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
                trunc.cols=getOption("datatable.print.trunc.cols"),
                quote=FALSE,
                na.print=NULL,
-               timezone=FALSE, ...) {
+               timezone=FALSE,
+               indices=FALSE, ...) {
   # topn  - print the top topn and bottom topn rows with '---' inbetween (5)
   # nrows - under this the whole (small) table is printed, unless topn is provided (100)
   # class - should column class be printed underneath column name? (FALSE)
@@ -64,15 +65,25 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
     }
     return(invisible(x))
   }
+  if (indices) {
+    if (is.null(indices(x))) {
+      indices = FALSE
+    } else {
+      index_col_name = paste0("index__", indices(x))
+      index_col = attr(attr(x, 'index'), paste0('__', indices(x)))
+    }
+  }
   n_x = nrow(x)
   if ((topn*2L+1L)<n_x && (n_x>nrows || !topnmiss)) {
     toprint = rbindlist(list(head(x, topn), tail(x, topn)), use.names=FALSE)  # no need to match names because head and tail of same x, and #3306
     rn = c(seq_len(topn), seq.int(to=n_x, length.out=topn))
     printdots = TRUE
+    if (indices) set(toprint, j=index_col_name, value=c(head(index_col, topn), tail(index_col, topn)))
   } else {
     toprint = x
     rn = seq_len(n_x)
     printdots = FALSE
+    if (indices) set(toprint, j=index_col_name, value=index_col)
   }
   toprint=format.data.table(toprint, na.encode=FALSE, timezone = timezone, ...)  # na.encode=FALSE so that NA in character cols print as <NA>
   require_bit64_if_needed(x)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
@@ -18612,3 +18612,49 @@ test(2263.3, options=list(datatable.verbose=TRUE, datatable.optimize=0L), names(
 test(2263.4, options=list(datatable.verbose=TRUE, datatable.optimize=Inf), dt[, .N, b], data.table(b=dt$b, N=1L), output="GForce optimized j to")
 test(2263.5, options=list(datatable.verbose=TRUE, datatable.optimize=Inf), dt[, .N, .(b,c)], data.table(b=dt$b, c=dt$c, N=1L), output="GForce optimized j to")
 test(2263.6, options=list(datatable.verbose=TRUE, datatable.optimize=Inf), names(attributes(dt[, .N, b]$b)), c("class", "att"), output="GForce optimized j to")
+
+# tests for printing indices alongside data.tables
+NN = 200
+set.seed(2024)
+DT = data.table(
+ grp1 = sample(100, NN, TRUE),
+ grp2 = sample(90, NN, TRUE),
+ grp3 = sample(80, NN, TRUE)
+)
+setkey(DT, grp1, grp2)
+setindex(DT, grp1, grp3)
+dt2 = copy(DT)
+ans = c(
+ "     grp1 grp2 grp3 index__grp1__grp3",
+ "  1:    1    5   15                 1",
+ "  2:    1   24   60                 2",
+ "  3:    2   26   32                 5",
+ "  4:    2   36   57                 3",
+ "  5:    2   51   30                 4",
+ " ---                                 ",
+ "196:   98   77   45               195",
+ "197:   98   87   70               197",
+ "198:  100   18   21               198",
+ "199:  100   36   51               199",
+ "200:  100   38   56               200"
+ )
+test(2264.1, print(DT, indices=TRUE), output=ans)
+setindex(DT, NULL) # clear indices
+# if no indices are set, simply ignore
+test(2264.2, capture.output(print(DT, indices=TRUE)), capture.output(print(DT, indices=FALSE)))
+setindex(DT, grp3)
+ans = c(
+ "     grp1 grp2 grp3 index__grp3",
+ "  1:    1    5   15          10",
+ "  2:    1   24   60         119",
+ "  3:    2   26   32         164",
+ "  4:    2   36   57         192",
+ "  5:    2   51   30          63",
+ " ---                           ",
+ "196:   98   77   45          11",
+ "197:   98   87   70          66",
+ "198:  100   18   21          31",
+ "199:  100   36   51         139",
+ "200:  100   38   56         159"
+ )
+test(2264.3, print(DT, indices=TRUE), output=ans)
diff --git a/man/print.data.table.Rd b/man/print.data.table.Rd
@@ -27,7 +27,8 @@
     trunc.cols=getOption("datatable.print.trunc.cols"), # default: FALSE
     quote=FALSE,
     na.print=NULL,
-    timezone=FALSE, \dots)
+    timezone=FALSE,
+    indices=FALSE, \dots)
 
   format_col(x, \dots)
   \method{format_col}{default}(x, \dots)
@@ -49,6 +50,7 @@
   \item{quote}{ If \code{TRUE}, all output will appear in quotes, as in \code{print.default}. }
   \item{timezone}{ If \code{TRUE}, time columns of class POSIXct or POSIXlt will be printed with their timezones (if attribute is available). }
   \item{na.print}{ The string to be printed in place of \code{NA} values, as in \code{print.default}. }
+  \item{indices}{ If \code{TRUE}, the row indices will be printed alongside \code{x}. }
   \item{\dots}{ Other arguments ultimately passed to \code{format}. }
 }
 \value{
@@ -116,6 +118,17 @@
   x = data.table(z = c(1 + 3i, 2 - 1i, pi + 2.718i))
   print(x)
 
+  NN = 200
+  set.seed(2024)
+  DT = data.table(
+    grp1 = sample(100, NN, TRUE),
+    grp2 = sample(90, NN, TRUE),
+    grp3 = sample(80, NN, TRUE)
+  )
+  setkey(DT, grp1, grp2)
+  setindex(DT, grp1, grp3)
+  print(DT, indices = TRUE)
+
   iris = as.data.table(iris)
   iris_agg = iris[ , .(reg = list(lm(Sepal.Length ~ Petal.Length))), by = Species]
   format_list_item.lm = function(x, ...) sprintf('<lm:\%s>', format(x$call$formula))