diff --git a/NAMESPACE b/NAMESPACE index cb45cf86f..d6abdde07 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -163,6 +163,14 @@ S3method(split, IDate) S3method(unique, IDate) S3method(unique, ITime) +# generic to support custom column formatters +export(format_col) +S3method(format_col, default) +S3method(format_col, POSIXct) +S3method(format_col, expression) +export(format_list_item) +S3method(format_list_item, default) + # duplist # getdots # NCOL diff --git a/NEWS.md b/NEWS.md index 478ffb855..e8dc4d192 100644 --- a/NEWS.md +++ b/NEWS.md @@ -71,6 +71,8 @@ 7. New variable `.Last.updated` (similar to R's `.Last.value`) contains the number of rows affected by the most recent `:=` or `set()`, [#1885](https://github.com/Rdatatable/data.table/issues/1885). +8. `data.table` printing now supports customizable methods for both columns and list column row items, part of [#1523](https://github.com/Rdatatable/data.table/issues/1523). `format_col` is S3-generic for customizing how to print whole columns; `format_list_item` is S3-generic for customizing how to print each row of a list column. Thanks variously to @mllg, who initially filed [#3338](https://github.com/Rdatatable/data.table/pulls/3338) with the seed of the idea, @franknarf1 who earlier suggested the idea of providing custom formatters, @fparages who submitted a patch to improve the printing of timezones for [#2842](https://github.com/Rdatatable/data.table/issues/2842), @RichardRedding for pointing out an error relating to printing wide `expression` columns in [#3011](https://github.com/Rdatatable/data.table/issues/3011), and @MichaelChirico for the ultimate implementation. See `?print.data.table` for examples. + #### BUG FIXES 1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting. diff --git a/R/print.data.table.R b/R/print.data.table.R index 6edf0d195..8bd10735b 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -6,8 +6,7 @@ print.data.table <- function(x, topn=getOption("datatable.print.topn"), row.names=getOption("datatable.print.rownames"), col.names=getOption("datatable.print.colnames"), print.keys=getOption("datatable.print.keys"), - quote=FALSE, - timezone=FALSE, ...) { + quote=FALSE, ...) { # topn - print the top topn and bottom topn rows with '---' inbetween (5) # nrows - under this the whole (small) table is printed, unless topn is provided (100) # class - should column class be printed underneath column name? (FALSE) @@ -64,7 +63,7 @@ print.data.table <- function(x, topn=getOption("datatable.print.topn"), rn = seq_len(nrow(x)) printdots = FALSE } - toprint=format.data.table(toprint, na.encode=FALSE, timezone = timezone, ...) # na.encode=FALSE so that NA in character cols print as + toprint=format.data.table(toprint, na.encode=FALSE, ...) # na.encode=FALSE so that NA in character cols print as if ((!"bit64" %chin% loadedNamespaces()) && any(sapply(x,inherits,"integer64"))) require_bit64() # When we depend on R 3.2.0 (Apr 2015) we can use isNamespaceLoaded() added then, instead of %chin% above @@ -111,44 +110,11 @@ print.data.table <- function(x, topn=getOption("datatable.print.topn"), invisible(x) } -format.data.table <- function (x, ..., justify="none", timezone = FALSE) { +format.data.table <- function (x, ..., justify="none") { if (is.atomic(x) && !is.null(x)) { stop("Internal structure doesn't seem to be a list. Possibly corrupt data.table.") } - format.item <- function(x) { - if (is.null(x)) # NULL item in a list column - "" - else if (is.atomic(x) || inherits(x,"formula")) # FR #2591 - format.data.table issue with columns of class "formula" - paste(c(format(head(x, 6L), justify=justify, ...), if (length(x) > 6L) "..."), collapse=",") # fix for #5435 - format has to be added here... - else - paste0("<", class(x)[1L], ">") - } - # FR #2842 add timezone for posix timestamps - format.timezone <- function(col) { # paste timezone to a time object - tz = attr(col,'tzone', exact = TRUE) - if (!is.null(tz)) { # date object with tz - nas = is.na(col) - col = paste0(as.character(col)," ",tz) # parse to character - col[nas] = NA_character_ - } - return(col) - } - # FR #1091 for pretty printing of character - # TODO: maybe instead of doing "this is...", we could do "this ... test"? - char.trunc <- function(x, trunc.char = getOption("datatable.prettyprint.char")) { - trunc.char = max(0L, suppressWarnings(as.integer(trunc.char[1L])), na.rm=TRUE) - if (!is.character(x) || trunc.char <= 0L) return(x) - idx = which(nchar(x) > trunc.char) - x[idx] = paste0(substr(x[idx], 1L, as.integer(trunc.char)), "...") - x - } - do.call("cbind",lapply(x,function(col,...){ - if (!is.null(dim(col))) stop("Invalid column: it has dimensions. Can't format it. If it's the result of data.table(table()), use as.data.table(table()) instead.") - if(timezone) col = format.timezone(col) - if (is.list(col)) col = vapply_1c(col, format.item) - else col = format(char.trunc(col), justify=justify, ...) # added an else here to fix #5435 - col - },...)) + do.call("cbind", lapply(x, format_col, ..., justify=justify)) } mimicsAutoPrint = c("knit_print.default") @@ -165,3 +131,40 @@ shouldPrint = function(x) { # as opposed to printing a blank line, for excluding col.names per PR #1483 cut_top = function(x) cat(capture.output(x)[-1L], sep = '\n') +format_col = function(x, ...) { + UseMethod("format_col") +} + +format_list_item = function(x, ...) { + UseMethod("format_list_item") +} + +format_col.default = function(x, ...) { + if (!is.null(dim(x))) stop("Invalid column: it has dimensions. Can't format it. If it's the result of data.table(table()), use as.data.table(table()) instead.") + if (is.list(x)) return(vapply_1c(x, format_list_item)) + format(char.trunc(x), ...) # added an else here to fix #5435 +} + +# #2842 -- different columns can have different tzone, so force usage in output +format_col.POSIXct = function(x, ...) format(x, usetz = TRUE, ...) + +# #3011 -- expression columns can wrap to newlines which breaks printing +format_col.expression = function(x, ...) format(char.trunc(as.character(x)), ...) + +format_list_item.default = function(x, ...) { + if (is.null(x)) return ("") # NULL item in a list column + if (is.atomic(x) || inherits(x, "formula")) # FR #2591 - format.data.table issue with columns of class "formula" + paste(c(format(head(x, 6L), ...), if (length(x) > 6L) "..."), collapse=",") # fix for #5435 - format has to be added here... + else + paste0("<", class(x)[1L], ">") +} + +# FR #1091 for pretty printing of character +# TODO: maybe instead of doing "this is...", we could do "this ... test"? +char.trunc <- function(x, trunc.char = getOption("datatable.prettyprint.char")) { + trunc.char = max(0L, suppressWarnings(as.integer(trunc.char[1L])), na.rm=TRUE) + if (!is.character(x) || trunc.char <= 0L) return(x) + idx = which(nchar(x) > trunc.char) + x[idx] = paste0(substr(x[idx], 1L, as.integer(trunc.char)), "...") + x +} diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a3a88f6ef..beb0674dd 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14178,12 +14178,13 @@ test(2025.11, fread(f), data.table(A=1:2, B=c("foobar","baz"), C=3:4)) # printing timezone, #2842 DT = data.table(t1 = as.POSIXct("1982-04-26 13:34:56", tz = "Europe/Madrid"),t2 = as.POSIXct("2019-01-01 19:00:01",tz = "UTC")) -test(2026.1, capture.output(print(DT))[2], "1: 1982-04-26 13:34:56 2019-01-01 19:00:01") -test(2026.2, capture.output(print(DT,timezone = TRUE))[2], "1: 1982-04-26 13:34:56 Europe/Madrid 2019-01-01 19:00:01 UTC") -DT = data.table(v1 = c(1,as.numeric(NA))) -DT[2,t:= as.POSIXct("2019-01-01 19:00:01",tz = "UTC")] -test(2026.3, capture.output(print(DT)), c(" v1 t","1: 1 ", "2: NA 2019-01-01 19:00:01")) -test(2026.4, capture.output(print(DT, timezone = TRUE)), c(" v1 t","1: 1 ","2: NA 2019-01-01 19:00:01 UTC")) +test(2026.1, capture.output(print(DT))[2L], "1: 1982-04-26 13:34:56 CEST 2019-01-01 19:00:01 UTC") +DT = data.table(v1 = c(1, as.numeric(NA))) +DT[2L, t:= as.POSIXct("2019-01-01 19:00:01", tz = "UTC")] +test(2026.2, capture.output(print(DT)), + c(" v1 t", + "1: 1 ", + "2: NA 2019-01-01 19:00:01 UTC")) # empty item in j=list(x, ) errors gracefully, #3507 DT = data.table(a = 1:5) @@ -14405,6 +14406,28 @@ test(2034.1, fread('A,B\n"foo","ba"r"', quote="''"), error='quote= must be a sin test(2034.2, fread('A,B\n"foo","ba"r"', quote=FALSE), ans<-data.table(A='"foo"', B='"ba"r"')) test(2034.3, fread('A,B\n"foo","ba"r"', quote=""), ans) +# format_col and format_list_item printing helpers/generics +## Use case: solve #2842 by defining format_col.POSIXct to have usetz = TRUE +DT = data.table( + t1 = as.POSIXct('2018-05-01 12:34:56', tz = 'UTC'), + t2 = as.POSIXct('2018-05-01 12:34:56', tz = 'Asia/Singapore') +) +test(2035.1, DT, output = 'UTC.*\\+08') + +# #3011 -- default expression printing can break format_col.default +test(2035.2, print(data.table(e = expression(1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13))), output = '1 + 2 + 3') + +# format_col generic is used +format_col.complex = function(x, ...) sprintf('(%.1f, %.1fi)', Re(x), Im(x)) +registerS3method("format_col", "complex", format_col.complex) +x = data.table(z = c(1 + 3i, 2 - 1i, pi + 2.718i)) +test(2035.3, x, output = '(1.0, 3.0i)') + +# format_list_item() generic is used +format_list_item.myclass <- function(x, ...) paste0("<", class(x)[1L], ":", x$id, ">") +registerS3method("format_list_item", "myclass", format_list_item.myclass) +DT = data.table(row = 1:2, objs = list(structure(list(id = "foo"), class = "myclass"), structure(list(id = "bar"), class = "myclass"))) +test(2035.4, print(DT), output = "myclass:foo.*myclass:bar") ################################### # Add new tests above this line # diff --git a/man/print.data.table.Rd b/man/print.data.table.Rd index 3b7fe7e16..9ba6c712b 100644 --- a/man/print.data.table.Rd +++ b/man/print.data.table.Rd @@ -1,10 +1,18 @@ \name{print.data.table} \alias{print.data.table} +\alias{format_col} +\alias{format_col.default} +\alias{format_col.POSIXct} +\alias{format_col.expression} +\alias{format_list_item} +\alias{format_list_item.default} \title{ data.table Printing Options } \description{ \code{print.data.table} extends the functionalities of \code{print.data.frame}. Key enhancements include automatic output compression of many observations and concise column-wise \code{class} summary. + + \code{format_col} and \code{format_list_item} generics provide flexibility for end-users to define custom printing methods for generic classes. } \usage{ \method{print}{data.table}(x, @@ -14,8 +22,15 @@ row.names=getOption("datatable.print.rownames"), # default: TRUE col.names=getOption("datatable.print.colnames"), # default: "auto" print.keys=getOption("datatable.print.keys"), # default: FALSE - quote=FALSE, - timezone=FALSE, \dots) + quote=FALSE, \dots) + + format_col(x, \dots) + \method{format_col}{default}(x, \dots) + \method{format_col}{POSIXct}(x, \dots) + \method{format_col}{expression}{x, \dots) + + format_list_item(x, \dots) + \method{format_list_item}{default}(x, \dots) } \arguments{ \item{x}{ A \code{data.table}. } @@ -26,11 +41,20 @@ \item{col.names}{ One of three flavours for controlling the display of column names in output. \code{"auto"} includes column names above the data, as well as below the table if \code{nrow(x) > 20}. \code{"top"} excludes this lower register when applicable, and \code{"none"} suppresses column names altogether (as well as column classes if \code{class = TRUE}. } \item{print.keys}{ If \code{TRUE}, any \code{\link{key}} and/or \code{\link[=indices]{index}} currently assigned to \code{x} will be printed prior to the preview of the data. } \item{quote}{ If \code{TRUE}, all output will appear in quotes, as in \code{print.default}. } - \item{timezone}{ If \code{TRUE}, time columns of class POSIXct or POSIXlt will be printed with their timezones (if attribute is available). } \item{\dots}{ Other arguments ultimately passed to \code{format}. } } +\value{ + \code{print.data.table} returns \code{x} invisibly. + + \code{format_col} returns a \code{length(x)}-size \code{character} vector. + + \code{format_list_item} returns a length-1 \code{character} scalar. +} \details{ By default, with an eye to the typically large number of observations in a code{data.table}, only the beginning and end of the object are displayed (specifically, \code{head(x, topn)} and \code{tail(x, topn)} are displayed unless \code{nrow(x) < nrows}, in which case all rows will print). + + \code{format_col} is applied at a column level; for example, \code{format_col.POSIXct} is used to tag the time zones of \code{POSIXct} columns. \code{format_list_item} is applied to the elements (rows) of \code{list} columns; see Examples. + } \seealso{\code{\link{print.default}}} \examples{ @@ -58,5 +82,17 @@ setindexv(DT, c("a", "b")) setindexv(DT, "a") print(DT, print.keys=TRUE) -} + + # Formatting customization + format_col.complex = function(x, ...) sprintf('(\%.1f, \%.1fi)', Re(x), Im(x)) + registerS3method("format_col", "complex", format_col.complex) + x = data.table(z = c(1 + 3i, 2 - 1i, pi + 2.718i)) + print(x) + iris = as.data.table(iris) + iris_agg = iris[ , .(reg = list(lm(Sepal.Length ~ Petal.Length))), by = Species] + format_list_item.lm = function(x) sprintf('', format(x$call$formula)) + registerS3method("format_list_item", "lm", format_list_item.lm) + print(iris_agg) + +}