From 0389de1aa3fa5d746f278d03e1901cb5c48e20c3 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Fri, 23 Mar 2018 09:03:24 +1100 Subject: [PATCH 01/18] rownames argument to as.matrix and as.data.frame --- R/data.table.R | 104 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 93 insertions(+), 11 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index aad2d328b..c961256b8 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1859,17 +1859,58 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # x #} - -as.matrix.data.table <- function(x,...) -{ - dm <- dim(x) - cn <- names(x) +as.matrix.data.table <- function(x, rownames, ...) { + rn <- NULL + rnc <- NULL + if (!missing(rownames)) { # Convert rownames to a column index if possible + if (is.null(rownames)) { + warning("rownames is NULL, ignoring rownames") + } else if (length(rownames) != 1) { + stop("rownames must be a single column in x") + } else if (is.na(rownames)) { + warning("rownames is NA, ignoring rownames") + } else if (is.logical(rownames) && !isTRUE(rownames)) { + warning("rownames is FALSE, ignoring rownames") + } else if (!(is.logical(rownames) || is.character(rownames) || is.numeric(rownames))) { + # E.g. because rownames is some sort of object that cant be converted to a column index + stop("rownames must be TRUE, a column index, or a column name in x") + } else { + if (is.logical(rownames) && isTRUE(rownames)) { + if (haskey(x)) { + rownames <- key(x) + if (length(rownames) > 1) { + warning("rownames is TRUE but multiple keys found in key(x), using first column instead") + rownames <- 1 + } + } else { + rownames <- 1 + } + } + if (is.character(rownames)) { # Handles cases where rownames is a column name, or key(x) from TRUE + rnc <- chmatch(rownames, names(x)) + if (is.na(rnc)) stop(rownames, " is not a column of x") + } else { # rownames is an index already + if (rownames < 1 || rownames > ncol(x)) + stop("rownames is ", rownames, " which is outside the column number range [1,ncol=", ncol(x), "]") + rnc <- rownames + } + } + } + if (!is.null(rnc)) { # If there are rownames, extract and drop that column + rn <- x[, ..rnc][[1]] + dm <- dim(x) - c(0, 1) + cn <- names(x)[-rnc] + X <- x[, -rnc, with = FALSE] + } else { + dm <- dim(x) + cn <- names(x) + X <- x + } if (any(dm == 0L)) - return(array(NA, dim = dm, dimnames = list(NULL, cn))) + return(array(NA, dim = dm, dimnames = list(rn, cn))) p <- dm[2L] n <- dm[1L] collabs <- as.list(cn) - X <- x class(X) <- NULL non.numeric <- non.atomic <- FALSE all.logical <- TRUE @@ -1914,7 +1955,7 @@ as.matrix.data.table <- function(x,...) } X <- unlist(X, recursive = FALSE, use.names = FALSE) dim(X) <- c(n, length(X)/n) - dimnames(X) <- list(NULL, unlist(collabs, use.names = FALSE)) + dimnames(X) <- list(rn, unlist(collabs, use.names = FALSE)) X } @@ -2006,10 +2047,51 @@ tail.data.table <- function(x, n=6L, ...) { `[<-.data.table`(x,j=name,value=value) # important i is missing here } -as.data.frame.data.table <- function(x, ...) +as.data.frame.data.table <- function(x, rownames, ...) { - ans = copy(x) - setattr(ans,"row.names",.set_row_names(nrow(x))) # since R 2.4.0, data.frames can have non-character row names + rnc <- NULL + if (!missing(rownames)) { # Convert rownames to a column index if possible + if (is.null(rownames)) { + warning("rownames is NULL, ignoring rownames") + } else if (length(rownames) != 1) { + stop("rownames must be a single column in x") + } else if (is.na(rownames)) { + warning("rownames is NA, ignoring rownames") + } else if (is.logical(rownames) && !isTRUE(rownames)) { + warning("rownames is FALSE, ignoring rownames") + } else if (!(is.logical(rownames) || is.character(rownames) || is.numeric(rownames))) { + # E.g. because rownames is some sort of object that cant be converted to a column index + stop("rownames must be TRUE, a column index, or a column name in x") + } else { + if (is.logical(rownames) && isTRUE(rownames)) { + if (haskey(x)) { + rownames <- key(x) + if (length(rownames) > 1) { + warning("rownames is TRUE but multiple keys found in key(x), using first column instead") + rownames <- 1 + } + } else { + rownames <- 1 + } + } + if (is.character(rownames)) { # Handles cases where rownames is a column name, or key(x) from TRUE + rnc <- chmatch(rownames, names(x)) + if (is.na(rnc)) stop(rownames, " is not a column of x") + } else { # rownames is an index already + if (rownames < 1 || rownames > ncol(x)) + stop("rownames is ", rownames, " which is outside the column number range [1,ncol=", ncol(x), "]") + rnc <- rownames + } + } + } + if (!is.null(rnc)) { # If there are rownames, extract and drop that column + rn <- x[, ..rnc][[1]] + ans <- x[, -rnc, with = FALSE] + rownames(ans) <- rn # settatr(ans, "row.names", rn) only wokrs for character row names + } else { + ans = copy(x) + setattr(ans,"row.names",.set_row_names(nrow(x))) # since R 2.4.0, data.frames can have non-character row names + } setattr(ans,"class","data.frame") setattr(ans,"sorted",NULL) # remove so if you convert to df, do something, and convert back, it is not sorted setattr(ans,".internal.selfref",NULL) From b1590ae43ddac3f4469220b3e82bce56aca6718a Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Fri, 23 Mar 2018 12:23:11 +1100 Subject: [PATCH 02/18] Added documentation --- man/as.data.frame.Rd | 66 ++++++++++++++++++++++++++++++++++++++++++++ man/as.matrix.Rd | 62 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 man/as.data.frame.Rd create mode 100644 man/as.matrix.Rd diff --git a/man/as.data.frame.Rd b/man/as.data.frame.Rd new file mode 100644 index 000000000..c42779cd5 --- /dev/null +++ b/man/as.data.frame.Rd @@ -0,0 +1,66 @@ +\name{as.data.frame} +\alias{as.data.frame} +\alias{as.data.frame.data.table} +\title{Convert a data.table to a data.frame} +\description{ +Converts a \code{data.table} into a \code{data.frame}, optionally using one +of the columns in the \code{data.table} as the \code{data.frame} \code{rownames}. + +If you do not need to use one of the columns as the \code{rownames}, use +\code{\link{setDF}} to coerce a \code{data.table} to a \code{data.frame} +by reference (i.e. without making another copy in memory). +} +\usage{ +\method{as.data.frame}{data.table}(x, rownames, ...)} + +\arguments{ +\item{x}{a \code{data.table}} +\item{rownames}{optional, a single column name or column index to use as +the \code{rownames} inthe returned \code{matrix}. If \code{TRUE} the +\code{\link{key}} of the \code{data.table} will be used if it is a +single column, otherwise the first column in the \code{data.table} will +be used.} +\item{\dots}{additional arguments to be passed to or from methods.} +} + +\details{ +\code{\link{as.data.frame}} is a generic function in base R. It dispatches to +\code{as.data.frame.data.table} if its \code{x} argument is a \code{data.table}. + +The method for \code{data.table}s will create a copy of \code{x} with +the \code{data.table} class attributes removed. An additional argument +\code{rownames} is provided for \code{as.data.frame.data.table} that +allows a single column to be removed from \code{x} to be used as the +\code{\link{rownames}} of the resulting \code{data.frame}. An example +of where this is useful is where a function from another package expects +the data stored in the first column of \code{x} to be the \code{rownames} +of the data frame for the purposes of subsetting or matching rows in +its internal workings. + +If you do not wish to keep a copy of \code{x} as a \code{data.table} and +do not need to use the \code{rownames} argument you should use the +\code{\link{setDF}} function instead, which will coerce \code{x} to a +\code{data.frame} by reference (i.e. without making another copy in memory). +} + +\value{ +A new \code{data.frame} containing the contents of \code{x}. +} + +\seealso{ +\code{\link{data.table}}, \code{\link{as.data.frame}}, \code{\link{setDF}} +} + +\examples{ +(dt1 <- data.table(A = letters[1:10], X = 1:10, Y = 11:20)) +as.data.frame(dt1) +as.data.frame(dt1, rownames = "A") +as.data.frame(dt1, rownames = 1) +as.data.frame(dt1, rownames = TRUE) + +(dt1 <- data.table(A = letters[1:10], X = 1:10, Y = 11:20)) +setkey(dt1, A) +as.data.frame(dt1, rownames = TRUE) + +setDF(dt1) # dt1 is a data.frame now +} diff --git a/man/as.matrix.Rd b/man/as.matrix.Rd new file mode 100644 index 000000000..5e12239d9 --- /dev/null +++ b/man/as.matrix.Rd @@ -0,0 +1,62 @@ +\name{as.matrix} +\alias{as.matrix} +\alias{as.matrix.data.table} +\title{Convert a data.table to a matrix} +\description{ +Converts a \code{data.table} into a \code{matrix}, optionally using one +of the columns in the \code{data.table} as the \code{matrix} \code{rownames}. +} +\usage{ +\method{as.matrix}{data.table}(x, rownames, ...)} + +\arguments{ +\item{x}{a \code{data.table}} +\item{rownames}{optional, a single column name or column index to use as +the \code{rownames} inthe returned \code{matrix}. If \code{TRUE} the +\code{\link{key}} of the \code{data.table} will be used if it is a +single column, otherwise the first column in the \code{data.table} will +be used.} +\item{\dots}{additional arguments to be passed to or from methods.} +} + +\details{ +\code{\link{as.matrix}} is a generic function in base R. It dispatches to +\code{as.matrix.data.table} if its \code{x} argument is a \code{data.table}. + +The method for \code{data.table}s will return a character matrix if there +are only atomic columns and any non-(numeric/logical/complex) column, +applying \code{\link{as.vector}} to factors and \code{\link{format}} to other +non-character columns. Otherwise, the usual coercion hierarchy (logical < +integer < double < complex) will be used, e.g., all-logical data frames +will be coerced to a logical matrix, mixed logical-integer will give an +integer matrix, etc. + +An additional argument \code{rownames} is provided for \code{as.matrix.data.table} +to facilitate conversions to matrices where the \code{\link{rownames}} are stored +in a single column of \code{x}, e.g. the first column after using +\code{\link{dcast.data.table}}. +} + +\value{ +A new \code{matrix} containing the contents of \code{x}. +} + +\seealso{ +\code{\link{data.table}}, \code{\link{as.matrix}}, \code{\link{data.matrix}} +\code{\link{array}} +} + +\examples{ +(dt1 <- data.table(A = letters[1:10], X = 1:10, Y = 11:20)) +as.matrix(dt1) # character matrix +as.matrix(dt1, rownames = "A") +as.matrix(dt1, rownames = 1) +as.matrix(dt1, rownames = TRUE) + +(dt1 <- data.table(A = letters[1:10], X = 1:10, Y = 11:20)) +setkey(dt1, A) +as.matrix(dt1, rownames = TRUE) +} + +\keyword{ array } + From 1323be04461cb9b808d80d52c1dbb52c440a3a9f Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Fri, 23 Mar 2018 14:33:47 +1100 Subject: [PATCH 03/18] some unit tests --- inst/tests/tests.Rraw | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b4888c9e3..f21edca16 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11522,6 +11522,24 @@ test(1893.2, print(DF), output=txt) txt = 'A,B\n109,MT\n7,N\n11,NA\n41,NB\n60,ND\n1,""\n2,\n3,"NA"\n4,NA\n' test(1893.3, print(fread(txt,na.strings="")), output="A B\n1: 109 MT\n2: 7 N\n3: 11 NA\n4: 41 NB\n5: 60 ND\n6: 1 \n7: 2 \n8: 3 NA\n9: 4 NA") +# Allow column to be used as rownames when converting to matrix or data.frame # 2702 +DT = data.table(id = letters[1:4], X = 1:4, Y = 5:8) +mat <- matrix(1:8, ncol = 2, dimnames=list(letters[1:4], c("X", "Y"))) +test(2702.1, as.matrix(DT, 1), mat) +test(2702.2, as.matrix(DT, "id"), mat) +test(2702.3, as.matrix(DT, TRUE), mat) +setkey(DT, id) +test(2702.4, as.matrix(DT, TRUE), mat) + +DT = data.table(id = letters[1:4], X = 1:4, Y = 5:8) +DF <- data.frame(X = 1:4, Y = 5:8) +rownames(DF) <- letters[1:4] +test(2702.5, as.data.frame(DT, 1), DF) +test(2702.6, as.data.frame(DT, "id"), DF) +test(2702.7, as.data.frame(DT, TRUE), DF) +setkey(DT, id) +test(2702.8, as.data.frame(DT, TRUE), DF) + ################################### # Add new tests above this line # ################################### From ddaeb6ac026d5362653fd184bfe21a027eb01be5 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Fri, 23 Mar 2018 14:39:35 +1100 Subject: [PATCH 04/18] added news --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index fe9b2ab7f..9f41a12aa 100644 --- a/NEWS.md +++ b/NEWS.md @@ -103,6 +103,8 @@ Thanks to @MichaelChirico for reporting and to @MarkusBonsch for the implementat 17. `update.dev.pkg` is new function to update package from development repository, it will download package sources only when newer commit is available in repository. `data.table::update.dev.pkg()` defaults updates `data.table`, but any package can be used. +18. `as.matrix.data.table` and `as.data.frame.data.table` methods now have an additional `rownames` argument allowing for a single column to be used as the `rownames` after conversion to a `matrix` or `data.frame` resepctively. Thanks to @sritchie73 for the suggestion, use cases, [#2692](https://github.com/Rdatatable/data.table/issues/2692) and implementation [PR#2702](https://github.com/Rdatatable/data.table/pull/2702) and @MichaelChirico for additional use cases. + #### BUG FIXES 1. The new quote rules handles this single field `"Our Stock Screen Delivers an Israeli Software Company (MNDO, CTCH)<\/a> SmallCapInvestor.com - Thu, May 19, 2011 10:02 AM EDT<\/cite><\/div>Yesterday in \""Google, But for Finding From 0ab7e4f980ad9a100cc044c03f5aaadb7b232c66 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Sat, 24 Mar 2018 10:03:53 +1100 Subject: [PATCH 05/18] Resolve CRAN notes about global variable bindings --- R/data.table.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index c961256b8..c86fe645c 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1897,7 +1897,7 @@ as.matrix.data.table <- function(x, rownames, ...) { } } if (!is.null(rnc)) { # If there are rownames, extract and drop that column - rn <- x[, ..rnc][[1]] + rn <- x[[rnc]] dm <- dim(x) - c(0, 1) cn <- names(x)[-rnc] X <- x[, -rnc, with = FALSE] @@ -2085,7 +2085,7 @@ as.data.frame.data.table <- function(x, rownames, ...) } } if (!is.null(rnc)) { # If there are rownames, extract and drop that column - rn <- x[, ..rnc][[1]] + rn <- x[[rnc]] ans <- x[, -rnc, with = FALSE] rownames(ans) <- rn # settatr(ans, "row.names", rn) only wokrs for character row names } else { From 8477788b02f254b899d8c74d645fc77fe8af2af9 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Sat, 24 Mar 2018 10:17:55 +1100 Subject: [PATCH 06/18] Reverted as.data.frame.data.table --- NEWS.md | 2 +- R/data.table.R | 47 ++---------------------------- inst/tests/tests.Rraw | 11 +------- man/as.data.frame.Rd | 66 ------------------------------------------- 4 files changed, 5 insertions(+), 121 deletions(-) delete mode 100644 man/as.data.frame.Rd diff --git a/NEWS.md b/NEWS.md index 9f41a12aa..1a47b1f66 100644 --- a/NEWS.md +++ b/NEWS.md @@ -103,7 +103,7 @@ Thanks to @MichaelChirico for reporting and to @MarkusBonsch for the implementat 17. `update.dev.pkg` is new function to update package from development repository, it will download package sources only when newer commit is available in repository. `data.table::update.dev.pkg()` defaults updates `data.table`, but any package can be used. -18. `as.matrix.data.table` and `as.data.frame.data.table` methods now have an additional `rownames` argument allowing for a single column to be used as the `rownames` after conversion to a `matrix` or `data.frame` resepctively. Thanks to @sritchie73 for the suggestion, use cases, [#2692](https://github.com/Rdatatable/data.table/issues/2692) and implementation [PR#2702](https://github.com/Rdatatable/data.table/pull/2702) and @MichaelChirico for additional use cases. +18. `as.matrix.data.table` method now has an additional `rownames` argument allowing for a single column to be used as the `rownames` after conversion to a `matrix`. Thanks to @sritchie73 for the suggestion, use cases, [#2692](https://github.com/Rdatatable/data.table/issues/2692) and implementation [PR#2702](https://github.com/Rdatatable/data.table/pull/2702) and @MichaelChirico for additional use cases. #### BUG FIXES diff --git a/R/data.table.R b/R/data.table.R index c86fe645c..78e6f02f9 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2047,51 +2047,10 @@ tail.data.table <- function(x, n=6L, ...) { `[<-.data.table`(x,j=name,value=value) # important i is missing here } -as.data.frame.data.table <- function(x, rownames, ...) +as.data.frame.data.table <- function(x, ...) { - rnc <- NULL - if (!missing(rownames)) { # Convert rownames to a column index if possible - if (is.null(rownames)) { - warning("rownames is NULL, ignoring rownames") - } else if (length(rownames) != 1) { - stop("rownames must be a single column in x") - } else if (is.na(rownames)) { - warning("rownames is NA, ignoring rownames") - } else if (is.logical(rownames) && !isTRUE(rownames)) { - warning("rownames is FALSE, ignoring rownames") - } else if (!(is.logical(rownames) || is.character(rownames) || is.numeric(rownames))) { - # E.g. because rownames is some sort of object that cant be converted to a column index - stop("rownames must be TRUE, a column index, or a column name in x") - } else { - if (is.logical(rownames) && isTRUE(rownames)) { - if (haskey(x)) { - rownames <- key(x) - if (length(rownames) > 1) { - warning("rownames is TRUE but multiple keys found in key(x), using first column instead") - rownames <- 1 - } - } else { - rownames <- 1 - } - } - if (is.character(rownames)) { # Handles cases where rownames is a column name, or key(x) from TRUE - rnc <- chmatch(rownames, names(x)) - if (is.na(rnc)) stop(rownames, " is not a column of x") - } else { # rownames is an index already - if (rownames < 1 || rownames > ncol(x)) - stop("rownames is ", rownames, " which is outside the column number range [1,ncol=", ncol(x), "]") - rnc <- rownames - } - } - } - if (!is.null(rnc)) { # If there are rownames, extract and drop that column - rn <- x[[rnc]] - ans <- x[, -rnc, with = FALSE] - rownames(ans) <- rn # settatr(ans, "row.names", rn) only wokrs for character row names - } else { - ans = copy(x) - setattr(ans,"row.names",.set_row_names(nrow(x))) # since R 2.4.0, data.frames can have non-character row names - } + ans = copy(x) + setattr(ans,"row.names",.set_row_names(nrow(x))) # since R 2.4.0, data.frames can have non-character row names setattr(ans,"class","data.frame") setattr(ans,"sorted",NULL) # remove so if you convert to df, do something, and convert back, it is not sorted setattr(ans,".internal.selfref",NULL) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f21edca16..1fe2bb20b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11522,7 +11522,7 @@ test(1893.2, print(DF), output=txt) txt = 'A,B\n109,MT\n7,N\n11,NA\n41,NB\n60,ND\n1,""\n2,\n3,"NA"\n4,NA\n' test(1893.3, print(fread(txt,na.strings="")), output="A B\n1: 109 MT\n2: 7 N\n3: 11 NA\n4: 41 NB\n5: 60 ND\n6: 1 \n7: 2 \n8: 3 NA\n9: 4 NA") -# Allow column to be used as rownames when converting to matrix or data.frame # 2702 +# Allow column to be used as rownames when converting to matrix # 2702 DT = data.table(id = letters[1:4], X = 1:4, Y = 5:8) mat <- matrix(1:8, ncol = 2, dimnames=list(letters[1:4], c("X", "Y"))) test(2702.1, as.matrix(DT, 1), mat) @@ -11531,15 +11531,6 @@ test(2702.3, as.matrix(DT, TRUE), mat) setkey(DT, id) test(2702.4, as.matrix(DT, TRUE), mat) -DT = data.table(id = letters[1:4], X = 1:4, Y = 5:8) -DF <- data.frame(X = 1:4, Y = 5:8) -rownames(DF) <- letters[1:4] -test(2702.5, as.data.frame(DT, 1), DF) -test(2702.6, as.data.frame(DT, "id"), DF) -test(2702.7, as.data.frame(DT, TRUE), DF) -setkey(DT, id) -test(2702.8, as.data.frame(DT, TRUE), DF) - ################################### # Add new tests above this line # ################################### diff --git a/man/as.data.frame.Rd b/man/as.data.frame.Rd deleted file mode 100644 index c42779cd5..000000000 --- a/man/as.data.frame.Rd +++ /dev/null @@ -1,66 +0,0 @@ -\name{as.data.frame} -\alias{as.data.frame} -\alias{as.data.frame.data.table} -\title{Convert a data.table to a data.frame} -\description{ -Converts a \code{data.table} into a \code{data.frame}, optionally using one -of the columns in the \code{data.table} as the \code{data.frame} \code{rownames}. - -If you do not need to use one of the columns as the \code{rownames}, use -\code{\link{setDF}} to coerce a \code{data.table} to a \code{data.frame} -by reference (i.e. without making another copy in memory). -} -\usage{ -\method{as.data.frame}{data.table}(x, rownames, ...)} - -\arguments{ -\item{x}{a \code{data.table}} -\item{rownames}{optional, a single column name or column index to use as -the \code{rownames} inthe returned \code{matrix}. If \code{TRUE} the -\code{\link{key}} of the \code{data.table} will be used if it is a -single column, otherwise the first column in the \code{data.table} will -be used.} -\item{\dots}{additional arguments to be passed to or from methods.} -} - -\details{ -\code{\link{as.data.frame}} is a generic function in base R. It dispatches to -\code{as.data.frame.data.table} if its \code{x} argument is a \code{data.table}. - -The method for \code{data.table}s will create a copy of \code{x} with -the \code{data.table} class attributes removed. An additional argument -\code{rownames} is provided for \code{as.data.frame.data.table} that -allows a single column to be removed from \code{x} to be used as the -\code{\link{rownames}} of the resulting \code{data.frame}. An example -of where this is useful is where a function from another package expects -the data stored in the first column of \code{x} to be the \code{rownames} -of the data frame for the purposes of subsetting or matching rows in -its internal workings. - -If you do not wish to keep a copy of \code{x} as a \code{data.table} and -do not need to use the \code{rownames} argument you should use the -\code{\link{setDF}} function instead, which will coerce \code{x} to a -\code{data.frame} by reference (i.e. without making another copy in memory). -} - -\value{ -A new \code{data.frame} containing the contents of \code{x}. -} - -\seealso{ -\code{\link{data.table}}, \code{\link{as.data.frame}}, \code{\link{setDF}} -} - -\examples{ -(dt1 <- data.table(A = letters[1:10], X = 1:10, Y = 11:20)) -as.data.frame(dt1) -as.data.frame(dt1, rownames = "A") -as.data.frame(dt1, rownames = 1) -as.data.frame(dt1, rownames = TRUE) - -(dt1 <- data.table(A = letters[1:10], X = 1:10, Y = 11:20)) -setkey(dt1, A) -as.data.frame(dt1, rownames = TRUE) - -setDF(dt1) # dt1 is a data.frame now -} From ac52d9ada9d8a0e970222ba95e5170133a597988 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Sat, 24 Mar 2018 11:41:45 +1100 Subject: [PATCH 07/18] Unit tests for errors and warnings --- inst/tests/tests.Rraw | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1fe2bb20b..3897a731f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11530,6 +11530,19 @@ test(2702.2, as.matrix(DT, "id"), mat) test(2702.3, as.matrix(DT, TRUE), mat) setkey(DT, id) test(2702.4, as.matrix(DT, TRUE), mat) +# errors +test(2702.5, as.matrix(DT, -1), error="rownames is -1 which is outside the column number range") +test(2702.6, as.matrix(DT, "Z"), error="Z is not a column of x") +test(2702.7, as.matrix(DT, c(1,2)), error="rownames must be a single column of x") +test(2702.8, as.matrix(DT, complex(1)), error="rownames must be TRUE, a column index, or a column name in x") +# warnings +mat2 <- matrix(c(letters[1:4], 1:8), ncol=3, dimnames=list(NULL, c("id", "X", "Y"))) +test(2702.9, as.matrix(DT, NA), mat2, warning="rownames is NA, ignoring rownames") +test(2702.10, as.matrix(DT, NULL), mat2, warning="rownames is NULL, ignoring rownames") +test(2702.11, as.matrix(DT, FALSE), mat2, warning="rownames is FALSE, ignoring rownames") +setkey(DT, id, X) +test(2702.12, as.matrix(DT, TRUE), mat, warning="rownames is TRUE but multiple keys found in key(x), using first column instead") + ################################### # Add new tests above this line # From b9eab65c03c59900bdc38e8be87633575f1670ca Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Sat, 24 Mar 2018 12:15:38 +1100 Subject: [PATCH 08/18] Fixed error message in tests and test increment numbers --- inst/tests/tests.Rraw | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 3897a731f..b26668672 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11525,19 +11525,19 @@ test(1893.3, print(fread(txt,na.strings="")), output="A B\n1: 109 MT\n2: # Allow column to be used as rownames when converting to matrix # 2702 DT = data.table(id = letters[1:4], X = 1:4, Y = 5:8) mat <- matrix(1:8, ncol = 2, dimnames=list(letters[1:4], c("X", "Y"))) -test(2702.1, as.matrix(DT, 1), mat) -test(2702.2, as.matrix(DT, "id"), mat) -test(2702.3, as.matrix(DT, TRUE), mat) +test(2702.01, as.matrix(DT, 1), mat) +test(2702.02, as.matrix(DT, "id"), mat) +test(2702.03, as.matrix(DT, TRUE), mat) setkey(DT, id) -test(2702.4, as.matrix(DT, TRUE), mat) +test(2702.04, as.matrix(DT, TRUE), mat) # errors -test(2702.5, as.matrix(DT, -1), error="rownames is -1 which is outside the column number range") -test(2702.6, as.matrix(DT, "Z"), error="Z is not a column of x") -test(2702.7, as.matrix(DT, c(1,2)), error="rownames must be a single column of x") -test(2702.8, as.matrix(DT, complex(1)), error="rownames must be TRUE, a column index, or a column name in x") -# warnings +test(2702.05, as.matrix(DT, -1), error="rownames is -1 which is outside the column number range") +test(2702.06, as.matrix(DT, "Z"), error="Z is not a column of x") +test(2702.07, as.matrix(DT, c(1,2)), error="rownames must be a single column in x") +test(2702.08, as.matrix(DT, complex(1)), error="rownames must be TRUE, a column index, or a column name in x") +# warningsi mat2 <- matrix(c(letters[1:4], 1:8), ncol=3, dimnames=list(NULL, c("id", "X", "Y"))) -test(2702.9, as.matrix(DT, NA), mat2, warning="rownames is NA, ignoring rownames") +test(2702.09, as.matrix(DT, NA), mat2, warning="rownames is NA, ignoring rownames") test(2702.10, as.matrix(DT, NULL), mat2, warning="rownames is NULL, ignoring rownames") test(2702.11, as.matrix(DT, FALSE), mat2, warning="rownames is FALSE, ignoring rownames") setkey(DT, id, X) From 11da1448c2f98c761115add34e0b0b41e2632822 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Sun, 25 Mar 2018 17:52:25 +1100 Subject: [PATCH 09/18] Removed with=FALSE --- R/data.table.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 5e76e4678..f30d94093 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1918,11 +1918,13 @@ as.matrix.data.table <- function(x, rownames, ...) { } } } - if (!is.null(rnc)) { # If there are rownames, extract and drop that column + # If the rownames argument has been used, and is a single column, + # extract that column's index (rnc) and drop it from x + if (!is.null(rnc)) { rn <- x[[rnc]] dm <- dim(x) - c(0, 1) cn <- names(x)[-rnc] - X <- x[, -rnc, with = FALSE] + X <- x[, .SD, .SDcols = cn] } else { dm <- dim(x) cn <- names(x) From 5b4bca72324cebdafe87cb4750c6c3ade99259d1 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Sun, 25 Mar 2018 18:10:38 +1100 Subject: [PATCH 10/18] Enhanced clarity of an error check --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index f30d94093..4b593f2e2 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1891,7 +1891,7 @@ as.matrix.data.table <- function(x, rownames, ...) { stop("rownames must be a single column in x") } else if (is.na(rownames)) { warning("rownames is NA, ignoring rownames") - } else if (is.logical(rownames) && !isTRUE(rownames)) { + } else if (identical(rownames, FALSE)) { warning("rownames is FALSE, ignoring rownames") } else if (!(is.logical(rownames) || is.character(rownames) || is.numeric(rownames))) { # E.g. because rownames is some sort of object that cant be converted to a column index From c5ae94b4bced6399bde4e837589cc9454eac19e0 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Sun, 25 Mar 2018 18:16:49 +1100 Subject: [PATCH 11/18] replaced isTRUE --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 4b593f2e2..75860e730 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1897,7 +1897,7 @@ as.matrix.data.table <- function(x, rownames, ...) { # E.g. because rownames is some sort of object that cant be converted to a column index stop("rownames must be TRUE, a column index, or a column name in x") } else { - if (is.logical(rownames) && isTRUE(rownames)) { + if (identical(rownames, TRUE)) { if (haskey(x)) { rownames <- key(x) if (length(rownames) > 1) { From f07b813296adda4118197b46062b5da27571c634 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Sun, 25 Mar 2018 18:37:05 +1100 Subject: [PATCH 12/18] Vector of rownames may be used in as.matrix --- R/data.table.R | 8 ++++++-- inst/tests/tests.Rraw | 22 ++++++++++++---------- man/as.matrix.Rd | 5 +++-- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 75860e730..3bc2b5771 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1887,15 +1887,19 @@ as.matrix.data.table <- function(x, rownames, ...) { if (!missing(rownames)) { # Convert rownames to a column index if possible if (is.null(rownames)) { warning("rownames is NULL, ignoring rownames") + } else if (length(rownames) == nrow(x)) { + # rownames argument is a vector of row names, no column in x to drop. + rn <- rownames + rnc <- NULL } else if (length(rownames) != 1) { - stop("rownames must be a single column in x") + stop("rownames must be a single column in x or a vector of row names of length nrow(x)") } else if (is.na(rownames)) { warning("rownames is NA, ignoring rownames") } else if (identical(rownames, FALSE)) { warning("rownames is FALSE, ignoring rownames") } else if (!(is.logical(rownames) || is.character(rownames) || is.numeric(rownames))) { # E.g. because rownames is some sort of object that cant be converted to a column index - stop("rownames must be TRUE, a column index, or a column name in x") + stop("rownames must be TRUE, a column index, a column name in x, or a vector of row names") } else { if (identical(rownames, TRUE)) { if (haskey(x)) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2541ebb5b..b15443ee4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11547,23 +11547,25 @@ test(1894.12, DT[, sum(y)*..z], error="..z in j is looking for z in calling scop # Allow column to be used as rownames when converting to matrix # 1895 DT = data.table(id = letters[1:4], X = 1:4, Y = 5:8) mat <- matrix(1:8, ncol = 2, dimnames=list(letters[1:4], c("X", "Y"))) +mat2 <- matrix(c(letters[1:4], 1:8), ncol=3, dimnames=list(NULL, c("id", "X", "Y"))) +mat3 <- matrix(c(letters[1:4], 1:8), ncol=3, dimnames=list(1:4, c("id", "X", "Y"))) test(1895.01, as.matrix(DT, 1), mat) test(1895.02, as.matrix(DT, "id"), mat) test(1895.03, as.matrix(DT, TRUE), mat) setkey(DT, id) test(1895.04, as.matrix(DT, TRUE), mat) +test(1895.05, as.matrix(DT, 1:4), mat3) # errors -test(1895.05, as.matrix(DT, -1), error="rownames is -1 which is outside the column number range") -test(1895.06, as.matrix(DT, "Z"), error="Z is not a column of x") -test(1895.07, as.matrix(DT, c(1,2)), error="rownames must be a single column in x") -test(1895.08, as.matrix(DT, complex(1)), error="rownames must be TRUE, a column index, or a column name in x") -# warningsi -mat2 <- matrix(c(letters[1:4], 1:8), ncol=3, dimnames=list(NULL, c("id", "X", "Y"))) -test(1895.09, as.matrix(DT, NA), mat2, warning="rownames is NA, ignoring rownames") -test(1895.10, as.matrix(DT, NULL), mat2, warning="rownames is NULL, ignoring rownames") -test(1895.11, as.matrix(DT, FALSE), mat2, warning="rownames is FALSE, ignoring rownames") +test(1895.06, as.matrix(DT, -1), error="rownames is -1 which is outside the column number range") +test(1895.07, as.matrix(DT, "Z"), error="Z is not a column of x") +test(1895.08, as.matrix(DT, c(1,2)), error="rownames must be a single column in x or a vector of row names of length nrow(x)") +test(1895.09, as.matrix(DT, complex(1)), error="rownames must be TRUE, a column index, a column name in x, or a vector of row names") +# warnings +test(1895.10, as.matrix(DT, NA), mat2, warning="rownames is NA, ignoring rownames") +test(1895.11, as.matrix(DT, NULL), mat2, warning="rownames is NULL, ignoring rownames") +test(1895.12, as.matrix(DT, FALSE), mat2, warning="rownames is FALSE, ignoring rownames") setkey(DT, id, X) -test(1895.12, as.matrix(DT, TRUE), mat, warning="rownames is TRUE but multiple keys found in key(x), using first column instead") +test(1895.13, as.matrix(DT, TRUE), mat, warning="rownames is TRUE but multiple keys found in key(x), using first column instead") ################################### diff --git a/man/as.matrix.Rd b/man/as.matrix.Rd index 5e12239d9..f93f3ba89 100644 --- a/man/as.matrix.Rd +++ b/man/as.matrix.Rd @@ -12,10 +12,11 @@ of the columns in the \code{data.table} as the \code{matrix} \code{rownames}. \arguments{ \item{x}{a \code{data.table}} \item{rownames}{optional, a single column name or column index to use as -the \code{rownames} inthe returned \code{matrix}. If \code{TRUE} the +the \code{rownames} in the returned \code{matrix}. If \code{TRUE} the \code{\link{key}} of the \code{data.table} will be used if it is a single column, otherwise the first column in the \code{data.table} will -be used.} +be used. Alternative a vector of length \code{nrow(x)} to assign as the +row names of the returned \code{matrix}.} \item{\dots}{additional arguments to be passed to or from methods.} } From 3d4681c684643b02a24daff1aeced0ccf4cfb215 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Sun, 25 Mar 2018 20:31:00 +1100 Subject: [PATCH 13/18] Fixed number in tests to reflect PR --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b15443ee4..a1a6ed06f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11544,7 +11544,7 @@ test(1894.11, DT[, sum(z)*..z], 72L) setnames(DT, "z", "..z") test(1894.12, DT[, sum(y)*..z], error="..z in j is looking for z in calling scope, but a column '..z' exists. Column names should not start with ..") -# Allow column to be used as rownames when converting to matrix # 1895 +# Allow column to be used as rownames when converting to matrix #2702 DT = data.table(id = letters[1:4], X = 1:4, Y = 5:8) mat <- matrix(1:8, ncol = 2, dimnames=list(letters[1:4], c("X", "Y"))) mat2 <- matrix(c(letters[1:4], 1:8), ncol=3, dimnames=list(NULL, c("id", "X", "Y"))) From d9a4a54dbde72bb6a663e2ee4cde3888715ff0eb Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 27 Mar 2018 23:13:26 +0800 Subject: [PATCH 14/18] mainly cosmetic changes (hopefully no typos; on browser) --- R/data.table.R | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 3bc2b5771..a2b7f1697 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1891,33 +1891,34 @@ as.matrix.data.table <- function(x, rownames, ...) { # rownames argument is a vector of row names, no column in x to drop. rn <- rownames rnc <- NULL - } else if (length(rownames) != 1) { - stop("rownames must be a single column in x or a vector of row names of length nrow(x)") + } else if (length(rownames) != 1L) { + stop(sprintf("rownames must be a single column in x or a vector of row names of length nrow(x)=%d", nrow(x))) } else if (is.na(rownames)) { warning("rownames is NA, ignoring rownames") } else if (identical(rownames, FALSE)) { warning("rownames is FALSE, ignoring rownames") } else if (!(is.logical(rownames) || is.character(rownames) || is.numeric(rownames))) { - # E.g. because rownames is some sort of object that cant be converted to a column index + # E.g. because rownames is some sort of object that can't be converted to a column index stop("rownames must be TRUE, a column index, a column name in x, or a vector of row names") - } else { + } else { # Handles cases where rownames is a column name, or key(x) from TRUE if (identical(rownames, TRUE)) { if (haskey(x)) { rownames <- key(x) - if (length(rownames) > 1) { - warning("rownames is TRUE but multiple keys found in key(x), using first column instead") - rownames <- 1 + if (length(rownames) > 1L) { + warning(sprintf("rownames is TRUE but multiple keys [%s] found for x; defaulting to first key column [%s]", + paste(rownames, collapse = ','), rownames[1L])) + rownames <- 1L } } else { - rownames <- 1 + rownames <- 1L } } - if (is.character(rownames)) { # Handles cases where rownames is a column name, or key(x) from TRUE + if (is.character(rownames)) { rnc <- chmatch(rownames, names(x)) if (is.na(rnc)) stop(rownames, " is not a column of x") } else { # rownames is an index already - if (rownames < 1 || rownames > ncol(x)) - stop("rownames is ", rownames, " which is outside the column number range [1,ncol=", ncol(x), "]") + if (rownames < 1L || rownames > ncol(x)) + stop(sprintf("rownames is %s which is outside the column number range [1,ncol=%d]", rownames, ncol(x))) rnc <- rownames } } From de48d84cbed34498cfc6b7bd3346db7bb567df17 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 27 Mar 2018 23:15:44 +0800 Subject: [PATCH 15/18] typo (rownames is integer not string here) --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index a2b7f1697..4ece7ec27 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1918,7 +1918,7 @@ as.matrix.data.table <- function(x, rownames, ...) { if (is.na(rnc)) stop(rownames, " is not a column of x") } else { # rownames is an index already if (rownames < 1L || rownames > ncol(x)) - stop(sprintf("rownames is %s which is outside the column number range [1,ncol=%d]", rownames, ncol(x))) + stop(sprintf("rownames is %d which is outside the column number range [1,ncol=%d]", rownames, ncol(x))) rnc <- rownames } } From 895554e6f23d3fc9fd82e091985a6fe8fd7fdfe5 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Sun, 1 Apr 2018 15:56:11 +1000 Subject: [PATCH 16/18] NULL, FALSE, and NA now passthrough instead of warning --- R/data.table.R | 14 ++++---------- inst/tests/tests.Rraw | 11 ++++++----- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 4ece7ec27..eb6333483 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1885,22 +1885,16 @@ as.matrix.data.table <- function(x, rownames, ...) { rn <- NULL rnc <- NULL if (!missing(rownames)) { # Convert rownames to a column index if possible - if (is.null(rownames)) { - warning("rownames is NULL, ignoring rownames") - } else if (length(rownames) == nrow(x)) { + if (length(rownames) == nrow(x)) { # rownames argument is a vector of row names, no column in x to drop. rn <- rownames rnc <- NULL - } else if (length(rownames) != 1L) { + } else if (!is.null(rownames) && length(rownames) != 1L) { # vector(0) will throw an error, but NULL will pass through stop(sprintf("rownames must be a single column in x or a vector of row names of length nrow(x)=%d", nrow(x))) - } else if (is.na(rownames)) { - warning("rownames is NA, ignoring rownames") - } else if (identical(rownames, FALSE)) { - warning("rownames is FALSE, ignoring rownames") - } else if (!(is.logical(rownames) || is.character(rownames) || is.numeric(rownames))) { + } else if (!(is.null(rownames) || is.logical(rownames) || is.character(rownames) || is.numeric(rownames))) { # E.g. because rownames is some sort of object that can't be converted to a column index stop("rownames must be TRUE, a column index, a column name in x, or a vector of row names") - } else { # Handles cases where rownames is a column name, or key(x) from TRUE + } else if (!is.null(rownames) && !is.na(rownames) && !identical(rownames, FALSE)) { # Handles cases where rownames is a column name, or key(x) from TRUE if (identical(rownames, TRUE)) { if (haskey(x)) { rownames <- key(x) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1d083e6fb..308b8c6ba 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11589,12 +11589,13 @@ test(1899.06, as.matrix(DT, -1), error="rownames is -1 which is outside the colu test(1899.07, as.matrix(DT, "Z"), error="Z is not a column of x") test(1899.08, as.matrix(DT, c(1,2)), error="rownames must be a single column in x or a vector of row names of length nrow(x)") test(1899.09, as.matrix(DT, complex(1)), error="rownames must be TRUE, a column index, a column name in x, or a vector of row names") -# warnings -test(1899.10, as.matrix(DT, NA), mat2, warning="rownames is NA, ignoring rownames") -test(1899.11, as.matrix(DT, NULL), mat2, warning="rownames is NULL, ignoring rownames") -test(1899.12, as.matrix(DT, FALSE), mat2, warning="rownames is FALSE, ignoring rownames") +# values that pass through (rownames ignored) +test(1899.10, as.matrix(DT, NA), mat2) +test(1899.11, as.matrix(DT, NULL), mat2) +test(1899.12, as.matrix(DT, FALSE), mat2) +# Warnings: setkey(DT, id, X) -test(1899.13, as.matrix(DT, TRUE), mat, warning="rownames is TRUE but multiple keys found in key(x), using first column instead") +test(1899.13, as.matrix(DT, TRUE), mat, warning="rownames is TRUE but multiple keys") ################################### From a887594f3bd8fd317ef1c31c99d88719fe8d7c37 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Sun, 1 Apr 2018 15:57:30 +1000 Subject: [PATCH 17/18] Fixed incorrect warning message --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index eb6333483..9c2d1c61f 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1899,7 +1899,7 @@ as.matrix.data.table <- function(x, rownames, ...) { if (haskey(x)) { rownames <- key(x) if (length(rownames) > 1L) { - warning(sprintf("rownames is TRUE but multiple keys [%s] found for x; defaulting to first key column [%s]", + warning(sprintf("rownames is TRUE but multiple keys [%s] found for x; defaulting to first column x[,1]", paste(rownames, collapse = ','), rownames[1L])) rownames <- 1L } From cde74a29aeeb8399431bec51b7de322fba094d5b Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Tue, 3 Apr 2018 14:20:56 +1000 Subject: [PATCH 18/18] Removed extraneous newline before tests --- inst/tests/tests.Rraw | 1 - 1 file changed, 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 308b8c6ba..ea3f1bffc 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11572,7 +11572,6 @@ test(1898.1, set2key(DT, a), error="deprecated. Please use setindex() instead.") test(1898.2, set2keyv(DT, "a"), error="deprecated. Please use setindexv() instead.") test(1898.3, key2(DT), error="deprecated. Please use indices() instead.") - # Allow column to be used as rownames when converting to matrix #2702 DT = data.table(id = letters[1:4], X = 1:4, Y = 5:8) mat <- matrix(1:8, ncol = 2, dimnames=list(letters[1:4], c("X", "Y")))