diff --git a/R/fread.R b/R/fread.R index 685aff3b5..237cb0847 100644 --- a/R/fread.R +++ b/R/fread.R @@ -1,5 +1,5 @@ -fread <- function(input="",sep="auto",sep2="auto",nrows=-1L,header="auto",na.strings="NA",stringsAsFactors=FALSE,verbose=getOption("datatable.verbose"),autostart=1L,skip=0L,select=NULL,drop=NULL,colClasses=NULL,integer64=getOption("datatable.integer64"),dec=if (sep!=".") "." else ",", col.names, check.names=FALSE, encoding="unknown", quote="\"", strip.white=TRUE, fill=FALSE, blank.lines.skip=FALSE, showProgress=getOption("datatable.showProgress"),data.table=getOption("datatable.fread.datatable")) { +fread <- function(input="",sep="auto",sep2="auto",nrows=-1L,header="auto",na.strings="NA",stringsAsFactors=FALSE,verbose=getOption("datatable.verbose"),autostart=1L,skip=0L,select=NULL,drop=NULL,colClasses=NULL,integer64=getOption("datatable.integer64"),dec=if (sep!=".") "." else ",", col.names, check.names=FALSE, encoding="unknown", quote="\"", strip.white=TRUE, fill=FALSE, blank.lines.skip=FALSE, key=NULL, showProgress=getOption("datatable.showProgress"),data.table=getOption("datatable.fread.datatable")) { if (!is.character(dec) || length(dec)!=1L || nchar(dec)!=1) stop("dec must be a single character e.g. '.' or ','") # handle encoding, #563 if (length(encoding) != 1L || !encoding %in% c("unknown", "UTF-8", "Latin-1")) { @@ -130,5 +130,13 @@ fread <- function(input="",sep="auto",sep2="auto",nrows=-1L,header="auto",na.str # FR #768 if (!missing(col.names)) setnames(ans, col.names) # setnames checks and errors automatically + if (!is.null(key) && data.table) { + if (!is.character(key)) + stop("key argument of data.table() must be character") + if (length(key) == 1L) { + key = strsplit(key, split = ",")[[1L]] + } + setkeyv(ans, key) + } ans } diff --git a/README.md b/README.md index 6d8f07867..26c265341 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,8 @@ 14. `fread` gains `fill` argument with default `FALSE` for backwards compatibility. Closes [#536](https://github.com/Rdatatable/data.table/issues/536). + 15. `fread` gains `key` argument, [#590](https://github.com/Rdatatable/data.table/issues/590). + #### BUG FIXES 1. Now compiles and runs on IBM AIX gcc. Thanks to Vinh Nguyen for investigation and testing, [#1351](https://github.com/Rdatatable/data.table/issues/1351). diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index fc23294b4..9071e83a8 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7268,6 +7268,10 @@ text="x,y\n1,a\n2,b\n" test(1586.1, fread(text, colClasses=c("integer", "factor")), data.table(x=1:2, y=factor(letters[1:2]))) test(1586.2, fread(text, colClasses=c(x="factor")), data.table(x=factor(1:2), y=letters[1:2])) +# FR #590 +text="x,y\n2,a\n1,q\n3,c\n" +test(1587, fread(text, key="y"), setDT(fread(text), key="y")) + ########################## # TODO: Tests involving GForce functions needs to be run with optimisation level 1 and 2, so that both functions are tested all the time. diff --git a/man/fread.Rd b/man/fread.Rd index 445f22bd0..e847e0f37 100644 --- a/man/fread.Rd +++ b/man/fread.Rd @@ -15,7 +15,7 @@ skip=0L, select=NULL, drop=NULL, colClasses=NULL, integer64=getOption("datatable.integer64"), # default: "integer64" dec=if (sep!=".") "." else ",", col.names, check.names=FALSE, encoding="unknown", quote="\"", -strip.white=TRUE, fill=FALSE, blank.lines.skip=FALSE, +strip.white=TRUE, fill=FALSE, blank.lines.skip=FALSE, key=NULL, showProgress=getOption("datatable.showProgress"), # default: TRUE data.table=getOption("datatable.fread.datatable") # default: TRUE ) @@ -43,6 +43,7 @@ data.table=getOption("datatable.fread.datatable") # default: TRUE \item{strip.white}{ default is \code{TRUE}. Strips leading and trailing whitespaces of unquoted fields. If \code{FALSE}, only header trailing spaces are removed. } \item{fill}{logical (default is \code{FALSE}). If \code{TRUE} then in case the rows have unequal length, blank fields are implicitly filled.} \item{blank.lines.skip}{\code{logical}, default is \code{FALSE}. If \code{TRUE} blank lines in the input are ignored.} + \item{key}{Character vector of one or more column names which is passed to \code{\link{setkey}}. It may be a single comma separated string such as \code{key="x,y,z"}, or a vector of names such as \code{key=c("x","y","z")}. Only valid when argument \code{data.table=TRUE}.} \item{showProgress}{ TRUE displays progress on the console using \code{\\r}. It is produced in fread's C code where the very nice (but R level) txtProgressBar and tkProgressBar are not easily available. } \item{data.table}{ TRUE returns a \code{data.table}. FALSE returns a \code{data.frame}. } }