R/data.table.R


dim.data.table <- function(x) 
{
    .Call(Cdim, x)
}

.global <- new.env()  # thanks to: http://stackoverflow.com/a/12605694/403310
setPackageName("data.table",.global)
.global$print = ""

.SD = .N = .I = .GRP = .BY = .EACHI = NULL
# These are exported to prevent NOTEs from R CMD check, and checkUsage via compiler.
# But also exporting them makes it clear (to users and other packages) that data.table uses these as symbols.
# And NULL makes it clear (to the R's mask check on loading) that they're variables not functions.
# utils::globalVariables(c(".SD",".N")) was tried as well, but exporting seems better.
# So even though .BY doesn't appear in this file, it should still be NULL here and exported because it's
# defined in SDenv and can be used by users.

print.data.table <- function(x, topn=getOption("datatable.print.topn"), 
                             nrows=getOption("datatable.print.nrows"), 
                             class=getOption("datatable.print.class"), 
                             row.names=getOption("datatable.print.rownames"), 
                             quote=FALSE, ...) {    # topn  - print the top topn and bottom topn rows with '---' inbetween (5)
    # nrows - under this the whole (small) table is printed, unless topn is provided (100)
    # class - should column class be printed underneath column name? (FALSE)
    if (.global$print!="" && address(x)==.global$print) {   # The !="" is to save address() calls and R's global cache of address strings
        #  := in [.data.table sets .global$print=address(x) to suppress the next print i.e., like <- does. See FAQ 2.22 and README item in v1.9.5
        # The issue is distinguishing "> DT" (after a previous := in a function) from "> DT[,foo:=1]". To print.data.table(), there
        # is no difference. Now from R 3.2.0 a side effect of the very welcome and requested change to avoid silent deep copy is that
        # there is now no longer a difference between > DT and > print(DT). So decided that DT[] is now needed to guarantee print; simpler.
        # This applies just at the prompt. Inside functions, print(DT) will of course print.
        # Other options investigated (could revisit): Cstack_info(), .Last.value gets set first before autoprint, history(), sys.status(),
        #   topenv(), inspecting next statement in caller, using clock() at C level to timeout suppression after some number of cycles
        SYS <- sys.calls()
        if (length(SYS) <= 2 ||  # "> DT" auto-print or "> print(DT)" explicit print (cannot distinguish from R 3.2.0 but that's ok)
            ( length(SYS) > 3L &&
              SYS[[length(SYS)-3L]][[1L]] == "knit_print.default") ) {   # knitr auto print to mimic the prompt
            .global$print = ""
            return(invisible())
        }
    }
    .global$print = ""
    if (!is.numeric(nrows)) nrows = 100L
    if (!is.infinite(nrows)) nrows = as.integer(nrows)
    if (nrows <= 0L) return(invisible())   # ability to turn off printing
    if (!is.numeric(topn)) topn = 5L
    topnmiss = missing(topn)
    topn = max(as.integer(topn),1L)
    if (nrow(x) == 0L) {
        if (length(x)==0L)
           cat("Null data.table (0 rows and 0 cols)\n")  # See FAQ 2.5 and NEWS item in v1.8.9
        else
           cat("Empty data.table (0 rows) of ",length(x)," col",if(length(x)>1L)"s",": ",paste(head(names(x),6),collapse=","),if(ncol(x)>6)"...","\n",sep="")
        return()
    }
    if (topn*2<nrow(x) && (nrow(x)>nrows || !topnmiss)) {
        toprint = rbind(head(x, topn), tail(x, topn))
        rn = c(seq_len(topn), seq.int(to=nrow(x), length.out=topn))
        printdots = TRUE
    } else {
        toprint = x
        rn = seq_len(nrow(x))
        printdots = FALSE
    }
    toprint=format.data.table(toprint, ...)
    # fix for #975.
    if (any(sapply(x, function(col) "integer64" %in% class(col))) && !"package:bit64" %in% search()) {
        warning("Some columns have been read as type 'integer64' but package bit64 isn't loaded. Those columns will display as strange looking floating point data. There is no need to reload the data. Just require(bit64) to obtain the integer64 print method and print the data again.")
    }
    # FR #5020 - add row.names = logical argument to print.data.table
    if (isTRUE(row.names)) rownames(toprint)=paste(format(rn,right=TRUE,scientific=FALSE),":",sep="") else rownames(toprint)=rep.int("", nrow(toprint))
    if (is.null(names(x)) | all(names(x) == "")) colnames(toprint)=rep("", ncol(toprint)) # fixes bug #97 (RF#4934) and #545 (RF#5253)
    if (isTRUE(class)) {
      #Matching table for most common types & their abbreviations
      class_abb = c(list = "<list>", integer = "<int>", numeric = "<num>",
            character = "<char>", Date = "<Date>", complex = "<cplx>",
            factor = "<fctr>", POSIXct = "<POSc>", logical = "<lgcl>",
            IDate = "<IDat>", integer64 = "<i64>", raw = "<raw>",
            expression = "<expr>", ordered = "<ord>")
      classes = vapply(x, function(col) class(col)[1L], "", USE.NAMES=FALSE)
      abbs = unname(class_abb[classes])
      if ( length(idx <- which(is.na(abbs))) )
        abbs[idx] = paste("<", classes[idx], ">", sep="")
      toprint = rbind(abbs, toprint)
      rownames(toprint)[1L] = ""
    }
    if (printdots) {
        toprint = rbind(head(toprint,topn),"---"="",tail(toprint,topn))
        rownames(toprint) = format(rownames(toprint),justify="right")
        print(toprint,right=TRUE,quote=quote)
        return(invisible())
    }
    if (nrow(toprint)>20L)
        # repeat colnames at the bottom if over 20 rows so you don't have to scroll up to see them
        toprint=rbind(toprint,matrix(colnames(toprint),nrow=1)) # fixes bug #4934
    print(toprint,right=TRUE,quote=quote)
    invisible()
}

# FR #2591 - format.data.table issue with columns of class "formula"
is.formula <- function(x) class(x) == "formula"

format.data.table <- function (x, ..., justify="none") {
    if (is.atomic(x) && !is.null(x)) {
        stop("Internal structure doesn't seem to be a list. Possibly corrupt data.table.")
    }
    format.item <- function(x) {
        if (is.atomic(x) || is.formula(x)) # FR #2591 - format.data.table issue with columns of class "formula"
            paste(c(format(head(x,6), justify=justify, ...), if(length(x)>6)""),collapse=",")  # fix for #5435 - format has to be added here...
        else
            paste("<",class(x)[1L],">",sep="")
    }
    # FR #1091 for pretty printing of character
    # TODO: maybe instead of doing "this is...", we could do "this ... test"?
    char.trunc <- function(x, trunc.char = getOption("datatable.prettyprint.char")) {
        trunc.char = max(0L, suppressWarnings(as.integer(trunc.char[1L])), na.rm=TRUE)
        if (!is.character(x) || trunc.char <= 0L) return(x)
        idx = which(nchar(x) > trunc.char)
        x[idx] = paste(substr(x[idx], 1L, as.integer(trunc.char)), "...", sep="")
        x
    }
    do.call("cbind",lapply(x,function(col,...){
        if (!is.null(dim(col))) stop("Invalid column: it has dimensions. Can't format it. If it's the result of data.table(table()), use as.data.table(table()) instead.")
        if (is.list(col)) col = sapply(col, format.item)
        else col = format(char.trunc(col), justify=justify, ...) # added an else here to fix #5435
        col
    },...))
}

is.data.table <- function(x) inherits(x, "data.table")
is.ff <- function(x) inherits(x, "ff")  # define this in data.table so that we don't have to require(ff), but if user is using ff we'd like it to work

#NCOL <- function(x) {
#    # copied from base, but additionally covers data.table via is.list()
#    # because NCOL in base explicity tests using is.data.frame()
#    if (is.list(x) && !is.ff(x)) return(length(x))
#    if (is.array(x) && length(dim(x)) > 1L) ncol(x) else as.integer(1L)
#}
#NROW <- function(x) {
#    if (is.data.frame(x) || is.data.table(x)) return(nrow(x))
#    if (is.list(x) && !is.ff(x)) stop("List is not a data.frame or data.table. Convert first before using NROW")   # list may have different length elements, which data.table and data.frame's resolve.
#    if (is.array(x)) nrow(x) else length(x)
#}

null.data.table <-function() {
    ans = list()
    setattr(ans,"class",c("data.table","data.frame"))
    setattr(ans,"row.names",.set_row_names(0L))
    alloc.col(ans)
}

data.table <-function(..., keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFactors=FALSE)
{
    # NOTE: It may be faster in some circumstances to create a data.table by creating a list l first, and then setattr(l,"class",c("data.table","data.frame")) at the expense of checking.
    # TO DO: rewrite data.table(), one of the oldest functions here. Many people use data.table() to convert data.frame rather than
    # as.data.table which is faster; speed could be better.  Revisit how many copies are taken in for example data.table(DT1,DT2) which
    # cbind directs to.  And the nested loops for recycling lend themselves to being C level.
    
    x <- list(...)   # doesn't copy named inputs as from R >= 3.1.0 (a very welcome change)
    if (!.R.listCopiesNamed) .Call(CcopyNamedInList,x)   # to maintain the old behaviour going forwards, for now. See test 548.2.
    # **TO DO** Something strange with NAMED on components of `...`. To investigate. Or just port data.table() to C. This is why
    # it's switched, because extra copies would be introduced in R <= 3.1.0, iiuc.
    
    # fix for #5377 - data.table(null list, data.frame and data.table) should return null data.table. Simple fix: check all scenarios here at the top.
    if (identical(x, list(NULL)) || identical(x, list(list())) || 
           identical(x, list(data.frame(NULL))) || identical(x, list(data.table(NULL)))) return( null.data.table() )
    tt <- as.list(substitute(list(...)))[-1L]  # Intention here is that data.table(X,Y) will automatically put X and Y as the column names.  For longer expressions, name the arguments to data.table(). But in a call to [.data.table, wrap in list() e.g. DT[,list(a=mean(v),b=foobarzoo(zang))] will get the col names
    vnames = names(tt)
    if (is.null(vnames)) vnames = rep.int("",length(x))
    vnames[is.na(vnames)] = ""
    novname = vnames==""
    if (any(!novname)) {
        if (any(vnames[!novname] == ".SD")) stop("A column may not be called .SD. That has special meaning.")
    }
    for (i in which(novname)) {
        # if (ncol(as.data.table(x[[i]])) <= 1) { # cbind call in test 230 fails if I write ncol(as.data.table(eval(tt[[i]], parent.frame()))) <= 1, no idea why... (keep this for later even though all tests pass with ncol(.).. because base uses as.data.frame(.))
        if (is.null(ncol(x[[i]]))) { 
            if ((tmp <- deparse(tt[[i]])[1]) == make.names(tmp))
                vnames[i] <- tmp
        }
    }
    tt = vnames==""
    if (any(tt)) vnames[tt] = paste("V", which(tt), sep = "")
    # so now finally we have good column names. We also will use novname later to know which were explicitly supplied in the call.
    n <- length(x)
    if (n < 1L)
        return( null.data.table() )
    if (length(vnames) != n) stop("logical error in vnames")
    vnames <- as.list.default(vnames)
    nrows = integer(n)          # vector of lengths of each column. may not be equal if silent repetition is required.
    numcols = integer(n)         # the ncols of each of the inputs (e.g. if inputs contain matrix or data.table)
    for (i in seq_len(n)) {
        xi = x[[i]]
        if (is.null(xi)) stop("column or argument ",i," is NULL")
        if ("POSIXlt" %chin% class(xi)) {
            warning("POSIXlt column type detected and converted to POSIXct. We do not recommend use of POSIXlt at all because it uses 40 bytes to store one date.")
            x[[i]] = as.POSIXct(xi)
        } else if (is.matrix(xi) || is.data.frame(xi)) {  # including data.table (a data.frame, too)
            xi = as.data.table(xi, keep.rownames=keep.rownames)       # TO DO: allow a matrix to be a column of a data.table. This could allow a key'd lookup to a matrix, not just by a single rowname vector, but by a combination of several columns. A matrix column could be stored either by row or by column contiguous in memory.
            x[[i]] = xi
            numcols[i] = length(xi)
        } else if (is.table(xi)) {
            x[[i]] = xi = as.data.table.table(xi, keep.rownames=keep.rownames)
            numcols[i] = length(xi)
        } else if (is.function(xi)) {
            x[[i]] = xi = list(xi)
        }
        nrows[i] <- NROW(xi)    # for a vector (including list() columns) returns the length
        if (numcols[i]>0L) {
            namesi <- names(xi)  # works for both data.frame's, matrices and data.tables's
            if (length(namesi)==0L) namesi = rep.int("",ncol(xi))
            namesi[is.na(namesi)] = ""
            tt = namesi==""
            if (any(tt)) namesi[tt] = paste("V", which(tt), sep = "")
            if (novname[i]) vnames[[i]] = namesi
            else vnames[[i]] = paste(vnames[[i]], namesi, sep=".")
        }
    }
    nr <- max(nrows)
    ckey = NULL
    recycledkey = FALSE
    for (i in seq_len(n)) {
        xi = x[[i]]
        if (is.data.table(xi) && haskey(xi)) {
            if (nrows[i]<nr) recycledkey = TRUE
            else ckey = c(ckey, key(xi))
        }
    }
    for (i in which(nrows < nr)) {
        # TO DO ... recycle in C, but not high priority as large data already regular from database or file
        xi <- x[[i]]
        if (identical(xi,list())) {
            x[[i]] = vector("list", nr)
            next
        }
        if (nrows[i]==0L) stop("Item ",i," has no length. Provide at least one item (such as NA, NA_integer_ etc) to be repeated to match the ",nr," rows in the longest column. Or, all columns can be 0 length, for insert()ing rows into.")
        # Implementing FR #4813 - recycle with warning when nr %% nrows[i] != 0L
        if (nr%%nrows[i] != 0L) warning("Item ", i, " is of size ", nrows[i], " but maximum size is ", nr, " (recycled leaving remainder of ", nr%%nrows[i], " items)")
        # if (nr%%nrows[i] == 0L) {
            if (is.data.frame(xi)) {   # including data.table
                ..i = rep(seq_len(nrow(xi)), length.out = nr)
                x[[i]] = xi[..i,,drop=FALSE]
                next
            }
            if (is.atomic(xi) || is.list(xi)) {
                # TO DO: surely use set() here, or avoid the coercion
                x[[i]] = rep(xi, length.out = nr)
                next
            }
            stop("problem recycling column ",i,", try a simpler type")
        # }
        stop("argument ",i," (nrow ",nrows[i],") cannot be recycled without remainder to match longest nrow (",nr,")")
    }
    if (any(numcols>0L)) {
        value = vector("list",sum(pmax(numcols,1L)))
        k = 1L
        for(i in seq_len(n)) {
            if (is.list(x[[i]]) && !is.ff(x[[i]])) {
                for(j in seq_len(length(x[[i]]))) {
                    value[[k]] = x[[i]][[j]]
                    k=k+1L
                }
            } else {
                value[[k]] = x[[i]]
                k=k+1L
            }
        }
    } else {
        value = x
    }
    vnames <- unlist(vnames)
    if (check.names)   # default FALSE
        vnames <- make.names(vnames, unique = TRUE)
    setattr(value,"names",vnames)
    setattr(value,"row.names",.set_row_names(nr))
    setattr(value,"class",c("data.table","data.frame"))
    if (!is.null(key)) {
      if (!is.character(key)) stop("key argument of data.table() must be character")
      if (length(key)==1L) {
          key = strsplit(key,split=",")[[1L]]
          # eg key="A,B"; a syntax only useful in key argument to data.table(), really.
      }
      setkeyv(value,key)
    } else {
       # retain key of cbind(DT1, DT2, DT3) where DT2 is keyed but not DT1. cbind calls data.table().
       # If DT inputs with keys have been recycled then can't retain key
       if (length(ckey)
           && !recycledkey
           && !any(duplicated(ckey))
           && all(ckey %in% names(value))
           && !any(duplicated(names(value)[names(value) %in% ckey])))
           setattr(value, "sorted", ckey)
    }
    # FR #643, setfactor is an internal function in fread.R
    if (isTRUE(stringsAsFactors)) setfactor(value, which(vapply(value, is.character, TRUE)), FALSE)
    alloc.col(value)  # returns a NAMED==0 object, unlike data.frame()
}

replace_dot_alias <- function(e) {
    # we don't just simply alias .=list because i) list is a primitive (faster to iterate) and ii) we test for use
    # of "list" in several places so it saves having to remember to write "." || "list" in those places 
    if (is.call(e)) {
        if (e[[1L]] == ".") e[[1L]] = quote(list)
        for (i in seq_along(e)[-1]) if (!is.null(e[[i]])) e[[i]] = replace_dot_alias(e[[i]])
    }
    e
}

.massagei <- function(x) {
    # J alias for list as well in i, just if the first symbol 
    if (is.call(x) && as.character(x[[1L]]) %chin% c("J","."))
        x[[1L]] = quote(list)
    x
}

# A (relatively) fast (uses DT grouping) wrapper for matching two vectors, BUT:
# it behaves like 'pmatch' but only the 'exact' matching part. That is, a value in 
# 'x' is matched to 'table' only once. No index will be present more than once. 
# This should make it even clearer:
# chmatch2(c("a", "a"), c("a", "a")) # 1,2 - the second 'a' in 'x' has a 2nd match in 'table'
# chmatch2(c("a", "a"), c("a", "b")) # 1,NA - the second one doesn't 'see' the first 'a'
# chmatch2(c("a", "a"), c("a", "a.1")) # 1,NA - this is where it differs from pmatch - we don't need the partial match.
chmatch2 <- function(x, table, nomatch=NA_integer_) {
    .Call(Cchmatch2, x, table, as.integer(nomatch)) # this is in 'rbindlist.c' for now.
}

"[.data.table" <- function (x, i, j, by, keyby, with=TRUE, nomatch=getOption("datatable.nomatch"), mult="all", roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL)
{
    # ..selfcount <<- ..selfcount+1  # in dev, we check no self calls, each of which doubles overhead, or could
    # test explicitly if the caller is [.data.table (even stronger test. TO DO.)
    # the drop=NULL is to sink drop argument when dispatching to [.data.frame; using '...' stops test 147
    if (!cedta()) {
        # Fix for #5070 (to do)
        Nargs = nargs() - (!missing(drop))
        ans = if (Nargs<3L) `[.data.frame`(x,i)  # drop ignored anyway by DF[i]
              else if (missing(drop)) `[.data.frame`(x,i,j)
              else `[.data.frame`(x,i,j,drop)
        # added is.data.table(ans) check to fix bug #5069
        if (!missing(i) & is.data.table(ans)) setkey(ans,NULL)  # See test 304
        return(ans)
    }
    if (!mult %chin% c("first","last","all")) stop("mult argument can only be 'first','last' or 'all'")
    if (length(roll)!=1L || is.na(roll)) stop("roll must be a single TRUE, FALSE, positive/negative integer/double including +Inf and -Inf or 'nearest'")
    if (is.character(roll)) {
        if (roll!="nearest") stop("roll is '",roll,"' (type character). Only valid character value is 'nearest'.")
    } else {
        roll = if (isTRUE(roll)) +Inf else as.double(roll)
    }
    force(rollends)
    if (!is.logical(rollends)) stop("rollends must be a logical vector")
    if (length(rollends)>2) stop("rollends must be length 1 or 2")
    if (length(rollends)==1) rollends=rep.int(rollends,2L)
    # TO DO (document/faq/example). Removed for now ... if ((roll || rolltolast) && missing(mult)) mult="last" # for when there is exact match to mult. This does not control cases where the roll is mult, that is always the last one.
    missingnomatch = missing(nomatch)
    if (!is.na(nomatch) && nomatch!=0L) stop("nomatch must either be NA or 0, or (ideally) NA_integer_ or 0L")
    nomatch = as.integer(nomatch)
    if (!is.logical(which) || length(which)>1) stop("'which' must be a logical vector length 1. Either FALSE, TRUE or NA.")
    if ((isTRUE(which)||is.na(which)) && !missing(j)) stop("'which' is ",which," (meaning return row numbers) but 'j' is also supplied. Either you need row numbers or the result of j, but only one type of result can be returned.")
    if (!is.na(nomatch) && is.na(which)) stop("which=NA with nomatch=0 would always return an empty vector. Please change or remove either which or nomatch.")
    .global$print=""
    if (missing(i) && missing(j)) {
        # ...[] == oops at console, forgot print(...)
        # or some kind of dynamic construction that has edge case of no contents inside [...]
        return(x)
    }
    if (!with && missing(j)) stop("j must be provided when with=FALSE")
    if (!missing(j)) {
        jsub = replace_dot_alias(substitute(j))
        if (is.call(jsub) && jsub[[1L]]=="{") {
            if (length(jsub)==2) jsub = jsub[[2L]]  # to allow {} wrapping of := e.g. [,{`:=`(...)},] [#376]
            else if (is.call(jsub[[2L]]) && jsub[[2L]][[1L]] == ":=")
                stop("You have wrapped := with {} which is ok but then := must be the only thing inside {}. You have something else inside {} as well. Consider placing the {} on the RHS of := instead; e.g. DT[,someCol:={tmpVar1<-...;tmpVar2<-...;tmpVar1*tmpVar2}")
        }
        if (is.call(jsub)) {
            if (jsub[[1L]] == "eval" && !any(all.vars(jsub[[2]]) %in% names(x))) {
                # Grab the dynamic expression from calling scope now to give the optimizer a chance to optimize it
                # Only when top level is eval call.  Not nested like x:=eval(...) or `:=`(x=eval(...), y=eval(...))
                jsub = eval(jsub[[2L]], parent.frame(), parent.frame())  # this evals the symbol to return the dynamic expression
                if (is.expression(jsub)) jsub = jsub[[1L]]    # if expression, convert it to call
                # Note that the dynamic expression could now be := (new in v1.9.7)
            }
            if (is.call(jsub) && jsub[[1L]] == ":=") allow.cartesian=TRUE   # (see #800)
        }
    }
    bysub=NULL
    if (!missing(by)) bysub=substitute(by)
    if (!missing(keyby)) {
        if (!missing(by)) stop("Provide either 'by' or 'keyby' but not both")
        by=bysub=substitute(keyby)
        # Assign to 'by' so that by is no longer missing and we can proceed as if there were one by
    }
    byjoin = FALSE
    if (!missing(by)) {
        if (missing(j)) stop("'by' or 'keyby' is supplied but not j")
        byjoin = is.symbol(bysub) && bysub==".EACHI"
    }
    irows = NULL  # Meaning all rows. We avoid creating 1:nrow(x) for efficiency.
    notjoin = FALSE
    rightcols = leftcols = integer()
    
    # To take care of duplicate column names properly (see chmatch2 function above `[data.table`) for description
    dupmatch <- function(x, y, ...) {
        if (anyDuplicated(x))
            pmax(chmatch(x,y, ...), chmatch2(x,y,0L))
        else chmatch(x,y)
    }
    
    # setdiff removes duplicate entries, which'll create issues with duplicated names. Use '%chin% instead.
    dupdiff <- function(x, y) x[!x %chin% y]
    
    if (!missing(i)) {
        xo = NULL
        isub = substitute(i)
        isnull_inames = FALSE
        nqgrp = integer(0)  # for non-equi join
        nqmaxgrp = 1L       # for non-equi join
        # Fixes 4994: a case where quoted expression with a "!", ex: expr = quote(!dt1); dt[eval(expr)] requires 
        # the "eval" to be checked before `as.name("!")`. Therefore interchanged.
        restore.N = remove.N = FALSE
        if (exists(".N", envir=parent.frame(), inherits=FALSE)) {
            old.N = get(".N", envir=parent.frame(), inherits=FALSE)
            locked.N = bindingIsLocked(".N", parent.frame())
            if (locked.N) eval(call("unlockBinding", ".N", parent.frame()))  # eval call to pass R CMD check NOTE until we find cleaner way
            assign(".N", nrow(x), envir=parent.frame(), inherits=FALSE)
            restore.N = TRUE
            # the comment below is invalid hereafter (due to fix for #1145)
            # binding locked when .SD[.N] but that's ok as that's the .N we want anyway

            # TO DO: change isub at C level s/.N/nrow(x); changing a symbol to a constant should be ok
        } else {
           assign(".N", nrow(x), envir=parent.frame(), inherits=FALSE)
           remove.N = TRUE
        }
        if (is.call(isub) && isub[[1L]]=="eval") {  # TO DO: or ..()
            isub = eval(.massagei(isub[[2L]]), parent.frame(), parent.frame())
            if (is.expression(isub)) isub=isub[[1L]]
        }
        if (is.call(isub) && isub[[1L]] == as.name("!")) {
            notjoin = TRUE
            if (!missingnomatch) stop("not-join '!' prefix is present on i but nomatch is provided. Please remove nomatch.");
            nomatch = 0L
            isub = isub[[2L]]
            # #932 related so that !(v1 == 1) becomes v1 == 1 instead of (v1 == 1) after removing "!"
            if (is.call(isub) && isub[[1L]] == "(" && !is.name(isub[[2L]]))
                isub = isub[[2L]]
        }
        if (is.call(isub) && isub[[1L]] == as.name("order") && getOption("datatable.optimize") >= 1) { # optimize here so that we can switch it off if needed
            if (verbose) cat("order optimisation is on, i changed from 'order(...)' to 'forder(DT, ...)'.\n")
            isub = as.list(isub)
            isub = as.call(c(list(quote(forder), quote(x)), isub[-1L]))
        }
        if (is.null(isub)) return( null.data.table() )
        if (is.call(isub) && isub[[1L]] == quote(forder)) {
            order_env = new.env(parent=parent.frame())            # until 'forder' is exported
            assign("forder", forder, order_env)
            assign("x", x, order_env)
            i = eval(isub, order_env, parent.frame())             # for optimisation of 'order' to 'forder'
            # that forder returns integer(0) is taken care of internally within forder
          } else if (is.call(isub) &&
                     getOption("datatable.use.index") && # #1422
                     as.character(isub[[1L]]) %chin% c("==","%in%") &&
                     is.name(isub[[2L]]) &&
                     (isub2<-as.character(isub[[2L]])) %chin% names(x) &&
                     (getOption("datatable.auto.index") || (isub2 %chin% indices(x))) && # `||` used to either auto.index or already have index #1422
                     is.null(attr(x, '.data.table.locked'))) {  # fix for #958, don't create auto index on '.SD'.
            # LHS is a column name symbol
            # simplest case for now (single ==).  Later, top level may be &,|,< or >
            # TO DO: print method could print physical and secondary keys at end.
            # TO DO: move down to if (is.data.table) clause below, later ...
            RHS = eval(isub[[3L]], x, parent.frame())
            # fix for #961
            if (is.list(RHS)) RHS = as.character(RHS)
            if (isub[[1L]] == "==" && length(RHS)>1) {
                if (length(RHS)!=nrow(x)) stop("RHS of == is length ",length(RHS)," which is not 1 or nrow (",nrow(x),"). For robustness, no recycling is allowed (other than of length 1 RHS). Consider %in% instead.")
                i = x[[isub2]] == RHS    # DT[colA == colB] regular element-wise vector scan
            } else if ( (is.integer(x[[isub2]]) && is.double(RHS) && isReallyReal(RHS)) || (mode(x[[isub2]]) != mode(RHS) && !(class(x[[isub2]]) %in% c("character", "factor") && 
                         class(RHS) %in% c("character", "factor"))) || 
                         (is.factor(x[[isub2]]) && !is.factor(RHS) && mode(RHS)=="numeric") ) { # fringe case, #1361. TODO: cleaner way of doing these checks.
                    # re-direct all non-matching mode cases to base R, as data.table's binary 
                    # search based join is strict in types. #957 and #961.
                    i = if (isub[[1L]] == "==") x[[isub2]] == RHS else x[[isub2]] %in% RHS
            } else {
                # fix for #932 (notjoin) and also when RHS is NA (and notjoin is also TRUE)
                if (isub[[1L]] == "==") {
                    # RHS is of length=1 or n
                    if (any_na(as_list(RHS))) {
                        notjoin = FALSE
                        RHS = RHS[0L]
                    } else if (notjoin) {
                        RHS = c(RHS, if (is.double(RHS)) c(NA, NaN) else NA)
                    }
                }
                if (haskey(x) && isub2 == key(x)[1L]) {
                    # join to key(x)[1L]
                    xo <- integer()
                    rightcols = chmatch(key(x)[1],names(x))
                } else {
                    xo = get2key(x,isub2)  # Can't be any index with that col as the first one because those indexes will reorder within each group
                    if (is.null(xo)) {   # integer() would be valid and signifies o=1:.N
                        if (verbose) {cat("Creating new index '",isub2,"'\n",sep="");flush.console()}
                        if (identical(getOption("datatable.auto.index"), FALSE)) warning("Index is being created on '",isub2,"' besides the fact that option 'datatable.auto.index' is FALSE. Please report to data.table#1422.") # why not double check that, even if won't happen now may be a good check for future changes
                        setindexv(x,isub2)
                        xo = get2key(x,isub2)
                    } else {
                        if (verbose) {cat("Using existing index '",isub2,"'\n",sep="");flush.console()}
                    }
                    rightcols = chmatch(isub2, names(x))
                }
                # convert RHS to list to join to key (either physical or secondary)
                i = as.data.table( unique(RHS) )
                # To do: wrap isub[[3L]] with as.data.table() first before eval to save copy
                leftcols = 1L
                ans = bmerge(i, x, leftcols, rightcols, io<-FALSE, xo, roll=0.0, rollends=c(FALSE,FALSE), nomatch=0L, mult="all", 1L, nqgrp, nqmaxgrp, verbose=verbose)
                # No need to shallow copy i before passing to bmerge; we just created i above ourselves
                i = if (ans$allLen1 && !identical(suppressWarnings(min(ans$starts)), 0L)) ans$starts else vecseq(ans$starts, ans$lens, NULL)
                if (length(xo)) i = fsort(xo[i]) else i = fsort(i) # fix for #1495
                leftcols = rightcols = NULL  # these are used later to know whether a join was done, affects column order of result. So reset.
            }
        } else if (!is.name(isub)) i = eval(.massagei(isub), x, parent.frame())
          else i = eval(isub, parent.frame(), parent.frame())
        if (restore.N) {
            assign(".N", old.N, envir=parent.frame())
            if (locked.N) lockBinding(".N", parent.frame())
        }
        if (remove.N) rm(list=".N", envir=parent.frame())
        if (is.matrix(i)) {
            if (is.numeric(i) && ncol(i)==1L) { # #826 - subset DT on single integer vector stored as matrix
                i = as.integer(i)
            } else {
                stop("i is invalid type (matrix). Perhaps in future a 2 column matrix could return a list of elements of DT (in the spirit of A[B] in FAQ 2.14). Please let datatable-help know if you'd like this, or add your comments to FR #657.")
            }
        }
        if (is.logical(i)) {
            if (notjoin) {
                notjoin = FALSE
                i = !i
            }
        }
        if (is.null(i)) return( null.data.table() )
        if (is.character(i)) {
            isnull_inames = TRUE
            i = data.table(V1=i)   # for user convenience; e.g. DT["foo"] without needing DT[.("foo")]
        } else if (identical(class(i),"list") && length(i)==1L && is.data.frame(i[[1L]])) i = as.data.table(i[[1L]])
        else if (identical(class(i),"data.frame")) i = as.data.table(i)   # TO DO: avoid these as.data.table() and use a flag instead
        else if (identical(class(i),"list")) {
            isnull_inames = is.null(names(i))
            i = as.data.table(i)
        }
        if (is.data.table(i)) {
            if (!haskey(x) && missing(on) && is.null(xo)) {
                stop("When i is a data.table (or character vector), the columns to join by must be specified either using 'on=' argument (see ?data.table) or by keying x (i.e. sorted, and, marked as sorted, see ?setkey). Keyed joins might have further speed benefits on very large data due to x being sorted in RAM.")
            }
            if (!missing(on)) {
                # on = .() is now possible, #1257
                parse_on <- function(onsub) {
                    ops = c("==", "<=", "<", ">=", ">", "!=")
                    pat = paste("(", ops, ")", sep = "", collapse = "|")
                    if (is.call(onsub) && onsub[[1L]] == "eval") {
                        onsub = eval(onsub[[2L]], parent.frame(2L), parent.frame(2L))
                        if (is.call(onsub) && onsub[[1L]] == "eval") onsub = onsub[[2L]]
                    }
                    if (is.call(onsub) && as.character(onsub[[1L]]) %in% c("list", ".")) {
                        spat = paste("[ ]+(", pat, ")[ ]+", sep="")
                        onsub = lapply(as.list(onsub)[-1L], function(x) gsub(spat, "\\1", deparse(x, width.cutoff=500L)))
                        onsub = as.call(c(quote(c), onsub))
                    }
                    on = eval(onsub, parent.frame(2L), parent.frame(2L))
                    if (!is.character(on))
                        stop("'on' argument should be a named atomic vector of column names indicating which columns in 'i' should be joined with which columns in 'x'.")
                    this_op = regmatches(on, gregexpr(pat, on))
                    idx = (vapply(this_op, length, 0L) == 0L)
                    this_op[idx] = "=="
                    this_op = unlist(this_op, use.names=FALSE)
                    idx_op = match(this_op, ops, nomatch=0L)
                    if (any(idx_op %in% c(0L, 6L)))
                        stop("Invalid operators ", paste(this_op[idx_op==0L], collapse=","), ". Only allowed operators are ", paste(ops[1:5], collapse=""), ".")
                    if (is.null(names(on))) {
                        on[idx] = if (isnull_inames) paste(on[idx], paste("V", seq_len(sum(idx)), sep=""), sep="==") else paste(on[idx], on[idx], sep="==")
                    } else {
                        on[idx] = paste(names(on)[idx], on[idx], sep="==")
                    }
                    split = tstrsplit(on, paste("[ ]*", pat, "[ ]*", sep=""))
                    on = setattr(split[[2L]], 'names', split[[1L]])
                    if (length(empty_idx <- which(names(on) == "")))
                        names(on)[empty_idx] = on[empty_idx]
                    list(on = on, ops = idx_op)
                }
                on_ops = parse_on(substitute(on))
                on = on_ops[[1L]]
                ops = on_ops[[2L]]
                # TODO: collect all '==' ops first to speeden up Cnestedid
                rightcols = chmatch(names(on), names(x))
                if (length(nacols <- which(is.na(rightcols))))
                    stop("Column(s) [", paste(names(on)[nacols], collapse=","), "] not found in x")
                leftcols  = chmatch(unname(on), names(i))
                if (length(nacols <- which(is.na(leftcols))))
                    stop("Column(s) [", paste(unname(on)[nacols], collapse=","), "] not found in i")
                # figure out the columns on which to compute groups on
                non_equi = which.first(ops != 1L) # 1 is "==" operator
                if (!is.na(non_equi)) { # non-equi conditions present.. investigate groups..
                    nqcols = rightcols[non_equi:length(rightcols)]
                    nqgrp = .Call(Cnestedid, x, nqcols, forderv(x, nqcols))
                    if ( (nqmaxgrp <- max(nqgrp)) > 1L) { # got some non-equi join work to do
                        if ("_nqgrp_" %in% names(x)) stop("Column name '_nqgrp_' is reserved for non-equi joins.")
                        set(nqx<-shallow(x), j="_nqgrp_", value=nqgrp)
                        xo = forderv(nqx, c(ncol(nqx), rightcols))
                    } else nqgrp = integer(0)
                }
                if (nqmaxgrp == 1L) { # equi join. Reuse secondary index, #1439
                    xo = if (isTRUE(getOption("datatable.use.index"))) {
                        if (verbose) cat("Looking for existing (secondary) index... ")
                        attr(attr(x, 'index'), paste("__", names(x)[rightcols], sep="", collapse=""))
                    }
                    if (is.null(xo)) {
                        if (verbose) {
                            if (isTRUE(getOption("datatable.use.index"))) cat("not found.\n")
                            tt = system.time(xo <- forderv(x, by=rightcols))
                            cat("forder took", tt["user.self"] + tt["sys.self"], "sec\n")
                        } else xo = forderv(x, by = rightcols)
                    } else {
                        if (verbose) cat("found. Reusing index.\n")
                    }
                } else {
                    if (!missing(by)) stop("by-joins are not yet implemented for multi-group non-equi-joins.")
                }
            } else if (is.null(xo)) {
                rightcols = chmatch(key(x),names(x))   # NAs here (i.e. invalid data.table) checked in bmerge()
                leftcols = if (haskey(i))
                    chmatch(head(key(i),length(rightcols)),names(i))
                else
                    seq_len(min(length(i),length(rightcols)))
                rightcols = head(rightcols,length(leftcols))
                xo = integer()  ## signifies 1:.N
                if (missing(by) && with && isTRUE(getOption("datatable.old.bywithoutby"))) {
                    # To revert to <=v1.9.2 behaviour.  TO DO: remove option after Sep 2015
                    warning("The data.table option 'datatable.old.bywithoutby' for grouping on join without providing `by` will be deprecated in the next release, use `by=.EACHI`.", call. = FALSE)
                    by=bysub=as.symbol(".EACHI")
                    byjoin=TRUE
                    txtav = c(names(x)[-rightcols], names(i)[-leftcols])
                    if (missing(j)) j = jsub = as.call(parse(text=paste(".(",paste(txtav, collapse=","),")",sep="")))[[1]]
                }
                ops = rep(1L, length(leftcols))
            }
            # Implementation for not-join along with by=.EACHI, #604
            if (notjoin && (byjoin || mult != "all")) { # mult != "all" needed for #1571 fix
                notjoin = FALSE
                if (verbose) {last.started.at=proc.time()[3];cat("not-join called with 'by=.EACHI'; Replacing !i with i=setdiff(x,i) ...");flush.console()}
                orignames = copy(names(i))
                i = setdiff_(x, i, rightcols, leftcols) # part of #547
                if (verbose) {cat("done in",round(proc.time()[3]-last.started.at,3),"secs\n");flush.console}
                setnames(i, orignames[leftcols])
                setattr(i, 'sorted', names(i)) # since 'x' has key set, this'll always be sorted
            }
            io = if (missing(on)) haskey(i) else identical(unname(on), head(key(i), length(on)))
            i = .shallow(i, retain.key = io)
            ans = bmerge(i, x, leftcols, rightcols, io, xo, roll, rollends, nomatch, mult, ops, nqgrp, nqmaxgrp, verbose=verbose)
            # temp fix for issue spotted by Jan. Ideally would like to avoid this 'setorder', as there's another 
            # 'setorder' in generating 'irows' below...
            if (length(ans$indices)) setorder(setDT(ans[1:3]), indices)
            allLen1 = ans$allLen1
            allGrp1 = ans$allGrp1
            f__ = ans$starts
            len__ = ans$lens
            indices__ = ans$indices
            # length of input nomatch (single 0 or NA) is 1 in both cases.
            # When no match, len__ is 0 for nomatch=0 and 1 for nomatch=NA, so len__ isn't .N
            # If using secondary key of x, f__ will refer to xo
            if (is.na(which)) {
                w = if (notjoin) f__!=0L else is.na(f__)
                return( if (length(xo)) fsort(xo[w]) else which(w) )
            }
            if (mult=="all") {
                if (!byjoin) {
                    # Really, `anyDuplicated` in base is AWESOME!
                    # allow.cartesian shouldn't error if a) not-join, b) 'i' has no duplicates
                    irows = if (allLen1) f__ else vecseq(f__,len__,
                        if( allow.cartesian || 
                            notjoin || # #698. When notjoin=TRUE, ignore allow.cartesian. Rows in answer will never be > nrow(x).
                            !anyDuplicated(f__, incomparables = c(0L, NA_integer_)))  # #742. If 'i' has no duplicates, ignore 
                            NULL 
                        else as.double(nrow(x)+nrow(i))) # rows in i might not match to x so old max(nrow(x),nrow(i)) wasn't enough. But this limit now only applies when there are duplicates present so the reason now for nrow(x)+nrow(i) is just to nail it down and be bigger than max(nrow(x),nrow(i)).
                    # Fix for #1092 and #1074
                    # TODO: implement better version of "any"/"all"/"which" to avoid 
                    # unnecessary construction of logical vectors
                    if (identical(nomatch, 0L) && allLen1) irows = irows[irows != 0L]
                } else {
                    if (length(xo) && missing(on)) stop("Cannot by=.EACHI when joining to a secondary key, yet")
                    # since f__ refers to xo later in grouping, so xo needs to be passed through to dogroups too.
                    if (length(irows)) stop("Internal error. irows has length in by=.EACHI")
                }
            } else {
		# turning on mult = "first"/"last" for non-equi joins again to test..
		# if (nqmaxgrp>1L) stop("Non-equi joins aren't yet functional with mult='first' and mult='last'.")
                # mult="first"/"last" logic moved to bmerge.c, also handles non-equi cases, #1452
                if (!byjoin) { #1287 and #1271
                    irows = f__ # len__ is set to 1 as well, no need for 'pmin' logic
                    if (identical(nomatch,0L)) irows = irows[len__>0L]  # 0s are len 0, so this removes -1 irows
                }
                # TODO: when nomatch=NA, len__ need not be allocated / set at all for mult="first"/"last"?
                # TODO: how about when nomatch=0L, can we avoid allocating then as well?
            }
            if (length(xo) && length(irows)) {
                irows = xo[irows]   # TO DO: fsort here?
                if (mult=="all" && !allGrp1 && length(xo)) {
                    irows = setorder(setDT(list(indices=rep.int(indices__, len__), irows=irows)))$irows
                }
            }
        } else {
            if (!missing(on)) {
                stop("logical error. i is not a data.table, but 'on' argument is provided.")
            }
            # TO DO: TODO: Incorporate which_ here on DT[!i] where i is logical. Should avoid i = !i (above) - inefficient.
            # i is not a data.table
            if (!is.logical(i) && !is.numeric(i)) stop("i has not evaluated to logical, integer or double")
            if (is.logical(i)) {
                if (isTRUE(i)) irows = i = NULL  # fixes #1249
                else if (identical(i, NA)) irows=i=integer(0)  # DT[NA] thread recycling of NA logical exists,
                                # but for #1252 and consistency, we need to return 0-rows
                else if (length(i)==nrow(x)) irows = i = which(i)   # e.g. DT[colA>3,which=TRUE]
                                                               # also replacing 'i' here - to save memory, #926.
                else irows=seq_len(nrow(x))[i]  # e.g. recycling DT[c(TRUE,FALSE),which=TRUE], for completeness 
                # it could also be DT[!TRUE, which=TRUE] (silly cases, yes). 
                # replaced the "else if (!isTRUE(i))" to just "else". Fixes bug report #4930 
            } else {
                irows = as.integer(i)  # e.g. DT[c(1,3)] and DT[c(-1,-3)] ok but not DT[c(1,-3)] (caught as error)
                irows = .Call(CconvertNegativeIdx, irows, nrow(x)) # simplifies logic from here on (can assume positive subscripts)
                                                                   # maintains Arun's fix for #2697 (test 1042)
                                                                   # efficient in C with more detailed messages
                                                                   # falls through quickly (no R level allocs) if no negatives
                                                                   # minor TO DO: can we merge this with check_idx in fcast.c/subset ?
            }
        }
        if (notjoin) {
            if (byjoin || !is.integer(irows) || is.na(nomatch)) stop("Internal error: notjoin but byjoin or !integer or nomatch==NA")
            irows = irows[irows!=0L]
            i = irows = if (length(irows)) seq_len(nrow(x))[-irows] else NULL  # NULL meaning all rows i.e. seq_len(nrow(x))
            leftcols = integer()  # proceed as if row subset from now on, length(leftcols) is switched on later
            rightcols = integer()
            # Doing this once here, helps speed later when repeatedly subsetting each column. R's [irows] would do this for each
            # column when irows contains negatives.
        }
        if (which) return( if (is.null(irows)) seq_len(nrow(x)) else irows )
    } else {  # missing(i)
        i = NULL
    }

    byval = NULL
    xnrow = nrow(x)
    xcols = xcolsAns = icols = icolsAns = integer()
    othervars = character(0)
    if (missing(j)) {
        # missing(by)==TRUE was already checked above before dealing with i
        if (!length(x)) return(null.data.table())
        if (!length(leftcols)) {
            ansvars = nx = names(x)
            jisvars = character()
            xcols = xcolsAns = seq_along(x)
        } else {
            jisvars = names(i)[-leftcols]
            tt = jisvars %chin% names(x)
            if (length(tt)) jisvars[tt] = paste("i.",jisvars[tt],sep="")
            if (length(duprightcols <- rightcols[duplicated(rightcols)])) {
                nx = c(names(x), names(x)[duprightcols])
                rightcols = chmatch2(names(x)[rightcols], nx)
                nx = make.unique(nx)
            } else nx = names(x)
            ansvars = make.unique(c(nx, jisvars))
            icols = c(leftcols, seq_along(i)[-leftcols])
            icolsAns = c(rightcols, seq.int(length(nx)+1L, length.out=ncol(i)-length(unique(leftcols))))
            xcols = xcolsAns = seq_along(x)[-rightcols]
        }
        ansvals = chmatch(ansvars, nx)
    } else {
        # j was substituted before dealing with i so that := can set allow.cartesian=FALSE (#800) (used above in i logic)
        if (is.null(jsub)) return(NULL)
        if (is.call(jsub) && jsub[[1L]]==":=") {
            # short circuit do-nothing, don't do further checks on .SDcols for example
            if (identical(irows, integer())) {
                if (identical(nomatch, 0L)) {
                    .global$print = address(x)
                    return(invisible(x))          # irows=NULL means all rows at this stage
                } else irows = rep(NA_integer_, nrow(x)) # fix for #759
            }
            if (!with) {
                if (is.null(names(jsub)) && is.name(jsub[[2L]])) {
                    # TO DO: uncomment these warnings in next release. Later, make both errors.
                    ## warning("with=FALSE is deprecated when using :=. Please wrap the LHS of := with parentheses; e.g., DT[,(myVar):=sum(b),by=a] to assign to column name(s) held in variable myVar. See ?':=' for other examples.")
                    jsub[[2L]] = eval(jsub[[2L]], parent.frame(), parent.frame()) 
                } else {
                    ## warning("with=FALSE ignored, it isn't needed when using :=. See ?':=' for examples.")
                }
                with = TRUE
            }
        }
        if (!with) {
            # missing(by)==TRUE was already checked above before dealing with i
            if (is.call(jsub) && deparse(jsub[[1]], 500L) %in% c("!", "-")) {  # TODO is deparse avoidable here?
                notj = TRUE
                jsub = jsub[[2L]]
            } else notj = FALSE
            # fix for #1216, make sure the paranthesis are peeled from expr of the form (((1:4)))
            while (is.call(jsub) && jsub[[1L]] == "(") jsub = as.list(jsub)[[-1L]]
            if (is.call(jsub) && length(jsub) == 3L && jsub[[1L]] == ":") {
                j = eval(jsub, setattr(as.list(seq_along(x)), 'names', names(x)), parent.frame()) # else j will be evaluated for the first time on next line
            } else {
                j = eval(jsub, parent.frame(), parent.frame())
            }
            if (is.logical(j)) j <- which(j)
            if (!length(j)) return( null.data.table() )
            if (is.factor(j)) j = as.character(j)  # fix for FR: #4867
            if (is.character(j)) {
                if (notj) {
                    w = chmatch(j, names(x))
                    if (any(is.na(w))) {
                        warning("column(s) not removed because not found: ",paste(j[is.na(w)],collapse=","))
                        w = w[!is.na(w)]
                    }
                    # changed names(x)[-w] to use 'setdiff'. Here, all instances of the column must be removed.
                    # Ex: DT <- data.table(x=1, y=2, x=3); DT[, !"x", with=FALSE] should just output 'y'.
                    # But keep 'dup cols' beause it's basically DT[, !names(DT) %chin% "x", with=FALSE] which'll subset all cols not 'x'.
                    ansvars = if (length(w)) dupdiff(names(x), names(x)[w]) else names(x)
                    ansvals = dupmatch(ansvars, names(x))
                } else {
                    # once again, use 'setdiff'. Basically, unless indices are specified in `j`, we shouldn't care about duplicated columns.
                    ansvars = j   # x. and i. prefixes may be in here, and they'll be dealt with below 
                    # dups = FALSE here.. even if DT[, c("x", "x"), with=FALSE], we subset only the first.. No way to tell which one the OP wants without index.
                    ansvals = chmatch(ansvars, names(x))
                }
            } else if (is.numeric(j)) {
                if (all(j == 0L)) return (null.data.table())
                if (any(abs(j) > ncol(x) | j==0L)) stop("j out of bounds")
                if (any(j<0L) && any(j>0L)) stop("j mixes positive and negative")
                if (any(j<0L)) j = seq_len(ncol(x))[j]
                ansvars = names(x)[ if (notj) -j else j ]  # DT[,!"columntoexclude",with=FALSE], if a copy is needed, rather than :=NULL
                # DT[, c(1,3), with=FALSE] should clearly provide both 'x' columns
                ansvals = if (notj) setdiff(seq_along(x), as.integer(j)) else as.integer(j)
	    } else stop("When with=FALSE, j-argument should be of type logical/character/integer indicating the columns to select.") # fix for #1440.
            if (!length(ansvals)) return(null.data.table())
        } else {   # with=TRUE and byjoin could be TRUE
            bynames = NULL
            allbyvars = NULL
            if (byjoin) {
                bynames = names(x)[rightcols]
            } else if (!missing(by)) {
                # deal with by before j because we need byvars when j contains .SD 
                # may evaluate to NULL | character() | "" | list(), likely a result of a user expression where no-grouping is one case being loop'd through 
                bysubl = as.list.default(bysub)
                bysuborig = bysub
                if (is.name(bysub) && !(as.character(bysub) %chin% names(x))) {  # TO DO: names(x),names(i),and i. and x. prefixes
                    bysub = eval(bysub, parent.frame(), parent.frame())
                    # fix for # 5106 - http://stackoverflow.com/questions/19983423/why-by-on-a-vector-not-from-a-data-table-column-is-very-slow
                    # case where by=y where y is not a column name, and not a call/symbol/expression, but an atomic vector outside of DT.
                    # note that if y is a list, this'll return an error (not sure if it should).
                    if (is.atomic(bysub)) bysubl = list(bysuborig) else bysubl = as.list.default(bysub)
                }
                if (length(bysubl) && identical(bysubl[[1L]],quote(eval))) {    # TO DO: or by=..()
                    bysub = eval(bysubl[[2]], parent.frame(), parent.frame())
                    bysub = replace_dot_alias(bysub) # fix for #1298
                    if (is.expression(bysub)) bysub=bysub[[1L]]
                    bysubl = as.list.default(bysub)
                } else if (is.call(bysub) && as.character(bysub[[1L]]) %chin% c("c","key","names", "intersect", "setdiff")) {
                    # catch common cases, so we don't have to copy x[irows] for all columns
                    # *** TO DO ***: try() this eval first (as long as not list() or .()) and see if it evaluates to column names
                    # to avoid the explicit c,key,names which already misses paste("V",1:10) for example
                    #        tried before but since not wrapped in try() it failed on some tests
                    # or look for column names used in this by (since if none it wouldn't find column names anyway
                    # when evaled within full x[irows]).  Trouble is that colA%%2L is a call and should be within frame.
                    tt = eval(bysub, parent.frame(), parent.frame())
                    if (!is.character(tt)) stop("by=c(...), key(...) or names(...) must evaluate to 'character'")
                    bysub=tt
                } else if (is.call(bysub) && !as.character(bysub[[1L]]) %chin% c("list", "as.list", "{", ".", ":")) {
                    # potential use of function, ex: by=month(date). catch it and wrap with "(", because we need to set "bysameorder" to FALSE as we don't know if the function will return ordered results just because "date" is ordered. Fixes #2670.
                    bysub = as.call(c(as.name('('), list(bysub)))
                    bysubl = as.list.default(bysub)
                } else if (is.call(bysub) && bysub[[1L]] == ".") bysub[[1L]] = quote(list)
                
                if (mode(bysub) == "character") {
                    if (length(grep(",",bysub))) {
                        if (length(bysub)>1L) stop("'by' is a character vector length ",length(bysub)," but one or more items include a comma. Either pass a vector of column names (which can contain spaces, but no commas), or pass a vector length 1 containing comma separated column names. See ?data.table for other possibilities.")
                        bysub = strsplit(bysub,split=",")[[1L]]
                    }
                    tt = grep("^[^`]+$",bysub)
                    if (length(tt)) bysub[tt] = paste("`",bysub[tt],"`",sep="")
                    bysub = parse(text=paste("list(",paste(bysub,collapse=","),")",sep=""))[[1L]]
                    bysubl = as.list.default(bysub)
                }
                allbyvars = intersect(all.vars(bysub),names(x))
                
                orderedirows = .Call(CisOrderedSubset, irows, nrow(x))  # TRUE when irows is NULL (i.e. no i clause)
                # orderedirows = is.sorted(f__)
                bysameorder = orderedirows && haskey(x) && all(sapply(bysubl,is.name)) && length(allbyvars) && identical(allbyvars,head(key(x),length(allbyvars)))
                if (is.null(irows))
                    if (is.call(bysub) && length(bysub) == 3L && bysub[[1L]] == ":" && is.name(bysub[[2L]]) && is.name(bysub[[3L]])) {
                        byval = eval(bysub, setattr(as.list(seq_along(x)), 'names', names(x)), parent.frame())
                        byval = as.list(x)[byval]
                    } else byval = eval(bysub, x, parent.frame())
                else {
                    if (!is.integer(irows)) stop("Internal error: irows isn't integer")  # length 0 when i returns no rows
                    # Passing irows as i to x[] below has been troublesome in a rare edge case.
                    # irows may contain NA, 0, negatives and >nrow(x) here. That's all ok.
                    # But we may need i join column values to be retained (where those rows have no match), hence we tried eval(isub)
                    # in 1.8.3, but this failed test 876.
                    # TO DO: Add a test like X[i,sum(v),by=i.x2], or where by includes a join column (both where some i don't match).
                    # TO DO: Make xss directly, rather than recursive call.
                    if (!is.na(nomatch)) irows = irows[irows!=0L]   # TO DO: can be removed now we have CisSortedSubset
                    if (length(allbyvars)) {    ###############  TO DO  TO DO  TO DO  ###############
                        if (verbose) cat("i clause present and columns used in by detected, only these subset:",paste(allbyvars,collapse=","),"\n")
                        xss = x[irows,allbyvars,with=FALSE,nomatch=nomatch,mult=mult,roll=roll,rollends=rollends]
                    } else {
                        if (verbose) cat("i clause present but columns used in by not detected. Having to subset all columns before evaluating 'by': '",deparse(by),"'\n",sep="")
                        xss = x[irows,nomatch=nomatch,mult=mult,roll=roll,rollends=rollends]
                    }
                    if (is.call(bysub) && length(bysub) == 3L && bysub[[1L]] == ":") {
                        byval = eval(bysub, setattr(as.list(seq_along(xss)), 'names', names(xss)), parent.frame())
                        byval = as.list(xss)[byval]
                    } else byval = eval(bysub, xss, parent.frame())
                    xnrow = nrow(xss)
                    # TO DO: pass xss (x subset) through into dogroups. Still need irows there (for :=), but more condense
                    # and contiguous to use xss to form .SD in dogroups than going via irows
                }
                if (!length(byval) && xnrow>0L) {
                    # see missing(by) up above for comments
                    # by could be NULL or character(0) for example (e.g. passed in as argument in a loop of different bys)
                    bysameorder = FALSE  # 1st and only group is the entire table, so could be TRUE, but FALSE to avoid
                                         # a key of empty character()
                    byval = list()
                    bynames = allbyvars = NULL
                    # the rest now fall through
                } else bynames = names(byval)
                if (is.atomic(byval)) {
                    if (is.character(byval) && length(byval)<=ncol(x) && !(is.name(bysub) && as.character(bysub)%chin%names(x)) ) {
                        stop("'by' appears to evaluate to column names but isn't c() or key(). Use by=list(...) if you can. Otherwise, by=eval",deparse(bysub)," should work. This is for efficiency so data.table can detect which columns are needed.")
                    } else {
                        # by may be a single unquoted column name but it must evaluate to list so this is a convenience to users. Could also be a single expression here such as DT[,sum(v),by=colA%%2]
                        byval = list(byval)
                        bysubl = c(as.name("list"),bysuborig)  # for guessing the column name below
                        if (is.name(bysuborig))
                            bynames = as.character(bysuborig)
                        else
                            bynames = names(byval)
                    }
                }
                if (!is.list(byval)) stop("'by' or 'keyby' must evaluate to vector or list of vectors (where 'list' includes data.table and data.frame which are lists, too)")
                for (jj in seq_len(length(byval))) {
                    if (!typeof(byval[[jj]]) %chin% c("integer","logical","character","double")) stop("column or expression ",jj," of 'by' or 'keyby' is type ",typeof(byval[[jj]]),". Do not quote column names. Usage: DT[,sum(colC),by=list(colA,month(colB))]")
                }
                tt = sapply(byval,length)
                if (any(tt!=xnrow)) stop("The items in the 'by' or 'keyby' list are length (",paste(tt,collapse=","),"). Each must be same length as rows in x or number of rows returned by i (",xnrow,").")
                if (is.null(bynames)) bynames = rep.int("",length(byval))
                if (any(bynames=="")) {
                    if (length(bysubl)<2) stop("When 'by' or 'keyby' is list() we expect something inside the brackets")
                    for (jj in seq_along(bynames)) {
                        if (bynames[jj]=="") {
                            # Best guess. Use "month" in the case of by=month(date), use "a" in the case of by=a%%2
                            byvars = all.vars(bysubl[[jj+1L]], functions = TRUE)
                            if (length(byvars) == 1) tt = byvars
                            else {
                                tt = grep("^eval|^[^[:alpha:]. ]",byvars,invert=TRUE,value=TRUE)[1L]
                                if (!length(tt)) tt = all.vars(bysubl[[jj+1L]])[1L]
                            }
                            # fix for #497
                            if (length(byvars) > 1L && tt %in% all.vars(jsub, FALSE)) {
                                bynames[jj] = deparse(bysubl[[jj+1L]])
                                if (verbose)
                                    cat("by-expression '", bynames[jj], "' is not named, and the auto-generated name '", tt, "' clashed with variable(s) in j. Therefore assigning the entire by-expression as name.\n", sep="")
                                
                            }
                            else bynames[jj] = tt
                            # if user doesn't like this inferred name, user has to use by=list() to name the column
                        }
                    }
                    # Fix for #1334
                    if (any(duplicated(bynames))) {
                        bynames = make.unique(bynames)
                    }
                }
                setattr(byval, "names", bynames)  # byval is just a list not a data.table hence setattr not setnames
            }
            
            jvnames = NULL
            if (is.name(jsub)) {
                # j is a single unquoted column name
                if (jsub!=".SD") {
                    jvnames = gsub("^[.](N|I|GRP|BY)$","\\1",as.character(jsub))
                    # jsub is list()ed after it's eval'd inside dogroups.
                }
            } else if (is.call(jsub) && as.character(jsub[[1L]]) %chin% c("list",".")) {
                jsub[[1L]] = quote(list)
                jsubl = as.list.default(jsub)  # TO DO: names(jsub) and names(jsub)="" seem to work so make use of that
                if (length(jsubl)>1) {
                    jvnames = names(jsubl)[-1L]   # check list(a=sum(v),v)
                    if (is.null(jvnames)) jvnames = rep.int("", length(jsubl)-1L)
                    for (jj in seq.int(2L,length(jsubl))) {
                        if (jvnames[jj-1L] == "" && mode(jsubl[[jj]])=="name")
                            jvnames[jj-1L] = gsub("^[.](N|I|GRP|BY)$","\\1",deparse(jsubl[[jj]]))
                        # TO DO: if call to a[1] for example, then call it 'a' too
                    }
                    setattr(jsubl, "names", NULL)  # drops the names from the list so it's faster to eval the j for each group. We'll put them back aftwards on the result.
                    jsub = as.call(jsubl)
                } # else empty list is needed for test 468: adding an empty list column
            } # else maybe a call to transform or something which returns a list.
            av = all.vars(jsub,TRUE)  # TRUE fixes bug #1294 which didn't see b in j=fns[[b]](c)
            use.I = ".I" %chin% av
            # browser()
            if (any(c(".SD","eval","get","mget") %chin% av)) {
                if (missing(.SDcols)) {
                    # here we need to use 'dupdiff' instead of 'setdiff'. Ex: setdiff(c("x", "x"), NULL) will give 'x'.
                    ansvars = dupdiff(names(x),union(bynames,allbyvars))   # TO DO: allbyvars here for vars used by 'by'. Document.
                    # just using .SD in j triggers all non-by columns in the subset even if some of
                    # those columns are not used. It would be tricky to detect whether the j expression
                    # really does use all of the .SD columns or not, hence .SDcols for grouping
                    # over a subset of columns

                    # all duplicate columns must be matched, because nothing is provided
                    ansvals = dupmatch(ansvars, names(x))
                } else {
                    # FR #4979 - negative numeric and character indices for SDcols
                    colsub = substitute(.SDcols)
                    # fix for #5190. colsub[[1L]] gave error when it's a symbol.
                    if (is.call(colsub) && deparse(colsub[[1L]], 500L) %in% c("!", "-")) {
                        colm = TRUE
                        colsub = colsub[[2L]]
                    } else colm = FALSE
                    # fix for #1216, make sure the paranthesis are peeled from expr of the form (((1:4))) 
                    while(is.call(colsub) && colsub[[1L]] == "(") colsub = as.list(colsub)[[-1L]]
                    if (is.call(colsub) && length(colsub) == 3L && colsub[[1L]] == ":") {
                        # .SDcols is of the format a:b
                        .SDcols = eval(colsub, setattr(as.list(seq_along(x)), 'names', names(x)), parent.frame())
                    } else {
                        .SDcols = eval(colsub, parent.frame(), parent.frame())
                    }
                    if (is.logical(.SDcols)) {
                        ansvals = which_(rep(.SDcols, length.out=length(x)), !colm)
                        ansvars = names(x)[ansvals]
                    } else if (is.numeric(.SDcols)) {
                        # if .SDcols is numeric, use 'dupdiff' instead of 'setdiff'
                        if (length(unique(sign(.SDcols))) != 1L) stop(".SDcols is numeric but has both +ve and -ve indices")
                        if (any(is.na(.SDcols)) || any(abs(.SDcols)>ncol(x)) || any(abs(.SDcols)<1L)) stop(".SDcols is numeric but out of bounds (or NA)")
                        if (colm) ansvars = dupdiff(names(x)[-.SDcols], bynames) else ansvars = names(x)[.SDcols]
                        ansvals = if (colm) setdiff(seq_along(names(x)), c(as.integer(.SDcols), which(names(x) %chin% bynames))) else as.integer(.SDcols)
                    } else {
                        if (!is.character(.SDcols)) stop(".SDcols should be column numbers or names")
                        if (any(is.na(.SDcols)) || any(!.SDcols %chin% names(x))) stop("Some items of .SDcols are not column names (or are NA)")
                        if (colm) ansvars = setdiff(setdiff(names(x), .SDcols), bynames) else ansvars = .SDcols
                        # dups = FALSE here. DT[, .SD, .SDcols=c("x", "x")] again doesn't really help with which 'x' to keep (and if '-' which x to remove)
                        ansvals = chmatch(ansvars, names(x))
                    }
                }
                # fix for long standing FR/bug, #495 and #484
                allcols = c(names(x), paste("x.",names(x),sep=""), if (is.data.table(i)) c(names(i), paste("i.", names(i), sep="")))
                if ( length(othervars <- setdiff(intersect(av, allcols), c(bynames, ansvars))) ) {
                    # we've a situation like DT[, c(sum(V1), lapply(.SD, mean)), by=., .SDcols=...] or 
                    # DT[, lapply(.SD, function(x) x *v1), by=, .SDcols=...] etc., 
                    ansvars = union(ansvars, othervars)
                    ansvals = chmatch(ansvars, names(x))
                }
                # .SDcols might include grouping columns if users wants that, but normally we expect user not to include them in .SDcols
            } else {
                if (!missing(.SDcols)) warning("This j doesn't use .SD but .SDcols has been supplied. Ignoring .SDcols. See ?data.table.")
                allcols = c(names(x), paste("x.",names(x),sep=""), if (is.data.table(i)) c(names(i), paste("i.", names(i), sep="")))
                ansvars = setdiff(intersect(av,allcols), bynames)
                if (verbose) cat("Detected that j uses these columns:",if (!length(ansvars)) "<none>" else paste(ansvars,collapse=","),"\n")
                # using a few named columns will be faster
                # Consider:   DT[,max(diff(date)),by=list(month=month(date))]
                # and:        DT[,lapply(.SD,sum),by=month(date)]
                # We don't want date in .SD in the latter, but we do in the former; hence the union() above.
                ansvals = chmatch(ansvars, names(x))
            }
            # if (!length(ansvars)) Leave ansvars empty. Important for test 607.
            
            # TODO remove as (m)get is now folded in above.
            # added 'mget' - fix for #994
            if (any(c("get", "mget") %chin% av)) {
                if (verbose) {
                    cat("'(m)get' found in j. ansvars being set to all columns. Use .SDcols or a single j=eval(macro) instead. Both will detect the columns used which is important for efficiency.\nOld:", paste(ansvars,collapse=","),"\n")
                    # get('varname') is too difficult to detect which columns are used in general
                    # eval(macro) column names are detected via the  if jsub[[1]]==eval switch earlier above.
                }
                allcols = c(names(x), paste("x.",names(x),sep=""), if (is.data.table(i)) c(names(i), paste("i.", names(i), sep="")))
                ansvars = setdiff(allcols,bynames) # fix for bug #5443
                ansvals = chmatch(ansvars, names(x))
                if (verbose) cat("New:",paste(ansvars,collapse=","),"\n")
            }

            lhs = NULL
            newnames = NULL
            suppPrint = identity
            if (length(av) && av[1L] == ":=") {
                if (identical(attr(x,".data.table.locked"),TRUE)) stop(".SD is locked. Using := in .SD's j is reserved for possible future use; a tortuously flexible way to modify by group. Use := in j directly to modify by group by reference.")
                suppPrint <- function(x) { .global$print=address(x); x }
                # Suppress print when returns ok not on error, bug #2376. Thanks to: http://stackoverflow.com/a/13606880/403310
                # All appropriate returns following this point are wrapped; i.e. return(suppPrint(x)).
                
                # FR #4996 - verbose message and return when a join matches nothing with `:=` in j
                if (byjoin & !notjoin) {
                    # Note: !notjoin is here only until the notjoin is implemented as a "proper" byjoin
                    if (identical(nomatch,0L) && all(f__ == 0L)) {
                        if (verbose) cat("No rows pass i clause so quitting := early with no changes made.\n")
                        return(suppPrint(x))
                    } else if (all(is.na(f__))) { # nomatch can't be 0 here
                        # fix for #759
                        irows = rep(NA_integer_, nrow(x))
                        byjoin = FALSE
                    }
                }
                if (!is.null(irows)) {
                    if (!length(irows)) {
                        if (verbose) cat("No rows pass i clause so quitting := early with no changes made.\n")
                        return(suppPrint(x))
                    } else
                        if (!with) irows <- irows[!is.na(irows)] # fixes 2445. TO DO: added a message if(verbose) or warning?
                        if (!missing(keyby)) stop("When i is present, keyby := on a subset of rows doesn't make sense. Either change keyby to by, or remove i")
                }
                if (is.null(names(jsub))) {
                    # regular LHS:=RHS usage, or `:=`(...) with no named arguments (an error)
                    # `:=`(LHS,RHS) is valid though, but more because can't see how to detect that, than desire
                    if (length(jsub)!=3L) stop("In `:=`(col1=val1, col2=val2, ...) form, all arguments must be named.")
                    lhs = jsub[[2]]
                    jsub = jsub[[3]]
                    if (is.name(lhs)) {
                        lhs = as.character(lhs)
                    } else {
                        # e.g. (MyVar):= or get("MyVar"):=
                        lhs = eval(lhs, parent.frame(), parent.frame())
                    }
                } else {
                    # `:=`(c2=1L,c3=2L,...)
                    lhs = names(jsub)[-1]
                    if (any(lhs=="")) stop("In `:=`(col1=val1, col2=val2, ...) form, all arguments must be named.")
                    names(jsub)=""
                    jsub[[1]]=as.name("list")
                }
                av = all.vars(jsub,TRUE)
                if (!is.atomic(lhs)) stop("LHS of := must be a symbol, or an atomic vector (column names or positions).")
                if (is.character(lhs))
                    m = chmatch(lhs,names(x))
                else if (is.numeric(lhs)) {
                    m = as.integer(lhs)
                    if (any(m<1L | ncol(x)<m)) stop("LHS of := appears to be column positions but are outside [1,ncol] range. New columns can only be added by name.")
                    lhs = names(x)[m]
                } else
                    stop("LHS of := isn't column names ('character') or positions ('integer' or 'numeric')")
                if (all(!is.na(m))) {
                    # updates by reference to existing columns
                    cols = as.integer(m)
                    newnames=NULL
                } else {
                    # Adding new column(s). TO DO: move after the first eval in case the jsub has an error.
                    newnames=setdiff(lhs,names(x))
                    m[is.na(m)] = ncol(x)+seq_len(length(newnames))
                    cols = as.integer(m)
                    if ((ok<-selfrefok(x,verbose))==0L)   # ok==0 so no warning when loaded from disk (-1) [-1 considered TRUE by R]
                        warning("Invalid .internal.selfref detected and fixed by taking a (shallow) copy of the data.table so that := can add this new column by reference. At an earlier point, this data.table has been copied by R (or been created manually using structure() or similar). Avoid key<-, names<- and attr<- which in R currently (and oddly) may copy the whole data.table. Use set* syntax instead to avoid copying: ?set, ?setnames and ?setattr. Also, in R<=v3.0.2, list(DT1,DT2) copied the entire DT1 and DT2 (R's list() used to copy named objects); please upgrade to R>v3.0.2 if that is biting. If this message doesn't help, please report to datatable-help so the root cause can be fixed.")
                    if ((ok<1L) || (truelength(x) < ncol(x)+length(newnames))) {
                        n = max(ncol(x)+100, ncol(x)+2*length(newnames))
                        name = substitute(x)
                        if (is.name(name) && ok && verbose) { # && NAMED(x)>0 (TO DO)    # ok here includes -1 (loaded from disk)
                            cat("Growing vector of column pointers from truelength ",truelength(x)," to ",n,". A shallow copy has been taken, see ?alloc.col. Only a potential issue if two variables point to the same data (we can't yet detect that well) and if not you can safely ignore this. To avoid this message you could alloc.col() first, deep copy first using copy(), wrap with suppressWarnings() or increase the 'datatable.alloccol' option.\n")
                            # Verbosity should not issue warnings, so cat rather than warning.
                            # TO DO: Add option 'datatable.pedantic' to turn on warnings like this.

                            # TO DO ... comments moved up from C ...
                            # Note that the NAMED(dt)>1 doesn't work because .Call
                            # always sets to 2 (see R-ints), it seems. Work around
                            # may be possible but not yet working. When the NAMED test works, we can drop allocwarn argument too
                            # because that's just passed in as FALSE from [<- where we know `*tmp*` isn't really NAMED=2.
                            # Note also that this growing will happen for missing columns assigned NULL, too. But so rare, we
                            # don't mind.
                        }
                        alloc.col(x, n, verbose=verbose)   # always assigns to calling scope; i.e. this scope
                        if (is.name(name)) {
                            assign(as.character(name),x,parent.frame(),inherits=TRUE)
                        } else if (is.call(name) && (name[[1L]] == "$" || name[[1L]] == "[[") && is.name(name[[2L]])) {
                            k = eval(name[[2L]], parent.frame(), parent.frame())
                            if (is.list(k)) {
                                origj = j = if (name[[1L]] == "$") as.character(name[[3L]]) else eval(name[[3L]], parent.frame(), parent.frame())
                                if (is.character(j)) {
                                    if (length(j)!=1L) stop("L[[i]][,:=] syntax only valid when i is length 1, but it's length %d",length(j))
                                    j = match(j, names(k))
                                    if (is.na(j)) stop("Item '",origj,"' not found in names of list")
                                }
                                .Call(Csetlistelt,k,as.integer(j), x)
                            } else if (is.environment(k) && exists(as.character(name[[3L]]), k)) {
                                assign(as.character(name[[3L]]), x, k, inherits=FALSE)
                            }
                        } # TO DO: else if env$<- or list$<-
                    }
                }
            }
        }
        
        if (length(ansvars)) {
            w = ansvals
            if (length(rightcols) && missing(by)) {
                w[ w %in% rightcols ] = NA
            }
            # patch for #1615. Allow 'x.' syntax. Only useful during join op when x's join col needs to be used.
            # Note that I specifically have not implemented x[y, aa, on=c(aa="bb")] to refer to x's join column 
            # as well because x[i, col] == x[i][, col] will not be TRUE anymore..
            xjoincols = paste("x.",names(x),sep="")
            if ( any(xjoinvals <- ansvars %in% xjoincols))
                w[xjoinvals] = chmatch(ansvars[xjoinvals], xjoincols)
            if (!any(wna <- is.na(w))) {
                xcols = w
                xcolsAns = seq_along(ansvars)
                icols = icolsAns = integer()
            } else {
                if (!length(leftcols)) stop("column(s) not found: ", paste(ansvars[wna],collapse=", "))
                xcols = w[!wna]
                xcolsAns = which(!wna)
                ivars = names(i)
                ivars[leftcols] = names(x)[rightcols]
                w2 = chmatch(ansvars[wna], ivars)
                if (any(w2na <- is.na(w2))) {
                    ivars = paste("i.",ivars,sep="")
                    ivars[leftcols] = names(i)[leftcols]
                    w2[w2na] = chmatch(ansvars[wna][w2na], ivars)
                    if (any(w2na <- is.na(w2))) {
                        ivars[leftcols] = paste("i.",ivars[leftcols],sep="")
                        w2[w2na] = chmatch(ansvars[wna][w2na], ivars)
                        if (any(w2na <- is.na(w2))) stop("column(s) not found: ", paste(ansvars[wna][w2na],sep=", "))
                    }
                }
                icols = w2
                icolsAns = which(wna)
            }
        }
    }  # end of  if !missing(j)
    
    SDenv = new.env(parent=parent.frame())
    # taking care of warnings for posixlt type, #646
    SDenv$strptime <- function(x, ...) {
	warning("POSIXlt column type detected and converted to POSIXct. We do not recommend use of POSIXlt at all because it uses 40 bytes to store one date.")
	as.POSIXct(base::strptime(x, ...))
    }

    # hash=TRUE (the default) does seem better as expected using e.g. test 645.  TO DO experiment with 'size' argument
    if (missing(by) || (!byjoin && !length(byval))) {
        # No grouping: 'by' = missing | NULL | character() | "" | list()
        # Considered passing a one-group to dogroups but it doesn't do the recycling of i within group, that's done here
        if (length(ansvars)) {
            # TO DO: port more of this to C
            ans = vector("list", length(ansvars))
            if (length(i) && length(icols)) {
                if (allLen1 && allGrp1 && (is.na(nomatch) || !any(f__==0L))) {   # nomatch=0 should drop rows in i that have no match
                    for (s in seq_along(icols)) {
                        target = icolsAns[s]
                        source = icols[s]
                        ans[[target]] = i[[source]]
                        if (address(ans[[target]]) == address(i[[source]])) ans[[target]] = copy(ans[[target]])
                    }
                } else {
		    ii = rep.int(if(allGrp1) seq_len(nrow(i)) else indices__, len__)
                    for (s in seq_along(icols)) {
                        target = icolsAns[s]
                        source = icols[s]
                        ans[[target]] = .Call(CsubsetVector,i[[source]],ii)  # i.e. i[[source]][ii]
                    }
                }
            }
            if (is.null(irows)) {
                for (s in seq_along(xcols)) {  # xcols means non-join x columns, since join columns come from i
                    target = xcolsAns[s]
                    source = xcols[s]
                    ans[[target]] = x[[source]]
                    # Temp fix for #921 - skip COPY until after evaluating 'jval' (scroll down).
                    # Unless 'with=FALSE' - can not be expressions but just column names.
                    if (!with && address(ans[[target]]) == address(x[[source]])) 
                        ans[[target]] = copy(ans[[target]])
                    else ans[[target]] = ans[[target]]
                }
            } else {
                for (s in seq_along(xcols)) {
                    target = xcolsAns[s]
                    source = xcols[s]
                    ans[[target]] = .Call(CsubsetVector,x[[source]],irows)   # i.e. x[[source]][irows], but guaranteed new memory even for singleton logicals from R 3.1.0
                }
            }
            # the address==address is a temp fix for R >= 3.1.0. TO DO: allow shallow copy here, then copy only when user uses :=
            # or set* on the result by using NAMED/REFCNT on columns, with warning if they copy. Since then, even foo = DT$b
            # would cause the next set or := to copy that column (so the warning is needed). To tackle that, we could have our
            # own DT.NAMED attribute, perhaps.
            # Or keep the rule that [.data.table always returns new memory, and create view() or view= as well, maybe cleaner.
            
            setattr(ans, "names", ansvars)
            if (haskey(x)) {
                keylen = which.first(!key(x) %chin% ansvars)-1L
                if (is.na(keylen)) keylen = length(key(x))
                len = length(rightcols)
                if (keylen > len && !.Call(CisOrderedSubset, irows, nrow(x))) {
                    keylen = if (missing(on) || identical(names(on), head(key(x), len))) len else 0L # fix for #1268
                }
                if (keylen && ((is.data.table(i) && haskey(i)) || is.logical(i) || (.Call(CisOrderedSubset, irows, nrow(x)) && ((roll == FALSE) || length(irows) == 1L)))) # see #1010. don't set key when i has no key, but irows is ordered and roll != FALSE
                    setattr(ans,"sorted",head(key(x),keylen))
            }
            setattr(ans, "class", class(x)) # fix for #5296
            setattr(ans, "row.names", .set_row_names(nrow(ans)))
            
            if (!with || missing(j)) return(alloc.col(ans))

            SDenv$.SDall = ans
            SDenv$.SD = if (!length(othervars)) SDenv$.SDall else shallow(SDenv$.SDall, setdiff(ansvars, othervars))
            SDenv$.N = nrow(SDenv$.SD)

        } else {
            SDenv$.SDall = SDenv$.SD = null.data.table()   # no columns used by j so .SD can be empty. Only needs to exist so that we can rely on it being there when locking it below for example. If .SD were used by j, of course then xvars would be the columns and we wouldn't be in this leaf.
            SDenv$.N = if (is.null(irows)) nrow(x) else length(irows) * !identical(suppressWarnings(max(irows)), 0L)
            # Fix for #963.
            # When irows is integer(0), length(irows) = 0 will result in 0 (as expected).
            # Binary search can return all 0 irows when none of the input matches. Instead of doing all(irows==0L) (previous method), which has to allocate a logical vector the size of irows, we can make use of 'max'. If max is 0, we return 0. The condition where only some irows > 0 won't occur.
        }
        # Temp fix for #921. Allocate `.I` only if j-expression uses it.
        SDenv$.I = if (!missing(j) && use.I) seq_len(SDenv$.N) else 0L
        SDenv$.GRP = 1L
        setattr(SDenv$.SD,".data.table.locked",TRUE)   # used to stop := modifying .SD via j=f(.SD), bug#1727. The more common case of j=.SD[,subcol:=1] was already caught when jsub is inspected for :=.
        setattr(SDenv$.SDall,".data.table.locked",TRUE)
        lockBinding(".SD",SDenv)
        lockBinding(".SDall",SDenv)
        lockBinding(".N",SDenv)
        lockBinding(".I",SDenv)
        lockBinding(".GRP",SDenv)
        for (ii in ansvars) assign(ii, SDenv$.SDall[[ii]], SDenv)
        # Since .SD is inside SDenv, alongside its columns as variables, R finds .SD symbol more quickly, if used.
        # There isn't a copy of the columns here, the xvar symbols point to the SD columns (copy-on-write).

        # Temp fix for #921 - check address and copy *after* evaluating 'jval'
        jval = eval(jsub, SDenv, parent.frame())
        # copy 'jval' when required
        # More speedup - only check + copy if irows is NULL
        if (is.null(irows)) {
            if (is.atomic(jval)) {
                jcpy = address(jval) %in% sapply(SDenv$.SD, address) # %chin% errors when RHS is list()
                if (jcpy) jval = copy(jval)
            } else if (address(jval) == address(SDenv$.SD)) {
                jval = copy(jval)
            } else if ( length(jcpy <- which(sapply(jval, address) %in% sapply(SDenv, address))) ) {
                for (jidx in jcpy) jval[[jidx]] = copy(jval[[jidx]])
            } else if (is.call(jsub) && jsub[[1L]] == "get" && is.list(jval)) {
                jval = copy(jval) # fix for #1212
            }
        } else {
            if (is.data.table(jval)) {
                setattr(jval, '.data.table.locked', NULL) # fix for #1341
                if (!truelength(jval)) alloc.col(jval)
            }
        }

        if (!is.null(lhs)) {   # *** TO DO ***: use set() here now that it can add new column(s) and remove newnames and alloc logic above
            if (verbose) cat("Assigning to ",if (is.null(irows)) "all " else paste(length(irows),"row subset of "), nrow(x)," rows\n",sep="")
            .Call(Cassign,x,irows,cols,newnames,jval,verbose)
            return(suppPrint(x))
        }        
        if ((is.call(jsub) && is.list(jval) && jsub[[1L]] != "get" && !is.object(jval)) || !missing(by)) {
            # is.call: selecting from a list column should return list
            # is.object: for test 168 and 168.1 (S4 object result from ggplot2::qplot). Just plain list results should result in data.table

            # Fix for #813 and #758. Ex: DT[c(FALSE, FALSE), list(integer(0), y)] 
            # where DT = data.table(x=1:2, y=3:4) should return an empty data.table!!
            if (!is.null(irows) && (identical(irows, integer(0)) || all(irows %in% 0L))) ## TODO: any way to not check all 'irows' values?
                if (is.atomic(jval)) jval = jval[0L] else jval = lapply(jval, `[`, 0L)
            if (is.atomic(jval)) {
                setattr(jval,"names",NULL)
                jval = data.table(jval) # TO DO: should this be setDT(list(jval)) instead?
            } else {
                if (is.null(jvnames)) jvnames=names(jval)
                # avoid copy if all vectors are already of same lengths, use setDT
                lenjval = vapply(jval, length, 0L)
                if (any(lenjval != lenjval[1L])) {
                    jval = as.data.table.list(jval)   # does the vector expansion to create equal length vectors
                    jvnames = jvnames[lenjval != 0L]  # fix for #1477
                } else setDT(jval)
            }
            if (is.null(jvnames)) jvnames = character(length(jval)-length(bynames))
            ww = which(jvnames=="")
            if (any(ww)) jvnames[ww] = paste("V",ww,sep="")
            setnames(jval, jvnames)
        }

        # fix for bug #5114 from GSee's - .data.table.locked=TRUE.   # TO DO: more efficient way e.g. address==address (identical will do that but then proceed to deep compare if !=, wheras we want just to stop?)
        # Commented as it's taken care of above, along with #921 fix. Kept here for the bug fix info and TO DO.
        # if (identical(jval, SDenv$.SD)) return(copy(jval))
        
        if (is.data.table(jval)) {
            setattr(jval, 'class', class(x)) # fix for #5296
            if (haskey(x) && all(key(x) %chin% names(jval)) && suppressWarnings(is.sorted(jval, by=key(x))))  # TO DO: perhaps this usage of is.sorted should be allowed internally then (tidy up and make efficient)
                setattr(jval, 'sorted', key(x))
        }
        return(jval)
    }

    ###########################################################################
    # Grouping ...
    ###########################################################################
    
    o__ = integer()
    if (".N" %chin% ansvars) stop("The column '.N' can't be grouped because it conflicts with the special .N variable. Try setnames(DT,'.N','N') first.")
    if (".I" %chin% ansvars) stop("The column '.I' can't be grouped because it conflicts with the special .I variable. Try setnames(DT,'.I','I') first.")
    SDenv$.iSD = NULL  # null.data.table()
    SDenv$.xSD = NULL  # null.data.table() - introducing for FR #2693 and Gabor's post on fixing for FAQ 2.8

    assign("print", function(x,...){base::print(x,...);NULL}, SDenv)
    # Now ggplot2 returns data from print, we need a way to throw it away otherwise j accumulates the result

    SDenv$.SDall = SDenv$.SD = null.data.table()  # e.g. test 607. Grouping still proceeds even though no .SD e.g. grouping key only tables, or where j consists of .N only
    SDenv$.N = as.integer(0)     # not 0L for the reson on next line :
    SDenv$.GRP = as.integer(1)   # oddly using 1L doesn't work reliably here! Possible R bug? TO DO: create reproducible example and report. To reproduce change to 1L and run test.data.table, test 780 fails. The assign seems ineffective and a previous value for .GRP from a previous test is retained, despite just creating a new SDenv.

    if (byjoin) {
        # The groupings come instead from each row of the i data.table.
        # Much faster for a few known groups vs a 'by' for all followed by a subset
        if (!is.data.table(i)) stop("logicial error. i is not data.table, but mult='all' and 'by'=.EACHI")
        byval = i
        bynames = if (missing(on)) head(key(x),length(leftcols)) else names(on)
        allbyvars = NULL
        bysameorder = haskey(i) || (is.sorted(f__) && ((roll == FALSE) || length(f__) == 1L)) # Fix for #1010
        ##  'av' correct here ??  *** TO DO ***
        xjisvars = intersect(av, names(x)[rightcols])  # no "x." for xvars.
        # if 'get' is in 'av' use all cols in 'i', fix for bug #5443
        # added 'mget' - fix for #994
        jisvars = if (any(c("get", "mget") %chin% av)) names(i) else intersect(gsub("^i[.]","", setdiff(av, xjisvars)), names(i))
        # JIS (non join cols) but includes join columns too (as there are named in i)
        if (length(jisvars)) {
            tt = min(nrow(i),1L)
            SDenv$.iSD = i[tt,jisvars,with=FALSE]
            for (ii in jisvars) {
                assign(ii, SDenv$.iSD[[ii]], SDenv)
                assign(paste("i.",ii,sep=""), SDenv$.iSD[[ii]], SDenv)
            }
        }

    } else {
        # Find the groups, using 'byval' ...
        if (missing(by)) stop("Internal error, by is missing")
        if (verbose) {last.started.at=proc.time()[3];cat("Finding groups (bysameorder=",bysameorder,") ... ",sep="");flush.console()}
        if (length(byval) && length(byval[[1]])) {
            if (!bysameorder) {
                o__ = forderv(byval, sort=FALSE, retGrp=TRUE)   # returns integer() (not NULL) if already ordered, to save 1:xnrow for efficiency
                bysameorder = orderedirows && !length(o__)
                f__ = attr(o__, "starts")
                len__ = uniqlengths(f__, xnrow)
                if (!bysameorder) {    # TO DO: lower this into forder.c
                    firstofeachgroup = o__[f__]    
                    if (length(origorder <- forderv(firstofeachgroup))) {
                        f__ = f__[origorder]
                        len__ = len__[origorder]
                    }
                }
                if (!orderedirows && !length(o__)) o__ = 1:xnrow  # temp fix.  TO DO: revist orderedirows
            } else {
                f__ = uniqlist(byval)
                len__ = uniqlengths(f__, xnrow)
                # TO DO: combine uniqlist and uniquelengths into one call.  Or, just set len__ to NULL when dogroups infers that.
            }
        } else {
            f__=NULL
            len__=0L
            bysameorder=TRUE   # for test 724
        }
        if (verbose) {cat("done in ",round(proc.time()[3]-last.started.at,3),"secs. bysameorder=",bysameorder," and o__ is length ",length(o__),"\n",sep="");flush.console}
        # TO DO: allow secondary keys to be stored, then we see if our by matches one, if so use it, and no need to sort again. TO DO: document multiple keys.
    }
    alloc = if (length(len__)) seq_len(max(len__)) else 0L
    SDenv$.I = alloc
    if (length(xcols)) {
        #  TODO add: if (length(alloc)==nrow(x)) stop("There is no need to deep copy x in this case")
        SDenv$.SDall = .Call(CsubsetDT,x,alloc,xcols)    # must be deep copy when largest group is a subset
        SDenv$.SD = if (!length(othervars)) SDenv$.SDall else shallow(SDenv$.SDall, setdiff(ansvars, othervars))
    }
    if (nrow(SDenv$.SDall)==0L) {
        setattr(SDenv$.SDall,"row.names",c(NA_integer_,0L))
        setattr(SDenv$.SD,"row.names",c(NA_integer_,0L))
    }
    # .set_row_names() basically other than not integer() for 0 length, otherwise dogroups has no [1] to modify to -.N
    setattr(SDenv$.SD,".data.table.locked",TRUE)   # used to stop := modifying .SD via j=f(.SD), bug#1727. The more common case of j=.SD[,subcol:=1] was already caught when jsub is inspected for :=.
    setattr(SDenv$.SDall,".data.table.locked",TRUE)
    lockBinding(".SD",SDenv)
    lockBinding(".SDall",SDenv)
    lockBinding(".N",SDenv)
    lockBinding(".GRP",SDenv)
    lockBinding(".I",SDenv)
    lockBinding(".iSD",SDenv)
    
    GForce = FALSE
    if ( getOption("datatable.optimize")>=1 && (is.call(jsub) || (is.name(jsub) && as.character(jsub) %chin% c(".SD",".N"))) ) {  # Ability to turn off if problems or to benchmark the benefit
        # Optimization to reduce overhead of calling lapply over and over for each group
        ansvarsnew = setdiff(ansvars, othervars)
        oldjsub = jsub
        funi = 1L # Fix for #985
        # convereted the lapply(.SD, ...) to a function and used below, easier to implement FR #2722 then.
        .massageSD <- function(jsub) {
            txt = as.list(jsub)[-1L]
            if (length(names(txt))>1L) .Call(Csetcharvec, names(txt), 2L, "")  # fixes bug #4839
            fun = txt[[2L]]
            if (is.call(fun) && fun[[1L]]=="function") {
                # Fix for #2381: added SDenv$.SD to 'eval' to take care of cases like: lapply(.SD, function(x) weighted.mean(x, bla)) where "bla" is a column in DT
                # http://stackoverflow.com/questions/13441868/data-table-and-stratified-means
                # adding this does not compromise in speed (that is, not any lesser than without SDenv$.SD)
                # replaced SDenv$.SD to SDenv to deal with Bug #5007 reported by Ricardo (Nice catch!)
                thisfun = paste("..FUN", funi, sep="") # Fix for #985
                assign(thisfun,eval(fun, SDenv, SDenv), SDenv)  # to avoid creating function() for each column of .SD
                lockBinding(thisfun,SDenv)
                txt[[1L]] = as.name(thisfun)
            } else {
                if (is.character(fun)) fun = as.name(fun)
                txt[[1L]] = fun
            }
            ans = vector("list",length(ansvarsnew)+1L)
            ans[[1L]] = as.name("list")
            for (ii in seq_along(ansvarsnew)) {
                txt[[2L]] = as.name(ansvarsnew[ii])
                ans[[ii+1L]] = as.call(txt)
            }
            jsub = as.call(ans)  # important no names here
            jvnames = ansvarsnew      # but here instead
            list(jsub, jvnames)
            # It may seem inefficient to constuct a potentially long expression. But, consider calling
            # lapply 100000 times. The C code inside lapply does the LCONS stuff anyway, every time it
            # is called, involving small memory allocations.
            # The R level lapply calls as.list which needs a shallow copy.
            # lapply also does a setAttib of names (duplicating the same names over and over again
            # for each group) which is terrible for our needs. We replace all that with a
            # (ok, long, but not huge in memory terms) list() which is primitive (so avoids symbol
            # lookup), and the eval() inside dogroups hardly has to do anything. All this results in
            # overhead minimised. We don't need to worry about the env passed to the eval in a possible
            # lapply replacement, or how to pass ... efficiently to it.
            # Plus we optimize lapply first, so that mean() can be optimized too as well, next.
        }
        if (is.name(jsub)) {
            if (jsub == ".SD") {
                jsub = as.call(c(quote(list), lapply(ansvarsnew, as.name)))
                jvnames = ansvarsnew
            }
        } else {
            if ( length(jsub) == 3L && (jsub[[1L]] == "[" || jsub[[1L]] == "head" || jsub[[1L]] == "tail") && jsub[[2L]] == ".SD" && (is.numeric(jsub[[3L]]) || jsub[[3L]] == ".N") ) {
                # optimise .SD[1] or .SD[2L]. Not sure how to test .SD[a] as to whether a is numeric/integer or a data.table, yet.
                jsub = as.call(c(quote(list), lapply(ansvarsnew, function(x) { jsub[[2L]] = as.name(x); jsub })))
                jvnames = ansvarsnew
            } else if (jsub[[1L]]=="lapply" && jsub[[2L]]==".SD" && length(xcols)) {
                deparse_ans = .massageSD(jsub)
                jsub = deparse_ans[[1L]]
                jvnames = deparse_ans[[2L]]
            } else if (jsub[[1L]] == "c" && length(jsub) > 1L) {
                # TODO, TO DO: raise the checks for 'jvnames' earlier (where jvnames is set by checking 'jsub') and set 'jvnames' already.
                # FR #2722 is just about optimisation of j=c(.N, lapply(.SD, .)) that is taken care of here.
                # FR #735 tries to optimise j-expressions of the form c(...) as long as ... contains
                # 1) lapply(.SD, ...), 2) simply .SD or .SD[..], 3) .N, 4) list(...) and 5) functions that normally return a single value*
                # On 5)* the IMPORTANT point to note is that things that are not wrapped within "list(...)" should *always* 
                # return length 1 output for us to optimise. Else, there's no equivalent to optimising c(...) to list(...) AFAICT.
                # One issue could be that these functions (e.g., mean) can be "re-defined" by the OP to produce a length > 1 output
                # Of course this is worrying too much though. If the issue comes up, we'll just remove the relevant optimisations.
                # For now, we optimise all functions mentioned in 'optfuns' below.
                optfuns = c("max", "min", "mean", "length", "sum", "median", "sd", "var")
                is_valid = TRUE
                any_SD = FALSE
                jsubl = as.list.default(jsub)
                oldjvnames = jvnames
                jvnames = NULL           # TODO: not let jvnames grow, maybe use (number of lapply(.SD, .))*lenght(ansvarsnew) + other jvars ?? not straightforward.
                # Fix for #744. Don't use 'i' in for-loops. It masks the 'i' from the input!!
                for (i_ in 2:length(jsubl)) {
                    this = jsub[[i_]]
                    if (is.name(this)) {
                        if (this == ".SD") { # optimise '.SD' alone
                            any_SD = TRUE
                            jsubl[[i_]] = lapply(ansvarsnew, as.name)
                            jvnames = c(jvnames, ansvarsnew)
                        } else if (this == ".N") {
                            # don't optimise .I in c(.SD, .I), it's length can be > 1 
                            # only c(.SD, list(.I)) should be optimised!! .N is always length 1.
                            jvnames = c(jvnames, gsub("^[.]([N])$", "\\1", this))   
                        } else {
                            # jvnames = c(jvnames, if (is.null(names(jsubl))) "" else names(jsubl)[i_])
                            is_valid=FALSE
                            break
                        }
                    } else if (is.call(this)) {
                        if (this[[1L]] == "lapply" && this[[2L]] == ".SD" && length(xcols)) {
                            any_SD = TRUE
                            deparse_ans = .massageSD(this)
                            funi = funi + 1L # Fix for #985
                            jsubl[[i_]] = as.list(deparse_ans[[1L]][-1L]) # just keep the '.' from list(.)
                            jvnames = c(jvnames, deparse_ans[[2L]])
                        } else if (this[[1]] == "list") {
                            # also handle c(lapply(.SD, sum), list()) - silly, yes, but can happen
                            if (length(this) > 1L) {
                                jl__ = as.list(jsubl[[i_]])[-1L] # just keep the '.' from list(.)
                                jn__ = if (is.null(names(jl__))) rep("", length(jl__)) else names(jl__)
                                idx  = unlist(lapply(jl__, function(x) is.name(x) && x == ".I"))
                                if (any(idx)) jn__[idx & (jn__ == "")] = "I"
                                jvnames = c(jvnames, jn__)
                                jsubl[[i_]] = jl__
                            }
                        } else if (is.call(this) && length(this) > 1L && as.character(this[[1L]]) %in% optfuns) {
                            jvnames = c(jvnames, if (is.null(names(jsubl))) "" else names(jsubl)[i_])
                        } else if ( length(this) == 3L && (this[[1L]] == "[" || this[[1L]] == "head") && 
                                        this[[2L]] == ".SD" && (is.numeric(this[[3L]]) || this[[3L]] == ".N") ) {
                            # optimise .SD[1] or .SD[2L]. Not sure how to test .SD[a] as to whether a is numeric/integer or a data.table, yet.
                            any_SD = TRUE
                            jsubl[[i_]] = lapply(ansvarsnew, function(x) { this[[2L]] = as.name(x); this })
                            jvnames = c(jvnames, ansvarsnew)
                        } else if (any(all.vars(this) == ".SD")) {
                            # TODO, TO DO: revisit complex cases (as illustrated below)
                            # complex cases like DT[, c(.SD[x>1], .SD[J(.)], c(.SD), a + .SD, lapply(.SD, sum)), by=grp]
                            # hard to optimise such cases (+ difficulty in counting exact columns and therefore names). revert back to no optimisation.
                            is_valid=FALSE
                            break
                        } else { # just to be sure that any other case (I've overlooked) runs smoothly, without optimisation
                            # TO DO, TODO: maybe a message/warning here so that we can catch the overlooked cases, if any?
                            is_valid=FALSE
                            break
                        }
                    } else {
                        is_valid = FALSE
                        break
                    }
                }
                if (!is_valid || !any_SD) { # restore if c(...) doesn't contain lapply(.SD, ..) or if it's just invalid
                    jvnames = oldjvnames           # reset jvnames
                    jsub = oldjsub                 # reset jsub
                    jsubl = as.list.default(jsubl) # reset jsubl
                } else {
                    setattr(jsubl, 'names', NULL)
                    jsub = as.call(unlist(jsubl, use.names=FALSE))
                    jsub[[1L]] = quote(list)
                }
            }
        }
        if (verbose) {
            if (!identical(oldjsub, jsub))
                cat("lapply optimization changed j from '",deparse(oldjsub),"' to '",deparse(jsub,width.cutoff=200L),"'\n",sep="")
            else
                cat("lapply optimization is on, j unchanged as '",deparse(jsub,width.cutoff=200L),"'\n",sep="")
        }
        dotN <- function(x) if (is.name(x) && x == ".N") TRUE else FALSE # For #5760
        # FR #971, GForce kicks in on all subsets, no joins yet. Although joins could work with 
        # nomatch=0L even now.. but not switching it on yet, will deal it separately.
        if (getOption("datatable.optimize")>=2 && !is.data.table(i) && !byjoin && length(f__) && !length(lhs)) {
            if (!length(ansvars) && !use.I) {
                GForce = FALSE
                if ( (is.name(jsub) && jsub == ".N") || (is.call(jsub) && length(jsub)==2L && jsub[[1L]] == "list" && jsub[[2L]] == ".N") ) {
                    GForce = TRUE
                    if (verbose) cat("GForce optimized j to '",deparse(jsub,width.cutoff=200L),"'\n",sep="")
                }
            } else {
                # Apply GForce
                gfuns = c("sum", "prod", "mean", "median", "var", "sd", ".N", "min", "max", "head", "last", "tail", "[") # added .N for #5760
                .ok <- function(q) {
                    if (dotN(q)) return(TRUE) # For #5760
                    cond = is.call(q) && as.character(q[[1L]]) %chin% gfuns && !is.call(q[[2L]])
                    ans  = cond && (length(q)==2 || identical("na",substring(names(q)[3L],1,2)))
                    if (identical(ans, TRUE)) return(ans)
                    ans = cond && length(q)==3 && ( as.character(q[[1]]) %chin% c("head", "tail") && 
                                                         (identical(q[[3]], 1) || identical(q[[3]], 1L)) || 
                                                    as.character(q[[1]]) %chin% "[" && is.numeric(q[[3]]) && 
                                                        length(q[[3]])==1 && q[[3]]>0 )
                    if (is.na(ans)) ans=FALSE
                    ans
                }
                if (jsub[[1L]]=="list") {
                    GForce = TRUE
                    for (ii in seq_along(jsub)[-1L]) if (!.ok(jsub[[ii]])) GForce = FALSE
                } else GForce = .ok(jsub)
                if (GForce) {
                    if (jsub[[1L]]=="list")
                        for (ii in seq_along(jsub)[-1L]) { 
                            if (dotN(jsub[[ii]])) next; # For #5760
                            jsub[[ii]][[1L]] = as.name(paste("g", jsub[[ii]][[1L]], sep=""))
                            if (length(jsub[[ii]])==3) jsub[[ii]][[3]] = eval(jsub[[ii]][[3]], parent.frame())  # tests 1187.2 & 1187.4
                        }
                    else {
                        jsub[[1L]] = as.name(paste("g", jsub[[1L]], sep=""))
                        if (length(jsub)==3) jsub[[3]] = eval(jsub[[3]], parent.frame())   # tests 1187.3 & 1187.5
                    }
                    if (verbose) cat("GForce optimized j to '",deparse(jsub,width.cutoff=200),"'\n",sep="")
                } else if (verbose) cat("GForce is on, left j unchanged\n");
            }
        }
        if (!GForce && !is.name(jsub)) {
            # Still do the old speedup for mean, for now
            nomeanopt=FALSE  # to be set by .optmean() using <<- inside it
            oldjsub = jsub
            if (jsub[[1L]]=="list") {
                for (ii in seq_along(jsub)[-1L]) {
                    if (dotN(jsub[[ii]])) next; # For #5760
                    if (is.call(jsub[[ii]]) && jsub[[ii]][[1L]]=="mean")
                        jsub[[ii]] = .optmean(jsub[[ii]])
                }
            } else if (jsub[[1L]]=="mean") {
                jsub = .optmean(jsub)
            }
            if (nomeanopt) {
                warning("Unable to optimize call to mean() and could be very slow. You must name 'na.rm' like that otherwise if you do mean(x,TRUE) the TRUE is taken to mean 'trim' which is the 2nd argument of mean. 'trim' is not yet optimized.",immediate.=TRUE)
            }
            if (verbose) {
                if (!identical(oldjsub, jsub))
                    cat("Old mean optimization changed j from '",deparse(oldjsub),"' to '",deparse(jsub,width.cutoff=200),"'\n",sep="")
                else
                    cat("Old mean optimization is on, left j unchanged.\n")
            }
            assign("Cfastmean", Cfastmean, SDenv)
            assign("mean", base::mean.default, SDenv)
            # Old comments still here for now ...
            # Here in case nomeanopt=TRUE or some calls to mean weren't detected somehow. Better but still slow.
            # Maybe change to :
            #     assign("mean", fastmean, SDenv)  # neater than the hard work above, but slower
            # when fastmean can do trim.
        }
    } else if (verbose) {
        if (getOption("datatable.optimize")<1) cat("All optimizations are turned off\n")
        else cat("Optimization is on but left j unchanged (single plain symbol): '",deparse(jsub,width.cutoff=200),"'\n",sep="")
    }
    if (byjoin) {
        groups = i
        grpcols = leftcols # 'leftcols' are the columns in i involved in the join (either head of key(i) or head along i)
        jiscols = chmatch(jisvars,names(i))  # integer() if there are no jisvars (usually there aren't, advanced feature)
        xjiscols = chmatch(xjisvars, names(x))
        SDenv$.xSD = x[min(nrow(i), 1L), xjisvars, with=FALSE]
        if (!missing(on)) o__ = xo else o__ = integer(0)
    } else {
        groups = byval
        grpcols = seq_along(byval)
        jiscols = NULL   # NULL rather than integer() is used in C to know when using by
        xjiscols = NULL
    }
    lockBinding(".xSD", SDenv)
    grporder = o__
    # for #971, added !GForce. if (GForce) we do it much more (memory) efficiently than subset of order vector below.
    if (length(irows) && !isTRUE(irows) && !GForce) {
        # fix for bug #2758. TO DO: provide a better error message
        if (length(irows) > 1 && length(zo__ <- which(irows == 0)) > 0) stop("i[", zo__[1], "] is 0. While grouping, i=0 is allowed when it's the only value. When length(i) > 1, all i should be > 0.")
        if (length(o__) && length(irows)!=length(o__)) stop("Internal error: length(irows)!=length(o__)")
        o__ = if (length(o__)) irows[o__]  # better do this once up front (even though another alloc) than deep repeated branch in dogroups.c
              else irows
    } # else grporder is left bound to same o__ memory (no cost of copy)
    if (is.null(lhs)) cols=NULL
    if (!length(f__)) {
        # for consistency of empty case in test 184
        f__=len__=0L
    }
    if (GForce) {
        thisEnv = new.env()  # not parent=parent.frame() so that gsum is found
        for (ii in ansvars) assign(ii, x[[ii]], thisEnv)
        assign(".N", len__, thisEnv) # For #5760
        #fix for #1683
        if (use.I) assign(".I", seq_len(nrow(x)), thisEnv)
        gstart(o__, f__, len__, irows) # irows needed for #971.
        ans = eval(jsub, thisEnv)
        if (is.atomic(ans)) ans=list(ans)  # won't copy named argument in new version of R, good
        gend()
        gi = if (length(o__)) o__[f__] else f__
        g = lapply(grpcols, function(i) groups[[i]][gi])
        ans = c(g, ans)
    } else {
        if (verbose) {last.started.at=proc.time()[3];cat("Starting dogroups ... ");flush.console()}
        ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose)
        if (verbose) {cat("done dogroups in",round(proc.time()[3]-last.started.at,3),"secs\n");flush.console()}
    }
    # TO DO: xrows would be a better name for irows: irows means the rows of x that i joins to
    # Grouping by i: icols the joins columns (might not need), isdcols (the non join i and used by j), all __ are length x
    # Grouping by by: i is by val, icols NULL, o__ may be subset of x, f__ points to o__ (or x if !length o__)
    # TO DO: setkey could mark the key whether it is unique or not.

    if (!is.null(lhs)) {
        if (any(names(x)[cols] %chin% key(x)))
            setkey(x,NULL)
        # fixes #1479. Take care of secondary indices, TODO: cleaner way of doing this
        attrs = attr(x, 'index')
        skeys = names(attributes(attrs))
        if (!is.null(skeys)) {
            hits  = unlist(lapply(paste("__", names(x)[cols], sep=""), function(x) grep(x, skeys)))
            hits  = skeys[unique(hits)]
            for (i in seq_along(hits)) setattr(attrs, hits[i], NULL) # does by reference
        }
        if (!missing(keyby)) {
            cnames = as.character(bysubl)[-1]
            if (all(cnames %chin% names(x)))
                setkeyv(x,cnames)  # TO DO: setkey before grouping to get memcpy benefit.
            else warning(":= keyby not straightforward character column names or list() of column names, treating as a by:",paste(cnames,collapse=","),"\n")
        }
        return(suppPrint(x))
    }
    if (is.null(ans)) {
        ans = as.data.table.list(lapply(groups,"[",0L))  # side-effects only such as test 168
        setnames(ans,seq_along(bynames),bynames)   # TO DO: why doesn't groups have bynames in the first place?
        return(ans)
    }
    setattr(ans,"row.names",.set_row_names(length(ans[[1L]])))
    setattr(ans,"class",class(x)) # fix for #5296
    if (is.null(names(ans))) {
        # Efficiency gain of dropping names has been successful. Ordinarily this will run.
        if (is.null(jvnames)) jvnames = character(length(ans)-length(bynames))
        if (length(bynames)+length(jvnames)!=length(ans))
            stop("Internal error: jvnames is length ",length(jvnames), " but ans is ",length(ans)," and bynames is ", length(bynames))
        ww = which(jvnames=="")
        if (any(ww)) jvnames[ww] = paste("V",ww,sep="")
        setattr(ans, "names", c(bynames, jvnames))
    } else {
        setnames(ans,seq_along(bynames),bynames)   # TO DO: reinvestigate bynames flowing from dogroups here and simplify
    }
    if (!missing(keyby)) {
        setkeyv(ans,names(ans)[seq_along(byval)])
        # but if 'bykey' and 'bysameorder' then the setattr in branch above will run instead for
        # speed (because !missing(by) when bykey, too)
    } else if (haskey(x) && bysameorder) {
        setattr(ans,"sorted",names(ans)[seq_along(grpcols)])
    }
    alloc.col(ans)   # TO DO: overallocate in dogroups in the first place and remove this line
}

.optmean <- function(expr) {   # called by optimization of j inside [.data.table only. Outside for a small speed advantage.
    if (length(expr)==2L)  # no parameters passed to mean, so defaults of trim=0 and na.rm=FALSE
        return(call(".External",quote(Cfastmean),expr[[2L]], FALSE))
        # return(call(".Internal",expr))  # slightly faster than .External, but R now blocks .Internal in coerce.c from apx Sep 2012
    if (length(expr)==3L && identical("na",substring(names(expr)[3L],1,2)))   # one parameter passed to mean()
        return(call(".External",quote(Cfastmean),expr[[2L]], expr[[3L]]))  # faster than .Call
    assign("nomeanopt",TRUE,parent.frame())
    expr  # e.g. trim is not optimized, just na.rm
}

#  [[.data.frame is now dispatched due to inheritance.
#  The code below tried to avoid that but made things
#  very slow (462 times faster down to 1 in the timings test).
#  TO DO. Reintroduce velow but dispatch straight to
#  .C("do_subset2") or better. Tests 604-608 test
#  that this doesn't regress.

#"[[.data.table" <- function(x,...) {
#    if (!cedta()) return(`[[.data.frame`(x,...))
#    .subset2(x,...)
#    #class(x)=NULL  # awful, copy
#    #x[[...]]
#}

#"[[<-.data.table" <- function(x,i,j,value) {
#    if (!cedta()) return(`[[<-.data.frame`(x,i,j,value))
#    if (!missing(j)) stop("[[i,j]] assignment not available in data.table, put assignment(s) in [i,{...}] instead, more powerful")
#    cl = oldClass(x)  # [[<-.data.frame uses oldClass rather than class, don't know why but we'll follow suit
#    class(x) = NULL
#    x[[i]] = value
#    class(x) = cl
#    x
#}


as.matrix.data.table <- function(x,...)
{
    dm <- dim(x)
    cn <- names(x)
    if (any(dm == 0L))
        return(array(NA, dim = dm, dimnames = list(NULL, cn)))
    p <- dm[2L]
    n <- dm[1L]
    collabs <- as.list(cn)
    X <- x
    class(X) <- NULL
    non.numeric <- non.atomic <- FALSE
    all.logical <- TRUE
    for (j in seq_len(p)) {
        if (is.ff(X[[j]])) X[[j]] <- X[[j]][]   # to bring the ff into memory, since we need to create a matrix in memory
        xj <- X[[j]]
        if (length(dj <- dim(xj)) == 2L && dj[2L] > 1L) {
            if (inherits(xj, "data.table"))
                xj <- X[[j]] <- as.matrix(X[[j]])
            dnj <- dimnames(xj)[[2]]
            collabs[[j]] <- paste(collabs[[j]], if (length(dnj) >
                0L)
                dnj
            else seq_len(dj[2L]), sep = ".")
        }
        if (!is.logical(xj))
            all.logical <- FALSE
        if (length(levels(xj)) > 0 || !(is.numeric(xj) || is.complex(xj) || is.logical(xj)) ||
            (!is.null(cl <- attr(xj, "class")) && any(cl %chin%
                c("Date", "POSIXct", "POSIXlt"))))
            non.numeric <- TRUE
        if (!is.atomic(xj))
            non.atomic <- TRUE
    }
    if (non.atomic) {
        for (j in seq_len(p)) {
            xj <- X[[j]]
            if (is.recursive(xj)) {
            }
            else X[[j]] <- as.list(as.vector(xj))
        }
    }
    else if (all.logical) {
    }
    else if (non.numeric) {
        for (j in seq_len(p)) {
            if (is.character(X[[j]]))
                next
            xj <- X[[j]]
            miss <- is.na(xj)
            xj <- if (length(levels(xj)))
                as.vector(xj)
            else format(xj)
            is.na(xj) <- miss
            X[[j]] <- xj
        }
    }
    X <- unlist(X, recursive = FALSE, use.names = FALSE)
    dim(X) <- c(n, length(X)/n)
    dimnames(X) <- list(NULL, unlist(collabs, use.names = FALSE))
    X
}

# bug #2375. fixed. same as head.data.frame and tail.data.frame to deal with negative indices
head.data.table <- function(x, n=6, ...) {
    if (!cedta()) return(NextMethod())
    stopifnot(length(n) == 1L)  
    i = seq_len(if (n<0L) max(nrow(x)+n, 0L) else min(n,nrow(x)))
    x[i, , ]
}
tail.data.table <- function(x, n=6, ...) {
    if (!cedta()) return(NextMethod())
    stopifnot(length(n) == 1L)  
    n <- if (n<0L) max(nrow(x) + n, 0L) else min(n, nrow(x))
    i = seq.int(to=nrow(x), length.out=n)
    x[i]
}

"[<-.data.table" <- function (x, i, j, value) {
    # [<- is provided for consistency, but := is preferred as it allows by group and by reference to subsets of columns
    # with no copy of the (very large, say 10GB) columns at all. := is like an UPDATE in SQL and we like and want two symbols to change.
    if (!cedta()) {
        x = if (nargs()<4) `[<-.data.frame`(x, i, value=value)
            else `[<-.data.frame`(x, i, j, value)
        return(alloc.col(x))    # over-allocate (again).   Avoid all this by using :=.
    }
    # TO DO: warning("Please use DT[i,j:=value] syntax instead of DT[i,j]<-value, for efficiency. See ?':='")
    if (!missing(i)) {
        isub=substitute(i)
        i = eval(.massagei(isub), x, parent.frame())
        if (is.matrix(i)) {
            if (!missing(j)) stop("When i is matrix in DT[i]<-value syntax, it doesn't make sense to provide j")
            x = `[<-.data.frame`(x, i, value=value)
            return(alloc.col(x))
        }
        i = x[i, which=TRUE]
        # Tried adding ... after value above, and passing ... in here (e.g. for mult="first") but R CMD check
        # then gives "The argument of a replacement function which corresponds to the right hand side must be
        # named 'value'".  So, users have to use := for that.
    } else i = NULL          # meaning (to C code) all rows, without allocating 1L:nrow(x) vector
    if (missing(j)) j=names(x)
    if (!is.atomic(j)) stop("j must be atomic vector, see ?is.atomic")
    if (any(is.na(j))) stop("NA in j")
    if (is.character(j)) {
        newnames = setdiff(j,names(x))
        cols = as.integer(chmatch(j, c(names(x),newnames)))
        # We can now mix existing columns and new columns
    } else {
        if (!is.numeric(j)) stop("j must be vector of column name or positions")
        if (any(j>ncol(x))) stop("Attempt to assign to column position greater than ncol(x). Create the column by name, instead. This logic intends to catch (most likely) user errors.")
        cols = as.integer(j)  # for convenience e.g. to convert 1 to 1L
        newnames = NULL
    }
    reinstatekey=NULL
    if (haskey(x) && identical(key(x),key(value)) &&
        identical(names(x),names(value)) &&
        is.sorted(i) &&
        identical(substitute(x),quote(`*tmp*`))) {
        # DT["a",]$y <- 1.1  winds up creating `*tmp*` subset of rows and assigning _all_ the columns into x and
        # over-writing the key columns with the same value (not just the single 'y' column).
        # That isn't good for speed; it's an R thing. Solution is to use := instead to avoid all this, but user
        # expects key to be retained in this case because _he_ didn't assign to a key column (the internal base R
        # code did).
        reinstatekey=key(x)
    }
    if (!selfrefok(x) || truelength(x) < ncol(x)+length(newnames)) {
        x = alloc.col(x,length(x)+length(newnames)) # because [<- copies via *tmp* and main/duplicate.c copies at length but copies truelength over too
        # search for one other .Call to assign in [.data.table to see how it differs
    }
    verbose=getOption("datatable.verbose")
    if (!.R.subassignCopiesOthers) {   # From 3.1.0, DF[2,"b"] = 7 no longer copies DF$a, but the VECSXP is copied (i.e. a shallow copy).
        x = .Call(Cassign,copy(x),i,cols,newnames,value,verbose)
    } else {
        .Call(Cassign,x,i,cols,newnames,value,verbose)
    }
    alloc.col(x)  #  can maybe avoid this realloc, but this is (slow) [<- anyway, so just be safe.
    if (length(reinstatekey)) setkeyv(x,reinstatekey)
    invisible(x)
    # no copy at all if user calls directly; i.e. `[<-.data.table`(x,i,j,value)
    # or uses data.table := syntax; i.e. DT[i,j:=value]
    # but, there is one copy by R in [<- dispatch to `*tmp*`; i.e. DT[i,j]<-value. *Update: not from R > 3.0.2, yay*
    # That copy is via main/duplicate.c which preserves truelength but copies length amount. Hence alloc.col(x,length(x)).
    # No warn passed to assign here because we know it'll be copied via *tmp*.
    # := allows subassign to a column with no copy of the column at all,  and by group, etc.
}

"$<-.data.table" <- function(x, name, value) {
    if (!cedta()) {
        ans = `$<-.data.frame`(x, name, value)
        return(alloc.col(ans))           # over-allocate (again)
    }
    x = copy(x)
    `[<-.data.table`(x,j=name,value=value)  # important i is missing here
}

as.data.frame.data.table <- function(x, ...)
{
    ans = copy(x)
    setattr(ans,"row.names",.set_row_names(nrow(x)))   # since R 2.4.0, data.frames can have non-character row names
    setattr(ans,"class","data.frame")
    setattr(ans,"sorted",NULL)  # remove so if you convert to df, do something, and convert back, it is not sorted
    setattr(ans,".internal.selfref",NULL)
    # leave tl intact, no harm, 
    ans
}

as.list.data.table <- function(x, ...) {
    # Similar to as.list.data.frame in base. Although a data.table/frame is a list, too, it may be
    # being coerced to raw list type (by calling code) so that "[" and "[[" work in their raw list form,
    # such as lapply does for data.frame. So we do have to remove the class attributes (and thus shallow
    # copy is almost instant way to achieve that, without risking compatibility).
    #if (sys.call(-2L)[[1L]]=="lapply")
    #    return(x)
    ans = shallow(x)
    setattr(ans, "class", NULL)
    setattr(ans, "row.names", NULL)
    setattr(ans, "sorted", NULL)
    setattr(ans,".internal.selfref", NULL)   # needed to pass S4 tests for example
    ans
}


dimnames.data.table <- function(x) {
    if (!cedta()) {
        if (!inherits(x, "data.frame")) 
          stop("data.table inherits from data.frame (from v1.5), but this data.table does not. Has it been created manually (e.g. by using 'structure' rather than 'data.table') or saved to disk using a prior version of data.table?")
        return(`dimnames.data.frame`(x))
    }
    list(NULL, names(x))
}

"dimnames<-.data.table" = function (x, value)   # so that can do  colnames(dt)=<..>  as well as names(dt)=<..>
{
    if (!cedta()) return(`dimnames<-.data.frame`(x,value))  # won't maintain key column (if any). Revisit if ever causes a compatibility problem but don't think it's likely that packages change column names using dimnames<-. See names<-.data.table below.
    if (.R.assignNamesCopiesAll) warning("This is R<3.1.0 where dimnames(x)<-value syntax deep copies the entire table. Please upgrade to R>=3.1.0 and see ?setnames which allows you to change names by name with built-in checks and warnings.")
    if (!is.list(value) || length(value) != 2) stop("attempting to assign invalid object to dimnames of a data.table")
    if (!is.null(value[[1L]])) stop("data.tables do not have rownames")
    if (ncol(x) != length(value[[2]])) stop("can't assign",length(value[[2]]),"colnames to a",ncol(x),"column data.table")
    setnames(x,as.character(value[[2]]))
    x  # this returned value is now shallow copied by R 3.1.0 via *tmp*. A very welcome change. 
}

"names<-.data.table" <- function(x,value)
{
    # When non data.table aware packages change names, we'd like to maintain the key, too.
    # If call is names(DT)[2]="newname", R will call this names<-.data.table function (notice no i) with 'value' already prepared to be same length as ncol
    caller = as.character(sys.call(-2L))[1L]
    if ( ((tt<-identical(caller,"colnames<-")) && cedta(3)) || cedta() ) {
        if (.R.assignNamesCopiesAll)
            warning("This is R<3.1.0 where ",if(tt)"col","names(x)<-value deep copies the entire table (several times). Please upgrade to R>=3.1.0 and see ?setnames which allows you to change names by name with built-in checks and warnings.")
    }
    x = shallow(x) # `names<-` should NOT modify by reference. Related to #1015, #476 and #825. Needed for R v3.1.0+.  TO DO: revisit
    if (is.null(value))
        setattr(x,"names",NULL)   # e.g. plyr::melt() calls base::unname()
    else
        setnames(x,value)
    x   # this returned value is now shallow copied by R 3.1.0 via *tmp*. A very welcome change. 
}

within.data.table <- function (data, expr, ...)
# basically within.list but retains key (if any)
# will be slower than using := or a regular query (see ?within for further info).
{
    if (!cedta()) return(NextMethod())
    parent <- parent.frame()
    e <- evalq(environment(), data, parent)
    eval(substitute(expr), e)  # might (and it's known that some user code does) contain rm()
    l <- as.list(e)
    l <- l[!sapply(l, is.null)]
    nD <- length(del <- setdiff(names(data), (nl <- names(l))))
    ans = copy(data)
    if (length(nl)) ans[,nl] <- l
    if (nD) ans[,del] <- NULL
    if (haskey(data) && all(key(data) %chin% names(ans))) {
        x = TRUE
        for (i in key(data)) {
            x = identical(data[[i]],ans[[i]])
            if (!x) break
        }
        if (x) setattr(ans,"sorted",key(data))
    }
    ans
}


transform.data.table <- function (`_data`, ...)
# basically transform.data.frame with data.table instead of data.frame, and retains key
{
    if (!cedta()) return(NextMethod())
    e <- eval(substitute(list(...)), `_data`, parent.frame())
    tags <- names(e)
    inx <- chmatch(tags, names(`_data`))
    matched <- !is.na(inx)
    if (any(matched)) {
        if (isTRUE(attr(`_data`, ".data.table.locked", TRUE))) setattr(`_data`, ".data.table.locked", NULL) # fix for #1641
        `_data`[,inx[matched]] <- e[matched]
        `_data` <- data.table(`_data`)
    }
    if (!all(matched)) {
        ans <- do.call("data.table", c(list(`_data`), e[!matched]))
    } else {
        ans <- `_data`
    }
    key.cols <- key(`_data`)
    if (!any(tags %chin% key.cols)) {
        setattr(ans, "sorted", key.cols)
    }
    ans
}

subset.data.table <- function (x, subset, select, ...)
{
    key.cols <- key(x)

    if (missing(subset)) {
        r <- TRUE
    } else {
        e <- substitute(subset)
        r <- eval(e, x, parent.frame())
        if (!is.logical(r))
            stop("'subset' must evaluate to logical")
        r <- r & !is.na(r)
    }

    if (missing(select)) {
        vars <- seq_len(ncol(x))
    } else {
        nl <- as.list(seq_len(ncol(x)))
        setattr(nl,"names",names(x))
        vars <- eval(substitute(select), nl, parent.frame())  # e.g.  select=colF:colP
        # #891 fix - don't convert numeric vars to column names - will break when there are duplicate columns
        key.cols <- intersect(key.cols, names(x)[vars]) ## Only keep key.columns found in the select clause
    }

    ans <- x[r, vars, with = FALSE]

    if (nrow(ans) > 0L) {
        if (!missing(select) && length(key.cols)) {
            ## Set the key on the returned data.table as long as the key
            ## columns that "remain" are the same as the original, or a
            ## prefix of it.
            is.prefix <- all(key(x)[seq_len(length(key.cols))] == key.cols)
            if (is.prefix) {
                setattr(ans, "sorted", key.cols)
            }
        }
    } else {
        setkey(ans,NULL)
    }
    ans
}

# Equivalent of 'rowSums(is.na(dt) > 0L)' but much faster and memory efficient.
# Also called "complete.cases" in base. Unfortunately it's not a S3 generic.
# Also handles bit64::integer64. TODO: export this?
# For internal use only. 'by' requires integer input. No argument checks here yet.
is_na <- function(x, by=seq_along(x)) .Call(Cdt_na, x, by)
any_na <- function(x, by=seq_along(x)) .Call(CanyNA, x, by)

na.omit.data.table <- function (object, cols = seq_along(object), invert = FALSE, ...) {
    if (!cedta()) return(NextMethod())
    if ( !missing(invert) && is.na(as.logical(invert)) )
        stop("Argument 'invert' must be logical TRUE/FALSE")
    if (is.character(cols)) {
        old = cols
        cols = chmatch(cols, names(object), nomatch=0L)
        if (any(cols==0L))
            stop("Columns ", paste(old[cols==0L], collapse=","), 
              " doesn't exist in the input data.table")
    }
    cols = as.integer(cols)
    ix = .Call(Cdt_na, object, cols)
    .Call(CsubsetDT, object, which_(ix, bool = invert), seq_along(object))
    # compare the above to stats:::na.omit.data.frame
}

which_ <- function(x, bool = TRUE) {
    # fix for #1467, quotes result in "not resolved in current namespace" error
    .Call(Cwhichwrapper, x, bool)
}

is.na.data.table <- function (x) {
    if (!cedta()) return(`is.na.data.frame`(x))
    do.call("cbind", lapply(x, "is.na"))
}

# not longer needed as inherits ...
#    t.data.table <- t.data.frame
#    Math.data.table <- Math.data.frame
#    summary.data.table <- summary.data.frame

Ops.data.table <- function(e1, e2 = NULL)
{
    ans = NextMethod()
    if (cedta() && is.data.frame(ans))
        ans = as.data.table(ans)
    ans
}

split.data.table <- function(x, f, drop = FALSE, by, sorted = FALSE, keep.by = TRUE, flatten = TRUE, ..., verbose = getOption("datatable.verbose")) {
    if (!is.data.table(x)) stop("x argument must be a data.table")
    stopifnot(is.logical(drop), is.logical(sorted), is.logical(keep.by),  is.logical(flatten))
    # split data.frame way, using `f` and not `by` argument
    if (!missing(f)) {
        if (!length(f) && nrow(x))
            stop("group length is 0 but data nrow > 0")
        if (!missing(by))
            stop("passing 'f' argument together with 'by' is not allowed, use 'by' when split by column in data.table and 'f' when split by external factor")
        # same as split.data.frame - handling all exceptions, factor orders etc, in a single stream of processing was a nightmare in factor and drop consistency
        return(lapply(split(x = seq_len(nrow(x)), f = f, drop = drop, ...), function(ind) x[ind]))
    }
    if (missing(by)) stop("you must provide 'by' or 'f' arguments")
    # check reserved column names during processing
    if (".ll.tech.split" %in% names(x)) stop("column '.ll.tech.split' is reserved for split.data.table processing")
    if (".nm.tech.split" %in% by) stop("column '.nm.tech.split' is reserved for split.data.table processing")
    if (!all(by %in% names(x))) stop("argument 'by' must refer to data.table column names")
    if (!all(by.atomic <- sapply(by, function(.by) is.atomic(x[[.by]])))) stop(sprintf("argument 'by' must refer only to atomic type columns, classes of '%s' columns are not atomic type", paste(by[!by.atomic], collapse=", ")))
    # list of data.tables (flatten) or list of lists of ... data.tables
    make.levels = function(x, cols, sorted) {
        by.order = if (!sorted) x[, funique(.SD), .SDcols=cols] # remember order of data, only when not sorted=FALSE
        ul = lapply(setNames(cols, nm=cols), function(col) if (!is.factor(x[[col]])) unique(x[[col]]) else levels(x[[col]]))
        r = do.call("CJ", c(ul, sorted=sorted, unique=TRUE))
        if (!sorted && nrow(by.order)) {
            ii = r[by.order, on=cols, which=TRUE]
            r = rbindlist(list(
                r[ii], # original order from data
                r[-ii] # empty levels at the end
            ))
        }
        r
    }
    .by = by[1L]
    # this builds data.table call - is much more cleaner than handling each case one by one
    dtq = as.list(call("[", as.name("x")))
    join = FALSE
    flatten_any = flatten && any(sapply(by, function(col) is.factor(x[[col]])))
    nested_current = !flatten && is.factor(x[[.by]])
    if (!drop && (flatten_any || nested_current)) {
        dtq[["i"]] = substitute(make.levels(x, cols=.cols, sorted=.sorted), list(.cols=if (flatten) by else .by, .sorted=sorted))
        join = TRUE
    }
    dtq[["j"]] = substitute(
        list(.ll.tech.split=list(.expr)),
        list(.expr = if (join) quote(if(.N == 0L) .SD[0L] else .SD) else as.name(".SD")) # simplify when `nomatch` accept NULL #857 ?
    )
    by.or.keyby = if (join) "by" else c("by"[!sorted], "keyby"[sorted])[1L]
    dtq[[by.or.keyby]] = substitute( # retain order, for `join` and `sorted` it will use order of `i` data.table instead of `keyby`.
        .expr,
        list(.expr = if(join) as.name(".EACHI") else if (flatten) by else .by)
    )
    dtq[[".SDcols"]] = if (keep.by) names(x) else setdiff(names(x), if (flatten) by else .by)
    if (join) dtq[["on"]] = if (flatten) by else .by
    dtq = as.call(dtq)
    if (isTRUE(verbose)) cat("Processing split.data.table with: ", deparse(dtq, width.cutoff=500L), "\n", sep="")
    tmp = eval(dtq)
    # add names on list
    setattr(ll <- tmp$.ll.tech.split,
            "names", 
            as.character(
                if (!flatten) tmp[[.by]] else tmp[, list(.nm.tech.split=paste(unlist(lapply(.SD, as.character)), collapse = ".")), by=by, .SDcols=by]$.nm.tech.split
            ))
    # handle nested split
    if (flatten || length(by) == 1L) return(
        lapply(ll, setattr, '.data.table.locked', NULL)
    ) else if (length(by) > 1L) return(
        lapply(ll, split.data.table, drop=drop, by=by[-1L], sorted=sorted, keep.by=keep.by, flatten=flatten)
    )
}

# TO DO, add more warnings e.g. for by.data.table(), telling user what the data.table syntax is but letting them dispatch to data.frame if they want

copy <- function(x) {
    newx = .Call(Ccopy,x)  # copies at length but R's duplicate() also copies truelength over.
                           # TO DO: inside Ccopy it could reset tl to 0 or length, but no matter as selfrefok detects it
                           # TO DO: revisit duplicate.c in R 3.0.3 and see where it's at
    if (!is.data.table(x)) {
	# fix for #1476. TODO: find if a cleaner fix is possible..
	if (is.list(x)) {
	    anydt = vapply(x, is.data.table, TRUE, USE.NAMES=FALSE)
	    if (sum(anydt)) {
		newx[anydt] = lapply(newx[anydt], function(x) {
				    setattr(x, ".data.table.locked", NULL)
				    alloc.col(x)
				})
	    }
	}
	return(newx)   # e.g. in as.data.table.list() the list is copied before changing to data.table
    }
    setattr(newx,".data.table.locked",NULL)
    alloc.col(newx)
}

copyattr <- function(from, to) {
    .Call(Ccopyattr, from, to)
}

point <- function(to, to_idx, from, from_idx) {
    .Call(CpointWrapper, to, to_idx, from, from_idx)
}

.shallow <- function(x, cols = NULL, retain.key = FALSE, unlock = FALSE) {
    isnull = is.null(cols)
    if (!isnull) cols = validate(cols, x)  # NULL is default = all columns
    ans = .Call(Cshallowwrapper, x, cols)  # copies VECSXP only
    if (retain.key && isnull) return(ans)  # handle most frequent case first
    # rest of the cases
    cols = names(x)[cols]
    retain.key = retain.key && identical(cols, head(key(x), length(cols)))
    setattr(ans, 'sorted', if (haskey(x) && retain.key) cols else NULL)
    if (unlock) setattr(ans, '.data.table.locked', NULL)
    ans
    # TODO: check/remove attributes for secondary keys?
}

shallow <- function(x, cols=NULL) {
    if (!is.data.table(x)) 
        stop("x is not a data.table. Shallow copy is a copy of the vector of column pointers (only), so is only meaningful for data.table")
    ans = .shallow(x, cols=cols, retain.key = TRUE)
    ans
}

alloc.col <- function(DT, n=getOption("datatable.alloccol"), verbose=getOption("datatable.verbose"))
{
    name = substitute(DT)
    if (identical(name,quote(`*tmp*`))) stop("alloc.col attempting to modify `*tmp*`")
    ans = .Call(Calloccolwrapper,DT,as.integer(eval(n)),verbose)

    if (is.name(name)) {
        name = as.character(name)
        assign(name,ans,parent.frame(),inherits=TRUE)
    }
    .Call(Csetnamed,ans,0L)
}

selfrefok <- function(DT,verbose=getOption("datatable.verbose")) {
    .Call(Cselfrefokwrapper,DT,verbose)
}

truelength <- function(x) .Call(Ctruelength,x)
# deliberately no "truelength<-" method.  alloc.col is the mechanism for that.
# settruelength() no longer need (and so removed) now that data.table depends on R 2.14.0
# which initializes tl to zero rather than leaving uninitialized.

setattr <- function(x,name,value) {
    # Wrapper for setAttrib internal R function
    # Sets attribute by reference (no copy)
    # Named setattr (rather than setattrib) at R level to more closely resemble attr<-
    # And as from 1.7.8 is made exported in NAMESPACE for use in user attributes.
    # User can also call `attr<-` function directly, but that copies (maybe just when NAMED>0, which is always for data.frame, I think).  See "Confused by NAMED" thread on r-devel 24 Nov 2011.
    # We tend to use setattr() internally in data.table.R because often we construct a data.table and it hasn't
    # got names yet. setnames() is the user interface which checks integrity and doesn't let you drop names for example.
    if (name=="names" && is.data.table(x) && length(attr(x,"names")) && !is.null(value))
        setnames(x,value)
        # Using setnames here so that truelength of names can be retained, to carry out integrity checks such as not
        # creating names longer than the number of columns of x, and to change the key, too
        # For convenience so that setattr(DT,"names",allnames) works as expected without requiring a switch to setnames.
    else {
	# fix for R's global TRUE value input, #1281
	ans = .Call(Csetattrib, x, name, value)
	# If name=="names" and this is the first time names are assigned (e.g. in data.table()), this will be grown by alloc.col very shortly afterwards in the caller.
	if (!is.null(ans)) {
	    warning("Input is a length=1 logical that points to the same address as R's global TRUE value. Therefore the attribute has not been set by reference, rather on a copy. You will need to assign the result back to a variable. See https://github.com/Rdatatable/data.table/issues/1281 for more.")
	    x = ans
	}
    }
    # fix for #1142 - duplicated levels for factors
    if (name == "levels" && is.factor(x) && anyDuplicated(value))
        .Call(Csetlevels, x, (value <- as.character(value)), unique(value))
    invisible(x)
}

setnames <- function(x,old,new) {
    # Sets by reference, maintains truelength, no copy of table at all.
    # But also more convenient than names(DT)[i]="newname"  because we can also do setnames(DT,"oldname","newname")
    # without an onerous match() ourselves. old can be positions, too, but we encourage by name for robustness.
    if (!is.data.frame(x)) stop("x is not a data.table or data.frame")
    if (!length(attr(x,"names"))) stop("x has no column names")  # because setnames is for user user. Internally, use setattr(x,"names",...)
    if (length(names(x)) != length(x)) stop("dt is length ",length(dt)," but its names are length ",length(names(x)))
    if (missing(new)) {
        # for setnames(DT,new); e.g., setnames(DT,c("A","B")) where ncol(DT)==2
        if (!is.character(old)) stop("Passed a vector of type '",typeof(old),"'. Needs to be type 'character'.")
        if (length(old) != ncol(x)) stop("Can't assign ",length(old)," names to a ",ncol(x)," column data.table")
        # note that duplicate names are permitted to be created in this usage only
        w = which(names(x) != old)
        if (!length(w)) return(invisible(x))  # no changes
        new = old[w]
        i = w
    } else {
        if (missing(old)) stop("When 'new' is provided, 'old' must be provided too")
        if (!is.character(new)) stop("'new' is not a character vector")
        if (is.numeric(old)) {
            if (length(sgn <- unique(sign(old))) != 1L) 
                stop("Items of 'old' is numeric but has both +ve and -ve indices.")
            tt = abs(old)<1L | abs(old)>length(x) | is.na(old)
            if (any(tt)) stop("Items of 'old' either NA or outside range [1,",length(x),"]: ",paste(old[tt],collapse=","))
            i = if (sgn == 1L) as.integer(old) else seq_along(x)[as.integer(old)]
            if (any(duplicated(i))) stop("Some duplicates exist in 'old': ",paste(i[duplicated(i)],collapse=","))
        } else {
            if (!is.character(old)) stop("'old' is type ",typeof(old)," but should be integer, double or character")
            if (any(duplicated(old))) stop("Some duplicates exist in 'old': ", paste(old[duplicated(old)],collapse=","))
            i = chmatch(old,names(x))
            if (any(is.na(i))) stop("Items of 'old' not found in column names: ",paste(old[is.na(i)],collapse=","))
            if (any(tt<-!is.na(chmatch(old,names(x)[-i])))) stop("Some items of 'old' are duplicated (ambiguous) in column names: ",paste(old[tt],collapse=","))
        }
        if (length(new)!=length(i)) stop("'old' is length ",length(i)," but 'new' is length ",length(new))
    }
    # update the key if the column name being change is in the key
    m = chmatch(names(x)[i], key(x))
    w = which(!is.na(m))
    if (length(w))
        .Call(Csetcharvec, attr(x,"sorted"), m[w], new[w])
    
    # update secondary keys
    idx = attr(x,"index")
    for (k in names(attributes(idx))) {
        tt = strsplit(k,split="__")[[1]][-1]
        m = chmatch(names(x)[i], tt)
        w = which(!is.na(m))
        if (length(w)) {
            tt[m[w]] = new[w]
            newk = paste("__",paste(tt,collapse="__"),sep="")
            setattr(idx, newk, attr(idx, k))
            setattr(idx, k, NULL)
        }   
    }

    .Call(Csetcharvec, attr(x,"names"), as.integer(i), new)
    invisible(x)
}

setcolorder <- function(x,neworder)
{
    # if (!is.data.table(x)) stop("x is not a data.table")
    if (length(neworder)!=length(x)) stop("neworder is length ",length(neworder)," but x has ",length(x)," columns.")
    if (is.character(neworder)) {
        if (any(duplicated(neworder))) stop("neworder contains duplicate column names")
        if (any(duplicated(names(x)))) stop("x has some duplicated column name(s): ",paste(names(x)[duplicated(names(x))],collapse=","),". Please remove or rename the duplicate(s) and try again.")
        o = as.integer(chmatch(neworder,names(x)))
        if (any(is.na(o))) stop("Names in neworder not found in x: ",paste(neworder[is.na(o)],collapse=","))
    } else {
        if (!is.numeric(neworder)) stop("neworder is not a character or numeric vector")
        o = as.integer(neworder)
        m = !(o %in% seq_len(length(x)))
        if (any(m)) stop("Column numbers in neworder out of bounds: ",paste(o[m],collapse=","))
    }
    .Call(Csetcolorder,x,o)
    invisible(x)
}

set <- function(x,i=NULL,j,value)  # low overhead, loopable
{
    if (is.atomic(value)) {
        # protect NAMED of atomic value from .Call's NAMED=2 by wrapping with list()
        l = vector("list",1)
        .Call(Csetlistelt,l,1L,value)  # to avoid the copy by list() in R < 3.1.0
        value = l
    }
    .Call(Cassign,x,i,j,NULL,value,FALSE)   #  verbose=FALSE for speed to avoid getOption()  TO DO: somehow read getOption("datatable.verbose") from C level
    invisible(x)
}

chmatch <- function(x,table,nomatch=NA_integer_)
    .Call(Cchmatchwrapper,x,table,as.integer(nomatch[1L]),FALSE) # [1L] to fix #1672

"%chin%" <- function(x,table) {
    # TO DO  if table has 'ul' then match to that
    .Call(Cchmatchwrapper,x,table,NA_integer_,TRUE)
}

chorder <- function(x) {
    o = forderv(x, sort=TRUE, retGrp=FALSE)
    if (length(o)) o else seq_along(x)
}

chgroup <- function(x) {
    # TO DO: deprecate and remove this. It's exported but doubt anyone uses it. Think the plan was to use it internally, but forderv superceded.
    o = forderv(x, sort=FALSE, retGrp=TRUE)
    if (length(o)) as.vector(o) else seq_along(x)  # as.vector removes the attributes
}


.rbind.data.table <- function(..., use.names=TRUE, fill=FALSE, idcol=NULL) {
    # See FAQ 2.23
    # Called from base::rbind.data.frame
    l = list(...)
    # if (missing(use.names)) message("Columns will be bound by name for consistency with base. You can supply unnamed lists and the columns will then be joined by position, or set use.names=FALSE. Alternatively, explicitly setting use.names to TRUE will remove this message.")
    rbindlist(l, use.names, fill, idcol)
}

rbindlist <- function(l, use.names=fill, fill=FALSE, idcol=NULL) {
    if (identical(idcol, FALSE)) idcol = NULL
    else if (!is.null(idcol)) {
        if (isTRUE(idcol)) idcol = ".id"
        if (!is.character(idcol)) stop("idcol must be a logical or character vector of length 1. If logical TRUE the id column will named '.id'.")
        idcol = idcol[1L]
    }
    # fix for #1467, quotes result in "not resolved in current namespace" error
    ans = .Call(Crbindlist, l, use.names, fill, idcol)
    if (!length(ans)) return(null.data.table())
    setDT(ans)[]
}

vecseq <- function(x,y,clamp) .Call(Cvecseq,x,y,clamp)

# .Call(Caddress, x) increments NAM() when x is vector with NAM(1). Referring object within non-primitive function is enough to increment reference.
address <- function(x) .Call(Caddress, eval(substitute(x), parent.frame()))

":=" <- function(...) stop('Check that is.data.table(DT) == TRUE. Otherwise, := and `:=`(...) are defined for use in j, once only and in particular ways. See help(":=").')

setDF <- function(x, rownames=NULL) {
  if (!is.list(x)) stop("setDF only accepts data.table, data.frame or list of equal length as input")
  if (any(duplicated(rownames))) stop("rownames contains duplicates")
  if (is.data.table(x)) {
    # copied from as.data.frame.data.table
    if (is.null(rownames)) {
      rn <- .set_row_names(nrow(x))
    }   else {
      if (length(rownames) != nrow(x))
        stop("rownames incorrect length; expected ", nrow(x), " names, got ", length(rownames))
      rn <- rownames
    }
    setattr(x, "row.names", rn)
    setattr(x, "class", "data.frame")
    setattr(x, "sorted", NULL)
    setattr(x, ".internal.selfref", NULL)
  } else if (is.data.frame(x)) {
    if (!is.null(rownames)){
      if (length(rownames) != nrow(x)) 
        stop("rownames incorrect length; expected ", nrow(x), " names, got ", length(rownames))
      setattr(x, "row.names", rownames)
    }
    x
  } else {
    n = vapply(x, length, 0L)
    mn = max(n)
    if (any(n<mn))
      stop("All elements in argument 'x' to 'setDF' must be of same length")
    xn = names(x)
    if (is.null(xn)) {
      setattr(x, "names", paste("V",seq_len(length(x)),sep=""))
    } else {
      idx = xn %chin% ""
      if (any(idx)) {
        xn[idx] = paste("V", seq_along(which(idx)), sep="")
        setattr(x, "names", xn)
      }
    }
    if (is.null(rownames)) {
      rn <- .set_row_names(mn)
    } else {
      if (length(rownames) != mn)
        stop("rownames incorrect length; expected ", mn, " names, got ", length(rownames))
      rn <- rownames
    }
    setattr(x,"row.names", rn)
    setattr(x,"class","data.frame")
  }
  invisible(x)
}

setDT <- function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) {
    name = substitute(x)
    if (is.name(name)) {
        home <- function(x, env) {
            if (identical(env, emptyenv()))
                stop("Can not find symbol ", cname, call. = FALSE)
            else if (exists(x, env, inherits=FALSE)) env
            else home(x, parent.env(env))
        }
        cname = as.character(name)
        envir = home(cname, parent.frame())
        if (bindingIsLocked(cname, envir)) {
            stop("Can not convert '", cname, "' to data.table by reference because binding is locked. It is very likely that '", cname, "' resides within a package (or an environment) that is locked to prevent modifying its variable bindings. Try copying the object to your current environment, ex: var <- copy(var) and then using setDT again.")
        }
    }
    if (is.data.table(x)) {
        # fix for #1078 and #1128, see .resetclass() for explanation.
        setattr(x, 'class', .resetclass(x, 'data.table'))
        if (!missing(key)) setkeyv(x, key) # fix for #1169
        if (check.names) setattr(x, "names", make.names(names(x), unique=TRUE))
        if (selfrefok(x) > 0) return(invisible(x)) else alloc.col(x)
    } else if (is.data.frame(x)) {
        rn = if (!identical(keep.rownames, FALSE)) rownames(x) else NULL
        setattr(x, "row.names", .set_row_names(nrow(x)))
        if (check.names) setattr(x, "names", make.names(names(x), unique=TRUE))
        # fix for #1078 and #1128, see .resetclass() for explanation.
        setattr(x, "class", .resetclass(x, 'data.frame'))
        alloc.col(x)
        if (!is.null(rn)) {
            nm = c(if (is.character(keep.rownames)) keep.rownames[1L] else "rn", names(x))
            x[, (nm[1L]) := rn]
            setcolorder(x, nm)
        }
    } else if (is.null(x) || (is.list(x) && !length(x))) {
        x = null.data.table()
    } else if (is.list(x)) {
        # copied from as.data.table.list - except removed the copy
	for (i in seq_along(x)) {
	    if (inherits(x[[i]], "POSIXlt"))
		stop("Column ", i, " is of POSIXlt type. Please convert it to POSIXct using as.POSIXct and run setDT again. We do not recommend use of POSIXlt at all because it uses 40 bytes to store one date.")
	}
        n = vapply(x, length, 0L)
        mn = max(n)
        if (any(n<mn))
            stop("All elements in argument 'x' to 'setDT' must be of same length")
        xn = names(x)
        if (is.null(xn)) {
            setattr(x, "names", paste("V",seq_len(length(x)),sep=""))
        } else {
            idx = xn %chin% "" # names can be NA - test 1006 caught that! 
            if (any(idx)) {
                xn[idx] = paste("V", seq_along(which(idx)), sep="")
                setattr(x, "names", xn)
            }
            if (check.names) setattr(x, "names", make.names(xn, unique=TRUE))
        }
        setattr(x,"row.names",.set_row_names(max(n)))
        setattr(x,"class",c("data.table","data.frame"))
        alloc.col(x)
    } else {
        stop("Argument 'x' to 'setDT' should be a 'list', 'data.frame' or 'data.table'")
    }
    if (!is.null(key)) setkeyv(x, key)
    if (is.name(name)) {
        name = as.character(name)
        assign(name, x, parent.frame(), inherits=TRUE)
    } else if (is.call(name) && (name[[1L]] == "$" || name[[1L]] == "[[") && is.name(name[[2L]])) {
        # common case is call from 'lapply()'
        k = eval(name[[2L]], parent.frame(), parent.frame())
        if (is.list(k)) {
            origj = j = if (name[[1L]] == "$") as.character(name[[3L]]) else eval(name[[3L]], parent.frame(), parent.frame())
            if (length(j) == 1L) {
                if (is.character(j)) {
                    j = match(j, names(k))
                    if (is.na(j))
                        stop("Item '", origj, "' not found in names of input list")
                }
            }
            .Call(Csetlistelt,k,as.integer(j), x)
        } else if (is.environment(k) && exists(as.character(name[[3L]]), k)) {
            assign(as.character(name[[3L]]), x, k, inherits=FALSE)
        }
    }
    invisible(x)
}

as_list <- function(x) {
    lx = vector("list", 1L)
    .Call(Csetlistelt, lx, 1L, x)
    lx
}

# FR #1353
rowid <- function(..., prefix=NULL) {
    rowidv(list(...), prefix=prefix)
}

rowidv <- function(x, cols=seq_along(x), prefix=NULL) {
    if (!is.null(prefix) && (is.character(prefix) && length(prefix) > 1L))
        stop("prefix must be NULL or a character vector of length=1.")
    if (is.atomic(x)) {
        if (!missing(cols) && !is.null(cols))
            stop("x is a single vector, non-NULL 'cols' doesn't make sense.")
        cols = 1L
        x = as_list(x)
    } else {
        if (!length(cols))
            stop("x is a list, 'cols' can not be on 0-length.")
        if (is.character(cols))
            cols = chmatch(cols, names(x))
        cols = as.integer(cols)
    }
    xorder = forderv(x, by=cols, sort=FALSE, retGrp=TRUE) # speedup on char with sort=FALSE
    xstart = attr(xorder, 'start')
    if (!length(xorder)) xorder = seq_along(x[[1L]])
    ids = .Call(Cfrank, xorder, xstart, uniqlengths(xstart, length(xorder)), "sequence")
    if (!is.null(prefix))
        ids = paste(prefix, ids, sep="")
    ids
}

# FR #686
rleid <- function(...) {
    rleidv(list(...))
}

rleidv <- function(x, cols=seq_along(x)) {
    if (is.atomic(x)) {
        if (!missing(cols) && !is.null(cols)) 
            stop("x is a single vector, non-NULL 'cols' doesn't make sense.")
        cols = 1L
        x = as_list(x)
    } else {
        if (!length(cols))
            stop("x is a list, 'cols' can not be 0-length.")
        if (is.character(cols)) 
            cols = chmatch(cols, names(x))
        cols = as.integer(cols)
    }
    .Call(Crleid, x, -1L)
}

# GForce functions
`g[` <- function(x, n) .Call(Cgnthvalue, x, as.integer(n)) # n is of length=1 here.
ghead <- function(x, n) .Call(Cghead, x, as.integer(n)) # n is not used at the moment
gtail <- function(x, n) .Call(Cgtail, x, as.integer(n)) # n is not used at the moment
gfirst <- function(x) .Call(Cgfirst, x)
glast <- function(x) .Call(Cglast, x)
gsum <- function(x, na.rm=FALSE) .Call(Cgsum, x, na.rm)
gmean <- function(x, na.rm=FALSE) .Call(Cgmean, x, na.rm)
gprod <- function(x, na.rm=FALSE) .Call(Cgprod, x, na.rm)
gmedian <- function(x, na.rm=FALSE) .Call(Cgmedian, x, na.rm)
gmin <- function(x, na.rm=FALSE) .Call(Cgmin, x, na.rm)
gmax <- function(x, na.rm=FALSE) .Call(Cgmax, x, na.rm)
gvar <- function(x, na.rm=FALSE) .Call(Cgvar, x, na.rm)
gsd <- function(x, na.rm=FALSE) .Call(Cgsd, x, na.rm)
gstart <- function(o, f, l, rows) .Call(Cgstart, o, f, l, rows)
gend <- function() .Call(Cgend)

isReallyReal <- function(x) {
    .Call(CisReallyReal, x)
}