inst/tests/tests.Rraw


# This file should be clean of non-ASCII characters; e.g. CRAN Solaris
# Use:  grep --color='auto' -P -n "[\x80-\xFF]" tests.Rraw

if (!exists("test.data.table",.GlobalEnv,inherits=FALSE)) {
    require(data.table)   # in dev the package should not be loaded
    options(warn=0)  # use require() so it warns but doesn't halt if not available
    require(plyr)
    require(ggplot2)
    require(hexbin)
    require(nlme)
    require(xts)
    require(bit64)
    require(gdata)
    require(GenomicRanges)
    require(caret)
    require(knitr)
    require(plm)
    # reshape2 ahead of reshape ...
    try(detach(package:reshape2),silent=TRUE)
    try(detach(package:reshape),silent=TRUE)
    library(reshape2, pos="package:base", logical.return=TRUE)
    library(reshape, pos="package:base", logical.return=TRUE)
    .devtesting=FALSE
} else {
    # Matt has suppressMessages(require(bit64)) in .Rprofile
    if ("package:reshape2" %in% search()) {
      detach(package:reshape2)
      library(reshape2, pos="package:base", logical.return=TRUE)
    }
    if ("package:reshape" %in% search()) {
      detach(package:reshape)
      library(reshape, pos="package:base", logical.return=TRUE)
    }
    .devtesting=TRUE
}
options(warn=2)

nfail = ntest = lastnum = 0
whichfail = NULL

.timingtests = FALSE
started.at = Sys.time()
oldbwb = options(datatable.old.bywithoutby=FALSE)  # in case user set it, or set in dev

if (!.devtesting) {
    test = data.table:::test
    compactprint = data.table:::compactprint
    is.sorted = data.table:::is.sorted
    forderv = data.table:::forderv
    forder = data.table:::forder
    null.data.table = data.table:::null.data.table
    ordernumtol = data.table:::ordernumtol   # TO DO: deprecated, remove
    iradixorder = data.table:::iradixorder   # TO DO: deprecated, remove
    dradixorder = data.table:::dradixorder   # TO DO: deprecated, remove
    uniqlist = data.table:::uniqlist
    uniqlengths = data.table:::uniqlengths
    setrev = data.table:::setrev
    setreordervec = data.table:::setreordervec
    selfrefok = data.table:::selfrefok
    setattr = data.table::setattr     # so as not to use bit::setattr
    .R.listCopiesNamed = data.table:::.R.listCopiesNamed
    .R.assignNamesCopiesAll = data.table:::.R.assignNamesCopiesAll
    .R.subassignCopiesOthers = data.table:::.R.subassignCopiesOthers
    .R.subassignCopiesVecsxp = data.table:::.R.subassignCopiesVecsxp
    setdiff_ = data.table:::setdiff_
    frankv = data.table:::frankv
    is_na = data.table:::is_na
    shallow = data.table:::shallow # until exported
    chmatch2 = data.table:::chmatch2
    which_ = data.table:::which_
    shift = data.table:::shift
    any_na = data.table:::any_na
    replace_dot = data.table:::replace_dot
    isReallyReal = data.table:::isReallyReal
    between = data.table::between
    which.first = data.table:::which.first
    which.last = data.table:::which.last
    trim = data.table:::trim
    `%+%.default` = data.table:::`%+%.default`
    .shallow = data.table:::.shallow
    getdots = data.table:::getdots
}

# test for covering tables.R 100%, we need to run tables() before creating any data.tables to return null data.table
test(0, tables(), null.data.table(), output = "No objects of class")

TESTDT = data.table(a=as.integer(c(1,3,4,4,4,4,7)), b=as.integer(c(5,5,6,6,9,9,2)), v=1:7)
setkey(TESTDT,a,b)
# i.e.       a b v
#       [1,] 1 5 1
#       [2,] 3 5 2
#       [3,] 4 6 3
#       [4,] 4 6 4
#       [5,] 4 9 5
#       [6,] 4 9 6
#       [7,] 7 2 7
INT = function(...) { as.integer(c(...)) }
##########################

test(1, TESTDT[SJ(4,6),v,mult="first"], 3L)
test(2, TESTDT[SJ(4,6),v,mult="last"], 4L)
test(3, TESTDT[SJ(c(4,4,4),c(6,6,7)),v,mult="last",roll=TRUE], INT(4,4,4))
test(4, TESTDT[SJ(c(4,4,4),c(9,9,10)),v,mult="last",roll=TRUE], INT(6,6,6))
test(5, TESTDT[SJ(c(4,4,4),c(6,6,7)),v,mult="last",roll=TRUE,rollends=FALSE], INT(4,4,4))
test(6, TESTDT[SJ(c(4,4,4),c(9,9,10)),v,mult="last",roll=TRUE,rollends=FALSE], INT(6,6,NA))
test(7, TESTDT[SJ(c(4,4,4),c(9,9,10)),v,mult="first",roll=TRUE,rollends=FALSE], INT(5,5,NA))
test(8, TESTDT[SJ(c(-9,1,4,4,8),c(1,4,4,10,1)),v], INT(NA,NA,NA,NA,NA))
test(9, TESTDT[SJ(c(-9,1,4,4,8),c(1,4,4,10,1)),v,roll=TRUE], INT(NA,NA,NA,6,NA))
test(10, TESTDT[SJ(c(-9,1,4,4,8),c(1,4,4,10,1)),v,roll=TRUE,rollends=FALSE], INT(NA,NA,NA,NA,NA))
test(11, TESTDT[SJ(c(-3,2,4,4,5,7,8)),v,mult="first"], INT(NA,NA,3,3,NA,7,NA))
test(12, TESTDT[SJ(c(-3,2,4,4,5,7,8)),v,mult="first",roll=TRUE], INT(NA,1,3,3,6,7,7))
test(13, TESTDT[SJ(c(-3,2,4,4,5,7,8)),v,mult="last"], INT(NA,NA,6,6,NA,7,NA))
test(14, TESTDT[SJ(c(-3,2,4,4,5,7,8)),v,mult="last",roll=TRUE], INT(NA,1,6,6,6,7,7))
test(15, TESTDT[SJ(c(-3,2,4,4,5,7,8)),v,mult="last",nomatch=0], INT(6,6,7))
test(16, TESTDT[SJ(c(4)),v], INT(3,4,5,6))
#test(17, suppressWarnings(TESTDT[SJ(c(4,4)),v,mult="all",incbycols=FALSE][[1]]), INT(3:6,3:6))
test(18, TESTDT[SJ(c(-3,2,4,8)),v,mult="all",nomatch=0,by=.EACHI][[2]], INT(3:6))
test(185, TESTDT[SJ(c(-3,2,4,8)),v,mult="all",nomatch=NA], INT(NA,NA,3:6,NA))
test(19, TESTDT[SJ(c(-3,2,4,8)),v,mult="all",roll=TRUE,nomatch=0], INT(1,3:6,7))
test(186, TESTDT[SJ(c(-3,2,4,8)),v,mult="all",roll=TRUE,nomatch=NA], INT(NA,1,3:6,7))
test(20, TESTDT[SJ(c(-3,2,4,8)),v,mult="all",roll=TRUE,rollends=FALSE,nomatch=0], INT(1,3:6))
test(187, TESTDT[SJ(c(-3,2,4,8)),v,mult="all",roll=TRUE,rollends=FALSE,nomatch=NA], INT(NA,1,3:6,NA))
test(21, TESTDT[SJ(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",nomatch=0], INT(1,3:4))
test(188, TESTDT[SJ(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",nomatch=NA, allow.cartesian=TRUE], INT(NA,1,NA,3:4,NA,NA,NA))
test(22, TESTDT[SJ(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",roll=TRUE,nomatch=0], INT(1,3:4,4,6))
test(189, TESTDT[SJ(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",roll=TRUE,nomatch=NA, allow.cartesian=TRUE], INT(NA,1,NA,3:4,4,6,NA))
test(23, TESTDT[SJ(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",roll=TRUE,rollends=FALSE,nomatch=0], INT(1,3:4,4))
test(190, TESTDT[SJ(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",roll=TRUE,rollends=FALSE,nomatch=NA,allow.cartesian=TRUE], INT(NA,1,NA,3:4,4,NA,NA))
test(24, TESTDT[SJ(c(1,NA,4,NA,NA,4,4),c(5,5,6,6,7,9,10)),v,mult="all",roll=TRUE,nomatch=0], INT(1,3:4,5:6,6))
test(191, TESTDT[SJ(c(1,NA,4,NA,NA,4,4),c(5,5,6,6,7,9,10)),v,mult="all",roll=TRUE,nomatch=NA,allow.cartesian=TRUE], INT(NA,NA,NA,1,3:4,5:6,6))
# Note that the NAs get sorted to the beginning by the SJ().

# i.e.       a b v      (same test matrix, repeating here for easier reading of the test cases below)
#       [1,] 1 5 1
#       [2,] 3 5 2
#       [3,] 4 6 3
#       [4,] 4 6 4
#       [5,] 4 9 5
#       [6,] 4 9 6
#       [7,] 7 2 7
test(25, TESTDT[SJ(4,6),v,mult="first"], 3L)
test(26, TESTDT[SJ(4,6),v,mult="last"], 4L)
test(27, TESTDT[J(c(4,4,4),c(7,6,6)),v,mult="last",roll=TRUE], INT(4,4,4))
test(28, TESTDT[J(c(4,4,4),c(10,9,9)),v,mult="last",roll=TRUE], INT(6,6,6))
test(29, TESTDT[J(c(4,4,4),c(7,6,6)),v,mult="last",roll=TRUE,rollends=FALSE], INT(4,4,4))
test(30, TESTDT[J(c(4,4,4),c(10,9,9)),v,mult="last",roll=TRUE,rollends=FALSE], INT(NA,6,6))
test(31, TESTDT[J(c(4,4,4),c(10,9,9)),v,mult="first",roll=TRUE,rollends=FALSE], INT(NA,5,5))
test(32, TESTDT[J(c(8,1,4,4,-9),c(1,4,4,10,1)),v], INT(NA,NA,NA,NA,NA))
test(33, TESTDT[J(c(8,1,4,4,-9),c(1,4,4,10,1)),v,roll=TRUE], INT(NA,NA,NA,6,NA))
test(34, TESTDT[J(c(8,1,4,4,-9),c(1,4,7,10,1)),v,roll=TRUE,rollends=FALSE], INT(NA,NA,4,NA,NA))
test(35, TESTDT[J(c(5,4,-3,8,4,7,2)),v,mult="first"], INT(NA,3,NA,NA,3,7,NA))
test(36, TESTDT[J(c(5,4,-3,8,4,7,2)),v,mult="first",roll=TRUE], INT(6,3,NA,7,3,7,1))
test(37, TESTDT[J(c(5,4,-3,8,4,7,2)),v,mult="last"], INT(NA,6,NA,NA,6,7,NA))
test(38, TESTDT[J(c(5,4,-3,8,4,7,2)),v,mult="last",roll=TRUE], INT(6,6,NA,7,6,7,1))
test(39, TESTDT[J(c(5,4,-3,8,4,7,2)),v,mult="last",nomatch=0], INT(6,6,7))
test(40, TESTDT[J(c(4)),v,mult="all"], INT(3,4,5,6))
test(41, TESTDT[J(c(4,4)),v,mult="all", allow.cartesian=TRUE], INT(3:6,3:6))
test(42, TESTDT[J(c(8,2,4,-3)),v,mult="all",nomatch=0], INT(3:6))
test(192, TESTDT[J(c(8,2,4,-3)),v,mult="all",nomatch=NA], INT(NA,NA,3:6,NA))
test(43, TESTDT[J(c(8,2,4,-3)),v,mult="all",roll=TRUE,nomatch=0], INT(7,1,3:6))
test(193, TESTDT[J(c(8,2,4,-3)),v,mult="all",roll=TRUE,nomatch=NA], INT(7,1,3:6,NA))
#test(44, suppressWarnings(TESTDT[J(c(8,4,2,-3)),v,mult="all",roll=TRUE,rollends=FALSE,incbycols=FALSE]), INT(3:6,1))
test(45, TESTDT[J(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",nomatch=0], INT(1,3:4))
test(194, TESTDT[J(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",nomatch=NA,allow.cartesian=TRUE], INT(NA,1,NA,3:4,NA,NA,NA))
test(46, TESTDT[J(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",roll=TRUE,nomatch=0], INT(1,3:4,4,6))
test(195, TESTDT[J(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",roll=TRUE,nomatch=NA,allow.cartesian=TRUE], INT(NA,1,NA,3:4,4,6,NA))
test(47, TESTDT[J(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",roll=TRUE,rollends=FALSE,nomatch=0], INT(1,3:4,4))
test(196, TESTDT[J(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",roll=TRUE,rollends=FALSE,nomatch=NA,allow.cartesian=TRUE], INT(NA,1,NA,3:4,4,NA,NA))
test(48, TESTDT[J(c(-9,NA,4,NA,1,4,4),c(1,5,9,6,5,9,10)),v,mult="all",roll=TRUE,nomatch=0], INT(5:6,1,5:6,6))  # this time the NAs stay where they are. Compare to test 24 above.
test(197, TESTDT[J(c(-9,NA,4,NA,1,4,4),c(1,5,9,6,5,9,10)),v,mult="all",roll=TRUE,nomatch=NA,allow.cartesian=TRUE], INT(NA,NA,5:6,NA,1,5:6,6))
test(49, TESTDT[J(c(4,1,0,5,3,7,NA,4,1),c(6,5,1,10,5,2,1,6,NA)),v,nomatch=0], INT(3,4,1,2,7,3,4))
test(198, TESTDT[J(c(4,1,0,5,3,7,NA,4,1),c(6,5,1,10,5,2,1,6,NA)),v,nomatch=NA,allow.cartesian=TRUE], INT(3,4,1,NA,NA,2,7,NA,3,4,NA))
test(50, TESTDT[J(c(4,1,0,5,3,7,NA,4,1),c(6,5,1,10,5,2,1,6,NA)),v,mult="last",nomatch=0], INT(4,1,2,7,4))
test(199, TESTDT[J(c(4,1,0,5,3,7,NA,4,1),c(6,5,1,10,5,2,1,6,NA)),v,mult="last",nomatch=NA], INT(4,1,NA,NA,2,7,NA,4,NA))

TESTDT[, a:=letters[a]]
setkey(TESTDT,a,b)
# i.e.       a b v
#       [1,] a 5 1
#       [2,] c 5 2
#       [3,] d 6 3
#       [4,] d 6 4
#       [5,] d 9 5
#       [6,] d 9 6
#       [7,] g 2 7
test(51, TESTDT[SJ(c("d","d","e","g"),c(6,7,1,2)),v,mult="all",roll=TRUE,nomatch=0], INT(3:4,4,7))
test(200, TESTDT[SJ(c("d","d","e","g"),c(6,7,1,2)),v,mult="all",roll=TRUE,nomatch=NA], INT(3:4,4,NA,7))
test(52, TESTDT[J(c("g","d","e","d"),c(6,6,1,2)),v,mult="all",roll=TRUE,nomatch=0], INT(7,3:4))
test(201, TESTDT[J(c("g","d","e","d"),c(6,6,1,2)),v,mult="all",roll=TRUE,nomatch=NA], INT(7,3:4,NA,NA))

TESTDT[, b:=letters[b]]
setkey(TESTDT,a,b)
# i.e.
#         a b v
#    [1,] a e 1
#    [2,] c e 2
#    [3,] d f 3
#    [4,] d f 4
#    [5,] d i 5
#    [6,] d i 6
#    [7,] g b 7
test(53, TESTDT[SJ(c("d","d","e","g"),c("f","g","a","b")),v,mult="last"], INT(4,NA,NA,7))
test(54, TESTDT[J(c("g","d","e","d"),c("b","g","a","f")),v,mult="last"], INT(7,NA,NA,4))  # this tests (d,g) ok even though there is an NA in last match in the roll.
test(55, TESTDT[SJ(c("d","d","e","g"),c("f","g","a","b")),v,mult="first"], INT(3,NA,NA,7))
test(56, TESTDT[J(c("g","d","e","d"),c("b","g","a","f")),v,mult="first"], INT(7,NA,NA,3))
test(57, TESTDT[J(c("g","d","d","d","e","d"),c("b","g","k","b","a","f")),v,roll=TRUE], INT(7,4,6,NA,NA,3,4))
# test 58 removed. Tested this failed (rolling join on factors) pre character columns, now works.
test(59, TESTDT[J(c("g","d","d","d","e","d"),c("b","g","k","b","a","f")),v,roll=TRUE,rollends=FALSE], INT(7,4,NA,NA,NA,3,4))
# test 60 removed. Tested this failed (rolling join on factors) pre character columns, now works.

# Tests 61-66 were testing sortedmatch which is now replaced by chmatch for characters, and removed
# for integers until needed.

# Test 67 removed. No longer use factors so debate/problem avoided.
# [.factor and c.factor are no longer present in data.table, not even hidden away
# X = factor(letters[1:10])
# test(67, levels(X[4:6]), letters[4:6])

test(68, "TESTDT" %in% tables(silent=TRUE)[,NAME])  # NAME is returned as a column in which we look for the string
test(69, "TESTDT" %in% tables(silent=TRUE)[,as.character(NAME)]) # an old test (from when NAME was factor) but no harm in keeping it
test(69.1, names(tables(silent=TRUE)), c("NAME","NROW","NCOL","MB","COLS","KEY"))
test(69.2, names(tables(silent=TRUE,mb=FALSE)), c("NAME","NROW","NCOL","COLS","KEY"))

a = "d"
# Variable Twister.  a in this scope has same name as a inside DT scope.
# Aug 2010 : As a result of bug 1005, and consistency with 'j' and 'by' we now allow self joins (test 183) in 'i'.
test(70, TESTDT[eval(J(a)),v,by=.EACHI], data.table(a="d",v=3:6,key="a"))   # the eval() enabled you to use the 'a' in the calling scope, not 'a' in the TESTDT.  TO DO: document this.
test(71, TESTDT[eval(SJ(a)),v,by=.EACHI], data.table(a="d",v=3:6,key="a"))
test(72, TESTDT[eval(CJ(a)),v,by=.EACHI], data.table(a="d",v=3:6,key="a"))

test(73, TESTDT[,v], 1:7)
test(74, TESTDT[,3], 3)
test(74.5, TESTDT[,3L], 3L)
test(75, TESTDT[,"v"], "v")
test(76, TESTDT[,2:3], 2:3)  # See ?[.data.table that with=FALSE is required for the likely intended result
test(77, TESTDT[,2:3,with=FALSE], data.table(b=c("e","e","f","f","i","i","b"),v=1:7))
test(78, TESTDT[,c("b","v"),with=FALSE], data.table(b=c("e","e","f","f","i","i","b"),v=1:7))
colsVar = c("b","v")
test(79, TESTDT[,colsVar], colsVar)
test(80, TESTDT[,colsVar,with=FALSE], data.table(b=c("e","e","f","f","i","i","b"),v=1:7))

# works in test.data.table, but not eval(body(test.data.table)) when in R CMD check ... test(81, TESTDT[1:2,c(a,b)], factor(c("a","c","e","e")))
# It is expected the above to be common source of confusion. c(a,b) is evaluated within
# the frame of TESTDT, and c() creates one vector, not 2 column subset as in data.frame's.
# If 2 columns were required use list(a,b).  c() can be useful too, but is different.

test(82, TESTDT[,c("a","b")], c("a","b"))
test(83, TESTDT[,list("a","b")], data.table(V1="a",V2="b"))
test(83.1, TESTDT[,list("sum(a),sum(b)")], data.table("sum(a),sum(b)"))
test(83.2, TESTDT[,list("sum(a),sum(b)"),by=a], {tt=data.table(a=c("a","c","d","g"),V1="sum(a),sum(b)",key="a");tt$V1=as.character(tt$V1);tt})
test(84, TESTDT[1:2,list(a,b)], data.table(a=c("a","c"), b=c("e","e"), key = 'a,b'))
# test(85, TESTDT[1:2,DT(a,b)], data.table(a=c("a","c"), b=c("e","e")))  #DT() now deprecated

test(86, TESTDT[,sum(v),by="b"], data.table(b=c("e","f","i","b"),V1=INT(3,7,11,7)))  # TESTDT is key'd by a,b, so correct that grouping by b should not be key'd in the result by default
test(87, TESTDT[,list(MySum=sum(v)),by="b"], data.table(b=c("e","f","i","b"),MySum=INT(3,7,11,7)))
test(88, TESTDT[,list(MySum=sum(v),Sq=v*v),by="b"][1:3], data.table(b=c("e","e","f"),MySum=INT(3,3,7),Sq=INT(1,4,9))) # silent repetition of MySum to match the v*v vector
# Test 89 dropped. Simplify argument no longer exists. by is now fast and always returns a data.table  ... test(89, TESTDT[,sum(v),by="b",simplify=FALSE], list(7L,3L,7L,11L))

# Test 88.5 contributed by Johann Hibschman (for bug fix #1294) :
test(88.5, TESTDT[a=="d",list(MySum=sum(v)),by=list(b)], data.table(b=c("f","i"), MySum=INT(7,11), key="b"))

setkey(TESTDT,b)
test(90, TESTDT[J(c("f","i")),sum(v),by=.EACHI], data.table(b=c("f","i"),V1=c(7L,11L),key="b"))
test(90.5, TESTDT[J(c("i","f")),sum(v),by=.EACHI], data.table(b=c("i","f"),V1=c(11L,7L)))  # test not keyed
test(91, TESTDT[SJ(c("f","i")),sum(v),by=.EACHI], data.table(b=c("f","i"),V1=c(7L,11L),key="b"))
# Test 92 dropped same reason as 89 ... test(TESTDT[92, J(c("f","i")),sum(v),mult="all",simplify=FALSE], list(7L,11L))

test(93, TESTDT[c("f","i"), which=TRUE], 4:7)
test(94, TESTDT[c("i","f"), mult="last", which=TRUE], INT(7,5))

test(95, TESTDT["f",v], 3:4)
test(96, TESTDT["f",v,by=.EACHI], data.table(b="f",v=3:4,key="b"))
test(97, TESTDT[c("f","i","b"),list(GroupSum=sum(v)),by=.EACHI], data.table(b=c("f","i","b"), GroupSum=c(7L,11L,7L)))
# that line above doesn't create a key on the result so that the order fib is preserved.
test(98, TESTDT[SJ(c("f","i","b")),list(GroupSum=sum(v)),by=.EACHI], data.table(b=c("b","f","i"), GroupSum=c(7L,7L,11L), key="b"))
# line above is the way to group, sort by group and setkey on the result by group.

dt <- data.table(A = rep(1:3, each=4), B = rep(11:14, each=3), C = rep(21:22, 6), key = "A,B")
test(99, unique(dt), data.table(dt[c(1L, 4L, 5L, 7L, 9L, 10L)], key="A,B"))

# test [<- for column assignment
dt1 <- dt2 <- dt
test(100, {dt1[,"A"] <- 3L; dt1}, {dt2$A <- 3L; dt2})

# test transform and within
test(101, within(dt, {D <- B^2}), transform(dt, D = B^2))
test(102, within(dt, {A <- B^2}), transform(dt, A = B^2))

# test .SD object
test(103, dt[, sum(.SD$B), by = "A"], dt[, sum(B), by = "A"])
test(104, dt[, transform(.SD, D = min(B)), by = "A"], dt[, list(B,C,D=min(B)), by = "A"])

# test numeric and comparison operations on a data table
test(105, all(dt + dt > dt))
test(106, all(dt + dt > 1))
test(107, dt + dt, dt * 2L)

# test a few other generics:
test(108, dt, data.table(t(t(dt)),key="A,B"))
test(109, all(!is.na(dt)))
dt2 <- dt
dt2$A[1] <- NA   # removes key
test(110, sum(is.na(dt2)), 1L)
test(111, {setkey(dt,NULL);dt}, na.omit(dt))
test(112, dt2[2:nrow(dt2),A], na.omit(dt2)$A)

# test [<- assignment:
dt2[is.na(dt2)] <- 1L
test(113, {setkey(dt,NULL);dt}, dt2)   # key should be dropped because we assigned to a key column
# want to discourage this going forward (inefficient to create RHS like this)
# dt2[, c("A", "B")] <- dt1[, c("A", "B"), with = FALSE]
# test(114, dt1, dt2)
## doesn't work, yet:
##     dt2[rep(TRUE, nrow(dt)), c("A", "B")] <- dt1[, c("A", "B"), with = FALSE]
##     dt2[rep(TRUE, nrow(dt)), c("A")] <- dt1[, c("A"), with = FALSE]
##     test(dt, dt2))  stop("Test 112 failed")

# test the alternate form of setkey:
dt1 = copy(dt)
dt2 = copy(dt)
setkeyv(dt1, "A")
setkey(dt2, A)
test(115, dt1, dt2)

# Test dogroups works correctly for character/factor columns
test(116, TESTDT[,a[1],by="b"], data.table(b=c("b","e","f","i"), V1=c("g","a","d","d"), key="b"))
test(117, TESTDT[,list(a[1],v[1]),by="b"], data.table(b=c("b","e","f","i"), V1=c("g","a","d","d"), V2=INT(7,1,3,5), key="b"))

# We no longer check i for out of bounds, for consistency with data.frame and e.g. cbind(DT[w],DT[w+1]). NA rows should be returned for i>nrow
test(118, TESTDT[8], data.table(a=as.character(NA), b=as.character(NA), v=as.integer(NA), key="b"))
test(119, TESTDT[6:9], data.table(a=c("d","d",NA,NA), b=c("i","i",NA,NA), v=c(5L,6L,NA,NA)))

n=10000
grp1=sample(1:50,n,replace=TRUE)
grp2=sample(1:50,n,replace=TRUE)
dt=data.table(x=rnorm(n),y=rnorm(n),grp1=grp1,grp2=grp2)
tt = system.time(ans <- dt[,list(.Internal(mean(x)),.Internal(mean(y))),by="grp1,grp2"])
# test(120, tt[1] < 0.5)   # actually takes more like 0.068 << 0.5, but the micro EC2 instance can be slow sometimes.
# TO DO: incorporate performance testing into R CMD check (using testthat?), that somehow copes with running on slow machines.
i = sample(nrow(ans),1)
test(121, all.equal(ans[i,c(V1,V2)], dt[grp1==ans[i,grp1] & grp2==ans[i,grp2], c(mean(x),mean(y))]))
# To DO: add a data.frame aggregate method here and check data.table is faster

# Tests of 0 and 1 row tables
TESTDT = data.table(NULL)
test(122, TESTDT[1], TESTDT)
test(123, TESTDT[0], TESTDT)
test(124, TESTDT[1:10], TESTDT)
test(125, TESTDT["k"], error="the columns to join by must be specified either using")
# test 126 no longer needed now that test() has 'error' argument

TESTDT = data.table(a=3L,v=2L,key="a")  # testing 1-row table
test(127, TESTDT[J(3)], TESTDT)
test(128, TESTDT[J(4)], data.table(a=4L,v=NA_integer_,key="a"))   # see tests 206-207 too re the [NA]
test(129, TESTDT[J(4),roll=TRUE], data.table(a=4L,v=2L,key="a"))  # the i values are in the result now (which make more sense for rolling joins, the x.a can still be accessed if need be)
test(130, TESTDT[J(4),roll=TRUE,rollends=FALSE], data.table(a=4L,v=NA_integer_,key="a"))
test(131, TESTDT[J(-4),roll=TRUE], data.table(a=-4L,v=NA_integer_,key="a"))

test(132, ncol(TESTDT[0]), 2L)
test(133, TESTDT[0][J(3)], data.table(a=3L,v=NA_integer_,key="a")) # These need to retain key for consistency (edge cases of larger sorted i)

# tests on data table names, make.names is now FALSE by default from v1.8.0
x = 2L; `1x` = 4L
dt = data.table(a.1 = 1L, b_1 = 2L, "1b" = 3L, `a 1` = 4L, x, `1x`, 2*x)
test(134, names(dt), c("a.1", "b_1", "1b", "a 1", "x", "V6", "V7"))
dt = data.table(a.1 = 1L, b_1 = 2L, "1b" = 3L, `a 1` = 4L, x, `1x`, 2*x, check.names=TRUE)
test(134.5, names(dt), c("a.1", "b_1", "X1b", "a.1.1", "x", "V6", "V7"))

dt = data.table(a.1 = 1L, b_1 = 2L, "1b" = 3L, `a 1` = 4L, x, `1x`, 2*x, check.names = FALSE)
test(135, names(dt), c("a.1", "b_1", "1b", "a 1", "x", "V6", "V7")) # the last two terms differ from data.frame()

test(136, dt[,b_1, by="a.1"], data.table(a.1=1L,"b_1"=2L))
test(137, dt[,`a 1`, by="a.1"], data.table(a.1=1L,"a 1"=4L, check.names=FALSE))
test(138, dt[,a.1, by="`a 1`"], data.table(`a 1`=4L,a.1=1L, check.names=FALSE))

# tests with NA's in factors
dt = data.table(a = c(NA, letters[1:5]), b = 1:6)
test(139, dt[,sum(b), by="a"], data.table(a = c(NA, letters[1:5]), V1 = 1:6))

# tests to make sure rbind and grouping keep classes
dt = data.table(a = rep(as.Date("2010-01-01"), 4), b = rep("a",4))
test(140, rbind(dt,dt), data.table(a = rep(as.Date("2010-01-01"), 8), b = rep("a",8)))
test(141, dt[,list(a=a), by="b"], dt[,2:1, with = FALSE])

dt$a <- structure(as.integer(dt$a), class = "Date")
test(142, dt[,list(b=b), by="a"], dt)

dt = data.table(x=1:5,y=6:10)
test(143, tail(dt), dt)  # tail was failing if a column name was called x.

dt <- data.table(a = rep(1:3, each = 4), b = LETTERS[1:4], b2 = LETTERS[1:4])
test(144, dt[, .SD[3,], by=b], data.table(b=LETTERS[1:4],a=3L,b2=LETTERS[1:4]))

DT = data.table(x=rep(c("a","b"),c(2,3)),y=1:5)
xx = capture.output(ans <- DT[,{print(x);sum(y)},by=x,verbose=FALSE])
test(145, xx, c("[1] \"a\"","[1] \"b\""))
test(146, ans, data.table(x=c("a","b"),V1=c(3L,12L)))

test(147, DT[,MySum=sum(v)], error="unused argument")  # user meant DT[,list(MySum=sum(v))]. FR#204 done.

dt = data.table(a=c(1L,4L,5L), b=1:3, key="a")
test(148, dt[CJ(2:3),roll=TRUE], data.table(a=c(2L,3L),b=c(1L,1L),key="a"))
test(149, dt[J(2:3),roll=TRUE], data.table(a=c(2L,3L),b=c(1L,1L)))  # in future this will detect the subset is ordered and retain the key

# 150:158 test out of order factor levels in key columns (now allowed from v1.8.0)
dt = data.table(x=factor(c("c","b","a"),levels=c("b","a","c")),y=1:3)
setkey(dt,x)
test(150.1, dt["b",y,verbose=TRUE], output="Coercing character column i.'V1' to factor") # changed i.V1 to i.x as per FR #2693
test(150.2, dt["b",y], 2L)
# from Tom's post :
a = data.table(a=rep(1:5, 2), b=factor(letters[rep(1:5, each =2)], levels=letters[5:1]), key="b")
test(151.1, a[J("b"),a,verbose=TRUE], output="Coercing character column i.'V1' to factor") # message back to `i.V1` now. 'b' still accessible to satisfy FR #2693, checked on next line 
test(151.2, a[J("b"),a], 3:4)
# stretch tests further, two out of order levels, one gets key'd the other not :
a = data.table(x=factor(letters[rep(1:5, each =2)], levels=letters[5:1]),
               y=factor(letters[rep(c(6,9,7,10,8), each =2)], levels=letters[10:6]),
               z=1:10)
test(152, is.sorted(levels(a$x)), FALSE)
test(153, is.sorted(levels(a$y)), FALSE)
test(154, a[,sum(z),by=x][1,paste(x,V1)], "a 3")  # ad hoc by doesn't sort the groups so 'a' (5th level) should be first
setkey(a,x)    # 'e' (level 1) should come first now.
test(155, is.sorted(levels(a$x)), FALSE)
test(156, is.sorted(levels(a$y)), FALSE)
test(157, a[,sum(z),by=x][1,paste(x,V1)], "e 19")  # 1st level is now first
test(158, a[,sum(z),by=y][1,paste(y,V1)], "h 19")  # not 'f'
test(158.5, a[,sum(z),keyby=y][1,paste(y,V1)], "j 15")  # not 'f' either


# tests of by expression variables
DT = data.table( a=1:5, b=11:50, d=c("A","B","C","D"), f=1:5, grp=1:5 )
f = quote( list(d) )
test(159, DT[,mean(b),by=eval(f)], DT[,mean(b),by=list(d)])  # column f doesn't get in the way of expression f
foo = function( grp ) {
   DT[,mean(b),by=eval(grp)]
}
test(160, foo(quote(list(d))), DT[,mean(b),by=list(d)])
test(161, foo(quote(list(d,a))), DT[,mean(b),by=list(d,a)])
test(162, foo(quote(list(f))), DT[,mean(b),by=list(f)])
test(163, foo(quote(list(grp))), DT[,mean(b),by=list(grp)])  # grp local variable in foo doesn't conflict with column grp
test(164, foo(f), DT[,mean(b),by=d])

# checks that data.table inherits methods from data.frame in base ok
test(165, subset(DT,a>2), DT[a>2])
test(166, suppressWarnings(split(DT,DT$grp)[[2]]), DT[grp==2])

if ("package:ggplot2" %in% search()) {
    test(167,names(print(ggplot(DT,aes(b,f))+geom_point())),c("data","panel","plot"))
    # The names() is a stronger test that it has actually plotted, but also because test() sees the invisible result
    test(167.1,DT[,print(ggplot(.SD,aes(b,f))+geom_point()),by=list(grp%%2L)],data.table(grp=integer()))  # %%2 because there are 5 groups in DT data at this stage, just need 2 to test
    # New test reported by C Neff on 11 Oct 2011
    if ("package:hexbin" %in% search())
       test(167.2, names(print(ggplot(DT) + geom_hex(aes(b, f)) + facet_wrap(~grp))), c("data","panel","plot"))
    else
       cat("Test 167.2 not run. If required call library(hexbin) first.\n")

    # Test plotting ITime with ggplot2 which seems to require an as.data.frame method for ITime, #1713
    datetimes = c("2011 NOV18 09:29:16", "2011 NOV18 10:42:40", "2011 NOV18 23:47:12",
              "2011 NOV19 01:06:01", "2011 NOV19 11:35:34", "2011 NOV19 11:51:09")
    DT = IDateTime(strptime(datetimes,"%Y %b%d %H:%M:%S"))
    test(168, print(DT[,qplot(idate,itime)])$ranges, print(qplot(DT$idate,DT$itime))$ranges)
    test(168.1, print(DT[,qplot(idate,as.POSIXct(itime,tzone=""))])$ranges, print(qplot(idate,as.POSIXct(itime,tzone=""),data=DT))$ranges)

    try(graphics.off(),silent=TRUE)
    #try(graphics.off(),silent=TRUE) # R CMD check doesn't like graphics it seems, even when inside try()
} else {
    cat("Tests 167-168 not run. If required call library(ggplot2) first.\n")
    # ggplot takes a long time so we don't include these by default
    # From examples, the library(ggplot2) is done first, so that 'R CMD check' does include tests 167-168
}

# test of . in formula, using inheritance
DT = data.table(y=1:100,x=101:200,y=201:300,grp=1:5)
test(169,DT[,as.list(lm(y~0+.,.SD)$coef),by=grp][2,x]-2<1e-10, TRUE)

DT <- data.table( a=1:4, d=c("A","B","C","D") )
g <- quote( list( d ) )
test(170, DT[,list(d)], DT[,eval(g)])

DT = data.table(A=c(25L,85L,25L,25L,85L), B=c("a","a","b","c","c"), C=c(2,65,9,82,823))
test(171.1, DT[B=="b"][A==85], output="Empty data.table (0 rows) of 3 cols: A,B,C")
test(171.2, DT[B=="b"][A==85,C], numeric())
test(171.3, DT[ , data.table( A, C )[ A==25, C ] + data.table( A, C )[ A==85, C ], by=B ], data.table(B=c("a","c"),V1=c(67,905)))
test(172, DT[ , list(3,data.table( A, C )[ A==25, C ] + data.table( A, C )[ A==85, C ]), by=B ], data.table(B=c("a","b","c"),V1=3,V2=c(67,NA,905)))

# Test growing result in memory. Usually the guess is good though.
# This example returns no rows for first group so guess for up-front allocate needs a reallocate
DT = data.table(A=c(1L,1L,2L,2L,3L,3L), B=1:6)
test(173, DT[,B[B>3],by=A][,V1], c(4L,5L,6L))

# Example taken from Harish post to datatable-help on 11 July
DT <- data.table(
     A=c("a","a","b","b","d","c","a","d"),
     B=c("x1","x2","x2","x1","x2","x1","x1","x2"),
     C=c(5,2,3,4,9,5,1,9)
     )
test(174, DT[,C[C-min(C)<3],by=list(A,B)][,V1], c(1,2,3,4,9,9,5))
test(175, DT[,C[C-min(C)<5],by=list(A,B)][,V1], c(5,1,2,3,4,9,9,5))

# Tests of data.table sub-assignments: $<-.data.table & [<-.data.table
DT = data.table(a = c("A", "Z"), b = 1:10, key = "a")
DT[J("A"),2] <- 100L  # without L generates nice warning :-)
DT[J("A"),"b"] <- 1:5
DT[1:3,"b"] <- 33L
test(176, DT,  data.table(a = rep(c("A", "Z"), each = 5),
                          b = as.integer(c(rep(33, 3), 4:5, seq(2, 10, by = 2))),
                          key = "a"))
DT[J("A"),"a"] <- "Z"
test(177, DT, data.table(a="Z", b=as.integer(c(rep(33, 3), 4:5, seq(2, 10, by = 2)))))  # i.e. key dropped and column a still factor

DT <- data.table(a = c("A", "Z"), b = 1:10, key = "a")
DT$b[1:5] <- 1:5
DT$b[1:3] <- 33
test(178, DT,  data.table(a = rep(c("A", "Z"), each = 5),
                          b = c(rep(33, 3), 4:5, seq(2, 10, by = 2)),
                          key = "a"))
DT$a <- 10:1
test(179, key(DT), NULL )

# Test logical in a key
DT = data.table(a=rep(1:3,each=2),b=c(TRUE,FALSE),v=1:6)
setkey(DT,a,b)
test(180, DT[J(2,FALSE),v], 4L)
test(181, DT[,sum(v),by=b][,V1], c(12L,9L))

# Test fix for bug 1026 reported by Harish V
# this test needed a unique var name to generate error 'object 'b' not found'.
# Otherwise it finds 'b' in local scope.
setnames(DT,2,"buniquename314")
bar = function( data, fcn ) {
    q = substitute( fcn )
    xx = data[,eval(q),by=a]
    yy = data[,eval(substitute(fcn)),by=a]
    identical(xx,yy)
}
test(182, bar( DT, sum(buniquename314) ), TRUE)

# Test bug 1005 reported by Branson Owen
DT = data.table(A = c("o", "x"), B = 1:10, key = "A")
test(183, DT[J(unique(A)), B], DT$B)

# Test bug 709 which returned an error here. And return type now empty table, #1945 in 1.8.1.
xx = data.table(a=1:5,b=6:10)
test(184, xx[a>6,sum(b),by=a], data.table(a=integer(),V1=integer()))

# Tests of bug 1015 highlight by Harish
# See thread "'by without by' now heeds nomatch=NA"
# Tests 185-201 were added in above next to originals
x <- data.table(a=c("a","b","d","e"),b=c("A","A","B","B"),d=c(1,2,3,4), key="a,b")
y <- data.table(g=c("a","b","c","d"),h=c("A","A","A","A"))
test(202, x[y], x[y,mult="all"])
test(203, x[y,d], c(1,2,NA,NA))
test(204, x[y,list(d)]$d, x[y,d])
test(205, x[y,list(d),mult="all"][,d], c(1,2,NA,NA))

# Test [NA] returns one NA row. NA is type *logical* so prior to
# change in v1.5, NA would get silently recycled and the whole table would
# be returned all NA (rarely useful and often confusing, but consistent
# with data.frame).
TESTDT = data.table(a=1:3,v=1:3,key="a")
test(206, TESTDT[NA], data.table(a=NA_integer_,v=NA_integer_,key="a"))  # NA are now allowed in keys, so retains key
setkey(TESTDT,NULL)
test(207, TESTDT[NA], data.table(a=NA_integer_,v=NA_integer_))

# With inheritance, NROW and NCOL in base work nicely. No need for them in data.table.
test(208, NROW(TESTDT), 3L)
test(209, nrow(TESTDT), 3L)
test(210, NCOL(TESTDT), 2L)
test(211, ncol(TESTDT), 2L)

# Test infinite recursion error is trapped when a pre-1.5 data.table
# is used with 1.5 (bug #1008)
DT = data.table(a=1:6,key="a")
test(212, DT[J(3)]$a, 3L) # correct class c("data.table","data.frame")
class(DT) = "data.table"  # incorrect class, but as from 1.8.1 it works. By accident when moving from colnames() to names(), it was dimnames() doing the check, but rather than add a check that identical(class(DT),c("data.frame","data.table")) at the top of [.data.table, we'll leave it flexible to user (user might not want to inherit from data.frame for some reason).
test(213, DT[J(3)]$a, 3L)

# setkey now auto coerces double and character for convenience, and
# to solve bug #953
DF = data.frame(a=LETTERS[1:10], b=1:10, stringsAsFactors=FALSE)
DT = data.table(DF)
setkey(DT,a)    # used to complain about character
test(215, DT["C",b], 3L)
DT = data.table(DF,key="a")
test(216, DT["C",b], 3L)
DT = data.table(a=c(1,2,3),v=1:3,key="a")
test(217, DT[J(2),v], 2L)
DT = data.table(a=c(1,2.1,3),v=1:3,key="a")
test(218, DT[J(2.1),v], 2L)

# tests of quote()-ed expressions in i. Bug #1058
DT = data.table(a=1:5,b=6:10,key="a")
q = quote(a>3)
test(220, DT[eval(q),b], 9:10)
test(221, DT[eval(parse(text="a>4")),b], 10L)
test(222, DT[eval(parse(text="J(2)")),b], 7L)

# lists in calling scope should be ok as single names passed to by, bug #1060
DT = data.table(a=1:2,b=rnorm(10))
byfact = DT[,a]   # vector, ok before fix but check anyway
test(223, DT[,mean(b),by=byfact], DT[,mean(b),by=list(byfact)])
byfact = DT[,list(a)]  # this caused next line to fail before fix
test(224, DT[,mean(b),by=byfact], DT[,mean(b),by=as.list(byfact)])
test(225, DT[,mean(b),by=byfact], DT[,mean(b),by={byfact}])

# tests for building expressions via parse, bug #1243
dt1key<-data.table(A1=1:100,onekey=rep(1:2,each=50))
setkey(dt1key,onekey)
ASumExpr<-parse(text="quote(sum(A1))") # no need for quote but we test it anyway because that was work around when test 227 failed
ASumExprNoQ<-parse(text="sum(A1)")
ans = dt1key[,sum(A1),by=onekey]
test(226,ans,dt1key[,eval(eval(ASumExpr)),by=onekey])
test(227,ans,dt1key[,eval(ASumExprNoQ),by=onekey])

# test for uncommon grouping pattern on 1-row data.table, bug #1245
DT = data.table(a=1L,b=2L)
test(228,DT[,list(1:2),by=a],data.table(a=c(1L,1L),V1=1:2))

# special case j=.SD, bug #1247
DT = data.table(a=rep(1:2,each=2),b=1:4)
test(229,DT[,.SD,by=a],DT)
setkey(DT,a)
test(229.1,DT[,.SD,by=key(DT)],DT)

# merge bug with column 'x', bug #1229
d1 <- data.table(x=c(1,3,8),y1=rnorm(3), key="x")
d2 <- data.table(x=c(3,8,10),y2=rnorm(3), key="x")
ans1=merge(d1, d2, by="x")
ans2=cbind(d1[2:3],y2=d2[1:2]$y2);setkey(ans2,x)
test(230, ans1, ans2)

# one column merge, bug #1241
DT = data.table(a=rep(1:2,each=3),b=1:6,key="a")
y = data.table(a=c(0,1),bb=c(10,11),key="a")
test(231,merge(y,DT),data.table(a=1L,bb=11,b=1:3,key="a"))
test(232,merge(y,DT,all=TRUE),data.table(a=rep(c(0L,1L,2L),c(1,3,3)),bb=rep(c(10,11,NA_real_),c(1,3,3)),b=c(NA_integer_,1:6),key="a"))
y = data.table(a=c(0,1),key="a") # y with only a key column
test(233,merge(y,DT),data.table(a=1L,b=1:3,key="a"))
test(234,merge(y,DT,all=TRUE),data.table(a=rep(c(0L,1L,2L),c(1,3,3)),b=c(NA_integer_,1:6),key="a"))

# 'by' when DT contains list columns
DT = data.table(a=c(1,1,2,3,3),key="a")
DT$b=list(1:2,1:3,1:4,1:5,1:6)
test(235,DT[,mean(unlist(b)),by=a],data.table(a=c(1,2,3),V1=c(1.8,2.5,mean(c(1:5,1:6))),key="a"))
test(236,DT[,sapply(b,mean),by=a],data.table(a=c(1,1,2,3,3),V1=c(1.5,2.0,2.5,3.0,3.5),key="a"))

# when i is a single name, it no longer evaluates within data.table scope
DT = data.table(a=1:5,b=rnorm(5),key="a")
a = list(4)
test(237,DT[a],DT[J(4)])

# repeat earlier test with xkey instead of x. xkey is internal to merge; the bigger problem Tom mentioned.
d1 <- data.table(xkey=c(1,3,8),y1=rnorm(3), key="xkey")
d2 <- data.table(xkey=c(3,8,10),y2=rnorm(3), key="xkey")
ans2=cbind(d1[2:3],y2=d2[1:2]$y2);setkey(ans2,xkey)
test(238, merge(d1, d2, by="xkey"), ans2)

# Join Inherited Scope, and X[Y] including Y's non-join columns
X=data.table(a=rep(1:3,c(3,3,2)),foo=1:8,key="a")
Y=data.table(a=2:3,bar=6:7)
test(239, X[Y,sum(foo),by=.EACHI], data.table(a=2:3,V1=c(15L,15L),key="a"))
test(240, X[Y,sum(foo*bar),by=.EACHI], data.table(a=2:3,V1=c(90L,105L),key="a"))
test(241, X[Y], data.table(a=rep(2:3,3:2),foo=4:8,bar=rep(6:7,3:2),key="a"))
test(242, X[Y,list(foo,bar),by=.EACHI][,sum(foo*bar)], 195L)
test(243, X[Y][,sum(foo*bar)], 195L)
# not sure about these yet :
# test(244, X[Y,sum(foo*bar),mult="first"], data.table(a=2:3,V1=c(24L,49L)))
# test(245, X[Y,sum(foo*bar),mult="last"], data.table(a=2:3,V1=c(36L,56L)))

# joining to less than all X's key colums (in examples but can't see formal test)
X=data.table(a=rep(LETTERS[1:2],2:3),b=1:5,v=10:14,key="a,b")
test(246.1, X["A"], X[1:2])   # checks that X[1:2] retains key, too
test(246.2, key(X["A"]), c("a","b"))
test(247, X["C"]$v, NA_integer_)
test(248, nrow(X["C",nomatch=0]), 0L)

x=data.table( a=c("a","b","c"), b=1:3, key="a" )
y=data.table( a=c("b","d","e"), d=c(8,9,10) )
test(249, x[y], data.table(a=c("b","d","e"),b=c(2L,NA,NA),d=c(8,9,10)))  # keeps i join cols
test(250, x[y,mult="first"], data.table(a=c("b","d","e"),b=c(2L,NA,NA),d=c(8,9,10))) # same

x=data.table( a=c("a","b","b","c"), b=1:4, key="a" )
y=data.table(a=c("b","d","b"), d=c(8,9,10))
test(251, x[y, allow.cartesian=TRUE], data.table(a=c("b","b","d","b","b"),b=c(2:3,NA,2:3),d=c(8,8,9,10,10)))

# auto coerce float to int in ad hoc by (just like setkey), FR#1051
DT = data.table(a=INT(1,1,1,2,2),v=1:5)
test(252, DT[,sum(v),by=a], data.table(a=1:2,V1=c(6L,9L)))

# check that by retains factor columns, since character is now default
DT = data.table(a=factor(c("A","A","A","B","B")),v=1:5)
test(253, DT[,sum(v),by=a], data.table(a=factor(c("A","B")),V1=c(6L,9L)))

# fix for bug #1298 with by=key(DT) and divisibility error.
DT=data.table(a=c(1,1,1,2,2),b=1:5,key="a")
test(254, DT[,sum(b),by=key(DT)]$V1, c(6L,9L))

# for for bug #1294 (combining scanning i and by)
# also see test 88.5 contributed by Johann Hibschman above.
DT = data.table(a=1:12,b=1:2,c=1:4)
test(255, DT[a>5,sum(c),by=b]$V1, c(12L, 7L))

# fix for bug #1301 (all.vars() doesn't appear to find fn in fns[[fn]] usage)
DT = data.table(a=1:6,b=1:2,c=letters[1:2],d=1:6)
fns = list(a=max,b=min)
test(256, DT[,fns[[b[1]]](d),by=c]$V1, c(5L,2L))
test(257, DT[,fns[[c[1]]](d),by=c]$V1, c(5L,2L))
fns=c(max,min)

DT = data.table(ID=1:10, SCORE_1=1:10, SCORE_2=11:20, SCORE_3=30:21, fn=c(rep(1, 5), rep(2, 5)))
test(258, DT[,fns[[fn]](SCORE_1,SCORE_2,SCORE_3),by=ID]$V1, c(30:26,6:10))
test(259, DT[,as.list(fns[[fn]](SCORE_1,SCORE_2,SCORE_3)),by=ID]$V1, c(30:26,6:10))
test(260, DT[,list(fns[[fn]](SCORE_1,SCORE_2,SCORE_3)),by=ID]$V1, c(30:26,6:10))

# fix for bug #1340 - Duplicate column names in self-joins (but print ok)
DT <- data.table(id=1:4, x1=c("a","a","b","c"), x2=c(1L,2L,3L,3L), key="x1")
test(261, DT[DT, allow.cartesian=TRUE][id < i.id]$i.x2, 2L)

# "<-" within j now assigns in the same environment for 1st group, as the rest
# Thanks to Andeas Borg for highlighting on 11 May

dt <- data.table(x=c(0,0,1,0,1,1), y=c(0,1,0,1,0,1), z=1:6)
groupInd = 0
test(262, dt[,list(z,groupInd<-groupInd+1),by=list(x,y)]$V2, c(1,2,2,3,3,4))
test(263, groupInd, 0)
test(264, dt[,list(z,groupInd<<-groupInd+1),by=list(x,y)]$V2, c(1,2,2,3,3,4))
test(265, groupInd, 4)

# Tests for passing 'by' expressions that evaluate to character column
# names in the edge case of 1 row; the character 'by' vector could
# feasibly be intended to be grouping values. Bug 1404; thanks to Andreas Borg
# for the detailed report, suggested fix and tests.

DT = data.frame(x=1,y="a",stringsAsFactors=FALSE)
DT = as.data.table(DT)
test(266,class(DT$y),"character") # just to check we setup the test correctly
test(267,DT[,sum(x),by=y]$V1,1)
test(268,DT[,sum(x),by="y"]$V1,1)
colvars="y"
test(269,DT[,sum(x),by=colvars]$V1,1)
setkey(DT,y)
test(270,DT[,sum(x),by=key(DT)]$V1,1)

DT = data.table(x=1,y=2)
setkeyv(DT,names(DT))
test(271, DT[,length(x),by=key(DT)]$V1, 1L)

DT = data.table(x=c(1,2,1), y=c(2,3,2), z=1:3)
setkeyv(DT,names(DT))
test(272, DT[,sum(z),by=key(DT)]$V1, c(1L,3L,2L))


# Tests for .BY and implicit .BY
# .BY is a single row, and by variables are now, too. FAQ 2.10 has been changed accordingly.
DT = data.table(a=1:6,b=1:2)
test(273, DT[,sum(a)*b,by=b]$V1, c(9L,24L))
test(274, DT[,sum(a)*.BY[[1]],by=b], data.table(b=1:2,V1=c(9L,24L)))
test(275, DT[,sum(a)*bcalc,by=list(bcalc=b+1L)], data.table(bcalc=2:3,V1=c(18L,36L)))
test(276, DT[,sapply(.SD,sum)*b,by=b], data.table(b=1:2,V1=c(9L,24L)))  # .SD should no longer include b, unlike v1.6 and before
test(277, DT[,sapply(.SD,sum)*bcalc,by=list(bcalc=b+1L)], data.table(bcalc=2:3,V1=c(18L,36L)))  # cols used in by expressions are excluded from .SD, but can still be used in j (by name only and may vary within the group e.g. DT[,max(diff(date)),by=month(date)]
test(278, DT[,sum(a*b),by=list(bcalc=b+1L)], data.table(bcalc=2:3,V1=c(9L,24L)))


# Test x==y where either column contain NA.
DT = data.table(x=c(1,2,NA,3,4),y=c(0,2,3,NA,4),z=1:5)
test(279, DT[x==y,sum(z)], 7L)
# In data.frame the equivalent is :
# > DF = as.data.frame(DT)
# > DF[DF$x==DF$y,]
#       x  y  z
# 2     2  2  2
# NA   NA NA NA
# NA.1 NA NA NA
# 5     4  4  5
# > DF[!is.na(DF$x) & !is.na(DF$y) & DF$x==DF$y,]
#   x y z
# 2 2 2 2
# 5 4 4 5


# Test that 0 length columns are expanded with NA to match non-0 length columns, bug fix #1431
DT = data.table(pool = c(1L, 1L, 2L), bal = c(10, 20, 30))
test(280, DT[, list(bal[0], bal[1]), by=pool], data.table(pool=1:2, V1=NA_real_, V2=c(10,30)))
test(281, DT[, list(bal[1], bal[0]), by=pool], data.table(pool=1:2, V1=c(10,30), V2=NA_real_))
# Test 2nd group too (the 1st is special) ...
test(282, DT[, list(bal[ifelse(pool==1,1,0)], bal[1]), by=pool], data.table(pool=1:2, V1=c(10,NA), V2=c(10,30)))

# More tests based on Andreas Borg's post of 11 May 2011.
DT = data.table(x=INT(0,0,1,0,1,1), y=INT(1,1,0,1,1,1), z=1:6)
ans = data.table(x=c(0L,1L,1L),y=c(1L,0L,1L),V1=c(1L,1L,2L),V2=c(7L,3L,11L))
test(283, DT[,list(sum(x[1], y[1]),sum(z)), by=list(x,y)], ans)
test(284, DT[,list(sum(unlist(.BY)),sum(z)),by=list(x,y)], ans)
groupCols = c("x", "y")
test(285, DT[,list(sum(unlist(.BY)),sum(z)),by=groupCols], ans)
groupExpr = quote(list(x,y))
test(286, DT[,list(sum(unlist(.BY)),sum(z)),by=groupExpr], ans)

# Bug fix from Damian B on 25 June 2011 :
DT = data.table(X=c(NA,1,2,3), Y=c(NA,2,1,3))
setkeyv(DT,c("X","Y"))
test(287, unique(DT), DT)

# Bug fix #1421: using vars in calling scope in j when i is logical or integer.
DT = data.table(A=c("a","b","b"),B=c(4,5,NA))
myvar = 6
test(288, DT[A=="b",B*myvar], c(30,NA))

# Test new feature in 1.6.1 that i can be plain list (such as .BY)
DT = data.table(grp=c("a","a","a","a","b","b","b"),v=1:7)
mysinglelookup = data.table(grp=c("a","b"),s=c(42,84),grpname=c("California","New York"),key="grp")
setkey(mysinglelookup,grp)
test(289, DT[,sum(v*mysinglelookup[.BY]$s),by=grp], data.table(grp=c("a","b"),V1=c(420,1512)))
# In v1.6.2 we will change so that single name j returns a vector, regardless of grouping
test(290, DT[,list(mysinglelookup[.BY]$grpname,sum(v)),by=grp], data.table(grp=c("a","b"),V1=c("California","New York"),V2=c(10L,18L)))

# Test user defined attributes are retained, see comment in FR#1006
DT = data.table(a=as.numeric(1:2),b=3:4)
setattr(DT,"myuserattr",42)
setkey(DT,a)   # a is numeric so a change of type to integer occurs, too, via := which checks selfref is ok
test(291, attr(DT,"myuserattr"), 42)

# Test new .N symbol
DT = data.table(a=INT(1,1,1,1,2,2,2),b=INT(3,3,3,4,4,4,4))
test(292, DT[,.N,by=list(a,b)], data.table(a=c(1L,1L,2L),b=c(3L,4L,4L),N=c(3L,1L,3L)))
test(293, DT[,list(a+b,.N),by=list(a,b)],  data.table(a=c(1L,1L,2L),b=c(3L,4L,4L),V1=4:6,N=c(3L,1L,3L)))

# Test that setkey and := syntax really are by reference, even within functions. You
# really do need to take a copy first to a new name; force(x) isn't enough.

DT = data.table(a=1:3,b=4:6)
f = function(x){ force(x)
                 setkey(x) }
f(DT)
test(294,key(DT),c("a","b"))  # The setkey didn't copy to a local variable. Need to copy first to local variable (with a new name) if required.

f = function(x){ force(x)
                 x[,a:=42L] }
f(DT)
test(295,DT,data.table(a=42L,b=4:6))  # := was by reference (fast) and dropped the key, too, because assigned to key column

DT = data.table(a=1:3,b=4:6)
f = function(x){ x = copy(x)
                 setkey(x) }
f(DT)
test(295.1,key(DT),NULL)
setkey(DT,a)
f = function(x){ x = copy(x)
                 x[,b:=10:12][J(2),b] }   # test copy retains key
test(295.2,f(DT),11L)
test(295.3,DT,data.table(a=1:3,b=4:6,key="a"))  # The := was on the local copy


# new feature added 1.6.3, that key can be vector.
test(296,data.table(a=1:3,b=4:6,key="a,b"),data.table(a=1:3,b=4:6,key=c("a","b")))

# test .SDcols (not speed, just operation)
DT = data.table(grp=1:3,A1=1:9,A2=10:18,A3=19:27,B1=101:109,B2=110:118,B3=119:127,key="grp")
test(297,DT[,list(A1=sum(A1),A2=sum(A2),A3=sum(A3)),by=grp], DT[,lapply(.SD,sum),by=grp,.SDcols=2:4])

DT = data.table(a=1:3,b=4:6)
test(298, {DT$b<-NULL;DT}, data.table(a=1:3))  # delete column
test(299, DT$c <- as.character(DT$c), error="zero length")  # to simulate RHS which could (due to user error) be non NULL but zero length. This copies DT too, so the next test checks that a subsequent := detects and fixes that.
test(299.1, DT[,c:=42L], data.table(a=1:3,c=42L), warning="Invalid .internal.selfref detected and fixed")
test(299.2, truelength(DT)>length(DT))   # the := over-allocated, by 100 by default, but user may have changed default so just check '>'
# FR #2551 - old 299.3 and 299.5 are changed to include length(RHS) > 1 to issue the warning
test(299.3, DT[2:3,c:=c(42, 42)], data.table(a=1:3,c=42L), warning="Coerced 'double' RHS to 'integer' to match the column's type.*length 3 (nrows of entire table)")
# FR #2551 - length(RHS) = 1 - no warning for type conversion
test(299.7, DT[2,c:=42], data.table(a=1:3,c=42L))
# also see tests 302 and 303.  (Ok, new test file for fast assign would be tidier).
test(299.4, DT[,c:=rep(FALSE,nrow(DT))], data.table(a=1:3,c=FALSE))  # replace c column with logical
test(299.5, DT[2:3,c:=c(42,0)], data.table(a=1:3,c=c(FALSE,TRUE,FALSE)), warning="Coerced 'double' RHS to 'logical' to match the column's type.*length 3 (nrows of entire table)")
# FR #2551 is now changed to fit in / fix bug #5442. Stricter warnings are in place now. Check tests 1294.1-34 below.
test(299.8, DT[2,c:=42], data.table(a=1:3,c=c(FALSE,TRUE,FALSE)), warning="Coerced 'double' RHS to 'logical' to match")
test(299.9, DT[2,c:=42L], data.table(a=1:3,c=c(FALSE,TRUE,FALSE)), warning="Coerced 'integer' RHS to 'logical' to match")
test(299.6, DT[2:3,c:=c(0L, 0L)], data.table(a=1:3,c=FALSE), warning="Coerced 'integer' RHS to 'logical' to match the column's type.*length 3 (nrows of entire table)")


# Test bug fix #1468, combining i and by.
DT = data.table(a=1:3,b=1:9,v=1:9,key="a,b")
test(300, DT[J(1),sum(v),by=b], data.table(b=c(1L,4L,7L),V1=c(1L,4L,7L),key="b"))
test(300.1, DT[J(1:2),sum(v),by=b], data.table(b=c(1L,4L,7L,2L,5L,8L),V1=c(1L,4L,7L,2L,5L,8L)))

# Test ad hoc by of more than 100,000 levels, see 2nd part of bug #1387 (100,000 from the limit of base::sort.list radix)
# This does need to be this large, like this in CRAN checks, because sort.list(method="radix") has this limit, which
# this tests. But it's well under 10 seconds.
DT = data.table(A=1:10,B=rnorm(10),C=factor(paste("a",1:100010,sep="")))
test(301, nrow(DT[,sum(B),by=C])==100010)
DT = data.table(A=1:10,B=rnorm(10),C=paste("a",1:100010,sep=""))
test(301.1, nrow(DT[,sum(B),by=C])==100010)

# Test fast assign
DT = data.table(a=c(1L,2L,2L,3L),b=4:7,key="a")
DT[2,b:=42L]  # needs to be on its own line to test DT symbol is changed by reference
test(302, DT, data.table(a=c(1L,2L,2L,3L),b=c(4L,42L,6L,7L),key="a"))
DT[J(2),b:=84L]
test(303, DT, data.table(a=c(1L,2L,2L,3L),b=c(4L,84L,84L,7L),key="a"))

# Test key is dropped when non-dt-aware packages reorder rows of data.table (for example)
if ("package:plyr" %in% search()) {
    DT = data.table(a=1:10,b=1:2,key="a")
    test(304, arrange(DT,b), data.table(a=INT(1,3,5,7,9,2,4,6,8,10),b=INT(1,1,1,1,1,2,2,2,2,2)))  # testing no key here, too
} else {
    cat("Test 304 not run. If required call library(plyr) first.\n")
}

# Test that changing colnames keep key in sync.
# TO DO: will have to do this for secondary keys, too, when implemented.
DT = data.table(x=1:10,y=1:10,key="x")
setnames(DT,c("a","b"))
test(305, key(DT), "a")
setnames(DT,"a","R")
test(306, key(DT), "R")

setnames(DT,"b","S")
test(307, key(DT), "R")
setnames(DT,c("a","b"))
test(308, key(DT), "a")
setnames(DT,1,"R")
test(309, key(DT), "R")

# Test :=NULL
DT = data.table(x=1:5,y=6:10,z=11:15,key="y")
test(310, DT[,x:=NULL], data.table(y=6:10,z=11:15,key="y"))  # delete first
test(311, DT[,y:=NULL], data.table(z=11:15))    # deleting key column also removes key
test(312, DT[,z:=NULL], data.table(NULL))      # deleting all
test(313, DT[,a:=1:3], error="") # cannot := a new column to NULL data.table, currently. Must use data.table()
DT = data.table(a=20:22)
test(314, {DT[,b:=23:25];DT[,c:=26:28]}, data.table(a=20:22,b=23:25,c=26:28))   # add in series
test(315, DT[,c:=NULL], data.table(a=20:22,b=23:25))   # delete last
test(316, DT[,c:=NULL], data.table(a=20:22,b=23:25), warning="Adding new column 'c' then assigning NULL")


# Test adding, removing and updating columns via [<- in one step
DT = data.table(a=1:6,b=1:6,c=1:6)
DT[,c("a","c","d","e")] <- list(NULL,11:16,42L,21:26)
test(317, DT, data.table(b=1:6,c=11:16,d=42L,e=21:26))

# Other assignments (covers DT[x==2, y:=5] too, #1502)
DT[e<24,"b"] <- 99L
test(318, DT, data.table(b=c(99L,99L,99L,4L,5L,6L),c=11:16,d=42L,e=21:26))
test(319, DT[b!=99L,b:=99L], data.table(b=99L,c=11:16,d=42L,e=21:26))

# previous within functionality restored, #1498
DT = data.table(a=1:10)
test(320, within(DT, {b <- 1:10; c <- a + b})[,list(a,b,c)], data.table(a=1:10,b=1:10,c=as.integer(seq(2,20,length=10))))
# not sure why within makes columns in order a,c,b, but it seems to be a data.frame thing, too.
test(321, transform(DT,b=42L,e=a), data.table(a=1:10,b=42L,e=1:10))
DT = data.table(a=1:5, b=1:5)
test(322, within(DT, rm(b)), data.table(a=1:5))

# check that cbind dispatches on first argument as expected
test(323, cbind(DT,DT), data.table(a=1:5,b=1:5,a=1:5,b=1:5))   # no check.names as from v1.8.0 (now we have :=, cbind is used far less anyway)
test(324, cbind(DT,data.frame(c=1:5)), data.table(a=1:5,b=1:5,c=1:5))
test(325, rbind(DT,DT), data.table(a=c(1:5,1:5),b=1:5))
test(326, rbind(DT,data.frame(a=6:10,b=6:10)), data.table(a=1:10,b=1:10))

# test removing multiple columns, and non-existing ones, #1510
DT = data.table(a=1:5, b=6:10, c=11:15)
test(327, within(DT,rm(a,b)), data.table(c=11:15))
test(328, within(DT,rm(b,c)), data.table(a=1:5))
test(329, within(DT,rm(b,a)), data.table(c=11:15))
test(330, within(DT,rm(b,c,d)), data.table(a=1:5), warning="object 'd' not found")
DT[,c("b","a")]=NULL
test(332, DT, data.table(c=11:15))
test(333, within(DT,rm(c)), data.table(NULL))
DT = data.table(a=1:5, b=6:10, c=11:15)
DT[,2:1]=NULL
test(334, DT, data.table(c=11:15))
test(335, DT[,2:1]<-NULL, error="Attempt to assign to column")

DT = data.table(a=1:2, b=1:6)
test(336, DT[,z:=a/b], data.table(a=1:2,b=1:6,z=(1:2)/(1:6)))
test(337, DT[3:4,z:=a*b], data.table(a=1:2,b=1:6,z=c(1,1,3,8,1/5,2/6)), warning="Coerced 'integer' RHS to 'double' to match the colum")


# test eval of LHS of := (using with=FALSE gives a warning here from v1.9.3)
DT = data.table(a=1:3, b=4:6)
test(338, DT[,2:=42L], data.table(a=1:3,b=42L))
test(339, DT[,2:1:=list(10:12,3L)], data.table(a=3L,b=10:12))
test(340, DT[,"a":=7:9], data.table(a=7:9,b=10:12))
test(341, DT[,c("a","b"):=1:3], data.table(a=1:3,b=1:3))
mycols = "a"
test(342, DT[,(mycols):=NULL], data.table(b=1:3))
mynewcol = "newname"
test(343, DT[,(mynewcol):=21L], data.table(b=1:3,newname=21L))
mycols = 1:2
test(344, DT[,(mycols):=NULL], data.table(NULL))


# It seems that the .Internal rbind of two data.frame coerces IDate to numeric. Tried defining
# "[<-.IDate" as per Tom's suggestion, and c.IDate to no avail (maybe because the .Internal code
# in bind.c doesn't look up package methods?). Anyway, as from 1.8.1, double are allowed in keys, so
# these still work but for a different reason than before 1.8.1: the results are IDate stored as double,
# rather than before when is worked because by and setkey coerced double to integer.
DF = data.frame(x=as.IDate(c("2010-01-01","2010-01-02")), y=1:6)
DT = as.data.table(rbind(DF,DF))
test(345, DT[,sum(y),by=x], {.x=as.IDate(c("2010-01-01","2010-01-02"));mode(.x)="double";data.table(x=.x,V1=c(18L,24L))})
test(346, setkey(DT,x)[J(as.IDate("2010-01-02"))], {.x=as.IDate(rep("2010-01-02",6L));mode(.x)="double";data.table(x=.x,y=rep(c(2L,4L,6L),2),key="x")})

# Test that invalid keys are reset, without user needing to remove key using key(DT)=NULL first
DT = data.table(a=letters[1:3],b=letters[6:4],key="a")
attr(DT,"sorted")="b"  # user can go under the hood
test(347, setkey(DT,b), data.table(a=letters[3:1],b=letters[4:6],key="b"),
          warning="Already keyed by this key but had invalid row order, key rebuilt")

# Test .N==0 with nomatch=NA|0, # tests for #963 added as well
DT = data.table(a=1:2,b=1:6,key="a")
test(349, DT[J(2:3),.N,nomatch=NA,by=.EACHI]$N, c(3L,0L))
test(350, DT[J(2:3),.N,nomatch=0], c(3L))
# Test first .N==0 with nomatch=NA|0
test(350.1, DT[J(2:3),.N], c(4L))
test(350.2, DT[J(4),.N], 1L)
test(350.3, DT[J(4),.N,nomatch=0L], 0L)
test(350.4, DT[J(4:5),.N,nomatch=0L], 0L)
test(350.5, DT[J(0:4),.N,by=.EACHI]$N, c(0L,3L,3L,0L,0L))
test(350.6, DT[c(0,0,0), .N], 0L)

# Test recycling list() on RHS of :=
DT = data.table(a=1:3,b=4:6,c=7:9,d=10:12)
test(351, DT[,c("a","b"):=list(13:15)], data.table(a=13:15,b=13:15,c=7:9,d=10:12))
test(352, DT[,letters[1:4]:=list(1L,NULL)], data.table(a=c(1L,1L,1L),c=c(1L,1L,1L)))

# Test assigning new levels into factor columns
DT = data.table(f=factor(c("a","b")),x=1:4)
test(353, DT[2,f:="c"], data.table(f=factor(c("a","c","a","b")),x=1:4))
test(354, DT[3,f:=factor("foo")], data.table(f=factor(c("a","c","foo","b")),x=1:4))

# Test growVector logic when adding levels  (don't need to grow levels for character cols)
newlevels = as.character(as.hexmode(1:2000))
DT = data.table(f=factor("000"),x=1:2010)
test(355, DT[11:2010,f:=newlevels], data.table(f=factor(c(rep("000",10),newlevels)),x=1:2010))

DT = data.table(f=c("a","b"),x=1:4)
# Test coercing factor to character column
test(355.5, DT[3,f:=factor("foo")], data.table(f=c("a","b","foo","b"),x=1:4))
test(355.6, DT[4,f:=factor("bar"),verbose=TRUE], data.table(f=c("a","b","foo","bar"),x=1:4), output="Coerced factor to character to match the column")


# See datatable-help post and NEWS item for 1.6.7
DT = data.table(X=factor(letters[1:10]), Y=1:10)
DT$X = "Something Different"
test(356, DT, data.table(X=factor("Something Different",levels=c(letters[1:10],"Something Different")), Y=1:10))

DT = data.table(X=letters[1:10], Y=1:10)
DT$X = "Something Different"
test(356.5, DT, data.table(X="Something Different", Y=1:10))

# Bug fix 1570
DT = data.table(x=1:5,y=1:5)
test(357, DT[x==0, y:=5L], data.table(x=1:5,y=1:5))
test(358, DT[FALSE, y:=5L], data.table(x=1:5,y=1:5))

# Bug fix 1599
DT = data.table(a=1:2,b=1:6)
test(359, DT[,sum(b),by=NULL], data.table(V1=21L))
test(360, DT[,sum(b),by=character(0)], data.table(V1=21L))

# Bug fix 1576 : NULL j results in 'inconsistent types' error
DT = data.table(a=1:3,b=1:9)
ans = data.table(a=c(1L,3L),V1=c(12L,18L))
test(361, DT[,if (a==2) NULL else sum(b),by=a], ans)
test(362, DT[,if (a==2) data.table(NULL) else sum(b),by=a], ans)
test(363, DT[,if (a==2) as.list(NULL) else sum(b),by=a], ans)
test(364, DT[,if (a==2) integer(0) else sum(b),by=a], ans)

# Test that data.table() can create list() columns directly
# NB: test 235 above ('by' when DT contains list columns) created the list column in two steps, no longer necessary
DT = data.table(a=1:2,b=list("h",7:8))
test(365, DT[1,b], list("h"))   # should it be a special case for 1-item results to unlist? Don't think so: in keeping with no drop=TRUE principle
test(366, DT[2,b], list(7:8))
DT = data.table(a=1:4,b=list("h",7:8),c=list(matrix(1:12,3),data.table(a=letters[1:3],b=list(1:2,3.4,"k"),key="a")))
test(367, DT[3,b], list("h"))
test(368, DT[4,b], list(7:8))
test(369, DT[3,c[[1]][2,3]], 8L)
test(370, DT[4,c[[1]]["b",b]][[1]], 3.4)

# Test returning a list() column via grouping
DT = data.table(x=INT(1,1,2,2,2),y=1:5)
test(371, DT[,list(list(unique(y))),by=x], data.table(x=1:2,V1=list(1:2,3:5)))

# Test matrix i is an error
test(372, DT[matrix(1:2,ncol=2)], error="i is invalid type (matrix)")

# Tests from bug fix #1593
DT = data.table(x=letters[1:2], y=1:4)
DT[x == "a", ]$y <- 0L
test(373, DT, data.table(x=letters[1:2], y=c(0L,2L,0L,4L)))
DT = data.table(x=letters[1:2], y=1:4, key="x")
DT["a", ]$y <- 0L
test(374, DT, data.table(x=letters[1:2], y=c(0L,2L,0L,4L), key="x"))
DT = data.table(x=letters[1:2], y=1:4)
DT[c(1,3), ]$y <- 0L
test(375, DT, data.table(x=letters[1:2], y=c(0L,2L,0L,4L)))

# Test unique on unsorted tables (and tolerance on numeric columns, too)
DT = data.table(a=c(2,1,2),b=c(1,2,1))
test(376, unique(DT), data.table(a=c(2,1),b=c(1,2)))
# From the SO thread :
M = matrix(sample(2, 120, replace = TRUE), ncol = 3)
DF = as.data.frame(M)
DT = as.data.table(M)
test(377, as.data.table(unique(DF)), unique(DT))

# Test compatibility with sqldf. sqldf() does a do.call("rbind" with empty input,
# so this tests ..1 when NULL (which was insufficiently list(...)[[1]] in 1.6.6).
# We now test this directly rather than using sqldf, because we couldn't get 'R CMD check'
# past "(converted from warning) closing unused connection 3 (/tmp/RtmpYllyW2/file55822c52)"
test(378, cbind(), NULL)
test(379, rbind(), NULL)

DT = data.table(a=rep(1:3,1:3),b=1:6)
test(380, DT[,{.SD$b[1]=10L;.SD}, by=a], error="locked binding")  # .SD locked for 1st group
test(381, DT[,{if (a==2) {.SD$b[1]=10L;.SD} else .SD}, by=a], error="locked binding") # .SD locked in 2nd group onwards too

# test that direct := is trapped, but := within a copy of .SD is allowed (FAQ 4.5). See also tests 556-557.
test(382, DT[,b:=.N*2L,by=a], data.table(a=rep(1:3,1:3),b=rep(2L*(1:3),1:3)))
test(383, DT[,{z=10L;b:=z},by=a], error=":= and `:=`(...) are defined for use in j, once only and in particular ways")
test(384, DT[,{mySD=copy(.SD);mySD[1,b:=99L];mySD},by=a], data.table(a=rep(1:3,1:3),b=c(99L,99L,4L,99L,6L,6L)))

# somehow missed testing := on logical subset with mixed TRUE/FALSE, reported by Muhammad Waliji
DT = data.table(x=1:2, y=1:6)
test(385, DT[x==1, y := x], data.table(x=1:2,y=c(1L,2L,1L,4L,1L,6L)))
test(386, DT[c(FALSE,TRUE),y:=99L], data.table(x=1:2,y=c(1L,99L,1L,99L,1L,99L)))

# test that column names have the appearance of being local in j (can assign to them ok), bug #1624
DT = data.table(name=c(rep('a', 3), rep('b', 2), rep('c', 5)), flag=FALSE)
test(387, DT[,{flag[1]<-TRUE;list(flag=flag)}, by=name], DT[c(1,4,6),flag:=TRUE])
DT = data.table(score=1:10, name=c(rep('a', 4), rep('b',2), rep('c', 3), 'd'))
test(388, DT[,{ans = score[1]
               score[1] <- -score[1]
               ans
               },by=name],
           data.table(name=letters[1:4],V1=c(1L,5L,7L,10L)))

# Tests 389-394 (character grouping and sorting) now at the start of this file, so that any
# errors elsewhere show up in the last 13 lines displayed by CRAN checks.

# Test unique.data.table for numeric columns within tolerance, for consistency with
# with unique.data.frame which does this using paste.
DT = data.table(a=tan(pi*(1/4 + 1:10)),b=42L)
# tan(...) from example in ?all.equal.
test(395, all.equal(DT$a, rep(1,10)))
test(396, length(unique(DT$a))>1)  # 10 unique values on all CRAN machines (as of Nov 2011) other than mac (5 unique)
test(397, unique(DT), DT[1])  # before v1.7.2 unique would return all 10 rows. For stability within tolerance, data.table has it's own modified numeric sort.
test(398, duplicated(DT), c(FALSE,rep(TRUE,9)))

DT = data.table(a=c(3.142, 4.2, 4.2, 3.142, 1.223, 1.223), b=rep(1,6))
test(399, unique(DT), DT[c(1,2,5)])
test(400, duplicated(DT), c(FALSE,FALSE,TRUE,TRUE,FALSE,TRUE))

DT[c(2,4,5),a:=NA]
test(401, unique(DT), DT[c(1,2,3,6)])
test(402, duplicated(DT), c(FALSE,FALSE,FALSE,TRUE,TRUE,FALSE))

# Test NULL columns next to non-NULL, #1633
DT = data.table(a=1:3,b=4:6)
test(403, DT[,list(3,if(a==2)NULL else b),by=a], data.table(a=1:3,V1=3,V2=c(4L,NA_integer_,6L)))
test(404, DT[,list(3,if(a==1)NULL else b),by=a], error="Please use a typed empty vector instead.*such as integer.*or numeric")
test(405, DT[,list(3,if(a==1)numeric() else b),by=a], error="Column 2 of result for group.*integer.*double.*types must be consistent for each group")
test(406, DT[,list(3,if(a==1)integer() else b),by=a], data.table(a=1:3,V1=3,V2=c(NA_integer_,5:6)))

# Test that first column can be list, #1640
test(407, data.table(list(1:2,3:5)), as.data.table(list(list(1:2,3:5))))

# With over-allocation, null data.table has truelength 100. Replaced the calls to structure() in the
# code to new null.data.table(), so test internal function. User may have changed default, so this
# doesn't test "100" explicitly.
test(408, null.data.table(), data.table(NULL))
test(408.5, data.table(), data.table(NULL))

# Test that adding a column using := is fully by reference rather than a shallow copy, #1646
DT = data.table(1:2,3:4)  # list vector truelength 100
DT2 = DT
DT2[,y:=10L]
test(409, DT, DT2)
test(410, DT, data.table(1:2,3:4,y=10L))
DT2[1,V1:=99L]
test(411, DT, DT2)
test(412, DT, data.table(c(99L,2L),3:4,y=10L))

# Test that cbind dispatched to data.table() and retains keys
DT = data.table(x=c("a","b"),y=1:4,key="x")
test(413.1, key(cbind(DT,DT)), NULL)  # key dropped because name "x" ambiguous
DT1 = data.table(z = c(1,2), w = 1:4, key = "z")
test(413.2, key(cbind(DT,DT1)), c("x", "z"))
test(413.3, key(cbind(colA=10:13, DT)), "x")   # data.table() dispatched even though 1st argument isn't data.table
test(413.4, key(cbind(colA=10:17, DT)), NULL)  # DT recycled so key is dropped
test(413.5, key(cbind(colA=1, DT)), "x")       # DT not recycled so key retained
test(414.1, key(cbind(DT,as.data.frame(DT1))), "x")

test(414.2, cbind(as.data.frame(DT),DT1), data.frame(DT,DT1))
# cbind(DF,...) should return a data.frame for consistency with base. Package treemap (at least) depends
# on this in the return() in treepalette().
# Use data.table(DF,DT) if a data.table result is required.
 

# Test friendly error when := is used in wrong place
test(415, x:=1, error="defined for use in j, once only and in particular ways")

# Somehow never tested that X[Y] is error if X is unkeyed.
DT = data.table(a=1:3,b=4:6)
test(416, DT[J(2)], error="the columns to join by must be specified either using")

# Test shallow copy warning from := adding a column, and (TO DO) only when X is NAMED.
DT = data.table(a=1:3,b=4:6)
test(417, alloc.col(DT,3), DT, warning="Attempt to reduce allocation from.*to 3 ignored. Can only increase allocation via shallow copy")
old = getOption("datatable.alloccol")   # search for "r-devel" note in this file why not in one step here
options(datatable.alloccol=3L)
DT = data.table(a=1:3,b=4:6)
options(datatable.alloccol=old)
DT2 = DT
test(418, length(DT)==2 && truelength(DT)==3)
DT[,c:=7L]   # uses final slot
test(419, DT, DT2)
test(420, length(DT)==3 && truelength(DT)==3 && length(DT2)==3 && truelength(DT2)==3)
test(421, DT[,d:=8L,verbose=TRUE], output="Growing vector of column pointers from")
test(422, length(DT)==4)
test(423, truelength(DT)>=4)  # with default alloccol, new tl will be 103. But user might have set that higher and then be running test.data.table(), or user might have set alloccol to just ncol(DT)+1. Hence just >=4.

# Test crash bug fixed, #1656, introduced with the 1.7.0 feature
DT <- data.table(a = factor(c("A", "Z")), b = 1:4)
DT[1,1] <- "Z"
test(424, DT, data.table(a=factor(c("Z","Z","A","Z")),b=1:4))
test(425, DT[1,1] <- 1, 1, warning="Coerced 'double' RHS to 'integer'")
test(426, DT, data.table(a=factor(c("A","Z")),b=1:4))
DT[1,1] <- 2L
test(427, DT, data.table(a=factor(c("Z","Z","A","Z")),b=1:4))
DT[1,a:="A"]
test(428, DT, data.table(a=factor(c("A","Z","A","Z")),b=1:4))
DT[1,a:=2L]
test(429, DT, data.table(a=factor(c("Z","Z","A","Z")),b=1:4))
test(430, DT[1,1]<- 3L, NA_integer_, warning="RHS contains 3 which is outside the levels range.*1,2.*of column 1, NAs generated")
test(431, DT[1,1:=4L], data.table(a=factor(c(NA,"Z","A","Z")),b=1:4), warning="RHS contains 4 which is outside the levels range.*1,2.*of column 1, NAs generated")

# simple realloc test
if (is.null(getOption("datatable.alloccol"))) {
    DT = data.table(a=1:3,b=4:6)
    test(432, truelength(DT), 100L)
    alloc.col(DT,200)
    test(433, truelength(DT), 200L)
    DT = alloc.col(DT,300)  # superfluous in this example, but shouldn't fail
    test(434, truelength(DT), 300L)
    DT2 = alloc.col(DT,400)
    test(435, truelength(DT), 400L)
    test(436, truelength(DT2), 400L)
}

# test that alloc.col assigns to wherever object is
DT = data.table(a=1:3,b=4:6)
f = function() {
    alloc.col(DT,200)  # DT isn't local so (via inherits=TRUE) it finds in frame above
    invisible()
}
f()
test(437, truelength(DT), 200L)

# quick test that [<- over allocates (again) after the copy of length via *tmp*
DT = data.table(a=1:3,b=4:6)
tl = truelength(DT)
DT$foo = 7L
test(438, truelength(DT), tl)
DT[,"bar"] = 8L
test(439, truelength(DT), tl)
test(440, DT, data.table(a=1:3,b=4:6,foo=7L,bar=8L))

# Test rbind works by colname now, for consistency with base, FR#1634
DT = data.table(a=1:3,b=4:6)
test(441, rbind(DT,list(a=4L,b=7L)), data.table(a=1:4,b=4:7))
test(442, rbind(DT,data.frame(a=4L,b=7L)), data.table(a=1:4,b=4:7))
test(443, rbind(DT,data.table(a=4L,b=7L)), data.table(a=1:4,b=4:7))
test(444, rbind(DT,list(b=7L,a=4L)), data.table(a=1:4,b=4:7)) # rbind should by default check row names. Don't warn here. Add clear documentation instead.
test(445, rbind(DT,data.frame(b=7L,a=4L)), data.table(a=1:4,b=4:7))
test(446, rbind(DT,data.table(b=7L,a=4L)), data.table(a=1:4,b=4:7))
test(450, rbind(DT,list(c=4L,a=7L)), error="This could be because the items in the list may not ")
test(451, rbind(DT,data.frame(c=4L,a=7L)), error="This could be because the items in the list may not ")
test(452, rbind(DT,data.table(c=4L,a=7L)), error="This could be because the items in the list may not ")
test(453, rbind(DT,list(4L,7L)), data.table(a=1:4,b=4:7))

# Test new use.names argument in 1.8.0
test(453.1, rbind(DT,list(FOO=4L,BAR=7L),use.names=FALSE), data.table(a=1:4,b=4:7))
test(453.2, rbind(DT,data.table(b=4:5,a=7:8), use.names=FALSE), data.table(a=1:5,b=4:8))

# Test the linked reported bug, #1645
A1 = data.table(b='hello', a='foo', key='a')
A2 = data.table(a=c('foo', 'bar'), key='a')
test(454, merge(A1, A2, all.y=TRUE, by='a'), data.table(a=c("bar","foo"),b=c(NA,"hello"),key="a"))
A1 = data.table(a='foo', b='hello', key='a')
test(455, merge(A1, A2, all.y=TRUE, by='a'), data.table(a=c("bar","foo"),b=c(NA,"hello"),key="a"))

# Test mixing nomatch=0 and mult="last", bug #1661
DT = data.table(id=c(1L, 2L, 2L, 3L), val=1:4, key="id")
test(456, DT[J(c(1,2,4)), mult="last", nomatch=0], data.table(id=1:2,val=c(1L,3L),key="id"))

# Test join inherited scope respexts nomatch=0, #1663
DT2 = data.table(id=c(1L,2L,4L), val2=c(11,12,14),key="id")
test(457, DT[DT2, list(val, val2), nomatch=0, by=.EACHI], data.table(id=c(1L,2L,2L),val=1:3,val2=c(11,12,12),key="id"))

# Test bysameorder edge cases, #1631
DT = data.table(a=1:3,v=4:9,key="a")
test(458, DT[,sum(v),by=list(a%%2L)], data.table(a=c(1L,0L),V1=c(26L,13L)))
test(459, DT[, list(sum(v)), list(ifelse(a == 2, NA, 1L))], data.table(ifelse=c(1L,NA_integer_),V1=c(26L,13L)))
test(460, DT[, list(sum(v)), list(ifelse(a == 2, 1, NA))], data.table(ifelse=c(NA_real_,1),V1=c(26L,13L)))
test(461, DT[,sum(v),by=a], data.table(a=1:3,V1=c(11L,13L,15L),key="a"))

# Test loading from file (which resets tl to 0 in R 2.14.0+, and unitialized random number in 2.13.2-)
f = tempfile()
save(list="DT",file=f)
load(f)
test(462, DT[,foo:=10L], data.table(a=1:3,v=4:9,foo=10L,key="a"))
unlink(f)

# Test CJ problems with v1.7.4, #1689
test(463, all(sapply(CJ(1:2,1:3),length)==6L))
DT = data.table(x=1:4,y=1:2,cnt=1L,key="x,y")
test(464, DT[CJ(1:4,1:4)]$cnt, INT(1,rep(NA,4),1,NA,NA,1,rep(NA,4),1,NA,NA))
test(465, DT[CJ(1:4,1:4), sum(cnt>0), by=.EACHI]$y, rep(1:4,4))
f1 = factor(c("READING","MATHEMATICS"))
f2 = factor(c("2010_2011","2009_2010","2008_2009"), levels=paste(2006:2010,2007:2011,sep="_"))
test(466, all(sapply(CJ(f1, f2),length)==6L))

# Test list(.SD,newcol=..) gives error with guidance
DT = data.table(a=1:2,v=3:6)
test(467, DT[,list(newcol=7L,.SD),by=a], error="Error.*use := by group instead")

# Test empty list column
DT = data.table(a=1:3,b=4:6)
test(468, DT[,foo:=list()], data.table(a=1:3,b=4:6,foo=list()))
# Test plonk list
test(469, DT[,bar:=list(1,"a",3.14)], data.table(a=1:3,b=4:6,foo=list(),bar=list(1,"a",3.14)))
# Test plonk list variable (to catch deparse treating j=list() specially)
x = list(2,"b",2.718)
test(470, DT[,baz:=x], data.table(a=1:3,b=4:6,foo=list(),bar=list(1,"a",3.14),baz=list(2,"b",2.718)))
# Test recycling list
DT = data.table(a=1:4,b=5:8)
test(471, DT[,foo:=list("a",2:3)], data.table(a=1:4,b=5:8,foo=list("a",2:3,"a",2:3)))
# Test recycling singleton list
DT[,foo:=NULL]
test(472, DT[,foo:=list(list(2:3))], data.table(a=1:4,b=5:8,foo=list(2:3,2:3,2:3,2:3)))

# Test adding new column with a recycled factor, #1691
DT = data.table(a=1:4,b=5:8)
DT[,c:=factor("a")]
test(473, DT, data.table(a=1:4,b=5:8,c=factor(c("a","a","a","a"))))
DT[,d:=factor(c("a","b"))]
test(474, DT, data.table(a=1:4,b=5:8,c=factor(c("a","a","a","a")),d=factor(c("a","b","a","b"))))

# Test scoping error introduced at 1.6.1, unique(DT) when key column is 'x'
DT=data.table(x=c("a", "a", "b", "b"), y=c("a", "a", "b", "b"), key="x")
test(475, unique(DT), data.table(x=c("a","b"),y=c("a","b"),key="x"))

# Test character and list columns in tables with many small groups
N = if (.devtesting) 1000L else 100L
DT = data.table(grp=1:(2*N),char=sample(as.hexmode(1:N),4*N,replace=TRUE),int=sample(1:N,4*N,replace=TRUE))
ans = DT[,list(p=paste(unique(char),collapse=","),
               i=list(unique(int))), by=grp]
test(476, nrow(as.matrix(ans)), 2L*N)   # The as.matrix triggers the "'getCharCE' must be called on a CHARSXP", or similar symptom of earlier corruption, before fix in dogroups.c.

# Test that plonking from calling scope works, even after removing, and column copy via := is ok too.
DT = data.table(a=1:3)
foo = 4:6
DT[,foo:=foo]
rm(foo)
gc()
DT[,foo2:=foo]
DT[2,foo:=10L]
DT[3,foo2:=11L]
gc()
test(477, DT, data.table(a=1:3,foo=c(4L,10L,6L),foo2=c(4L,5L,11L)))
test(478, DT[,foo:=foo], DT)  # does nothing, with no warning, consistent with base R `a<-a`.

# Test that recycling now works with oversized inputs and % != 0 length, both with warnings.
DT = data.table(x=1:4)
test(479, DT[, a:=5:7], data.table(x=1:4,a=c(5:7,5L)), warning="Supplied 3 items to be assigned to 4 items of column 'a' (recycled leaving remainder of 1 items)")

# Test that multiple columns can be added
DT = data.table(x=1:4)
test(481, DT[, c("foo","bar"):=list(10L,11:14)], data.table(x=1:4,foo=10L,bar=11:14))

# and combined with update and add in one step
test(482, DT[, c("foo","baz"):=list(12L,15:18)], data.table(x=1:4,foo=12L,bar=11:14,baz=15:18))

# Test that errors in := do not leave DT in bad state, #1711
DT = data.table(x=1:4)
test(483, DT[,c("foo","bar"):=list(20L,numeric())], error="RHS of assignment to new column.*bar.*is zero length but not empty list")
test(484, DT, data.table(x=1:4))  # i.e. DT as it was before, without foo being added as it did in v1.7.7-

# Test i's key longer than x's
d1 <- data.table(a=1:2, b=11:14, key="a,b")
d2 <- data.table(A=0:1, B=1:4, key="A")
test(485, d2[d1, allow.cartesian=TRUE], data.table(A=INT(1,1,1,1,2,2),B=INT(2,4,2,4,NA,NA),b=INT(11,11,13,13,12,14),key="A"))
test(486, d2[d1,sum(B),by=.EACHI], data.table(A=INT(1,1,2,2),V1=INT(6,6,NA,NA),key="A"))  # no allow.cartesian needed due to by-without-by

if ("package:reshape" %in% search()) {
    DT <- data.table(ID=rep(1:3, each=3), TIME=rep(1:3, 3), X=1:9)
    test(487, data.table(reshape(DT, idvar="ID", timevar="TIME", direction="wide")),
              data.table(ID=1:3,X.1=INT(1,4,7),X.2=INT(2,5,8),X.3=INT(3,6,9)))
    # The data.table() around reshape is to drop reshape's attributes.
    DT <- data.table(ID=rep(1:3, each=3), TIME=rep(1:3, 3), X=1:9, Y=10:18)
    test(488, data.table(reshape(DT, idvar="ID", timevar="TIME", direction="wide")),
              data.table(ID=1:3,X.1=INT(1,4,7),Y.1=INT(10,13,16),X.2=INT(2,5,8),Y.2=INT(11,14,17),X.3=INT(3,6,9),Y.3=INT(12,15,18)))
} else {
    cat("Tests 487 and 488 not run. If required call library(reshape) first.\n")
}

# Test warnings for names<- and colnames<-,  but only warnings when caller is data.table aware.
DT = data.table(a=1:3,b=4:6)
test(489, names(DT)[1]<-"A", "A", warning=if (base::getRversion()>="3.1.0") NULL else "Please upgrade")
test(490, names(DT), c("A","b"))
test(491, colnames(DT)[2]<-"B", "B", warning=if (base::getRversion()>="3.1.0") NULL else "Please upgrade")
test(492, names(DT), c("A","B"))

# Check setnames out of bounds errors
test(493, setnames(DT,"foo","bar"), error="not found.*foo")
test(494, setnames(DT,3,"bar"), error="outside range.*3")

# Test new function setcolorder()
DT = data.table(a=1:2,b=3:4,c=5:6)
test(495, setcolorder(DT,c(2,1,3)), data.table(b=3:4,a=1:2,c=5:6))
test(496, setcolorder(DT,c(2,1,3)), data.table(a=1:2,b=3:4,c=5:6))
test(497, setcolorder(DT,c("c","a","b")), data.table(c=5:6,a=1:2,b=3:4))
test(498, setcolorder(DT,"a"), error="neworder is length")
test(498.1, setcolorder(DT,c("d","a","b")), error="Names in neworder not found in x: d")


# test first group listens to nomatch when j uses join inherited scope.
x <- data.table(x=c(1,3,8),x1=10:12, key="x")
y <- data.table(x=c(3,8,10),y1=10:12, key="x")
test(499, y[x,x1,nomatch=0,by=.EACHI], data.table(x=c(3,8),x1=11:12, key="x"))
test(500, y[x,x1,nomatch=NA,by=.EACHI], data.table(x=c(1,3,8),x1=10:12, key="x"))

# Test merge bug of unkeyed tables introduced in 1.6.8 and 1.6.9 reported by Eric, and ...
dt1 <- data.table(l = factor(c("a","b","a","b")))
dt2 <- data.table(l = factor(c("a","b")), L = factor(c("A","B")))
test(501, setkey(merge(dt1,dt2,by="l"),NULL), as.data.table(merge(as.data.frame(dt1), as.data.frame(dt2), by="l")))

dt1 <- data.table(l = c("a","b","a","b"))
dt2 <- data.table(l = c("a","b"), L = c("A","B"))
test(501.5, setkey(merge(dt1,dt2,by="l"),NULL), as.data.table(merge(as.data.frame(dt1), as.data.frame(dt2), by="l")))

# ... similar example from DM
dtA = data.table(i = 1:8, j = rep(1:2, 4), k = rep(1:4, 2), A = 10:17)
dtB = data.table(j = rep(1:2, 2), k = 1:4, B = 18:21)
test(502, merge(dtA, dtB, by = c("j","k"), all.x = TRUE),
          data.table(j=rep(1:2,each=4), k=rep(INT(1,3,2,4),each=2), i=INT(1,5,3,7,2,6,4,8),
                     A=INT(10,14,12,16,11,15,13,17), B=rep(INT(18,20,19,21),each=2), key="j,k"))
test(503, dtA$i, 1:8)  # check that merge didn't change the order of dtA by reference
test(504, dtB$k, 1:4)  # or dtB

# Test new i. JIS prefix in 1.7.10
DT = data.table(a=1:2,b=1:4,key="a")
test(505, DT[J(a=1,b=6),sum(i.b*b),by=.EACHI]$V1, 24)  # 24 now 'double' because i.b is 'double'

# Test := after a key<-
DT = data.table(a=3:1,b=4:6)
test(506, key(DT)<-"a", "a", warning="can copy the whole table")
test(508, DT, data.table(a=1:3,b=6:4,key="a"))
test(509, DT[,b:=10L], data.table(a=1:3,b=10L,key="a"))
test(510, DT[,c:=11L], data.table(a=1:3,b=10L,c=11L,key="a"))  # Used to be warning about invalid .internal.selfref detected and fixed. As from v1.8.3 data.table() returns a NAMED==0 object, and key<- appears not to copy that. But within functions, key<- would still copy. TO DO: add tests....
#test(511,)

# Test new functons chmatch and %chin%
y=letters
x=c(sample(letters,12),"foo","bar")
test(512, chmatch(x,y), match(x,y))
test(513, chmatch(x,y,nomatch=0), match(x,y,nomatch=0))
test(514, x %chin% y, x %in% y)

# Test new function set() in v1.8.0
DT = data.table(a=1:3,b=4:6)
test(515, set(DT,2,1,3), data.table(a=c(1L,3L,3L),b=4:6), warning="Coerced i")
test(516, set(DT,"2",1,3), error="i is type 'character'")
test(517, set(DT,2L,1,3), DT, warning="Coerced j")
# FR #2551 implemented - removed warning from 518
# test(518, set(DT,2L,1L,3), DT, warning="Coerced 'double' RHS to 'integer'")
test(518, set(DT,2L,1L,3), DT)
test(519, set(DT,2L,1L,3L), data.table(a=INT(1,3,3),b=4:6))
test(520, set(DT,2L,"a",2L), data.table(a=1:3,b=4:6))
test(521, set(DT,2:3,"b",7:8), data.table(a=1:3,b=INT(4,7,8)))
test(522, set(DT,2L,"foo",7L), data.table(a=1:3,b=INT(4,7,8), foo=INT(NA,7,NA))) # error="foo.*is not a column name[.] Cannot add columns with set.*use := instead")
test(523, set(DT,2L,c("a","a"),list(9L,10L)), error="Can't assign to the same column twice in the same query (duplicates detected).")
test(523.1, set(DT,2L,"a",10L), data.table(a=INT(1,10,3),b=INT(4,7,8), foo=INT(NA,7,NA)))
setkey(DT,b)
test(524, set(DT,2L,"a",2L), data.table(a=1:3, b=INT(4,7,8), foo=INT(NA,7,NA), key="b"))
test(525, set(DT,1L,"b",6L), data.table(a=1:3, b=6:8, foo=INT(NA,7,NA)))
test(525.1, set(DT,j="b",value=9:11), data.table(a=1:3, b=9:11, foo=INT(NA,7,NA))) # plonk syntax via missing i (fixed in 1.8.1)
test(525.2, set(DT,NULL,"b",12:14), data.table(a=1:3, b=12:14, foo=INT(NA,7,NA)))  # plonk syntax via NULL i

# NEW ADDITIONAL TESTS FOR set() - bug #2077 - for using set to add columns by reference
DT1 <- data.table(x = 1, y = 1:10, fac = sample(LETTERS[1:3], 10, replace = TRUE)) # from SO
DT2 <- copy(DT1)
mul=c(5.3,2.8)
for (j in seq_along(mul)) set(DT1, i=NULL, j=paste("dot", j, sep=""), mul[j]*DT1[[j]])
DT2[, `:=`(dot1=5.3*x, dot2=2.8*y)]
test(1096.1, DT1, DT2)
set(DT1, i=NULL, j="dot2", value=NULL) # remove "dot2"
test(1096.2, DT1, DT2[, list(x,y,fac, dot1)])
DT2[, dot2 := NULL][5:9, `:=`(bla1 = 0L, x = 3L, bla2 = 2L)]
set(DT1, i=5:9, j=c("bla1", "x", "bla2"), value=list(0L, 3L, 2L))
test(1096.3, DT1, DT2) # more testing with many columns including existing columns
test(1096.4, set(DT1, i=NULL, j=7L, value=5L), error="Item 1 of column numbers in j is 7 which is outside range.*1.*6.*Use column names instead in j to add new columns.")

# Test that data.frame incompability is fixed, came to light in Feb 2012
DT = data.table(name=c('a','b','c'), value=1:3)
test(526, base::droplevels(DT[ name != 'a' ]), data.table(name=c('b','c'),value=2:3))   # base:: because we'll implement a fast droplevels, too.

if ("package:nlme" %in% search()) {
    # commented out to be consistent with base R, as #1078 and #1128 are more common cases..
    # until we can find a workaround for this, I'm commenting this one..
    # Search for "Fix for #1078" for the tests..
    # test(527, {x=Orthodont;tt=lme(distance ~ age, data=x); tt[["data"]]=NULL; tt},
    #           {x=as.data.table(Orthodont);tt=lme(distance ~ age, data=x);tt[["data"]]=NULL;tt})
    test(528, {x=iris;tt=groupedData( Sepal.Length ~ Sepal.Width | Species, data=x);attr(tt,"class")=NULL;attr(tt,"FUN")=NULL;tt},
              {x=as.data.table(iris);tt=groupedData( Sepal.Length ~ Sepal.Width | Species, data=x);attr(tt,"class")=NULL;attr(tt,"FUN")=NULL;attr(tt,".internal.selfref")=NULL;tt})
}

# Speed test of chmatch vs match.
# sortedmatch was 40 times slower and the wrong approach, removed in v1.8.0.
# Example from Tom in Jan 2011 who first found and raised the issue with sortedmatch.
if (.timingtests) {
  cat("Running 30sec (max) test ...");flush.console()
  n = 1e6
  a = as.character(as.hexmode(sample(n,replace=TRUE)))
  b = as.character(as.hexmode(sample(n,replace=TRUE)))
  test(529, system.time(ans1<-match(a,b))["user.self"] > system.time(ans2<-chmatch(a,b))["user.self"])
  test(530, ans1, ans2)
  # sorting a and b no longer makes a difference since both match and chmatch work via hash in some way or another
  cat("done\n")
}

# Test that .set_row_names() is maintained on .SD for each group
DT = data.table(a=INT(1,1,2,2,2,3,3,3,3),b=1:9)
test(531, DT[,length(rownames(.SD)),by=a], data.table(a=1:3,V1=2:4))

# Test column names with spaces, bug#1880, and check.names default is now FALSE, too
# Thanks to Yang Zhang for the tests.
DT = data.table("a b"=INT(1,1,2,2,2),c=1:5)
test(532, DT[,sum(c),by="a b"], data.table("a b"=1:2,V1=c(3L,12L)))
test(533, names(data.table('a b'=1)[, list('c d'=`a b`)]), "c d")
test(534, names(transform(data.table('a b'=1), `c d`=`a b`)), c("a b","c d"))

# Test keyby, new in v1.8.0
DT = data.table(a=INT(1,3,1,2,3,2),b=1:2,c=1:3,v=1:6)
test(535, DT[,sum(v),by=a, keyby=a], error="not both")
test(536, DT[,sum(v),by=a], data.table(a=c(1L,3L,2L),V1=c(4L,7L,10L)))  # retains appearance order
ans = data.table(a=1:3,V1=c(4L,10L,7L),key="a")
test(537, DT[,sum(v),keyby=a], ans)
test(538, DT[,sum(v),keyby="a"], ans)
var="a"
test(539, DT[,sum(v),keyby=eval(var)], ans)
a=quote(a%%2L)
test(540, DT[,sum(v),by=eval(a)], data.table(a=1:0,V1=c(11L,10L)))
test(541, DT[,sum(v),keyby=eval(a)], data.table(a=0:1,V1=c(10L,11L),key="a"))

test(542, DT[,sum(v),keyby=c("a","b","c")]$V1, INT(1,3,4,6,5,2))
test(543, DT[,sum(v),keyby="a,b,c"]$V1, INT(1,3,4,6,5,2))
test(544, DT[,sum(v),keyby=c("a","b,c")], error="but one or more items include a comma")

# Test single expressions passed to by, FR#1743 in v1.8.0
DT = data.table(a=1:4,date=as.IDate("2012-02-28")+0:3,v=5:8)
test(545, DT[,sum(v),by=a%%2L], data.table(a=1:0,V1=c(12L,14L)))
test(546, DT[,sum(v),by=month(date)], data.table(month=2:3,V1=c(11L,15L)))

# Test that factor levels no longer need to be sorted, and that 'ordered' class is retained.
# Posted by Allan Engelhardt ...
x = factor(LETTERS[1:3], levels=rev(LETTERS), ordered=TRUE)
DT = data.table(A=x,B=x,v=1:3, key="A")
test(547,is.ordered(DT$A) && is.ordered(DT$B))
test(548.1, DT["A",v,verbose=TRUE], output="Coercing character column i.'V1' to factor to match type of x.'A'") # msg back to i.V1 after a change to i.A for FR #2693. That still works, just differently which no longer overwrites names(i)
test(548.2, DT["A",v],1L)
# Posted by Damian Betebenner ...
set.seed(123)
my.course.sample = sample(1:5, 10, replace=TRUE)
Y = factor(my.course.sample, levels=1:5, labels=c("Basic Math", "Calculus", "Geometry", "Algebra I", "Algebra II"))
DT = data.table(ID=1:10, COURSE=Y)
test(549, DT[,sum(ID),by=COURSE]$V1, INT(1,2,29,17,6))
setkey(DT, COURSE)
test(550, DT[,sum(ID),by=key(DT)]$V1, INT(6,1,29,2,17))

# Another test of DT[i] syntax from datatable-unaware packages, #1794 from ilprincipe.
DF = structure(list(sample = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("panel.1yr", "panel.2yr", "panel.3yr", "panel.inc", "pre.inc", "pre.prev", "post.inc", "post.prev"), class = "factor"), base = c(2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2002, 2002, 2002, 2002, 2002, 2002, 2002, 2002), ref = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("2004", "2002-2004", "2001", "2000", "2009", "2008"), class = "factor"), var = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("distance", "time"), class = "factor"), treated = c(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1), distance = c(10000, 30000, 50000, 1e+05, 10000, 30000, 50000, 1e+05, 10000, 30000, 50000, 1e+05, 10000, 30000, 50000, 1e+05), all = c(602L, 6357L, 8528L, 9272L, 435L, 2438L, 3456L, 6360L, 245L, 2693L, 3699L, 4084L, 187L, 983L, 1400L, 2660L), di.recip = c(5L, 39L, 57L, 62L, 4L, 16L, 22L, 45L, 2L, 25L, 36L, 37L, 1L, 11L, 16L, 35L), irr = c(0.00830564784053156, 0.00613496932515337, 0.00668386491557223, 0.00668679896462468, 0.00919540229885057, 0.00656275635767022, 0.00636574074074074, 0.00707547169811321, 0.00816326530612245, 0.00928332714444857, 0.0097323600973236, 0.00905974534769833, 0.0053475935828877, 0.0111902339776195, 0.0114285714285714, 0.0131578947368421)), .Names = c("sample", "base", "ref", "var", "treated", "distance", "all", "di.recip", "irr"), row.names = c(NA, 16L), class = "data.frame")
DT = as.data.table(DF)
test(551, nrow(reshape(DT, v.names = c("all", "di.recip", "irr"),
              timevar = "treated", idvar = c("sample", "var", "distance"),
              direction = "wide" )), 8L)

# Test bug report #1275 from S Bagley :
DT = data.table(a=c("1","1"), b=c(2,2))
test(552, is.character(DT$a))
test(553, unique(DT), data.table(a="1",b=2))

# Test bug #1726 from Ivan Zhang.
DT = data.table(V1=c('a', 'b', 'a'), V2 = c('hello', 'ello', 'llo'))
test(554, nrow(DT[V1=='a' & V2 %like% 'll']), 2L)
test(555, nrow(DT[V1=='a' & V2 %like% 'ello']), 1L)

# Test can't := to .SD, #1727
DT = data.table(x = 1:5, y = rnorm(5))
test(556, DT[,.SD[,z:=rnorm(1)],by=x], error="[.]SD is locked.*reserved for possible future use")
f = function(.SD) .SD[,z:=rnorm(1)]
test(557, DT[, f(.SD), by=x], error="[.]SD is locked.*reserved for possible future use")

# Test printing on nested data.table, bug #1803
DT = data.table(x=letters[1:3],y=list(1:10,letters[1:4],data.table(a=1:3,b=4:6)))
test(558, capture.output(print(DT)),
          c("   x            y","1: a 1,2,3,4,5,6,","2: b      a,b,c,d","3: c <data.table>"))
test(559, setkey(DT,x)["a",y][[1]], 1:10)   # y is symbol representing list column, specially detected in dogroups

# Test renaming of .N to N
DT = data.table(a=INT(1,1,2,2,2),b=INT(1,2,2,2,1))
test(560.1, DT[,.N,a][,.N], 2L)
test(560.2, DT[,.N,a][,N], 2:3)
test(561, DT[,.N,a][,N], 2:3)
test(562, DT[,list(.N),a][,N], 2:3)
test(563, DT[,.N,a][,unique(.N),a]$V1, c(1L,1L))
test(564, DT[,.N,a][,unique(N),a]$V1, 2:3)
test(565, DT[,.N,a][N>2], data.table(a=2L, N=3L))
test(566, DT[,list(.N=.N),a][.N>2], data.table(a=2L,.N=3L))
test(567, DT[,.N,list(a,b)][,N,by=a]$N, c(1L,1L,2L,1L))
test(568, DT[,.N,list(a,b)][,unique(N),by=a]$V1, c(1L,2L,1L))
test(569, DT[,list(.N=.N),list(a,b)][,.N,a], error="The column '.N' can't be grouped because")
test(570, DT[,list(.N=.N),list(a,b)][,unique(.N),a], error="The column '.N' can't be grouped because")

# Test spaces in by="..." format, datatable-help on 31 March
DT = data.table("a "=1:2, "b"=3:4," b"=5:6, v=1:6)
test(571, DT[,sum(v),by="b, b"], data.table("b"=3:4, " b"=5:6, V1=c(9L,12L)))
test(572, DT[,sum(v),by="a , b"], data.table("a "=1:2, " b"=5:6, V1=c(9L,12L)))
test(573, DT[,sum(v),by="b, a"], error="object ' a' not found")

# Test base::unname, used by melt, and only supported by data.table for DF compatibility for non-dtaware packages
DT = data.table(a=1:3, b=4:6)
test(574, dim(unname(DT)), 3:2)

# Test that CJ retains explicit names (useful if used independently)
test(575, CJ(x=c(1L,2L), y=c("a","b")), data.table(x=c(1L,1L,2L,2L),y=c("a","b","a","b"),key="x,y"))
test(576, CJ(c(1L,2L), y=c("a","b")), data.table(V1=c(1L,1L,2L,2L),y=c("a","b","a","b"),key="V1,y"))
test(577, CJ(x=c(1L,2L), c("a","b")), data.table(x=c(1L,1L,2L,2L),V2=c("a","b","a","b"),key="x,V2"))

# Test factor to character join when factor contains unused and reverse order levels :
X = data.table(a=LETTERS[1:4],v=1:4,key="a")
Y = data.table(a=factor(c("D","B"),levels=rev(LETTERS)),key="a")
test(578, X[Y,verbose=TRUE], output="Coercing factor column i.'a' to character to match type of x.'a'")
test(579, X[Y], data.table(a=c("D","B"), v=c(4L,2L)))

# Test that logical i in set() returns helpful error
DT = data.table(a=1:3,b=4:6)
test(580, set(DT,a<3,"b",0L), error="simply wrap with which(), and take the which() outside the loop if possible for efficiency")

# Test by on empty tables (and when i returns no rows), #1945
DT = data.table(a=1:3,v=1:6)
test(581, DT[a<1,sum(v),by=a], data.table(a=integer(),V1=integer()))
test(582, DT[a<1,sum(v),by=list(a)], data.table(a=integer(),V1=integer()))
test(583, DT[a<1], DT[0])
test(584, DT[a<1], output="Empty data.table (0 rows) of 2 cols: a,v")
test(585, DT[a<1,list(v)], output="Empty data.table (0 rows) of 1 col: v")
test(586, data.table(a=integer(),V1=integer()), output="Empty data.table (0 rows) of 2 cols: a,V1")

# Test that .N is available in by on empty table, also in #1945
test(587, DT[a<1,list(sum(v),.N),by=a], data.table(a=integer(),V1=integer(),N=integer()))

# Realised that DT[NULL] returned an error.
test(588, DT[NULL], data.table(NULL))

# Test that .N, .SD and .BY are available when by is missing and when by is 0 length
DT = data.table(x=rep(1:3,each=3), y=c(1,3,6), v=1:9)
test(589, DT[,sapply(.SD,sum)*.N], c(x=162, y=270, v=405))
test(590, DT[,sapply(.SD,sum)*.N,by=NULL], data.table(V1=c(162,270,405)))
test(591, DT[,sapply(.SD,sum)*.N,by=character()], data.table(V1=c(162,270,405)))
test(592, DT[,sapply(.SD,sum)*.N,by=""], data.table(V1=c(162,270,405)))
test(593, DT[,lapply(.SD,sum)], data.table(x=18L, y=30, v=45L))    # bug fix #2263 in v1.8.3: now data.table result for consistency
test(594, DT[,lapply(.SD,sum),by=NULL], data.table(x=18L, y=30, v=45L))
test(595, DT[,lapply(.SD,sum),by=character()], data.table(x=18L, y=30, v=45L))
test(596, DT[,lapply(.SD,sum),by=""], data.table(x=18L, y=30, v=45L))

# Test keys of two numeric columns, bug#2004
DT = data.table(x=0.0,y=c(0.0,0.1,0.0,0.2,0.0))
test(597, unique(DT), DT[c(1,2,4)])
test(598, DT[,list(count=.N),by=c("x","y")], data.table(x=0.0,y=c(0.0,0.1,0.2),count=c(3L,1L,1L)))

# And that numeric NAs sort stably to the beginning. Whether NAs are allowed in keys, another issue but
# ordernumtol needs to deal with NA anyway for add hoc by and unique.
DT = data.table( c(1.34, 1.34, 1.34,   NA, 2.22, 2.22, 1.34, NA,  NA, 1.34, 0.999), c(75.1,   NA, 75.1, 75.1,  2.3,  2.4,  2.5, NA, 1.1,   NA, 7.9 ))
test(599, DT[c(8,9,4,11,2,10,7,1,3,5,6)], setkey(setkey(DT),NULL))

set.seed(1)
DT = data.table(x=rep(c(1,2), each=10), y=rnorm(20))
setkey(DT, x, y)
test(600, is.sorted(DT$x))
test(601, !is.sorted(DT$y))
test(602, base::order(DT$x,DT$y), 1:20)

# Test that as.list.data.table no longer copies via unclass, so speeding up sapply(DT,class) and lapply(.SD,...) etc, #2000
N = if (.devtesting) 1e6 else 1e4
DT = data.table(a=1:N,b=1:N,c=1:N,d=1:N)   # 15MB in dev testing, but test with N=1e7
if (.devtesting) test(603, system.time(sapply(DT,class))["user.self"] < 0.1)

# Tests on loopability, i.e. that overhead of [.data.table isn't huge, as in speed example in example(":=")
# These are just to catch slow down regressions where instead of 1s it takes 40s
if (.devtesting) {  # TO DO: find more robust way to turn these on for CRAN checks
test(604, system.time(for (i in 1:1000) nrow(DT))["user.self"] < 0.5)
test(605, system.time(for (i in 1:1000) ncol(DT))["user.self"] < 0.5)
test(606, system.time(for (i in 1:1000) length(DT[[1L]]))["user.self"] < 0.5) # much faster than nrow, TO DO: replace internally
}
# TO DO: move to stress test script off CRAN ... 
# DT = as.data.table(matrix(1L,nrow=100000,ncol=100))
# test(607, system.time(for (i in 1:1000) DT[i,V1:=i])["user.self"] < 10)  # 10 to be very wide margin for CRAN
# test(608, DT[1:1000,V1], 1:1000)

# Crash bug of chorder(character()), #2026
test(609, chorder(character()), base::order(character()))
test(610, chorder(""), base::order(""))
# Extra tests of chorder and chgroup
x = sample(LETTERS)
test(610.1, chorder(x), base::order(x))
test(610.2, chgroup(x), seq_along(x))
x = sample(LETTERS,1000,replace=TRUE)
test(610.3, chorder(x), base::order(x))
test(610.4, unique(x[chgroup(x)]), unique(x))

# := by group
DT = data.table(a=1:3,b=(1:9)/10)
test(611, DT[,v:=sum(b),by=a], data.table(a=1:3,b=(1:9)/10,v=c(1.2,1.5,1.8)))
setkey(DT,a)
test(612, DT[,v:=min(b),by=a], data.table(a=1:3,b=(1:9)/10,v=(1:3)/10,key="a"))
# Assign to subset ok (NA initialized in the other items) ok :
test(613, DT[J(2),w:=8.3]$w, rep(c(NA,8.3,NA),each=3))
test(614, DT[J(3),x:=9L]$x, rep(c(NA_integer_,NA_integer_,9L),each=3))
test(615, DT[J(2),z:=list(list(c(10L,11L)))]$z, rep(list(NULL, 10:11, NULL),each=3))
# Combining := by group with i
test(616, DT[a>1,p:=sum(b)]$p, rep(c(NA,3.3),c(3,6)))
test(617, DT[a>1,q:=sum(b),by=a]$q, rep(c(NA,1.5,1.8),each=3))

# Empty i clause, #2034. Thanks to Chris for testing, tests from him.
test(618, copy(DT)[a>3,r:=sum(b)], DT)
test(619, copy(DT)[J(-1),r:=sum(b)], DT)
test(620, copy(DT)[J(-1),r:=sum(b),nomatch=0], DT)
DT = data.table(x=letters, key="x")
test(621, copy(DT)[J("bb"), x:="foo"], DT)  # when no update, key should be retained
test(622, copy(DT)[J("bb"), x:="foo",nomatch=0], DT)

set.seed(2)
DT = data.table(a=rnorm(5)*10, b=1:5)
test(623, DT[,s:=sum(b),by=round(a)%%2]$s, c(10L,5L,5L,10L,10L))

# Tests on POSIXct attributes

DT = data.table(a=c(1,1,2,2,2))
test(624, attributes(DT[,as.POSIXct("2011-12-13 18:50",tz="EST"),by=a][[2]]), list(class=c("POSIXct","POSIXt"),tzone="EST"))

DT = data.table(x = rnorm(5))
DT$time1 <- Sys.time()         # recycle via *tmp*
DT$time2 <- rep(Sys.time(), 5) # plonk via *tmp*
DT[,time3:=Sys.time()]         # recycle
DT[,time4:=rep(Sys.time(),5)]  # plonk
test(625, all(sapply(DT,is,"POSIXct")[-1]))

# unique on ITime doesn't lose attributes, #1719
t = as.ITime(strptime(c("09:10:00","09:11:00","09:11:00","09:12:00"),"%H:%M:%S"))
test(626, unique(t), t[c(1,2,4)])
test(627, class(unique(t)), "ITime")

# Test recycling list() rbind - with recent C-level changes, this seems not possible (like rbindlist)
# old test commented.
# test(628, rbind(data.table(a=1:3,b=5:7,c=list(1:2,1:3,1:4)), list(4L,8L,as.list(1:3))),
#           data.table(a=c(1:3,rep(4L,3L)),b=c(5:7,rep(8L,3L)),c=list(1:2,1:3,1:4,1L,2L,3L)))
test(628, rbind(data.table(a=1:3,b=5:7,c=list(1:2,1:3,1:4)), list(4L,8L,as.list(1:3))), error = "inconsistent with first column of that item which is length")
# Test switch in .rbind.data.table for factor columns
test(628.5, rbind(data.table(a=1:3,b=factor(letters[1:3]),c=factor("foo")), list(4L,factor("d"),factor("bar"))),
          data.table(a=1:4,b=factor(letters[1:4]),c=factor(c(rep("foo",3),"bar"), levels = c("foo", "bar"))))

# Test merge with common names and all.y=TRUE, #2011
DT1 = data.table(a=c(1,3,4,5), total=c(2,1,3,1), key="a")
DT2 = data.table(a=c(2,3,5), total=c(5,1,2), key="a")
# 629+630 worked before anyway.  631+632 test the bug fix.
adf=as.data.frame
adt=as.data.table

test(629, merge(DT1,DT2), data.table(a=c(3,5),total.x=c(1,1),total.y=c(1,2),key="a"))
test(629.1, merge(DT1,DT2), setkey(adt(merge(adf(DT1),adf(DT2),by="a")),a))

test(630, merge(DT1,DT2,all.x=TRUE), data.table(a=c(1,3,4,5),total.x=c(2,1,3,1),total.y=c(NA,1,NA,2),key="a"))
test(630.1, merge(DT1,DT2,all.x=TRUE), setkey(adt(merge(adf(DT1),adf(DT2),by="a",all.x=TRUE)),a))

test(631, merge(DT1,DT2,all.y=TRUE), data.table(a=c(2,3,5),total.x=c(NA,1,1),total.y=c(5,1,2),key="a"))
test(631.1, merge(DT1,DT2,all.y=TRUE), setkey(adt(merge(adf(DT1),adf(DT2),by="a",all.y=TRUE)),a))

test(632, merge(DT1,DT2,all=TRUE), data.table(a=c(1,2,3,4,5),total.x=c(2,NA,1,3,1),total.y=c(NA,5,1,NA,2),key="a"))
test(632.1, merge(DT1,DT2,all=TRUE), setkey(adt(merge(adf(DT1),adf(DT2),by="a",all=TRUE)),a))

# Test that unsettting datatable.alloccol is caught, #2014
old = getOption("datatable.alloccol")
options(datatable.alloccol=NULL)  # the return value here seems to be TRUE rather than the old expression TO DO: follow up with r-devel
test(633, data.table(a=1:3), error="n must be integer length 1")
options(datatable.alloccol=old)

# Test that with=FALSE by number isn't messed up by dup column names, #2025
DT = data.table(a=1:3,a=4:6)
test(634, DT[,2:=200L], data.table(a=1:3,a=200L))

# Test names when not all items are named, #2029
DT = data.table(x=1:3,y=1:3)
test(635, names(DT[,list(x,y,a=y)]), c("x","y","a"))
test(636, names(DT[,list(x,a=y)]), c("x","a"))

# Test := by key, and that := to the key by key unsets the key. Make it non-trivial in size too.
set.seed(1)
DT = data.table(a=sample(1:100,1e6,replace=TRUE),b=sample(1:1000,1e6,replace=TRUE),key="a")
test(637, DT[,m:=sum(b),by=a][1:3], data.table(a=1L,b=c(156L,808L,848L),m=DT[J(1),sum(b)],key="a"))
test(638, key(DT[J(43L),a:=99L]), NULL)
setkey(DT,a)
test(639, key(DT[,a:=99L,by=a]), NULL)

# Test printing is right aligned without quotes etc, and rownames are repeated ok for more than 20 rows
DT=data.table(a=8:10,b=c("xy","x","xyz"),c=c(1.1,22.1,0))
test(640, capture.output(print(DT)), c("    a   b    c","1:  8  xy  1.1","2:  9   x 22.1","3: 10 xyz  0.0"))
DT=data.table(a=letters,b=1:26)
test(641, tail(capture.output(print(DT[1:20])),2), c("19: s 19","20: t 20"))
test(642, tail(capture.output(print(DT[1:21])),2), c("21: u 21","    a  b"))
DT=data.table(a=as.character(as.hexmode(1:500)), b=1:500)
test(643, capture.output(print(DT)), c("       a   b","  1: 001   1","  2: 002   2","  3: 003   3","  4: 004   4","  5: 005   5"," ---        ","496: 1f0 496","497: 1f1 497","498: 1f2 498","499: 1f3 499","500: 1f4 500"))

# Test inconsistent length of columns error.
DT = list(a=3:1,b=4:3)
setattr(DT,"class",c("data.table","data.frame"))
test(644, setkey(DT,a), error="Column 2 is length 2 which differs from length of column 1 (3)")
test(645, setkey(DT,b), error="Column 2 is length 2 which differs from length of column 1 (3)")

# Test faster mean.  Example from (now not needed as much) data.table wiki point 3.
# Example is a lot of very small groups.
set.seed(100)
n=1e4  # small n so as not to overload daily CRAN checks.
DT=data.table(grp1=sample(1:750, n, replace=TRUE),
              grp2=sample(1:750, n, replace=TRUE),
              x=rnorm(n),
              y=rnorm(n))
DT[c(2,5),x:=NA]  # seed chosen to get a group of size 2 and 3 in the first 5 to easily inspect.
DT[c(3,4),y:=NA]
tt1 = system.time(ans1<-DT[,list(mean(x),mean(y)),by=list(grp1,grp2)])    # 1.1s
tt2 = system.time(ans2<-DT[,list(.Internal(mean(x)),.Internal(mean(y))),by=list(grp1,grp2)])  # 1.1s
basemean = base::mean  # to isolate time of `::` itself
tt3 = system.time(ans3<-DT[,list(basemean(x),basemean(y)),by=list(grp1,grp2)])   # 11s
test(646, ans1, ans2)
test(647, ans1, ans3)
# this'll error with `valgrind` because of the 'long double' usage in gsumm.c (although I wonder if we need long double precision). 
# http://valgrind.org/docs/manual/manual-core.html#manual-core.limits
# http://comments.gmane.org/gmane.comp.debugging.valgrind/10340
test(648, any(is.na(ans1$V1)) && !any(is.nan(ans1$V1)))
if (.devtesting) test(649, tt1["user.self"] < 10*tt2["user.self"])   # should be same speed, but *10 as large margin
if (.devtesting) test(650, tt1["user.self"] < tt3["user.self"]/2)   # 10 times faster, but test 2 times faster as large margin

tt1 = system.time(ans1<-DT[,list(mean(x,na.rm=TRUE),mean(y,na.rm=TRUE)),by=list(grp1,grp2)])   # 2.0s
tt2 = system.time(ans2<-DT[,list(mean.default(x,na.rm=TRUE),mean.default(y,na.rm=TRUE)),by=list(grp1,grp2)])  # 5.0s
test(651, ans1, ans2)
test(652, any(is.nan(ans1$V1)))
if (.devtesting) test(653, tt1["user.self"] < tt2["user.self"])

# See FR#2067. Here we're just testing the optimization of mean and lapply, should be comparable to above
tt2 = system.time(ans2<-DT[,lapply(.SD,mean,na.rm=TRUE),by=list(grp1,grp2)])
setnames(ans2,"x","V1")
setnames(ans2,"y","V2")
test(654, ans1, ans2)
test(655, abs(tt1["user.self"] - tt2["user.self"])<2.0)  # unoptimized tt2 takes 30 seconds rather than 2. The difference between tt1 and tt2 is under 0.2 seconds usually, so 2.0 is very large margin for error to ensure it's not 30secs.

test(656, DT[,mean(x),by=grp1,verbose=TRUE], output="GForce optimized j to.*gmean")
test(657, DT[,list(mean(x)),by=grp1,verbose=TRUE], output="GForce optimized j to.*gmean")
test(658, DT[,list(mean(x),mean(y)),by=grp1,verbose=TRUE], output="GForce optimized j to.*gmean")
tt = capture.output(DT[,list(mean(x),mean(y)),by=list(grp1,grp2),verbose=TRUE])
test(659, !length(grep("Wrote less rows", tt)))  # first group is one row with this seed. Ensure we treat this as aggregate case rather than allocate too many rows.

# Test .N for logical i subset
DT = data.table(a=1:10, b=rnorm(10))
test(660, DT[a==8L, .N], 1L)

# Test that growing is sensible in worst case
DT = data.table(a=rep(1:10,1:10),b=rnorm(55))
tt = capture.output(DT[,sum(b)*b,by=a,verbose=TRUE])
test(661, length(grep("growing from",tt))<3)  # was 6 when we simply grew enough for latest result

# Test that adding a new logical column is supported, #2094
DT=data.table(a=1:3)
test(662, DT[,newcol:=NA], data.table(a=1:3,newcol=NA))
test(663, sapply(DT,class), c(a="integer",newcol="logical"))

# Test that setting names in the presence of dups is ok, #2103
DT = data.table(a=1:3, b=2:4, a=3:5)
test(664, setnames(DT, c('d','e','f')), data.table(d=1:3,e=2:4,f=3:5))

# Test by=c(...) in combination with i subset, #2078
DT = data.table(a=1:3,b=1:6,key="a")
test(665, DT[a<3,sum(b),by=c("a"),verbose=TRUE], DT[a<3,sum(b),by="a"], output="i clause present and columns used in by detected")
test(666, DT[a<3,sum(b),by=key(DT),verbose=TRUE], DT[a<3,sum(b),by=a], output="i clause present and columns used in by detected")
test(667, DT[a<3,sum(b),by=paste("a")], error='Otherwise, by=eval(paste("a")) should work')
test(668, DT[a<3,sum(b),by=eval(paste("a"))], DT[a<3,sum(b),by=a])
test(669, DT[a<3,sum(b),by=c(2)], error="must evaluate to 'character'")

# Test := keyby does key, #2065
DT = data.table(x=1:2, y=1:6)
ans = data.table(x=rep(1:2,each=3),y=c(1L,3L,5L,2L,4L,6L),z=rep(c(9L,12L),each=3),key="x")
test(670, DT[,z:=sum(y),keyby=x], ans)
DT = data.table(x=1:2, y=1:6)
test(671, DT[,z:=sum(y),keyby="x"], ans)
DT = data.table(x=1:2, y=1:6)
test(672, DT[,z:=sum(y),keyby=x%%2], data.table(x=1:2,y=1:6,z=c(9L,12L)), warning=":= keyby not straightforward character column names or list() of column names, treating as a by")
DT = data.table(x=1:2, y=1:6)
test(673, DT[,z:=sum(y),by=x%%2], data.table(x=1:2,y=1:6,z=c(9L,12L)))
DT = data.table(x=1:2, y=1:6)
test(674, DT[x>1,z:=sum(y),keyby=x], error="When i is present, keyby := on a subset of rows doesn't make sense. Either change keyby to by, or remove i")

# Test new .()
DT = data.table(x=1:2, y=1:6, key="x")
test(675, DT[.(1L)], DT[1:3])

# Test new rbindlist
l = list(data.table(a=1:2, b=7:8),
         data.table(a=3:4, 9:10),
         data.table(5:6, 11:12),
         data.table(b=13:14),
         list(15:16,17L),
         list(c(18,19),20:21))
test(676, rbindlist(l[1:3]), data.table(a=1:6,b=7:12))
test(677, rbindlist(l[c(10,1,10,2,10)]), data.table(a=1:4,b=7:10))  # NULL items ignored
test(678, rbindlist(l[c(1,4)]), error="Item 2 has 1 columns, inconsistent with item 1 which has 2")
test(679, rbindlist(l[c(1:2,5)]), error="Column 2 of item 3 is length 1, inconsistent with first column of that item which is length 2.")
test(680, rbindlist(l[c(2,6)]), data.table(a=c(3,4,18,19), V2=c(9:10,20:21)))  # coerces 18 and 19 to numeric (with eddi's changes in commit 1012 - highest type is preserved now) --- Caught and changed by Arun on 26th Jan 2014 (in commit 1099).
### ----> Therefore this TO DO may not be necessary here anymore (added by Arun 26th Jan 2014) ---> # TO DO when options(datatable.pedantic=TRUE): test(680.5, rbindlist(l[c(2,6)]), warning="Column 1 of item 2 is type 'double', inconsistent with column 1 of item 1's type ('integer')")
test(681, rbindlist(list(data.table(a=letters[1:2],b=c(1.2,1.3),c=1:2), list("c",1.4,3L), NULL, list(letters[4:6],c(1.5,1.6,1.7),4:6))), data.table(a=letters[1:6], b=seq(1.2,1.7,by=0.1), c=1:6))
test(682, rbindlist(NULL), data.table(NULL))
test(683, rbindlist(list()), data.table(NULL))
test(684, rbindlist(list(NULL)), data.table(NULL))
test(685, rbindlist(list(data.table(NULL))), data.table(NULL))

# Test merge when no overlap of data in by columns when all=TRUE, #2114
DF1=data.frame(foo=letters[1:5], bar=1:5, stringsAsFactors=FALSE)
DF2=data.frame(foo=letters[6:10], baz=6:10, stringsAsFactors=FALSE)
DT1=as.data.table(DF1)
DT2=as.data.table(DF2)
test(686, merge(DF1, DF2, by="foo", all=TRUE), as.data.frame(merge(DT1,DT2,by="foo",all=TRUE)))
DF1=data.frame(foo=letters[1:5], bar=1:5, stringsAsFactors=TRUE)
DF2=data.frame(foo=letters[6:10], baz=6:10, stringsAsFactors=TRUE)
DT1=as.data.table(DF1)
DT2=as.data.table(DF2)
test(687, merge(DF1, DF2, by="foo", all=TRUE), as.data.frame(merge(DT1,DT2,by="foo",all=TRUE)))

# And a more basic test that #2114 revealed that factor to factor join was leaving NA in the i
# factor columns, caught in 1.8.1 beta before release to CRAN.
DT = data.table(a=factor(letters[1:4]), b=5:8, key="a")
test(688, DT[J(factor("b"))], data.table(a=factor("b"), b=6L, key="a"))

# Test removing a column followed by adding a new column using := by group, #2117
DT = data.table(a=1:3,b=4:6)
DT[,b:=NULL]
test(689, DT[,b:=.N,by=a], data.table(a=1:3, b=1L))
test(690, DT[,c:=2,by=a], data.table(a=1:3, b=1L, c=2))

# Test combining i with by, with particular out of order circumstances, #2118
set.seed(1)
DT=data.table(a=sample(1:5,20,replace=TRUE),b=1:4,c=1:10)
test(691, DT[a>2,sum(c),by=b], DT[a>2][,sum(c),by=b])
test(692, DT[a>2,sum(c),by=b%%2L], data.table(b=1:0,V1=c(34L,42L)))
test(693, DT[a>2,sum(c),by=(b+1)%%2], data.table(b=c(0,1),V1=c(34L,42L)))
setkey(DT,b)
test(694, DT[a>2,sum(c),by=b], DT[a>2][,sum(c),by=b])
test(695, DT[a>2,sum(c),by=b%%2L], data.table(b=1:0,V1=c(34L,42L)))
test(696, DT[a>2,sum(c),by=(b+1)%%2], data.table(b=c(0,1),V1=c(34L,42L)))

# Test subset and %chin% crash with non-character input, #2131
test(697, 4 %chin% letters, error="type")
test(698, 4L %chin% letters, error="type")
test(699, "a" %chin% 4, error="type")
DT = data.table(aa=1:6,bb=7:12)
test(700, subset(DT,select="aa"), DT[,list(aa)])
test(701, subset(DT,select=aa), DT[,list(aa)])
test(702, subset(DT,select=c(aa)), DT[,list(aa)])
setkey(DT,aa)
test(703, subset(DT,select="aa"), data.table(aa=1:6,key="aa"))
test(704, subset(DT,select=aa), data.table(aa=1:6,key="aa"))
test(705, subset(DT,select=c(aa)), data.table(aa=1:6,key="aa"))

# Test rbinding of logical columns, #2133
DT1 = data.table(A=1:3,B=letters[1:3],C=c(TRUE,TRUE,FALSE))
DT2 = data.table(A=4:5,B=letters[4:5],C=c(TRUE,FALSE))
test(706, rbind(DT1,DT2), data.table(A=1:5, B=letters[1:5], C=c(TRUE,TRUE,FALSE,TRUE,FALSE)))
test(707, rbindlist(list(DT1,DT2)), rbind(DT1,DT2))

# Test non ascii characters when passed as character by, #2134
# *****
# TO DO: reinstate. Temporarily removed to pass CRAN's Mac using C locale (R-Forge's Mac is ok)
# *****

# Test := adding column after a setnames of all column names (which [,list(x)] does), #2146
DT = data.table(x=1:5)[,list(x)]
test(713, DT[,y:=5], data.table(x=1:5,y=5))
setnames(DT,c("A","B"))
test(714, DT[,z:=6:10], data.table(A=1:5,B=5,z=6:10))

# Test J alias is now removed outside DT[...] from v1.8.7 (to resolve rJava::J conflict)
test(715, J(a=1:3,b=4), data.table(a=1:3,b=4), error="could not find function.*J")

# Test get in j
DT = data.table(a=1:3,b=4:6)
test(716, DT[,get("b")], 4:6)   # TO DO: add warning about inefficiency when datatable.pedantic=TRUE
test(717, DT[,get("b"),verbose=TRUE], output="ansvars being set to all columns")

# Test that j can be a logical index when `with=FALSE` (#1797)
DT = data.table(a=1:10, b=rnorm(10), c=letters[1:10])
test(718, DT[, c(FALSE, TRUE, FALSE), with=FALSE], DT[, 2, with=FALSE])
test(719, nrow(DT[, c(FALSE, FALSE, FALSE), with=FALSE]), 0L)

# Test combining join with missing groups with group by, #2162
DT = data.table(a = 1, b = 2, c = 4, key="a")
test(720, DT[list(c(5,6,7)), .N, by=b], data.table(b=NA_real_,N=3L))
test(721, DT[list(c(5,6,7))][, .N, by=b], DT[list(c(5,6,7)), .N, by=b])
test(722, DT[list(c(5,6,7)), .N, by=b, mult="first"], data.table(b=NA_real_,N=3L))
test(723, DT[list(c(5,6,7)), .N, by=b, nomatch=0], data.table(b=numeric(),N=integer(),key="b")) # Key here is correct. by is ordered (albeit empty)
test(724, DT[list(c(5,6,7)), .N, by=b, nomatch=0], DT[list(c(5,6,7)),nomatch=0][,.N,by=b])      # Splitting should always be consistent

# another test linked from #2162
DT = data.table(x=rep(c("a","b","c"),each=3), y=c(1L,3L,6L), v=1:9, key="x")
test(725, DT[c("a","b","d"),list(v)], DT[J(c("a","b","d")),"v",with=FALSE])  # unfiled bug fix for NA matches; see NEWS 1.8.3
test(726, DT[c("a", "b", "d"), sum(v), by=y, nomatch=0], data.table(y=INT(1,3,6),V1=INT(5,7,9)))
test(727, DT[c("a", "b", "d"), sum(v), by=y], data.table(y=INT(1,3,6,NA),V1=INT(5,7,9,NA)))
test(728, DT[c("a", "b", "d"), sum(v), by=y], DT[J(c("a", "b", "d"))][, sum(v), by=y])

# explicit verbose=FALSE needed here because tests are run a second time with verbose=TRUE
test(729.1, capture.output(DT[c("a", "b", "d"), print(.SD), by=.EACHI, verbose=FALSE]),
          capture.output(suppressWarnings(DT[c("a", "b", "d"), print(.SD), by=x, verbose=FALSE])))
test(729.2, capture.output(DT[c("a", "b"), print(.SD), by=y, verbose=FALSE]),   # TO DO: why doesn't last group have x=d, maybe groups=i in dogroups
            capture.output(DT[c("a", "b"),verbose=FALSE][, print(.SD), by=y, verbose=FALSE]))

test(729.3, DT[c("b","d"),.SD,by=.EACHI], data.table(x=c("b","b","b","d"),y=INT(1,3,6,NA),v=INT(4,5,6,NA)))   # no debate here
test(729.4, DT[c("b","d"),.SD, by=y], DT[c("b","d")][,.SD, by=y][4L,x:=NA_character_])   # the i groups when no match don't get carried through (would be hard to implement this and very unlikely to be useful. Just break into compound query, if needed to be used in j, to get them to carry through. TO DO: add to FAQ.

# That unnamed i gets x's join column names when j is .SD (or any named list, which verbose warns is inefficient), #2281
test(729.5, DT[c("a","b"),.SD], data.table(x=rep(c("a","b"),each=3),y=INT(1,3,6),v=1:6,key="x"))

# check := when combining join with missing groups and then group by
test(730, DT[c("b","a"),w:=sum(v),by=y]$w, INT(5,7,9,5,7,9,NA,NA,NA))  # by over a different column than was joined to
test(731, DT["d",w:=99,by=y]$w, INT(5,7,9,5,7,9,NA,NA,NA))    # do nothing for missing group, before getting as far as type error
test(732, DT["d",w:=99L,by=y]$w, INT(5,7,9,5,7,9,NA,NA,NA))   # do nothing for missing group
test(733, DT[c("c","e","b"),w:=sum(v),by=y%%2L]$w, INT(5,7,9,24,24,15,24,24,15))

# Test column type change in the 0 row case (#2274)
DT = data.table(a=1:3,b=4:6)[0]
test(734, DT[,b:=as.character(b)], data.table(a=integer(),b=character()))
test(735, DT[,c:=double()], data.table(a=integer(),b=character(),c=double()))

# Deleting multiple columns out-of-order, #2223
DT = data.table(a=1:3,b=4:6,c=7:9,d=10:12,e=13:15,f=16:18,g=19:21)
test(736, DT[,c("b","d","g","f","c"):=NULL], data.table(a=1:3,e=13:15))  # test redundant with=FALSE is ok
DT = data.table(a=1:3,b=4:6,c=7:9,d=10:12,e=13:15,f=16:18,g=19:21)
test(737, DT[,c("b","d","g","f","c"):=NULL], data.table(a=1:3,e=13:15))  # with no longer needed

# Mixing column adds and deletes in one := gave incorrect results, #2251.
DT = data.table(c1=1:2)
test(738, DT[,c("c2", "c1"):=list(c1+1L, NULL)], data.table(c2=2:3))

# `:=`(c1=v1,v2=v2,...) is now valid , #2254
DT = data.table( c1=1:3 )
test(739, DT[,`:=`(c2=4:6, c3=7:9)], data.table(c1=1:3,c2=4:6,c3=7:9))
test(740, DT[,`:=`(4:6,c3=7:9)], error="all arguments must be named")
test(741, DT[,`:=`(4:6,7:9,10:12)], error="all arguments must be named")  # test the same error message in the other branch

# that out of bounds LHS is caught, root cause of #2254
test(742, DT[,3:6:=1L], error="outside.*range")
test(743, DT[,2:3:=99L], data.table(c1=1:3,c2=99L,c3=99L))
test(744, DT[,(ncol(DT)+1):=1L], error="outside.*range")
test(745, DT[,ncol(DT):=1L], data.table(c1=1:3,c2=99L,c3=1L))

# multiple LHS with by without by, #2215
DT = data.table(a=letters[c(1:3,3L)],key="a")
test(746, DT["a",c("new1","new2"):=list(4L, 5L)],
          data.table(a=letters[c(1:3,3L)],new1=INT(4,NA,NA,NA),new2=INT(5,NA,NA,NA),key="a"))
test(747, DT[,new1:=4:6], data.table(a=letters[c(1:3,3L)],new1=INT(4L,5L,6L,4L),new2=INT(5,NA,NA,NA),key="a"), warning="recycled leaving remainder of 1 item")
suppressWarnings(DT[,new1:=4:6])
test(748, DT[c("c","b"),`:=`(new3=.N,new2=sum(new1)+1L),by=.EACHI], data.table(a=letters[c(1:3,3L)],new1=INT(4,5,6,4),new2=INT(5,6,11,11),new3=INT(NA,1,2,2),key="a"))

# and multiple LHS by group, #1710
DT = data.table(a=rep(6:8,1:3),b=1:6)
test(749, DT[,c("c","d","e"):=list(.N,sum(b),a*10L),by=a], data.table(a=rep(6:8,1:3),b=1:6,c=rep(1:3,1:3),d=INT(rep(c(1,5,15),1:3)),e=rep(6:8,1:3)*10L))
test(750, DT[a<8,`:=`(f=b+sum(d),g=.N),by=c][,6:7,with=FALSE], data.table(f=INT(2,12,13,NA,NA,NA),g=INT(1,2,2,NA,NA,NA)))

# varname holding colnames, by group, linked from #2120.
DT = data.table(a=rep(1:3,1:3),b=1:6)
colname = "newcol"
test(751, DT[,(colname):=sum(b),by=a], data.table(a=rep(1:3,1:3),b=1:6,newcol=INT(1,5,5,15,15,15)))

# Add tests for nested := in j by group, #1987
DT = data.table(a=rep(1:3,2:4),b=1:9)
test(752, DT[,head(.SD,2)[,new:=1:.N],by=a], data.table(a=rep(1:3,each=2),b=c(1:4,6:7),new=1:2))

# Test duplicate() of recycled plonking RHS, #2298
DT = data.table(a=letters[3:1],x=1:3)
test(753, setkey(DT[,c("x1","x2"):=x],a), data.table(a=letters[1:3],x=3:1,x1=3:1,x2=3:1,key="a"))
DT = data.table(a=letters[3:1],x=1:3,y=4:6)
test(754, setkey(DT[,c("x1","y1","x2","y2"):=list(x,y)],a), data.table(a=letters[1:3],x=3:1,y=6:4,x1=3:1,y1=6:4,x2=3:1,y2=6:4,key="a"))
# And non-recycling i.e. that a single column copy does copy the column
DT = data.table(a=1:3)
test(754.1, DT[,b:=a][1,a:=4L][2,b:=5L], data.table(a=INT(4,2,3),b=INT(1,5,3)))
test(754.2, DT[,b:=a][3,b:=6L], data.table(a=INT(4,2,3),b=INT(4,2,6)))
test(754.3, DT[,a:=as.character(a),verbose=TRUE], output="Direct plonk.*no copy")
RHS = as.integer(DT$a)
test(754.4, DT[,a:=RHS,verbose=TRUE], output="RHS for item 1 has been duplicated")

# Used to test warning on redundant by (#2282) but by=.EACHI has now superseded
DT = data.table(a=letters[1:3],b=rep(c("d","e"),each=3),x=1:6,key="a,b")
test(755, DT[c("b","c"),sum(x),by=.EACHI], data.table(a=c("b","c"),V1=c(7L,9L),key="a"))
test(756, DT[c("b","c"),sum(x),by=a], data.table(a=c("b","c"),V1=c(7L,9L),key="a"))
test(757, DT[list(c("b","c"),"d"),sum(x),by=a], data.table(a=c("b","c"),V1=2:3,key="a"))  # 'by' less than number of join columns

# join then by when mult=="last"|"first", #2303 (crash in dev 1.8.3 only)
DT = data.table(a=1:3,b=1:6,c=7:12,key="a")
test(758, DT[J(c(1L,1L)),sum(c),by=b,mult="last"], DT[J(c(1L,1L)),mult="last"][,sum(c),by=b])
test(759, DT[J(1L),c,by=b,mult="last"], DT[J(1L),mult="last"][,c,by=b])
test(760, DT[2:5,sum(c),by=b], DT[2:5][,sum(c),by=b])
test(761, DT[2:5,sum(c),by=b%%2], DT[2:5][,sum(c),by=b%%2])

# joining from empty i table, #2194
DT = data.table(a=1:3,b=4:6,key="a")
test(762, DT[J(integer()),b,by=.EACHI], data.table(a=integer(),b=integer(),key="a"))
test(763, DT[J(integer()),1L,by=b], data.table(b=integer(),V1=integer(),key="b"))  # ordered by is detected now (empty is ordered), otherwise a join to the result would fail just because it's empty which wouldn't be consistent with non empty case
test(764, DT[J(integer()),b,mult="last"], integer())
test(765, DT[J(2L),b,mult="last"], 5L)
test(766, DT[J(5L),b,nomatch=0,by=.EACHI], data.table(a=integer(),b=integer(),key="a"))
test(767, DT[J(5:6),b,nomatch=0,by=.EACHI], data.table(a=integer(),b=integer(),key="a"))

# Crash on by-without-by with mixed type non join i columns, #2314. Despite not being used by j they were still being assigned to .BY.
DT = data.table(iris,key="Species")
Y = data.table(date=as.POSIXct("2011-01-01"),num=as.numeric(1:26))
Y[,get("letters"):=LETTERS]
Y[,A:=1:26]
Y[,p:=factor(p)]  # coerce type to match DT$Species to save warning. Crash was related to .BY internally, not the coercion.
setkey(Y,p)
for (i in 1:10){DT[Y,Petal.Width];DT[Y];NULL}  # reliable crash in 1.8.2 (tested).
test(768, DT[Y,Petal.Width,by=.EACHI], data.table(Species=factor(LETTERS),Petal.Width=NA_real_,key="Species"))
DT = data.table(a=1:3,b=1:6,c=7:12, key="a")
test(769, DT[,.BY[[1]]==a,by=a], data.table(a=1:3,V1=TRUE,key="a"))
test(770, DT[J(2:3),.BY[[1]]==b,by=.EACHI], data.table(a=INT(2,2,3,3),V1=c(TRUE,FALSE),key="a"))

# A data.table RHS of := caused a crash, #2311.
a = data.table(first=1:6, third=c(1,1,1,3,3,4), key="first")
b = data.table(first=c(3,4,4,5,6,7,8), second=1:7, key="first")
test(771, b[,third:=a[b,third,by=.EACHI]], b, warning="Supplied 2 items.*to 7.*recycled leaving remainder of 1 item")
test(772, copy(b)[,third:=as.list(a[b,third,by=.EACHI])], b, warning="Supplied 2 items.*to 7.*recycled leaving remainder of 1 item")
test(773, b[4,third[[1]]], c(1,3,3,3,4,NA,NA))
test(774.1, b[,third:=a[b,third,mult="first"]], ans<-data.table(first=c(3,4,4,5,6,7,8), second=1:7, third=c(1,3,3,3,4,NA,NA), key="first"))
test(774.2, b[,third:=a[b,third]], ans) # mult="first" no longer needed as from v1.9.3.  It now does what was naturally expected.


# That names are dropped. (Names on the column vectors don't display. They increase size and aren't much use.)
DT = data.table(a=1:3,b=LETTERS[1:3])
map = c("A"="Foo",B="Bar",C="Baz")
DT[,b:=map[b]]
test(775, names(DT$b), NULL)

# Test that names of named vectors don't carry through, #2307.
DT = data.table(a=1:3,b=c("a"="a","b"="a","c"="b"))
test(776, names(DT$b), NULL)  # From v1.8.11, data.table() drops vector names
DT = data.table(a=1:3,b=c("a","a","b"))
setattr(DT$b, "names", c("a","b","c"))  # Force names in there to test #2307
test(777, names(DT$b), c("a","b","c"))
test(778, DT[,sum(a),by=b], data.table(b=c("a","b"),V1=c(3L,3L)))  #2307 retained names length 3 on the length 2 vector result causing it not to print.
test(779, print(DT[,sum(a),by=b]), output="   b V11: a  32: b  3$")

# Test new .GRP binding
test(780, data.table(a=1:3,b=1:6)[,i:=.GRP,by=a][,i2:=.GRP], data.table(a=1:3,b=1:6,i=rep(1:3,2),i2=1L))

# Test new .I binding
DT = data.table(a=1:4,b=1:8)
test(781, DT[,.I,by=a]$I, INT(1,5,2,6,3,7,4,8))
test(782, DT[,.I[which.max(b)],by=a], data.table(a=1:4,V1=5:8))
setkey(DT,a)
test(783, DT[,.I,by=a]$I, 1:8)
test(784, DT[,.I[which.max(b)],by=a], data.table(a=1:4,V1=INT(2,4,6,8),key="a"))
test(785, DT[J(2:4),.I,by=a%%2L], data.table(a=rep(0:1,c(4,2)),I=INT(3,4,7,8,5,6)))
test(786, DT[J(c(3,2,4)),list(.I,.GRP),by=.EACHI], data.table(a=rep(c(3L,2L,4L),each=2),I=INT(5,6,3,4,7,8),GRP=rep(1:3,each=2L)))
test(787, DT[J(3:2),`:=`(i=.I,grp=.GRP),by=.EACHI][,list(i,grp)], data.table(i=INT(NA,NA,3:6,NA,NA),grp=INT(NA,NA,2,2,1,1,NA,NA)))

# New not-join (a.k.a. not-select, since not just for data.table i but integer, logical and character too)
DT = data.table(A=rep(1:3,each=2),B=1:6,key="A")
test(788, DT[!J(2)], data.table(A=c(1L,1L,3L,3L),B=c(1L,2L,5L,6L),key="A"))
test(789, DT[!(2:6)], DT[1])
test(790, DT[!(2:6)], DT[!2:6])   # nicer than DT[-2:6] applying - to 2 first
test(791, DT[!6], DT[1:5])
test(792, DT[!c(TRUE,FALSE)], DT[c(FALSE,TRUE)])
test(793, setkey(DT[,A:=letters[A]],A)[!c("b","c")], DT["a"])
test(794, DT[!"b"], DT[c("a","c")])
test(795, DT[!0], DT)
test(796, DT[!NULL], DT[NULL])
test(797, DT[!integer()], DT)
test(798, DT[!-1], DT[1])
test(799, DT[--1], DT[1])
myi = c("a","c")
test(800, DT[!myi], DT["b"])
test(801, DT[!"c",sum(B),by=A], data.table(A=c("a","b"),V1=c(3L,7L),key="A"))
test(802, DT[!"missing",sum(B),by=A], DT[,sum(B),by=A])
test(803, DT[!c("a","missing","b","missing2"),sum(B),by=A], DT["c",sum(B),by=.EACHI])
# Combining not-join with which
test(804, DT[!"b",which=TRUE], INT(1:2,5:6))  # row numbers in DT that don't match
# New which=NA value
test(805, DT[c("b","foo","c"),which=NA], 2L)  # row numbers in i that don't match
test(806, DT[!c("b","foo","c"),which=NA], c(1L,3L)) # row numbers in i that do match
test(807, DT[!c("b","foo","c"),nomatch=0], error="not-join.*prefix is present on i.*Please remove nomatch")
test(808, DT[c("b","foo","c"),which=TRUE,nomatch=NA], INT(3:4,NA,5:6))
test(809, DT[c("b","foo","c"),which=TRUE,nomatch=0], INT(3:4,5:6))
test(810, DT[c("b","foo","c"),which=NA,nomatch=NA], 2L)
test(811, DT[c("b","foo","c"),which=NA,nomatch=0], error="which=NA with nomatch=0 would always return an empty vector[.] Please change or remove either which or nomatch")

# New notj for column names and positions when with=FALSE, #1384
DT = data.table(a=1:3,b=4:6,c=7:9)
test(812, DT[,!"b",with=FALSE], DT[,-match("b",names(DT)),with=FALSE])
test(813, DT[,"foo",with=FALSE], error="column(s) not found: foo")
test(814, DT[,!"foo",with=FALSE], DT, warning="column(s) not removed because not found: foo")
test(815, DT[,!c("b","foo"),with=FALSE], DT[,list(a,c)], warning="column(s) not removed because not found: foo")
test(816, DT[,!2:3,with=FALSE], DT[,-(2:3),with=FALSE])  # for consistency, but ! is really for character column names
mycols = "b"
test(817, DT[,!mycols,with=FALSE], DT[,list(a,c)])
mycols = 2
test(818, DT[,!mycols,with=FALSE], DT[,list(a,c)])

# Test X[Y] slowdown, #2216
X = CJ(a=seq_len(1e3),b=seq_len(1e3))
Y = copy(X)
X[4,b:=3L]     # create a dup group, to force allLen1=FALSE
setkey(X)
test(819, system.time(X[Y,allow.cartesian=TRUE])["user.self"] < 5)
# Many minutes in 1.8.2!  Now well under 1s, but 5s for wide tolerance for CRAN. We like CRAN to tell us if any changes
# in R or elsewhere cause the 2 minute bug to return. Hence not excluded by an if(.devtesting)

test(820, system.time(X[Y,mult="first"])["user.self"] < 5)

# Optimization of lapply(,"+"), #2212
DT = data.table(a=rep(1:3,each=2L),b=1:6,c=7:12)
ans = data.table(a=rep(1:3,each=2L),b=INT(2,3,5,6,8,9),c=INT(8,9,11,12,14,15))
test(821, DT[,lapply(.SD, "+", a), by=a], ans)
test(822, DT[,lapply(.SD, `+`, a), by=a], ans)
ans = data.table(a=1:3,b=INT(4,9,14),c=INT(16,21,26))
test(823, DT[,lapply(.SD, "sum", a), by=a], ans)
test(824, DT[,lapply(.SD, sum, a), by=a], ans)
test(825, DT[,lapply(.SD, `sum`, a), by=a], ans)
DT[2,b:=NA_integer_]
test(825.1, DT[,lapply(.SD, function(x)sum(x)), by=a], data.table(a=1:3,b=INT(NA,7,11),c=INT(15,19,23)))
test(825.2, DT[,lapply(.SD,function(x,...)sum(x,...),na.rm=TRUE),by=a], data.table(a=1:3,b=INT(1,7,11),c=INT(15,19,23)))
test(825.3, DT[,lapply(.SD,sum,na.rm=TRUE),by=a], data.table(a=1:3,b=INT(1,7,11),c=INT(15,19,23)))

# Test illegal names in merge are ok and setcolorder length error, #2193i and #2090
DT1 = data.table(a=letters[1:5], "Illegal(name%)"=1:5, key="a")
DT2 = data.table(a=letters[1:5], b=6L, key="a")
test(826, merge(DT1,DT2), cbind(DT1,b=6L))
test(827, merge(DT2,DT1), cbind(DT2,"Illegal(name%)"=1:5))
a=data.table('User ID'=c(1,2,3), 'Blah Blah'=c(1,2,3), key='User ID')  #2090's test
b=data.table('User ID'=c(1,2,3), 'Yadda Yadda'=c(1,2,3), key='User ID')
test(827.1, names(a[b]), c("User ID","Blah Blah","Yadda Yadda"))

# setcolorder and merge check for dup column names, #2193(ii)
setnames(DT2,"b","a")
test(828, setcolorder(DT2,c("a","b")), error="x has some duplicated column name(s): a. Please remove or rename")
test(829, merge(DT1,DT2), error="y has some duplicated column name(s): a. Please remove or rename")
test(830, merge(DT2,DT1), error="x has some duplicated column name(s): a. Please remove or rename")

# attribs such as "comments" should be retained, #2270
DT1 <- data.table(id = seq.int(1, 10), A = LETTERS[1:10], key = "id")
comment(DT1$A) <- "first comment"     # copies, setattr would be better as on next line
DT2 <- data.table(id = seq.int(2, 10, 2), b = letters[1:5], key = "id")
setattr(DT2$b,"comment","second comment")
test(831, comment(DT1[DT2]$A), "first comment")
test(832, comment(DT2[DT1]$b), "second comment")
test(833, sapply(merge(DT1,DT2),comment), list(id=NULL, A="first comment", b="second comment"))
test(834, comment(DT1[2:3]$A), "first comment")

# Test that matrix RHS of := is caught, #2333
DT = data.table(a=1:3)
DT[,a:=scale(a)]    # 1 column matrix auto treated as vector
test(835, na.omit(DT), DT)
test(836, DT[,a:=as.integer(a)], data.table(a=INT(-1,0,1)))
test(837, DT[,a:=cbind(1,2)], data.table(a=c(1L,2L,1L)), warning="2 column matrix RHS of := will be treated as one vector")
DT = data.table(a=1:3,b=1:6)
test(838, DT[,c:=scale(b), by=a][,c:=as.integer(1000*c)], data.table(a=1:3,b=1:6,c=rep(as.integer(1000*scale(1:2)), each=3)))

# Test data.table's last(). (last is used internally in data.table, too).
test(839, last(1:10), 10L)   # If xts is loaded, this'll just test xts's last. Ok as they're consistent, for vectors.
DT = data.table(a=1:3,b=4:6)
test(840, last(DT), DT[3L])  # xts's last returns a one row data.table ok. So this test is ok too, whether or not xts is loaded.
                             # But not true when DT is a one column data.table/data.frame, see below.
if ("package:xts" %in% search()) {  # e.g. when run via R CMD check
    x = xts(1:100, Sys.Date()+1:100)
    test(841, last(x,10), x[91:100,])
    # The important thing this tests is that data.table's last() dispatches to xts's method when data.table is loaded above xts.
    # But that isn't tested by R CMD check because xts is loaded above data.table, there.
    # So to make this test relevant, in a fresh R session type: "require(xts);require(data.table);test.data.table()"
    #                                              rather than: "require(data.table);require(xts);test.data.table()"
    # Which was the main thrust of bug#2312 fixed in v1.8.3
} else {
    cat("Test 841 not run. If required call library(xts) first.\n")
    # So these won't run from R CMD check (deliberately, for now) ...
    ans = if ("package:gdata" %in% search()) list(89) else 89
    test(842, last(list("a",1:2,89)), ans)  # xts's last and gdata::last returns a one item list here. Would prefer it to return the item itself.
    DT = data.table(a=1:3)
    test(842.1, last(DT), DT[3L])
    # xts's last returns a 3L atomic here for 1 column data.frame, strangely. We wish for the last row, consistently. I tried
    # providing a last.data.table method and using Enhances and Imports in DESCRIPTION with import() and S3method() in
    # NAMESPACE but nothing I tried made last.data.table available to xts's last if xts was loaded above data.table (which was
    # frustrating to test as well, see comment to test 839 above).
}

# Test L[[1L]][,:=] updates by reference, #2204
l = list(data.table(a=1:3), data.table(b=4:6))
test(843, l[[2L]][,c:=7:9], data.table(b=4:6,c=7:9))
test(844, l, list(data.table(a=1:3), data.table(b=4:6,c=7:9)))
names(l) = c("foo","bar")   # R >= 3.1 no longer copies all the contents, yay
test(845, l[["foo"]][2,d:=4], data.table(a=1:3,d=c(NA,4L,NA)),
    warning= if (!.R.assignNamesCopiesAll) NULL else "Invalid .internal.selfref detected and fixed")
l = list(data.table(a=1:3), data.table(b=4:6))
setattr(l,"names",c("foo","bar"))
test(846, l[["foo"]][2,d:=4], data.table(a=1:3,d=c(NA,4,NA)))
test(847, l, list(foo=data.table(a=1:3,d=c(NA,4,NA)), bar=data.table(b=4:6)))
old = getOption("datatable.alloccol")
options(datatable.alloccol=2L)  # the return value here seems to be TRUE rather than the old expression TO DO: follow up with r-devel
l = list(foo=data.table(a=1:3,b=4:6),bar=data.table(c=7:9,d=10:12))   # list() doesn't copy the NAMED==0 objects here
test(848, truelength(l[[1L]]), 2L)
test(849, {l[[1L]][,e:=13:15]; l[[1L]]}, data.table(a=1:3,b=4:6)[,e:=13:15])
test(850, truelength(l[[1L]]), 102L)
test(851, truelength(l[[2L]]), 2L)
l[["bar"]][,f:=16:18]
test(852, truelength(l[[2L]]), 102L)
options(datatable.alloccol=old)
# Now create the list from named objected
DT1 = data.table(a=1:3, b=4:6)
DT2 = data.table(c=7:9)
l = list(DT1, DT2)
if (!.R.listCopiesNamed) {
    # From R>=3.1, list() no longer copies NAMED inputs (a very welcome change in Rdevel, r63767)
    test(853, address(DT1) == address(l[[1L]]))
    w = NULL
} else {
    test(853, address(DT1) != address(l[[1L]]))
    w = "Invalid .internal.selfref detected and fixed.*R's list() used to copy named objects"
}
test(854, l[[1]][,d:=10:12], data.table(a=1:3,b=4:6,d=10:12), warning = w)
test(855, l[[1]], data.table(a=1:3,b=4:6,d=10:12))

# Test setnames on data.frame, #2273.
DF = data.frame(foo=1:2,bar=3:4)
setnames(DF,c("baz","qux"))
test(856, DF, data.frame(baz=1:2,qux=3:4))
test(857.1, set(DF,NULL,"quux",5:6), error="set() on a data.frame is for changing existing columns, not adding new ones")
test(857.2, set(DF,NULL,3L,5:6), error="set() on a data.frame is for changing existing columns, not adding new ones")
test(858.1, set(DF,NULL,"qux",5:6), data.frame(baz=1:2, qux=5:6))
test(858.2, set(DF,NULL,2L,7:8), data.frame(baz=1:2, qux=7:8))

# Test DT[J(data.frame())], #2265
DT = data.table(foo=c(1,2,3), bar=c(1.1,2.2,3.3), key="foo")
i = data.frame(foo=1)
test(859, DT[i], DT[J(i)])
test(860, DT[i], DT[data.table(i)])

# test no memory leak, #2191 and #2284
# These take a few seconds each, and it's important to run these on CRAN to check no leak
gc(); before = gc()["Vcells","(Mb)"]
for (i in 1:2000) { DT = data.table(1:3); rm(DT) }  # in 1.8.2 would leak 3MB
gc(); after = gc()["Vcells","(Mb)"]
test(861, after < before+0.5)   # close to 0.0 difference, but 0.5 for safe margin

gc(); before = gc()["Vcells","(Mb)"]
DF = data.frame(x=1:20, y=runif(20))
for (i in 1:2000) { DT = as.data.table(DF); rm(DT) }
gc(); after = gc()["Vcells","(Mb)"]
test(862, after < before+0.5)

gc(); before = gc()["Vcells","(Mb)"]
DT = data.table(x=1:20, y=runif(20))
for (i in 1:2000) { x <- DT[1:5,]; rm(x) }
gc(); after = gc()["Vcells","(Mb)"]
test(863, after < before+0.5)

# rbindlist should look for the first non-empty data.table - New changes (from Arun). Explanation below:
# Even if data.table is empty, as long as there are column names, they should be considered. 
# Ex: What if all data.tables are empty? What'll be the column name then?
# If there are no names, then the first non-empty set of names will be allocated. I think this is the way to do it.. TODO: Should write to Matt about it.
test(864.1, rbindlist(list(data.table(foo=logical(0),bar=logical(0)), DT<-data.table(baz=letters[1:3],qux=4:6))), setnames(DT, c("foo", "bar")))
test(864.2, rbindlist(list(list(logical(0),logical(0)), DT<-data.table(baz=letters[1:3],qux=4:6))), DT)
test(864.3, rbindlist(list(data.table(logical(0),logical(0)), DT<-data.table(baz=letters[1:3],qux=4:6))), setnames(DT, c("V1", "V2")))

# Steve's find that setnames failed for numeric 'old' when pointing to duplicated names
DT = data.table(a=1:3,b=1:3,v=1:6,w=1:6)
test(865, ans1<-DT[,{list(name1=sum(v),name2=sum(w))},by="a,b",verbose=TRUE],
          output="result of j is a named list. It's very inefficient.*removed and put back")
test(866, names(ans1), c("a","b","name1","name2"))
test(867, names(ans2<-DT[,list(name1=sum(v),name2=sum(w)),by="a,b"]), c("a","b","name1","name2"))  # list names extracted here
test(868, ans1, ans2)
# and related to setnames, too
DT = data.table(a=1:3,b=1:6,key="a")
test(869, DT[J(2,42,84),print(.SD),by=.EACHI], output="   b1: 22: 5.*Empty data.table (0 rows) of 3 cols: a,V2,V3")

# Test setnames with duplicate colnames
DT = data.table(a=1:3,b=4:6,b=7:9)
test(870, setnames(DT,"b","foo"), error="Some items of 'old' are duplicated (ambiguous) in column names: b")
test(871, setnames(DT,c("bar","bar"),c("x","y")), error="Some duplicates exist in 'old': bar")
test(872, setnames(DT,3,"c"), data.table(a=1:3,b=4:6,c=7:9))
test(873, setnames(DT,"foo","bar"), error="Items of 'old' not found in column names: foo")
test(874, setnames(DT,c(1,1),c("foo","bar")), error="Some duplicates exist in 'old': 1")
test(875, setnames(DT,"c","b"), data.table(a=1:3,b=4:6,b=7:9))
test(875.1, setnames(DT,"a","c"), data.table(c=1:3,b=4:6,b=7:9))  # 'a' isn't duplicated so not a problem as from v1.8.11
test(875.2, setnames(DT,c("c","b"), c("C","B")), error="Some items of 'old' are duplicated (ambiguous) in column names: b")  # check error msg when 2nd one in old is the problem

# Test local var problem introduced in v1.8.3
DT = data.table(a=1:3,b=1:6)
f = function() {
  localvar = 2
  print(DT[a>localvar])
  print(DT[a>localvar,sum(b)])
  print(DT[a>localvar,sum(b),by=a])  # bug fix 2368
}
test(876, f(), output="   a b1: 3 32: 3 6.*[1] 9.*   a V11: 3  9")

# segfault when assigning NA names, #2393
DT = data.table(a=1:3, b=4:6)
test(877, setnames(DT, c(NA, NA)), error="Passed a vector of type 'logical'. Needs to be type 'character'")

# test no warning when use.names explicitly set, #2385 - changed 'warning' to 'message' as we just check if usenames is missing, due to C-level changes.
# commented the message for now until confirmation with Matt.
test(878, rbind(data.table(a=1:3,b=4:6), data.table(b=7:9,a=4:6)), data.table(a=1:6,b=4:9)) #, message="Columns will be bound by name for consistency with base")
test(879, rbind(data.table(a=1:3,b=4:6), data.table(b=7:9,a=4:6), use.names=TRUE), data.table(a=1:6,b=4:9))

# Test fread()
n=110  # 110 just to be over the 100 limit for printing head, as a convenience
DT = data.table( a=sample(1:1000,n,replace=TRUE),
                 b=sample(1:1000,n,replace=TRUE)-500L,
                 c=rnorm(n),
                 d=sample(c("foo","bar","baz","qux","quux"),n,replace=TRUE),
                 e=rnorm(n),
                 f=sample(1:1000,n,replace=TRUE) )
DT[2,b:=NA_integer_]
DT[4,c:=NA_real_]
DT[3,d:=NA_character_]
DT[5,d:=""]
DT[2,e:=+Inf]
DT[3,e:=-Inf]
DT[4,e:=NaN]  # write.table writes NaN as NA, though, and all.equal considers NaN==NA. fread would read NaN as NaN if "NaN" was in file
write.table(DT,f<-tempfile(),sep=",",row.names=FALSE,quote=FALSE)
test(880, fread(f), as.data.table(read.csv(f,stringsAsFactors=FALSE)))
test(881, fread(f), DT)
# test that columns are not coerced if nastring=NULL
DT[3,d:="NA"]
test(882, fread(f,na.strings=NULL)[['d']], DT[['d']])
DT[3,d:=NA_character_]
unlink(f)
write.table(DT,f<-tempfile(),sep=",",row.names=FALSE,quote=TRUE)
test(883, fread(f), as.data.table(read.csv(f,stringsAsFactors=FALSE)))
test(884, fread(f), DT)
unlink(f)

# Test short files.
# All the unlinks and using a new file each time are to work around apparent Windows issues it seems when writing, appending
# rereading (possibly via the MapViewOfFile) the same file that has just been appended to. These apparent issues have only
# showed up on winbuilder so far, so might be in combination with the D: tempdir() there; perhaps D: is on a network drive or something.
cat("",file=f<-tempfile()); test(885, fread(f), error="empty"); unlink(f)
test(885.1, fread(""), error="empty")
test(886, fread("\n"), error="empty")
test(887, fread("  \n\t  \t  \n    \n  "), error="empty")
cat("A", file=f<-tempfile()); test(888, fread(f), data.table(A=logical())); unlink(f)
test(889, fread("A\n"), data.table(A=logical()))
cat("AB,CDE",file=f<-tempfile()); test(890, fread(f), data.table(AB=logical(),CDE=logical())); unlink(f)
test(891, fread("AB,CDE\n"), data.table(AB=logical(),CDE=logical()))
cat("3.14",file=f<-tempfile()); test(892, fread(f), data.table(V1=3.14)); unlink(f)
cat("A,3",file=f<-tempfile()); test(893, fread(f), data.table(V1="A",V2=3L)); unlink(f)
if (.Platform$OS.type=="unix") test(893.5, fread("A,B\r\n\r\n"), data.table(A=logical(),B=logical()))
for (nc in c(0,1,2)) {   # 0 means all cols here
for (nr in c(0,1,2,3,5,10,18,19,20,21,22,28,29,30,31,32,38,39,40,41,42)) {  # 30 and 40 are trigger points for auto skip
for (eol in if (.Platform$OS.type=="unix") c("\n","\r\n") else "\n") {
    headDT = head(DT,nr)[,seq_len(if (nc==0) ncol(DT) else nc),with=FALSE]
    if (nr==0) for (j in seq_len(ncol(headDT))) set(headDT,j=j,value=logical())  # when read back in empty cols are the lowest type (logical)
    f = tempfile()
    cat(names(headDT),sep=",",file=f)  # no \n at the end here
    for (i in seq_len(nr)) {
        cat(eol,file=f,append=TRUE)  # on unix we simulate windows too. on windows \n will write \r\n (and \r\n will write \r\r\n)
        write.table(headDT[i],file=f,quote=FALSE,sep=",",eol="",row.names=FALSE,col.names=FALSE,append=TRUE)
        # loop approach is to get no \n after last line
    }
    test(894+nr/100+nc/1000, fread(f), headDT)
    file.copy(f,f2<-tempfile()); unlink(f)    # again trying to work around apparent issue on Windows
    cat(eol,file=f2,append=TRUE)   # now a 'normal' file ending with \n
    test(895+nr/100+nc/1000, fread(f2), headDT)
    file.copy(f2,f3<-tempfile()); unlink(f2)
    cat(eol,file=f3,append=TRUE)   # extra \n should be ignored
    test(896+nr/100+nc/1000, fread(f3), headDT)
    unlink(f)
    unlink(f2)
    unlink(f3)
}}}
if ("package:bit64" %in% search()) {
    DT = data.table( a=sample(1:1000,n,replace=TRUE),
                     b=sample(as.integer64(2)^35 * 1:10, n, replace=TRUE),
                     c=sample(c("foo","bar","baz"),n,replace=TRUE) )
    write.table(DT,f<-tempfile(),sep=",",row.names=FALSE,quote=FALSE)
    test(897, class(DT$b), "integer64")
    test(898, fread(f), DT)
    unlink(f)

    # Test all mid read bump coercions
    DT[,a2:=as.integer64(a)][,a3:=as.double(a)][,a4:=gsub(" ","",format(a))]
    DT[,b2:=as.double(b)][,b3:=gsub(" ","",format(b))]
    DT[,r:=a/100][,r2:=gsub(" ","",format(r))]
    DT[12, a2:=as.integer64(12345678901234)]   # start on row 12 to avoid first 5, middle 5 and last 5 test rows
    DT[13, a3:=3.14]
    DT[14, a4:="123A"]
    DT[15, b2:=1234567890123.45]
    DT[16, b3:="12345678901234567890A"]  # A is needed otherwise read as double with loss of precision (TO DO: should detect and bump to STR)
    DT[17, r2:="3.14A"]
    write.table(DT,f<-tempfile(),sep=",",row.names=FALSE,quote=FALSE)
    test(899, fread(f), DT, warning="Bumped column.*to type character.*may not be lossless")
    unlink(f)
} else {
    cat("Tests 897-899 not run. If required call library(bit64) first.\n")
}

# getwd() has been set by test.data.table() to the location of this tests.Rraw file. Test files should be in the same directory.
f = "ch11b.dat"  # http://www.stats.ox.ac.uk/pub/datasets/csb/ch11b.dat
test(900, fread(f), as.data.table(read.table(f)))

f = "1206FUT.txt"    # a CRLF line ending file (DOS)
test(901.1, DT<-fread(f,strip.white=FALSE), setDT(read.table(f,sep="\t",header=TRUE,colClasses=as.vector(sapply(DT,class)))))
test(901.2, DT<-fread(f), setDT(read.table(f,sep="\t",header=TRUE,colClasses=as.vector(sapply(DT,class)),strip.white=TRUE)))

# Tests the coerce of column 23 to character on line 179 due to the 'A' for the first time :
f = "2008head.csv"
test(902, fread(f), as.data.table(read.csv(f,stringsAsFactors=FALSE)), warning="Bumped column 23 to type character.*may not be lossless")

test(903, fread("A,B\n1,3,foo,5\n2,4,barbaz,6"), data.table(1:2,3:4,c("foo","barbaz"),5:6),
          warning="Starting data input on line 2 and discarding line 1 because.*: A,B")  # invalid colnames (too short)
test(904, fread("A,B,C,D\n1,3,foo,5\n2,4,barbaz,6"), DT<-data.table(A=1:2,B=3:4,C=c("foo","barbaz"),D=5:6))  # ok
test(905, fread('A,B,C,D\n1,3,foo,5\n2,4,"barbaz",6'), DT)
test(906, fread('A,B,C,D\n1,3,foo,5\n2,4,"ba,r,baz",6'), DT[2,C:="ba,r,baz"])
test(907, fread('A,B,C,D\n1,3,foo,5\n2,4,"ba,\\"r,baz",6'), DT[2,C:='ba,\\"r,baz'])  # \" protected ok, but \ needs taking off too (TO DO)
test(908, fread("A,B,C\n1,3,\n2,4,\n"), data.table(A=1:2,B=3:4,C=NA)) # where NA is type logical

test(909, fread("
Date and Time,Open,High,Low,Close,Volume
2007/01/01 22:51:00,5683,5683,5673,5673,64
2007/01/01 22:52:00,5675,5676,5674,5674,17
2007/01/01 22:53:00,5674,5674,5673,5674,42
")$Open, c(5683L,5675L,5674L))   # , is higher than ' ' in the hierarchy of separators, so ',' is auto detected here.

# blanks when testing if header row is all character
test(910, fread("
02-FEB-2009,09:55:04:962,26022009,2500,PE,36,500,44,200,11850,1100,,2865.60
02-FEB-2009,09:55:04:987,26022009,2800,PE,108.75,200,111,50,11700,1450,,2865.60
02-FEB-2009,09:55:04:939,26022009,3100,CE,31.1,3000,36.55,200,3500,5250,,2865.60
")$V13, rep(2865.60,3))

test(911, fread("02-FEB-2009,09:55:04:962,26022009,2500,PE,36,500,44,200,11850,1100,,2865.60
02-FEB-2009,09:55:04:987,26022009,2800,PE,108.75,200,111,50,11700,1450,,2865.60
02-FEB-2009,09:55:04:939,26022009,3100,CE,31.1,3000,36.55,200,3500,5250,,2865.60")$V13, rep(2865.60,3))

# Check manually setting separator
txt = "A;B;C|D,E\n1;3;4|5,6\n2;4;6|8,10\n"
test(912, names(fread(txt)), c("A;B;C|D","E"))
test(913, fread(txt,sep=";"), data.table(A=1:2,B=3:4,"C|D,E"=c("4|5,6","6|8,10")))
test(914, fread(txt,sep="*"), data.table("A;B;C|D,E"=c("1;3;4|5,6","2;4;6|8,10")))
test(915, fread(txt,sep="\n"), data.table("A;B;C|D,E"=c("1;3;4|5,6","2;4;6|8,10"))) # like a fast readLines

# Crash bug when RHS is 0 length and := by group, fixed in 1.8.7
DT = data.table(a=1:3,b=1:6)
test(916, DT[,newcol:=logical(0),by=a], data.table(a=1:3,b=1:6,newcol=NA))

# roll join error when non last join column is factor, #2450
X = data.table(id=2001:2004, uid=c(1001,1002,1001,1001), state=factor(c('CA','CA','CA','MA')), ts=c(51,52,53,54), key='state,uid,ts')
Y = data.table(id=3001:3004, uid=c(1001,1003,1002,1001), state=factor(c('CA','CA','CA','CA')), ts=c(51,57,59,59), key='state,uid,ts')
test(917, X[Y,roll=TRUE], data.table(id=INT(2001,2003,2002,NA), uid=c(1001,1001,1002,1003), state=factor('CA'), ts=c(51,59,59,57), i.id=INT(3001,3004,3003,3002), key='state,uid,ts'))

# NA in join column of type double, #2453.
X = data.table(name=c("Joh","Raf","Jon","Ste","Rob","Smi"),depID=c(NA,31,33,33,34,34),key="depID")
Y = data.table(depID=c(31,33,34,35),depName=c("Sal","Eng","Cle","Mar"),key="depID")
test(918, Y[X], data.table(depID=c(NA,31,33,33,34,34),depName=c(NA,"Sal","Eng","Eng","Cle","Cle"),name=c("Joh","Raf","Jon","Ste","Rob","Smi"),key='depID'))   # Y[X] same as merge.data.frame(X,Y,all.x=TRUE)
test(919, X[Y], data.table(name=c("Raf","Jon","Ste","Rob","Smi",NA), depID=c(31,33,33,34,34,35), depName=c("Sal","Eng","Eng","Cle","Cle","Mar"),key='depID'))
test(920, X[Y,nomatch=0], data.table(name=c("Raf","Jon","Ste","Rob","Smi"),depID=c(31,33,33,34,34),depName=c("Sal","Eng","Eng","Cle","Cle"),key='depID'))
test(921, Y[X,nomatch=0], data.table(depID=c(31,33,33,34,34),depName=c("Sal","Eng","Eng","Cle","Cle"),name=c("Raf","Jon","Ste","Rob","Smi"),key='depID'))

# setnames bug on keyed table, when full vector is given and target key isn't the positions in columns 1:length(key)
DT = data.table(a=1:2,b=3:4,c=5:6,key="b")
test(922, setnames(DT,c("A","B","C")), data.table(A=1:2,B=3:4,C=5:6,key="B"))

# vecseq overflow, crash bug #2464
DT = data.table(x=rep(1L,50000),key="x")
test(923, DT[DT], error="Join results in more than 2^31 rows (internal vecseq reached physical limit). Very likely misspecified join.")
X = data.table(x=1:2,y=1:6,key="x")
test(924.1, X[J(c(1,1,1))], X[rep(1:3,3)])
test(924.2, X[J(c(1,1,1,1))], error="Join results in 12 rows; more than 10 = nrow(x)[+]nrow(i). Check for duplicate key values in i each of")


# sorting of 'double' columns not correct for ties (tolerance nuance in C code), #2484
DT = data.table(X=as.POSIXct( c(rep("15DEC2008:00:00:00",10),"15DEC2008:00:00:00",rep("17DEC2008:00:00:00",2)),format="%d%b%Y:%H:%M:%S"),Y=c(1534,61,74,518,519,1519,1520,1524,3127,29250,30609,43,7853))
setkey(DT,X,Y)
test(925, DT[,base::order(X,Y)], 1:nrow(DT))

# Test new dogroup warning for zero length columns in result when other columns are >1, #2478
DT = data.table(a=1:3,b=1:6)
test(926, DT[, if(a==2L) list(42:43,NULL) else list(42L,3.14), by=a], data.table(a=INT(1,2,2,3),V1=INT(42,42,43,42),V2=c(3.14,NA,NA,3.14)), warning="Item 2 of j's result for group 2 is zero length. This will be filled with 2 NAs to match the")
test(927, DT[, if(a==2L) list(42:43,numeric()) else list(42L,3.14), by=a], data.table(a=INT(1,2,2,3),V1=INT(42,42,43,42),V2=c(3.14,NA,NA,3.14)), warning="Item 2 of j's result for group 2 is zero length. This will be filled with 2 NAs to match the")

# And the root cause of #2478: that cbind(DT,1:3) created invalid data.table with empty column
test(928, cbind(data.table(a=1L),b=1:3), data.table(a=1L,b=1:3))
# FR #4813 implementation resulted in changing 929 error to warning
# test(929, cbind(data.table(a=1L,b=2:3),c=1:3), error="argument 1 (nrow 2) cannot be recycled without remainder to match longest nrow (3)")
test(929, cbind(data.table(a=1L,b=2:3),c=1:3), data.table(a=1L, b=c(2L,3L,2L), c=1:3), warning="Item 1 is of size 2 but maximum size is 3")
test(930, cbind(data.table(a=1L,b=2:3),c=1:4), data.table(a=1L,b=INT(2,3,2,3),c=1:4))
DT = data.table(x=c(1,1,1,1,2,2,3),y=c(1,1,2,3,1,1,2))
DT[,rep:=1L][c(2,7),rep:=c(2L,3L)]   # duplicate row 2 and triple row 7
DT[,num:=1:.N]                       # to group each row by itself
test(931, DT[,cbind(.SD,dup=1:rep),by="num"], data.table(num=INT(1,2,2,3:7,7,7),x=c(1,1,1,1,1,2,2,3,3,3),y=c(1,1,1,2,3,1,1,2,2,2),rep=INT(1,2,2,1,1,1,1,3,3,3), dup=INT(1,1,2,1,1,1,1,1,2,3)))

# New roll=+/- and rollends
DT = data.table(a=INT(1,3,4,4,4,4,7), b=INT(5,5,6,6,9,9,2), v=1:7, key="a,b")
test(932, DT[J(c(0,2,6,8)), roll=+Inf, rollends=TRUE, v], INT(1,1,6,7))
test(933, DT[J(c(0,2,6,8)), roll=-Inf, rollends=TRUE, v], INT(1,2,7,7))
test(934, DT[J(c(0,2,6,8)), roll=+Inf, v], INT(NA,1,6,7))
test(935, DT[J(c(0,2,6,8)), roll=-Inf, v], INT(1,2,7,NA))
test(936, DT[J(c(-10,-1,2,12,13)), roll=5, rollends=TRUE, v], INT(NA,1,1,7,NA))
test(937, DT[J(c(-10,-1,2,12,13)), roll=-5, rollends=TRUE, v], INT(NA,1,2,7,NA))
test(938, DT[J(c(-10,2,6,7,8)), roll="nearest", v], INT(1,1,7,7,7))
test(939, DT[J(c(-10,2,6,7,8)), roll="nearest", rollends=c(TRUE,FALSE), v], INT(1,1,7,7,NA))
test(940, DT[J(c(-10,2,6,7,8)), roll="nearest", rollends=c(FALSE,TRUE), v], INT(NA,1,7,7,7))
test(941, DT[J(c(-10,2,6,7,8)), roll="nearest", rollends=FALSE, v], INT(NA,1,7,7,NA))

# merge all=TRUE with space in a y column name, #2555
X = data.table(a=1:3,b=4:6)
Y = data.table(a=2:4,"d 1"=5:7) # space in Y's column name
test(942, merge(X,Y,all=TRUE,by="a"), data.table(a=1:4,b=INT(4:6,NA),"d 1"=INT(NA,5:7),key="a"))
test(943, merge(X,Y,all.y=TRUE,by="a"), data.table(a=2:4,b=INT(5:6,NA),"d 1"=5:7,key="a"))

# Test error message say NULL rather than empty table
DT = data.table(NULL)
test(944, DT[,a:=1L], error = "Cannot use := to add columns to a null data.table.*You can use")
DT = data.table(a=numeric())
test(945, DT[,b:=a+1], data.table(a=numeric(),b=numeric()))

# fread blank column names get default names
test(946, fread('A,B,,D\n1,3,foo,5\n2,4,bar,6\n'), data.table(A=1:2,B=3:4,c("foo","bar"),D=5:6))
test(947, fread('0,2,,4\n1,3,foo,5\n2,4,bar,6\n'), data.table(0:2,2:4,c("","foo","bar"),4:6))
test(948, fread('A,B,C\nD,E,F\n',header=TRUE), data.table(A="D",B="E",C=FALSE))
test(949, fread('A,B,\nD,E,F\n',header=TRUE), data.table(A="D",B="E",V3=FALSE))

# +/- with no numbers afterwards should read as character
test(950, fread('A,B,C\n1,+,4\n2,-,5\n3,-,6\n'), data.table(A=1:3,B=c("+","-","-"),C=4:6))

# catching misuse of `:=`
x = data.table(a=1:5)
test(951, x[,{b=a+3; `:=`(c=b)}], error="defined for use in j, once only and in particular ways")

# fread colClasses
input = 'A,B,C\n01,foo,3.140\n002,bar,6.28000\n'
test(952, fread(input, colClasses=c(C="character")), data.table(A=1:2,B=c("foo","bar"),C=c("3.140","6.28000")))
test(953, fread(input, colClasses=c(C="character",A="numeric")), data.table(A=c(1.0,2.0),B=c("foo","bar"),C=c("3.140","6.28000")))
test(954, fread(input, colClasses=c(C="character",A="double")), data.table(A=c(1.0,2.0),B=c("foo","bar"),C=c("3.140","6.28000")))
test(955, fread(input, colClasses=list(character="C",double="A")), data.table(A=c(1.0,2.0),B=c("foo","bar"),C=c("3.140","6.28000")))
test(956, fread(input, colClasses=list(character=2:3,double="A")), data.table(A=c(1.0,2.0),B=c("foo","bar"),C=c("3.140","6.28000")))
test(957, fread(input, colClasses=list(character=1:3)), data.table(A=c("01","002"),B=c("foo","bar"),C=c("3.140","6.28000")))
test(958, fread(input, colClasses="character"), data.table(A=c("01","002"),B=c("foo","bar"),C=c("3.140","6.28000")))
test(959, fread(input, colClasses=c("character","double","numeric"), verbose=TRUE),
          warning = "Column 2 ('B') has been detected as type 'character'. Ignoring request from colClasses to read as 'numeric' (a lower type) since NAs (or loss of precision) may result",
          output = "Detected 3 columns",  # including output= just so that verbose output is captured, just the warning will be checked.
          data.table(A=c("01","002"),B=c("foo","bar"),C=c(3.14,6.28)))

test(960, fread(input, colClasses=c("character","double")), error="colClasses is unnamed and length 2 but there are 3 columns. See")
test(961, fread(input, colClasses=1:3), error="colClasses is not type list or character vector")
test(962, fread(input, colClasses=list(1:3)), error="colClasses is type list but has no names")
test(963, fread(input, colClasses=list(character="D")), error="Column name 'D' in colClasses..1.. not found")
test(964, fread(input, colClasses=c(D="character")), error="Column name 'D' in colClasses..1.. not found")
test(965, fread(input, colClasses=list(character=0)), error="Column number 0 (colClasses..1...1.) is out of range .1,ncol=3.")
test(966, fread(input, colClasses=list(character=2:4)), error="Column number 4 (colClasses..1...3.) is out of range .1,ncol=3.")

# Character input more than 4096 bytes (used to be passed through path.expand which imposed the limit), #2649
test(967, nrow(fread( paste( rep('a\tb\n', 10000), collapse=''), header=FALSE)), 10000L)

# Test fread warns about removal of any footer (and autostart skips up over it)
test(968, fread("A,B\n1,3\n2,4\n\nRowcount: 2\n"), data.table(A=1:2,B=3:4), warning="Stopped reading at empty line 4.*discarded.*Rowcount: 2")
test(969, fread("A,B\n1,3\n2,4\n\n\nRowcount: 2"), data.table(A=1:2,B=3:4), warning="Stopped reading at empty line 4.*discarded.*Rowcount: 2")
test(970, fread("A,B\n1,3\n2,4\n\n\nRowcount: 2\n\n"), data.table(A=1:2,B=3:4), warning="Stopped reading at empty line 4.*discarded.*Rowcount: 2")

# fread skip override
input = "some,bad,data\nA,B,C\n1,3,5\n2,4,6\n"
test(971, fread(input), data.table(some=c("A",1:2),bad=c("B",3:4),data=c("C",5:6)))
test(972, fread(input, skip=1), data.table(A=1:2,B=3:4,C=5:6))
test(973, fread(input, skip=2), data.table(V1=1:2,V2=3:4,V3=5:6))
test(974, fread(input, skip=2, header=TRUE), data.table("1"=2L,"3"=4L,"5"=6L))
test(975, fread(input, skip="B"), data.table(A=1:2,B=3:4,C=5:6))
input = "\n\nA,B\n1,3\n2,4\n\nC,D\n5,7\n6,8\n\nE,F\n9,11\n10,12\n"   # 3 tables in one file
test(976, fread(input), data.table(A=1:2,B=3:4), warning="Stopped reading at empty line 6.*discarded.*C,D")
test(977, fread(input, autostart=8), data.table(C=5:6,D=7:8), warning="Stopped reading at empty line 10.*discarded.*E,F")
test(978, fread(input, skip="D"), data.table(C=5:6,D=7:8), warning="Stopped reading at empty line 10.*discarded.*E,F")

# mixed add and update in same `:=` bug/crash, #2528 and #2778
DT = data.table(x=rep(1:2, c(3,2)), y=6:10)
DT[, z:=.GRP, by=x]                 # first assignment
test(979, DT[, `:=`(z=.GRP, w=2), by=x], data.table(x=INT(1,1,1,2,2),y=6:10,z=INT(1,1,1,2,2),w=2))  # mixed update and add
# and example from http://stackoverflow.com/a/14732348/403310 :
dt1 = fread("Date,Time,A,B
01/01/2013,08:00,10,30
01/01/2013,08:30,15,25
01/01/2013,09:00,20,20
02/01/2013,08:00,25,15
02/01/2013,08:30,30,10
02/01/2013,09:00,35,5")
dt2 = fread("Date,A,B,C
01/01/2013,100,300,1
02/01/2013,200,400,2")
setkey(dt1, "Date")
setkey(dt2, "Date")
test(980, dt1[dt2, `:=`(A=A+i.A, B=B+i.B, C=i.C)][,list(A,B,C)],
          data.table(A=INT(110,115,120,225,230,235),B=INT(330,325,320,415,410,405),C=rep(1:2,each=3)))
DT = data.table(A=1:2,B=3:4,C=5:6)
test(981, DT[,`:=`(D=B+4L,B=0:1,E=A*2L,F=A*3L,C=C+1L,G=C*2L),by=A],
          data.table(A=1:2,B=0L,C=6:7,D=7:8,E=c(2L,4L),F=c(3L,6L),G=c(10L,12L)), warning="RHS 2 is length 2")
DT = data.table(A=1:2,B=3:4,C=5:6)
test(982, DT[,`:=`(D=B+4L,B=0L,E=A*2L,F=A*3L,C=C+1L,G=C*2L),by=A],
          data.table(A=1:2,B=0L,C=6:7,D=7:8,E=c(2L,4L),F=c(3L,6L),G=c(10L,12L))) # Also note that G is not yet iterative. In future: c(12,14)

# rbindlist binding factors, #2650
test(983, rbindlist(list(data.table(factor(c("A","A","B","C","A"))), data.table(factor(c("B","F","A","G"))))), data.table(V1=factor(c("A","A","B","C","A","B","F","A","G"))))
test(984, rbindlist(list(data.table(factor(c("A","B"))), data.table(c("C","A")))), data.table(factor(c("A","B","C","A"))))
test(985, rbindlist(list(data.table(c("A","B")), data.table(factor(c("C","A"))))), data.table(factor(c("A","B","C","A"))))
# with NA
test(985.1, rbindlist(list(data.table(factor(c("A","B"))), data.table(factor(c("C",NA))))), data.table(factor(c("A","B","C",NA))))
test(985.2, rbindlist(list(data.table(c("A","B")), data.table(factor(c("C",NA))))), data.table(factor(c("A","B","C",NA))))

## Allow unique/duplicated to accept custom colum combination to query for
## uniqueness
dt <- data.table(A = rep(1:3, each=4), B = rep(11:14, each=3), C = rep(21:22, 6), key = "A,B")
df <- as.data.frame(dt)
test(986, unique(dt), dt[!duplicated(df[, key(dt)]),])
test(987, unique(dt, by='A'), dt[!duplicated(df[, 'A'])])
test(988, unique(dt, by='B'), dt[!duplicated(df[, 'B'])])
test(989, unique(dt, by='C'), dt[!duplicated(df[, 'C'])])
test(990, unique(dt, by=c('B', 'C')), dt[!duplicated(df[, c('B', 'C')])])
test(991, unique(dt, by=NULL), dt[!duplicated(df)])
test(991.1, unique(dt, by=4), error="Integer values between 1 and ncol are required")
test(991.2, unique(dt, by=c(1,3.1)), error="Integer values between 1 and ncol are required")
test(991.3, unique(dt, by=2:3), dt[!duplicated(df[,c('B','C')])])
test(991.4, unique(dt, by=c('C','D','E')), error="by specifies column names that do not exist. First 5: D,E")

# :=NULL on factor column in empty data.table, #4809
DT = data.table(A = integer(), B = factor())
test(992, DT[, B:=NULL], data.table(A=integer()))

# That including FUN= works in j=lapply, #4839
DT = as.data.table(iris)
test(993, DT[, lapply(.SD, function(x) sum(!is.na(x), na.rm=TRUE)), by = Species],
          DT[, lapply(.SD, FUN=function(x) sum(!is.na(x), na.rm=TRUE)), by = Species])

# fread more than 50,000 columns, the R_PPSSIZE limit in Defn.h
# Takes too long for routine use. TO DO: move to a long running stress test script
#M = matrix(1,nrow=3,ncol=200000)
#f = tempfile()
#write.csv(M,f,row.names=FALSE)
#test(994, fread(f)[[200000]], rep(1L,3))
#unlink(f)

# CJ with `sorted = FALSE` option
DT <- data.table(x=rep(3:5, each=4), y=rep(1:6, each=2), z=1:12)
setkey(DT, x, y)
OUT <- DT[J(c(5,5,3,3), c(5,1,5,1))]
test(995, DT[CJ(c(5,3), c(5,1), sorted=FALSE)], OUT)

# CJ with ordered factor
xx <- factor(letters[1:2], ordered=TRUE)
yy <- sample(2)
test(996, CJ(xx, yy), setkey(data.table(rep(xx, each=2), rep(base::sort.int(yy), 2))))

# That CJ orders NA consistently with setkey and historically, now it doesn't use setkey.
# NA must always come first in data.table throughout, since binary search relies on that internally.
test(997, DT <- CJ(c(1,3,NA,2), 5:6), setkey(setkey(copy(DT),NULL)))  # double setkey to really rebuild key
test(998, DT <- CJ(as.integer(c(1,3,NA,2)), 5:6), setkey(setkey(copy(DT),NULL)))
test(999, DT <- CJ(c("A","B",NA,"C"), 5:6), setkey(setkey(copy(DT),NULL)))
test(1000, DT <- CJ(c(1,NA,3), c("B",NA,"A"), c(5L,NA_integer_)), setkey(setkey(copy(DT),NULL)))
test(1001, DT <- CJ(c(1,NA,3)), setkey(setkey(copy(DT),NULL)))  # The 1 column case is switched inside CJ() so test that too.

# merge all=TRUE when y is empty, #2633
a = data.table(P=1:2,Q=3:4,key='P')
b = data.table(P=2:3,R=5:6,key='P')
test(1002, merge(a,b[0],all=TRUE), data.table(merge.data.frame(a,b[0],all=TRUE),key='P'))
a = data.table(c=c(1,2),key='c')
b = data.table(c=3,key='c')
test(1003, merge(a,b[0],all=TRUE), data.table(merge.data.frame(a,b[0],all=TRUE),key='c'))

# setkey with backticks, #2452
DT = data.table("Date and Time"=1:3,x=4:6)
test(1004, setkey(copy(DT),`Date and Time`), setkey(DT,"Date and Time"))

# rbinding with duplicate names, NA or "", #2384 and #2726
DT = data.table(a=1:3,b=4:6,b=7:9,c=10:12)
test(1005, rbind(DT,DT), data.table(a=rep(1:3,2),b=rep(4:6,2),b=rep(7:9,2),c=rep(10:12,2)))
M <- mtcars
colnames(M)[11] <- NA
test(1006, print(as.data.table(M), nrows=10), output="gear NA.*1: 21.0")

# rbinding factor with non-factor/character
DT1 <- data.table(x=1:5, y=factor("a"))
DT2 <- data.table(x=1:5, y=2)
test(1007, rbindlist(list(DT1, DT2)), data.table(x = c(1:5, 1:5), y = factor(c(rep('a', 5), rep('2', 5)), levels = c('a', '2'))))
test(1008, rbindlist(list(DT2, DT1)), data.table(x = c(1:5, 1:5), y = factor(c(rep('2', 5), rep('a', 5)))))

# rbindlist different types
DT1 <- data.table(a = 1L, b = 2L)
DT2 <- data.table(a = 2L, b = 'a')
DT3 <- data.table(a = 2L, b = 2.5)
test(1008.1, rbindlist(list(DT1, DT2)), data.table(a = c(1L,2L), b = c('2', 'a')))
test(1008.2, rbindlist(list(DT1, DT3)), data.table(a = c(1L,2L), b = c(2, 2.5)))

# optimized mean() respects na.rm=TRUE by default, as intended
DT = data.table(a=c(NA,NA,FALSE,FALSE), b=c(1,1,2,2))
test(1009, DT[,list(mean(a), sum(a)),by=b], data.table(b=c(1,2),V1=c(NA,0),V2=c(NA,0)))

# an fread error shouldn't hold a lock on the file on Windows.
f = tempfile()
cat('A,B\n"aa",2\n"bb,2\n"cc",3\n', file=f)  # NB: deliberate missing quote after bb.
test(1010, fread(f), data.table(A=c("aa", "\"bb", "cc"), B=c(2L,2L,3L)))
cat('dd",4\n',file=f,append=TRUE)   # tests file lock on Windows after error
test(1011, fread(f), data.table(A=c("aa", "\"bb", "cc", "dd\""), B=c(2L,2L,3L,4L)))
cat('A,B\n"aa",1\n"bb",2\n"cc",3\n', file=f)   # testing overwrite
test(1012, fread(f), data.table(A=c("aa","bb","cc"),B=1:3))
unlink(f)  # testing file can be removed after error

# integer64 control to fread
test(1013, fread("A,B\n123,123\n", integer64="integer"), error="integer64='%s' which isn't 'integer64'|'double'|'numeric'|'character'")
test(1014, fread("A,B\n123456789123456,21\n", integer64="character"), data.table(A="123456789123456",B=21L))
test(1015, fread("A,B\n123456789123456,21\n", integer64="double"), data.table(A=as.double("123456789123456"),B=21L))
# and that mid read bumps respect integer64 control too ..
x = sample(1:1000,100,replace=TRUE)
DT = data.table( A=as.character(x), B=1:100)
DT[15, A:="123456789123456"]  # row 15 is outside the top, middle and last 5 rows.
write.table(DT,f<-tempfile(),sep=",",row.names=FALSE,quote=FALSE)
test(1016, fread(f,integer64="numeric"), copy(DT)[,A:=as.numeric(A)])
test(1017, fread(f,integer64="character"), DT, warning="Bumped column.*to type character.*may not be lossless")
unlink(f)

# ERANGE warning, #4879
tt = try(fread("1.46761e-313\n"), silent=TRUE)   # options(warn=2) while this test file runs
if (inherits(tt,"try-error")) {
    # All CRAN machines including SPARC
    test(1018, fread("1.46761e-313\n"), data.table(V1=as.numeric("1.46761e-313")), warning="strtod() returned ERANGE")
} else {
    # on PowerPC only via QEMU emulation :
    test(1018, fread("1.46761e-313\n"), data.table("1.46761e-313"=logical()))
}
test(1019, fread("1.23456789123456789123456789\n"), data.table(V1=as.numeric("1.23456789123456789123456789"))) # no warning, as standard

# crash assigning to row 0, #2754
DT = data.table(A=1:5,B=6:10)
test(1020, DT[0,A:=6L], DT)
test(1021, DT[NA,A:="foo"], DT)
test(1022, DT[5:0,A:=21L], data.table(A=21L, B=6:10)) 
test(1023, DT[c(1,2,NA,3), B:=42L], data.table(A=21L, B=c(42L,42L,42L,9:10)))
test(1024, DT[6,A:=0L], error="i[[]1[]] is 6 which is out of range [[]1,nrow=5[]]")

# crash assigning to duplicated column names/numbers, #2751
test(1024.1, DT[,c("B","B"):=NULL], error="Can't assign to the same column twice in the same query (duplicates detected).")
test(1024.2, DT[,c(1,2,1):=NULL], error="Can't assign to the same column twice in the same query (duplicates detected).")

# as.data.table.table, #4848
DF <- data.frame(x = c(1,1,2,NA,1,2), y = c("b", "b", "b", "a", "c", "a"), z = c(1,1,1,1,1,2), stringsAsFactors=FALSE   )
tab1 <- as.data.table(as.data.frame(table(DF$x), stringsAsFactors=FALSE)); setattr(tab1, 'names', c("V1", "N"))
tab2 <- as.data.table(as.data.frame(table(DF$x, DF$y), stringsAsFactors=FALSE)); setattr(tab2, 'names', c("V1", "V2", "N"))
tab3 <- as.data.table(as.data.frame(table(DF$x, DF$y, DF$z), stringsAsFactors=FALSE)); setattr(tab3, 'names', c("V1", "V2", "V3", "N"))
test(1025, as.data.table(table(DF$x)), tab1)
test(1026, as.data.table(table(DF$x, DF$y)), tab2)
test(1027, as.data.table(table(DF$x, DF$y, DF$z)), tab3)
# catch printing of data.table(table()), #4847 (as.data.table should be used instead)
# new, updated 14th Feb, 2015. data.table(table) now redirects to as.data.table
test(1027.1, data.table(table(1:99)), as.data.table(table(1:99)))
# data.table() and rbindlist() in v1.8.11 now catch and removes the dim attribute. For it on to test print catches it :
test(1027.2, {DT<-data.table(table(1:99));setattr(DT[[1]],"dim",99L);print(DT)}, error="Invalid column: it has dimensions. Can't format it. If it's the result of data.table(table()), use as.data.table(table()) instead.")

# as.data.table.x where x is integer, numeric, etc...
set.seed(45)
test(1028, as.data.table(x<-sample(5)), data.table(V1=x))
test(1029, as.data.table(x<-as.numeric(x)), data.table(V1=x))
test(1030, as.data.table(x<-as.Date(x, origin="2013-01-01")), data.table(V1=x))
test(1031, as.data.table(x<-factor(sample(5))), data.table(V1=x))
test(1032, as.data.table(x<-factor(x, ordered=TRUE)), data.table(V1=x))
test(1033, as.data.table(x<-as.logical(sample(0:1, 5, TRUE))), data.table(V1=x))
test(1034, as.data.table(x<-as.character(sample(letters, 5))), data.table(V1=x))

#########################################
# All melt.data.table tests go in here #
#########################################
if ("package:reshape2" %in% search()) {

    set.seed(45)
    DT <- data.table(
          i_1 = c(1:5, NA), 
          i_2 = c(NA,6,7,8,9,10), 
          f_1 = factor(sample(c(letters[1:3], NA), 6, TRUE)), 
          c_1 = sample(c(letters[1:3], NA), 6, TRUE), 
          d_1 = as.Date(c(1:3,NA,4:5), origin="2013-09-01"), 
          d_2 = as.Date(6:1, origin="2012-01-01"))
    DT[, l_1 := DT[, list(c=list(rep(i_1, sample(5,1)))), by = i_1]$c] # generate list cols
    DT[, l_2 := DT[, list(c=list(rep(c_1, sample(5,1)))), by = i_1]$c]

    test(1035, melt(DT, id=1:2, measure=3:4), melt(DT, id=c("i_1", "i_2"), measure=c("f_1", "c_1")))

    ans1 = cbind(DT[, c(1,2,8), with=FALSE], variable=factor("l_1"))
    ans1[, value := DT$l_1]
    test(1036, melt(DT, id=c("i_1", "i_2", "l_2"), measure=c("l_1")), ans1)

    # melt retains attributes if all are of same type (new)
    ans2 = data.table(c_1=DT$c_1, variable=rep(c("d_1", "d_2"), each=6), value=as.Date(c(DT$d_1, DT$d_2)))[!is.na(value)]
    test(1037, melt(DT, id=4, measure=5:6, na.rm=TRUE, variable.factor=FALSE), ans2)

    DT2 <- data.table(x=1:5, y=1+5i) # unimplemented class
    test(1038, melt(DT2, id=1), error="Unknown column type 'complex'")
    
    # more tests
    DT[, f_2 := factor(c("z", "a", "x", "z", "a", "a"), ordered=TRUE)]
    DT[, id := 1:6]
    ans1 = cbind(melt(DT, id="id", measure=5:6, value.name="value1"), melt(DT, id=integer(0), measure=7:8, value.name="value2")[, variable:=NULL])
    levels(ans1$variable) = as.character(1:2)
    test(1038.2, ans1, melt(DT, id="id", measure=list(5:6, 7:8)))
    test(1038.3, ans1, melt(DT, id="id", measure=list(5:6, 7:8), na.rm=TRUE)) # should've no effect
    test(1038.7, ans1, melt(DT, id="id", measure=patterns("d_", "l_")))
    # melt retains ordered factors!
    test(1038.4, melt(DT, id="id", measure=c("f_1", "f_2"), value.factor=TRUE)$value, factor(c(as.character(DT$f_1), as.character(DT$f_2)), ordered=TRUE))
    # if measure is integer(0) just returns a duplicated data.table with all idcols
    test(1038.5, melt(DT, id=1:6, measure=integer(0)), shallow(DT, 1:6))
    # measure.var list with single entry recycles to maximum length
    ans = cbind(melt(DT, id="id", measure=c("c_1", "c_1"))[, variable := NULL], melt(DT, id=integer(0), measure=c("f_1", "f_2")))
    setnames(ans, c("id", "value1", "variable", "value2"))
    setcolorder(ans, c("id", "variable", "value1", "value2"))
    levels(ans$variable) = as.character(1:2)
    test(1038.6, melt(DT, id="id", measure=list(c("c_1", "c_1"), c("f_1", "f_2"))), ans)

    # test to ensure attributes on non-factor id-columns are preserved after melt
    DT <- data.table(x=1:3, y=letters[1:3], z1=8:10, z2=11:13)
    setattr(DT$x, 'foo', 'bla1')
    setattr(DT$y, 'bar', 1:4)
    test(1222.1, attr(melt(DT, id=1:2)$x, "foo"), "bla1")
    test(1222.2, attr(melt(DT, id=1:2)$y, "bar"), 1:4)

    # bug #699 - melt segfaults when vars are not in dt
    x = data.table(a=c(1,2),b=c(2,3),c=c(3,4))
    test(1316.1, melt(x, id="d"), error="One or more values")
    test(1316.2, melt(x, measure="d"), error="One or more values")
    test(1316.3, melt(x, id="a", measure="d"), error="One or more values")
    test(1316.4, melt(x, id="d", measure="a"), error="One or more values")

    # fix for #780.
    DT = data.table(x=rep(c("a","b","c"),each=3), y=c(1,3,6), v=1:9)
    foo = function(input, by, var) {
        melt(input, id.vars = by, measure.vars=var)
    }
    test(1371.1, foo(DT, by="x"), data.table(x=rep(DT$x, 2L), variable=factor(rep(c("y", "v"), each=9L), levels=c("y", "v")), value=c(DT$y, DT$v)), warning="are not all of the same type. By order of hierarchy, the molten data value column will be of type 'double'")
    test(1371.2, foo(DT), data.table(x=rep(DT$x, 2L), variable=factor(rep(c("y", "v"), each=9L), levels=c("y", "v")), value=c(DT$y, DT$v)), warning="To be consistent with reshape2's melt, id.vars and")
    # Fix for #1055
    DT <- data.table(A = 1:2, B = 3:4, D = 5:6, D = 7:8)
    test(1495, melt(DT, id=1:2), data.table(A=1:2, B=3:4, 
        variable=factor(rep(1L, 4L), labels="D"), value=5:8))

    # segfault of unprotected var caught with the help of address sanitizer
    set.seed(1)
    val = sample(c(1:5, NA), 1e6L, TRUE)
    dt <- setDT(replicate(100L, val, simplify=FALSE))
    ## to ensure there's no segfault...
    ans <- melt(dt, measure.vars=names(dt), na.rm=TRUE)
    test(1509, ans, ans)

    # improper levels fix, #1359
    dt = data.table(id=1:3, x=NA_character_, y=c('a', NA_character_, 'c'))
    test(1563, melt(dt, id.var="id", na.rm=TRUE), data.table(id=c(1L,3L), variable=factor(c("y", "y")), value=c("a", "c")))

    # fixing segfault due to negative id and measure vars that I detected by accident
    dt = data.table(x=1:5, y=6:10, z=11:15)
    test(1569.1, melt(dt, id=-1, measure=NULL), error="One or more values in 'id.vars'")
    test(1569.2, melt(dt, id=-1, measure=-1), error="One or more values in 'id.vars'")
    test(1569.3, melt(dt, id=NULL, measure=-1), error="One or more values in 'measure.vars'")
    test(1569.4, melt(dt, id=5, measure=-1), error="One or more values in 'id.vars'")
    test(1569.5, melt(dt, id=1, measure=-1), error="One or more values in 'measure.vars'")
}

# sorting and grouping of Inf, -Inf, NA and NaN,  #4684, #4815 & #4883
DT <- data.table(x = rep(c(1, NA, NaN, Inf, -Inf), each=2))
OUT <- data.table(x=c(1, NA, NaN, Inf, -Inf), N=2L)
test(1039, DT[, .N, by=x], OUT)
DT <- data.table(y =c(NA, Inf, NA, -Inf, -Inf, NaN, Inf, 1, NaN, 1))
OUT <- data.table(y = c(NA, Inf, -Inf, NaN, 1), N=2L)
test(1040, DT[, .N, by=y], OUT)

# rbindlist on *data.frame* input, #4648.  Somehow not test for this. (Although, #4648 was the same as #2650 fixed in v1.8.9).
l <- list(u1=data.frame(i1=c('a', 'b', 'c'), val=1:3, stringsAsFactors=TRUE),
          u2=data.frame(i1=c('d', 'e'), val=4:5, stringsAsFactors=TRUE))
test(1041, rbindlist(l), data.table(i1=factor(letters[1:5]),val=1:5))

# negative indexing in *i* leads to crash/wrong aggregates when dogroups is called. bug #2697
DT = data.table(x = c(1,2,3,4,5), group = c(1,1,2,2,3))
test(1042, DT[-5, mean(x), by = group], data.table(group=1:2, V1=c(1.5, 3.5)))
# Test when abs(negative index) > nrow(dt) - should warn
test(1042.1, DT[-10], DT, warning="Item 1 of i is -10 but there are only 5 rows. Ignoring this and 0 more like it out of 1.")
test(1042.2, DT[c(-5, -10), mean(x), by = group], data.table(group=c(1,2),V1=c(1.5,3.5)), warning="Item 2 of i is -10 but there are only 5 rows. Ignoring this and 0 more like it out of 2.") 
#  Test #1043 TO DO - mixed negatives
test(1043, DT[c(1, -5)], error="Item 2 of i is -5 and item 1 is 1. Cannot mix positives and negatives.")

# crash (floating point exception), when assigning null data.table() to multiple cols, #4731
DT = data.table(x=1:5,y=6:10)
test(1044, DT[3,c("x","y"):=data.table()],error="Supplied 2 columns to be assigned an empty list.*use NULL instead.*list(list())")
test(1045, DT[3,c("x","y"):=list()],error="Supplied 2 columns to be assigned an empty list.*use NULL instead.*list(list())")

# negative indexing with head() and tail(). bug #2375
d1 = data.table(date = c(1,2,3,4,5), value = c(1,2,3,4,5))
d2 = data.frame(d1)
test(1046, head(d1, -2), as.data.table(head(d2, -2)))
test(1047, head(d1, 2), as.data.table(head(d2, 2)))
test(1048, head(d1, -10), as.data.table(head(d2, -10)))
test(1049, head(d1, 10), as.data.table(head(d2, 10)))
test(1050, tail(d1, -2), as.data.table(tail(d2, -2)))
test(1051, tail(d1, 2), as.data.table(tail(d2, 2)))
test(1052, tail(d1, -10), as.data.table(tail(d2, -10)))
test(1053, tail(d1, 10), as.data.table(tail(d2, 10)))

# negative indexing with `:=` - new feature through fixing of #2697, performs as intended for negative subscripts.
x <- data.table(letters=letters[1:5], number=1:5)
test(1054, x[-(1:3), number := 1L], x[4:5, number := 1L])
test(1055, x[0, number := 1L], x)

# print.data.table heeds digits=2 etc, #2535
DT = data.table(x=rep(c("a","b","c"),each=3), y=(30/7)^(2:10))[, logy := log(y)]
test(1056, print(DT, digits=2), output="   x       y logy1: a      18  2.92: a      79  4.43: a     337  5.8")
test(1057, print(DT, digits=2, big.mark=","), output="   x         y logy1: a        18  2.9.*6: b    26,556 10.27: c   113,811 11.6")

# bug #2758 fix - segfault with zeros in i and factors in by
x <- data.table(letters=letters[1:5], factor=factor(letters[1:5]), number=1:5)
test(1058, x[c(0, 3), list(letters, number), by=factor], error="While grouping, i=0 is allowed")
test(1059, x[c(3, 0), list(letters, number), by=factor], error="While grouping, i=0 is allowed")
test(1060, x[c(0, 3), number:=5L, by=factor], error="While grouping, i=0 is allowed")
test(1061, x[c(0, 3), number:=5L], data.table(letters=letters[1:5], factor=factor(letters[1:5]), number=c(1:2,5L,4:5)))

# bug #2440 fix - seqfault when j refers to grouping variable when results are empty
DT = data.table(x=rep(c("a","b"),each=3),v=c(42,42,42,4,5,6))
test(1062, DT[x %in% c('z'),list(x2=x),by=x], output="Empty data.table (0 rows) of 2 cols: x,x2")
test(1063, DT[x %in% c('z'),list(vpaste=paste(v,collapse=','),x2=paste(x,x)),by=x], output="Empty data.table (0 rows) of 3 cols: x,vpaste,x2")
test(1064, DT[integer(0), list(x2=x), by=x], output="Empty data.table (0 rows) of 2 cols: x,x2")

# bug #2445 fix - := fails when subsetting yields NAs and with=FALSE
X = data.table(A=1:3, B=1:6, key="A")
var <- "B"
test(1065, X[J(2:5), (var):=22L], data.table(A=rep(1:3, each=2), B=c(1L,4L,rep(22L,4)), key="A"))

# fread single unnamed colClasses
f = "A,B,C,D\n1,3,5,7\n2,4,6,8\n"
test(1066, fread(f,colClasses=c("integer","integer","character")), error="colClasses is unnamed and length 3 but there are 4 columns")
test(1067, fread(f,colClasses=c("integer","numeric","character","character")), data.table(A=1:2,B=c(3,4),C=c("5","6"),D=c("7","8")))
test(1068, fread(f,colClasses="character"), data.table(A=c("1","2"),B=c("3","4"),C=c("5","6"),D=c("7","8")))

# fread select and drop
test(1069, fread(f,drop=c("D","B")), data.table(A=1:2,C=5:6))
test(1070, fread(f,drop="E"), fread(f), warning="Column name 'E' in 'drop' not found")
test(1071, fread(f,select="B",colClasses=list(numeric="C")), data.table(B=3:4))
test(1072, fread(f,select="B",drop="C"), error="not both")
test(1073, fread(f,drop=2:3), fread(f,select=c(1,4)))  # tests coercing numeric select as well

# that problem printing duplicate columns doesn't return, #4788
DT = data.table(V1 = c(1:1000), V2 = c(10001:11000))
test(1074, DT[, sum(V2), by = V1], output="1000: 1000 11000")  # x has two columns both called V1 here

# add test from #2446. Already fixed but add anyway. "names in neworder not found in x: 'colnames with spaces' from merge() when all.y=TRUE"
X = data.table(a=1:3,b=4:6,"c d"=7:9)
Y = data.table(e=10:12,a=2:4)
test(1075, merge(X,Y,by="a",all=TRUE), data.table(a=c(1:4),b=c(4:6,NA),"c d"=c(7:9,NA),e=c(NA,10:12),key="a"))

# Fixes #2670. `by` sometimes incorrect for expressions of keyed columns. When by is used like `by=month(date)`, with key column set to "date", grouping+aggregation would be wrong.
DT = data.table(date=as.Date("2013-01-01")+seq(1,1000,by=10),1:100)
setkey(DT,date)
test(1076, DT[,sum(V2),by=month(date)], DT[, sum(V2), by=list(month(date))])
# just to be sure, second test with another function using sample.
setkey(DT, V2)
ff <- function(x) { set.seed(45); (sample(x)-1) %/% 10}
test(1077, DT[, sum(V2),by=ff(V2)], DT[, sum(V2),by=list(ff(V2))])

# rbindlist should discard names on columns, #4890
d = data.frame(x=1:5)
f = function(x) {suppressWarnings(DF<-data.frame(x=x, y=1:10)); setattr(DF$x,"names","a");DF}
l = apply(d, 1, f)
test(1078.1, length(names(l[[1]]$x)), 10)   # test this test is creating names on the column
test(1078.2, length(names(l[[2]]$x)), 10)
a = rbindlist(l)
test(1078.3, a$x, rep(1:5,each=10))   # a$x would segfault before the fix to rbindlist

# data.table() shouldn't retain column names, root cause of #4890
x = 1:5
names(x) = letters[1:5]
test(1079.1, DF<-data.frame(x=x, y=1:10), data.frame(x=rep(1:5,2),y=1:10), warning="row names.*discarded") 
test(1079.2, lapply(DF, names), list(x=NULL, y=NULL))
test(1079.3, DT<-data.table(x=x, y=1:10), data.table(x=rep(1:5,2),y=1:10))
test(1079.4, lapply(DT, names), list(x=NULL, y=NULL))
# test from similar #4912 for completeness
z = c(a=1,b=2,c=3)
a = data.table(z,x=1:3)
b = rbind(a, data.table(z=2,x=1))
test(1080, b$z, c(1,2,3,2))

# mid row logical detection
test(1081, fread("A,B,C\n1,T,2\n"), data.table(A=1L,B=TRUE,C=2L))

# cartesian join answer's key should contain only the columns considered in binary search. Fixes #2677
set.seed(45)
n <- 10
DT1 <- data.table(a=sample(1:3, n, replace=TRUE), b=sample(1:3, n, replace=TRUE), c=sample(1:10, n,replace=TRUE), key=c("a", "b", "c"))
DT2 <- data.table(p=sample(1:3, n, replace=TRUE), q=sample(1:3, n, replace=TRUE), r=sample(1:n), w=sample(1:n))
setkey(DT2, p,q)
ans <- DT1[DT2, nomatch=0, allow.cartesian=TRUE]  # NB: DT2 contains duplicate key values so columns c ends up not being sorted
test(1082.1, key(ans), c("a","b"))
test(1082.2, setkeyv(ans, key(ans)), ans) # i.e. key is valid, otherwise re-built warning will be caught
check <- setkey(as.data.table(aggregate(r ~a+b+c, ans, length)), a, b)
test(1083, setkeyv(ans[, list(r = .N), by=key(DT1)], key(ans)), check) # if the key is set properly, then and only then will the aggregation results match with "check"

# Tests for #2531. `:=` loses POSIXct or ITime attribute:
# first test from this SO post: http://stackoverflow.com/questions/15996692/cannot-assign-columns-as-date-by-reference-in-data-table
dt <- data.table(date = as.IDate(sample(10000:11000, 10), origin = "1970-01-01"))
dt[, group := rep(1:2, 5)]
dt[, min.group.date := as.IDate(min(date)), by = group]
test(1084, class(dt$min.group.date), c("IDate", "Date"))

dt <- data.table(date = as.IDate(sample(10000:11000, 10), origin = "1970-01-01"))
dt[, group := rep(1:2, 5)]
dt[, min.group.date := min(date), by = group] # don't need to wrap it with as.IDate(.)
test(1085, class(dt$min.group.date), c("IDate", "Date"))

# second test from this SO post: http://stackoverflow.com/questions/14604820/why-does-this-posixct-or-itime-loses-its-format-attribute
DT = data.table(x=as.POSIXct(c("2009-02-17 17:29:23.042", "2009-02-17 17:29:25.160")), y=c(1L,2L))
DT[,x1:=as.ITime(x)]
DT[,`:=`(last.x=tail(x,1L),last.x1=tail(x1,1L)),by=y]
test(1086, class(DT$last.x), c("POSIXct", "POSIXt"))
test(1087, class(DT$last.x1), "ITime")

# Tests 1088-1093 were non-ASCII. Now in DtNonAsciiTests

# print of unnamed DT with >20 <= 100 rows, #4934
DT <- data.table(x=1:25, y=letters[1:25])
DT.unnamed <- unname(copy(DT))
test(1094, print(DT.unnamed), output="NA NA 1:  1  a 2:  2  b 3:  3  c")

# DT[!TRUE] or DT[!TRUE, which=TRUE], #4930. !TRUE still can be a recycling operation with !(all TRUE)
DT <- data.table(x=1:3, y=4:6)
test(1095.1, DT[!TRUE], DT[FALSE])
test(1095.2, DT[!TRUE, which=TRUE], DT[FALSE, which=TRUE])

######### incremented tests by 1 as I've used 1096 for FR #2077 (above along with already existing tests 522): ###########
# roll backwards when i is keyed and rollends=FALSE
# http://stackoverflow.com/questions/18984179/roll-data-table-with-rollends
dt1 = data.table(Date=as.Date(c("2013-01-03","2013-01-07")),key="Date")[,ind:=.I]
dt2 = data.table(Date=seq(from=as.Date("2013-01-01"),to=as.Date("2013-01-10"), by="1 day"),key="Date")
test(1097, dt1[dt2,roll=-Inf,rollends=FALSE]$ind, INT(NA,NA,1,2,2,2,2,NA,NA,NA))  # now ok
test(1098, dt1[dt2,roll=-Inf,rollends=TRUE]$ind, INT(1,1,1,2,2,2,2,2,2,2))  # ok before
test(1099, dt1[dt2,roll=-Inf,rollends=c(TRUE,FALSE)]$ind, INT(1,1,1,2,2,2,2,NA,NA,NA))  # ok before
test(1100, dt1[dt2,roll=-Inf,rollends=c(FALSE,TRUE)]$ind, INT(NA,NA,1,2,2,2,2,2,2,2))  # now ok

#########################################
# All dcast.data.table tests go in here #
#########################################
if ("package:reshape2" %in% search()) {

    names(ChickWeight) <- tolower(names(ChickWeight))
    DT <- melt(as.data.table(ChickWeight), id=2:4) # calls melt.data.table

    # changed 'mean' to 'sum' to avoid valgrind floating point precision based error.
    test(1101, as.data.frame(dcast(DT, time ~ variable, fun=sum)), dcast(as.data.frame(DT), time~variable, fun=sum))
    test(1102, as.data.frame(dcast(DT, diet ~ variable, fun=sum)), dcast(as.data.frame(DT), diet~variable, fun=sum))
    x1 <- as.data.frame(dcast(DT, diet+chick ~ time, drop=FALSE)) 
    x1$chick <- factor(x1$chick, levels=levels(x1$chick), ordered=FALSE)
    x2 <- dcast(as.data.frame(DT), diet+chick~time, drop=FALSE)
    test(1103, x1,x2)
    x1 <- as.data.frame(dcast(DT, diet+chick ~ time, drop=FALSE, fill=0)) 
    x1$chick <- factor(x1$chick, levels=levels(x1$chick), ordered=FALSE)
    x2 <- dcast(as.data.frame(DT), diet+chick~time, drop=FALSE, fill=0)
    test(1104.1, x1,x2)

    # add test for 'subset' in dcast
    x1 <- dcast(as.data.frame(DT), time + chick ~ variable+diet, fun=sum, subset=.(time> 20))
    x2 <- as.data.frame(dcast(DT, time + chick ~ variable+diet, fun=sum, subset=.(time> 20)))
    test(1104.2, x1, x2)

    # testing without aggregation
    x <- data.table(a=5:1, b=runif(5))
    test(1104.3, as.data.frame(dcast(x, a ~ b, value.var="b")), dcast(as.data.frame(x), a ~ b, value.var="b"))

    # Fix for case 2 in bug report #5149 - dcast dint aggregate properly when formula RHS has "."
    set.seed(45)
    DT = data.table(x=rep(1:5, each=3), y=runif(15, 0, 1))
    ans = setDT(dcast(as.data.frame(DT), x ~ ., mean, value.var="y"))
    setkey(ans, x)
    test(1148.1, dcast(DT, x ~ ., mean, value.var="y"), ans)
    # also quashed another bug with `.` in formula (when there's no aggregate function):
    DT <- data.table(a=sample(5), b=runif(5), c=5:1)
    ans1 = setDT(dcast(as.data.frame(DT), a ~ ., value.var="c"))
    ans2 = setDT(dcast(as.data.frame(DT), b+a ~ ., value.var="c"))
    setkey(ans1, "a")
    setkey(ans2, "b", "a")
    test(1148.2, dcast(DT, a ~ ., value.var="c"), ans1)
    test(1148.3, dcast(DT, b+a~., value.var="c"), ans2)

    # more tests for `dcast` with formula being character and errors when formula is a hybrid
    set.seed(1)
    x <- data.table(a=rep(1:5, each=5), b=runif(25))
    ### adding all extra arguments for no verbose during "test.data.table()" to all dcast tests
    test(1150.1, dcast(x, " a~ . ", value.var="b", fun=length), data.table(a=1:5, `.`=5L, key="a"))
    test(1150.2, dcast(x, "a ~  c ", value.var="b"), error="not found or of unknown type")
    test(1150.3, dcast(x, a ~  a, value.var="c"), error="are not found in 'data'")

    # fix for #5379 - issue when factor columns on formula LHS along with `drop=FALSE`
    set.seed(1L)
    df <- data.frame(a=factor(sample(letters[1:3], 10, replace=TRUE), letters[1:5]),
                 b=factor(sample(tail(letters, 5), 10, replace=TRUE)))
    dt <- as.data.table(df)
    test(1198.1, setkey(setDT(dcast(as.data.frame(df), a~b, drop=FALSE, value.var="b", fun=length)), a), dcast(dt, a~b, drop=FALSE, fun=length, value.var="b"))
    
    # reverse the levels
    set.seed(1L)
    df <- data.frame(a=factor(sample(letters[1:3], 10, replace=TRUE), letters[5:1]),
                 b=factor(sample(tail(letters, 5), 10, replace=TRUE)))
    dt <- as.data.table(df)
    test(1198.2, setkey(setDT(dcast(as.data.frame(df), a~b, drop=FALSE, value.var="b", fun=length)), a), dcast(dt, a~b, drop=FALSE, value.var="b", fun=length))
    
    # more factor cols
    set.seed(1L)
    df <- data.frame(a1=factor(sample(letters[1:3], 10, replace=TRUE), letters[1:5]), # factor col 1
                 a2=factor(sample(letters[6:10], 10, replace=TRUE), letters[6:10]), # factor col 2
                 a3=sample(letters[1:3], 10, TRUE), # no factor
                 b=factor(sample(tail(letters, 5), 10, replace=TRUE)))
    dt <- as.data.table(df)
    ans <- dcast(dt, a1+a2+a3~b, drop=FALSE, value.var="b")
    ans[, c(4:7) := lapply(.SD, as.character), .SDcols=4:7]
    test(1198.3, setkey(setDT(dcast(as.data.frame(df), a1+a2+a3~b, drop=FALSE, value.var="b")), a1,a2,a3), ans)

    # dcast bug fix for 'subset' argument (it doesn't get key set before to run C-fcast):
    dt <- data.table(x=c(1,1,1,2,2,2,1,1), y=c(1,2,3,1,2,1,1,2), z=c(1,2,3,NA,4,5,NA,NA))
    test(1252, dcast(dt, x~y, value.var="z", subset=.(!is.na(z))), data.table(x=c(1,2), `1`=c(1,5), `2`=c(2,4), `3`=c(3,NA), key="x"))

    # FR #5675 and DOC #5676
    set.seed(1L)
    dt <- data.table(a=sample(10), b=2013:2014, variable=rep(c("c", "d"), each=10), value=runif(20))
    ans1 <- names(dcast(dt, a ~ ... + b, value.var="value"))
    test(1286, ans1, c("a", "c_2013", "c_2014", "d_2013", "d_2014"))

    # bug git #693 - dcast error message improvement:
    dt <- data.table(x=c(1,1), y=c(2,2), z = 3:4)
    test(1314, dcast(dt, x ~ y, value.var="z", fun.aggregate=identity), error="should take vector inputs and return a single value")

    # bug #688 - preserving attributes
    DT = data.table(id = c(1,1,2,2), ty = c("a","b","a","b"), da = as.Date("2014-06-20"))
    test(1315, dcast(DT, formula = id ~ ty, value.var="da"), data.table(id=c(1,2), a=as.Date("2014-06-20"), b=as.Date("2014-06-20"), key="id")) 

    # issues/713 - dcast and fun.aggregate
    DT <- data.table(id=rep(1:2, c(3,4)), k=c(rep(letters[1:3], 2), 'c'), v=1:7)
    foo <- function (tbl, fun.aggregate) {
        dcast(tbl, id ~ k, value.var='v', fun.aggregate=fun.aggregate, fill=NA_integer_)
    }
    test(1345, foo(DT, last), dcast(DT, id ~ k, value.var='v', fun.aggregate=last, fill=NA_integer_))

    # more minor changes to dcast (subset argument handling symbol - removing any surprises with data.table's typical scoping rules) - test for that.
    DT <- data.table(id=rep(1:2, c(3,4)), k=c(rep(letters[1:3], 2), 'c'), v=1:7)
    bla <- c(TRUE, rep(FALSE, 6L)) 
    # calling `subset=.(bla)` gives eval error when testing... not sure what's happeing! using values directly instead for now.
    test(1346.1, dcast(DT, id ~ k, value.var="v", subset=.(c(TRUE, rep(FALSE, 6L)))), dcast(DT[1L], id ~ k, value.var="v"))
    DT[, bla := !bla]
    test(1346.2, dcast(DT, id ~ k, value.var="v", subset=.(bla), fun.aggregate=length), dcast(DT[(bla)], id ~ k, value.var="v", fun.aggregate=length))

    # issues/715
    DT <- data.table(id=rep(1:2, c(3,2)), k=c(letters[1:3], letters[1:2]), v=1:5)
    test(1347.1, dcast(DT, id ~ k, fun.aggregate=last, value.var="v"), error="should take vector inputs and return a single value")
    test(1347.2, dcast(DT, id ~ k, fun.aggregate=last, value.var="v", fill=NA_integer_), data.table(id=1:2, a=c(1L, 4L), b=c(2L,5L), c=c(3L,NA_integer_), key="id"))

    # Fix for #893
    dt <- data.table(
        x = factor("a", levels = c("a", "b")),
        y = factor("b", levels = c("a", "b")),
        z = 1
    )
    test(1457, dcast(dt, y ~ x, drop = FALSE, value.var="z"), 
                 data.table(dcast(as.data.frame(dt), y ~ x, drop = FALSE, value.var="z"), key="y"))

    # dcast.data.table new tests
    # Fix for #1070 (special case of ... on LHS)
    dt <- data.table(label= month.abb[1:5], val=0)
    test(1501.1, dcast(dt,... ~ label, value.var="val", sum), 
           data.table(`.`=".", Apr=0, Feb=0, Jan=0, Mar=0, May=0, key="."))
    # Fix for #862 (optional prefixes)
    dt <- data.table(name=c("Betty","Joe","Frank","Wendy","Sally"),
                       address=c(rep("bla1",2), rep("bla2",2), "bla3"))
    test(1501.2, dcast(dt, address ~ paste("cust", dt[, seq_len(.N), by=address]$V1, sep=""), value.var="name"), data.table(address=paste("bla",1:3,sep=""), cust1=c("Betty", "Frank", "Sally"), 
                 cust2=c("Joe", "Wendy", NA), key="address"))

    # Fix for #1037 (optional prefixes + undefined variables)
    dt <- structure(list(V1 = c(0L, 1L, 2L, 3L, 4L, 0L, 1L, 2L, 3L, 4L), 
              V2 = c(1.052, 0.542, 0.496, 0.402, 0.278, 5.115, 4.329, 4.121, 
              4.075, 4.0088)), .Names = c("V1", "V2"), class = "data.frame", row.names = c(NA, -10L))
    setDT(dt)
    ans1 = dcast(as.data.frame(dt), cumsum(V1 == 0) ~ V1, value.var = 'V2')
    ans2 = dcast(dt, cumsum(V1 == 0) ~ V1, value.var = 'V2')
    setkey(setnames(setDT(ans1), names(ans2)), V1)
    test(1501.3, ans1, ans2)

    # Implement #716 and #739 (multiple value.var and fun.aggregate)
    # multiple value.var
    dt = data.table(x=sample(5,20,TRUE), y=sample(2,20,TRUE), 
                    z=sample(letters[1:2], 20,TRUE), d1 = runif(20), d2=1L)
    ans21 <- dcast(as.data.frame(dt), x + y ~ z, fun=sum, value.var="d1")
    ans22 <- dcast(as.data.frame(dt), x + y ~ z, fun=sum, value.var="d2")
    ans23 <- dcast(as.data.frame(dt), x + y ~ z, fun=mean, value.var="d1")
    ans24 <- dcast(as.data.frame(dt), x + y ~ z, fun=mean, value.var="d2")

    ans1 <- dcast(dt, x + y ~ z, fun=sum, value.var=c("d1","d2"))
    ans2 <- cbind(ans21, ans22[, 3:4])
    setkey(setnames(setDT(ans2), names(ans1)), x, y)
    test(1501.4, ans1, ans2)
    # multiple fun.agg
    ans1 <- dcast(dt, x + y ~ z, fun=list(sum, mean), value.var="d1")
    ans2 <- cbind(ans21, ans23[, 3:4])
    setkey(setnames(setDT(ans2), names(ans1)), x, y)
    test(1501.5, ans1, ans2)
    # multiple fun.agg and value.var (all combinations)
    ans1 <- dcast(dt, x + y ~ z, fun=list(sum, mean), value.var=c("d1", "d2"))
    ans2 <- cbind(ans21, ans22[, 3:4], ans23[, 3:4], ans24[, 3:4])
    setkey(setnames(setDT(ans2), names(ans1)), x, y)
    test(1501.6, ans1, ans2)
    # multiple fun.agg and value.var (one-to-one)
    ans1 <- dcast(dt, x + y ~ z, fun=list(sum, mean), value.var=list("d1", "d2"))
    ans2 <- cbind(ans21, ans24[, 3:4])
    setkey(setnames(setDT(ans2), names(ans1)), x, y)
    test(1501.7, ans1, ans2)

    # Additional test after fixing fun.agg creation - using the example here: https://github.com/Rdatatable/data.table/issues/716
    DT = data.table(x=1:5, y=paste("v", 1:5, sep=""), 
                            v1=6:10, v2=11:15, 
                            k1=letters[1:5], k2=letters[6:10])
    DT.m = melt(DT, id=1:2, measure=list(3:4, 5:6))
    ans1 <- dcast(DT.m, x ~ y, fun.aggregate = 
        list(sum, function(x) paste(x, collapse="")), value.var=list("value1", "value2"))
    ans21 <- dcast(as.data.frame(DT.m), x ~ y, fun.agg=sum, value.var="value1")
    ans22 <- dcast(as.data.frame(DT.m), x ~ y, fun.agg=function(x) paste(x, collapse=""), value.var="value2")
    ans2 <- cbind(ans21, ans22[, -1L])
    setkey(setnames(setDT(ans2), names(ans1)), x)
    test(1501.8, ans1, ans2)

    # more testing on fun.aggregate
    dt = as.data.table(airquality)
    ans = suppressWarnings(melt(dt, id=c("Month", "Day"), na.rm=TRUE))
    ans = ans[ , .(min=min(value), max=max(value)), by=.(Month, variable)]
    ans = melt(ans, id=1:2, variable.name="variable2")
    ans = dcast(ans, Month ~ variable + variable2)
    setnames(ans, c("Month", paste(names(ans)[-1L], ".", sep="_")))
    valvars = c("Ozone", "Solar.R", "Wind", "Temp")
    ans2 <- suppressWarnings(dcast(dt, Month ~ ., fun=list(min, max), na.rm=TRUE, value.var=valvars))
    setcolorder(ans, names(ans2))
    test(1501.9, setkey(ans, Month), ans2[, names(ans2)[-1L] := lapply(.SD, as.numeric), .SDcols=-1L])

    # test for #1210, sep argument for dcast
    dt = data.table(x=sample(5,20,TRUE), y=sample(2,20,TRUE), 
                z=sample(letters[1:2], 20,TRUE), d1 = runif(20), d2=1L)
    test(1501.10, names(dcast(dt, x ~ y + z, fun=length, value.var = "d2", sep=".")), 
                  c("x", "1.a", "1.b", "2.a", "2.b"))
}

# test for freading commands
x1 <- data.table(a = c(1:5), b = c(1:5))
f <- tempfile()
write.csv(x1, f, row.names = FALSE)
if (.Platform$OS.type == "unix") {
    test(1105, x1[a != 3], fread(paste('grep -v 3 ', f, sep="")))
} else {
    # x2 <- fread(paste('more ', f, sep=""))
    # Doesn't work on winbuilder. Relies on 'more' available in DOS via Cygwin?
    # Error:
    # Syntax error: end of file unexpected (expecting ")")
    #  Error: (converted from warning) running command 'sh.exe -c (more D:\temp\RtmpgB8D2P\file1ed828a511cd) > D:\temp\RtmpgB8D2P\file1ed84f9f44f8' had status 2
    # test(1105, x1, x2)
}
unlink(f)

# test for "key" argument of [.data.table
#x1 <- data.table(a = c(1:5), b = c(5:1))
#x1[J(2), key = 'a']
#test(1106, key(x1) == 'a')
#x1[, a, key = NULL]
#test(1107, is.null(key(x1)))

# test that eval works inside expressions
DT <- data.table(a = c(1:5))
s <- quote(a)
test(1108, DT[, sum(eval(s))], DT[, sum(a)])

# test that boolean expression does not trigger a not-join
DT <- data.table(a = 1:3, b = c(TRUE,FALSE,NA))
test(1109, DT[b != TRUE], DT[!(b == TRUE)])

# commented for now (by Arun)
# # test the speed of simple comparison
# DT <- data.table(a = 1:1e7)
# t1 = system.time(DT[a == 100])[3]
# t2 = system.time(DT[which(a == 100)])[3]
# # make sure we're at most 30% slower than "which" (should pass most of the time)
# test(1110, (t1 - t2)/t2 < 0.3)

# test that a column named list is ok (this also affects other functions in by, might be worth adding a test for that)
DT <- data.table(list = 1:6, a = 1:2)
test(1111, DT[, lapply(.SD, sum), by = a], DT[, list(list = sum(list)), by = a])

# fix for #4995. "rbind" retains key when the first argument isn't a data.table (.rbind.data.table is never run is the issue)
DT <- data.table(name=c('Guff','Aw'),id=101:102,id2=1:2,key='id')
y <- rbind(list('No','NON',0L),DT,list('Extra','XTR',3L))
test(1112, key(y), NULL)

# fix for http://stackoverflow.com/questions/14753411/why-does-data-table-lose-class-definition-in-sd-after-group-by
# where, .SD loses class information.
format.myclass <- function(x, ...){
  paste("!!", x, "!!", sep = "")
}
DT <- data.table(L = rep(letters[1:3],3), N = 1:9)
setattr(DT$N, "class", "myclass")
test(1113, class(DT[, .SD, by = L]$N), class(DT$N))
setkey(DT, L)
test(1114, class(DT[, .SD, by = L]$N), class(DT$N))
test(1115, class(DT[J(unique(L)), .SD, by=.EACHI]$N), class(DT$N))

# Fix for #4994 - not-join quoted expression dint work...
dt = data.table(a = 1:2, key = 'a')
dt1 = data.table(a = 1)
expr = quote(!dt1)
test(1116, dt[eval(expr)], dt[2])
expr = quote(!1)
test(1117, dt[eval(expr)], dt[2])

# Fix for #2381 - optimisation of `DT[, lapply(.SD, function(x) FUN(x, bla)), by=key(DT)]` where "bla" is a column in DT dint work.
set.seed(45)
dt <- data.table(x=rep(1:4, each=4), b1=sample(16), b2=runif(16))
setkey(dt, x)
test(1118, dt[, lapply(.SD, function(y) weighted.mean(y, b2, na.rm=TRUE)), by=x], dt[, lapply(.SD, weighted.mean, b2, na.rm=TRUE), by=x])

# a(nother) test of #295
DT <- data.table(x=5:1, y=1:5, key="y")
test(1119, is.null(key(DT[, list(z = y, y = 1/y)])))


## various ordered factor rbind tests
DT = data.table(ordered('a', levels = c('a','b','c')))
DT1 = data.table(factor('a', levels = c('b','a','f')))
DT2 = data.table(ordered('b', levels = c('b','d','c')))
DT3 = data.table(c('foo', 'bar'))
DT4 = data.table(ordered('a', levels = c('b', 'a')))

test(1120, rbind(DT, DT1, DT2, DT3), data.table(ordered(c('a','a','b', 'foo', 'bar'), levels = c('a','b','d','c','f', 'foo', 'bar'))))
test(1121, rbindlist(list(DT, DT1, DT2, DT3)), data.table(ordered(c('a','a','b', 'foo', 'bar'), levels = c('a','b','d','c','f', 'foo', 'bar'))))
test(1122, rbind(DT, DT4), data.table(factor(c('a','a'), levels = c('a','b','c'))), warning="ordered factor levels cannot be combined, going to convert to simple factor instead")
test(1123, rbindlist(list(DT, DT4)), data.table(factor(c('a','a'), levels = c('a','b','c'))), warning="ordered factor levels cannot be combined, going to convert to simple factor instead")
test(1124, rbind(DT1, DT1), data.table(factor(c('a','a'), levels = c('b','a','f'))))
test(1125, rbindlist(list(DT1, DT1)), data.table(factor(c('a','a'), levels = c('b','a','f'))))

## test rbind(..., fill = TRUE)
DT = data.table(a = 1:2, b = 1:2)
DT1 = data.table(a = 3:4, c = 1:2)

test(1126, rbind(DT, DT1, fill = TRUE), data.table(a = 1:4, b = c(1, 2, NA, NA), c = c(NA, NA, 1, 2)))

## check for #4959 - rbind'ing empty data.table's
DT = data.table(a=character())
#test(1127, rbind(DT, DT), DT)

## check for #5005
DT = data.table(a=0:2,b=3:5,key="a")
test(1128, DT[, (function(){b})()], DT[, b])

## Fix for FR #4867
DT <- data.table(x=1:5, y=6:10)
test(1129.1, DT[, as.factor(c("x", "y")), with=FALSE], DT)
test(1129.2, DT[, as.factor(c("x", "x")), with=FALSE], DT[, list(x, x)])

# Fix for a specific case that results in error in `construct` function in data.table.R (found and fixed during #5007 bug fix)
MyValueIsTen <- 10
set.seed(1)
DT <- data.table(ID=sample(LETTERS[1:3], 6, TRUE), Value1=rnorm(6), Value2=runif(6))
cols <- c("Value1", "Value2")
DT2 <- copy(DT)
test(1130, DT[, (cols) := lapply(.SD, function(x) MyValueIsTen), by=ID], DT2[, (cols) := 10])

# Fix for #5007 - The value MyValueIsTen = 10 was never recognised (value within the function environment)
MyValueIsTen <- 5
set.seed(1)
DT <- data.table(ID=sample(LETTERS[1:3], 6, TRUE), Value1=rnorm(6), Value2=runif(6))
My_Fun <- function(x=copy(DT)) {
    MyValueIsTen <- 10
    cols <- c("Value1", "Value2")
    x[, (cols) := lapply(.SD, function(x) MyValueIsTen), by=ID]
}
DT[, (cols) := 10]
test(1131, My_Fun(), DT)

# Test for #4957 - where `j` doesn't know `.N` when used with `lapply(.SD, function(x) ...)`
test(1132, DT[, lapply(.SD, function(x) .N), by=ID], data.table(ID=c("A", "B", "C"), Value1=2L, Value2=2L))

# Test for #4990 - `:=` does not generate recycling warning during 'by':
DT <- data.table(x=c(1,1,1,1,1,2,2))
# on a new column
test(1133.1, DT[, new := c(1,2), by=x], data.table(x=c(1,1,1,1,1,2,2), new=c(1,2,1,2,1,1,2)), warning="Supplied 2 items to be assigned to group 1 of size 5 in column 'new'")
# on an already existing column
test(1133.2, DT[, new := c(1,2), by=x], data.table(x=c(1,1,1,1,1,2,2), new=c(1,2,1,2,1,1,2)), warning="Supplied 2 items to be assigned to group 1 of size 5 in column 'new'")

# Fix for FR #2496 - catch `{` in `:=` expression in `j`:
DT <- data.table(x=c("A", "A", "B", "B"), val =1:4)
DT2 <- copy(DT)[, a := 1L]
test(1134.1, DT[, {a := 1L}], DT2, warning="Caught and removed")
test(1134.2, DT[, {b := 2L}, by=x], DT2[, b:=2L, by=x], warning="Caught and removed")

# fix for bug #5069 
if ("package:gdata" %in% search()) {
    DT <- data.table(a = c('asdfasdf','asdf','asdgasdgasdgasdg','sdg'), b = runif(4,0,1))
    test(1135, write.fwf(DT, f<-tempfile()), NULL)
    unlink(f)
}

# FR #2693 and Gabor's suggestions from here: http://r.789695.n4.nabble.com/Problem-with-FAQ-2-8-tt4668878.html (correcting software according to FAQ 2.8)
d1 <- data.table(id1 = c(1L, 2L, 2L, 3L), val = 1:4, key = "id1") 
d2 <- data.table(id2 = c(1L, 2L, 4L), val2 = c(11, 12, 14),key = "id2") 
d3 <- copy(d2)
setnames(d3, names(d1))

test(1136.1, d1[d2, id1], INT(1,2,2,4))
test(1136.2, d1[d2, id1], d1[d2][,id1])
test(1136.3, d1[d2, id2], INT(1,2,2,4))
test(1136.4, d1[d2, id2], d1[d2, list(id1,id2,val,val2)][,id2])
test(1136.5, d1[d3, i.id1], INT(1,2,2,4))
test(1136.6, d1[d3, i.id1], d1[d3, list(id1,i.id1)][,i.id1])
test(1136.7, d1[d2, val], c(1:3, NA))
test(1136.8, d1[d2, val2], c(11,12,12,14))
test(1136.9, d1[d3, list(id1, val, i.val)], data.table(id1=INT(1,2,2,4), val=c(1:3, NA), i.val=c(11,12,12,14), key="id1"))
test(1136.11, d1[d3, list(id1, i.id1, val, i.val)], data.table(id1=INT(1,2,2,4), 
                i.id1=INT(1,2,2,4), val=c(1:3, NA), i.val=c(11,12,12,14), key="id1"))
test(1136.12, d1[d2], data.table(id1=INT(1,2,2,4), val=c(1:3, NA), val2=c(11,12,12,14), key="id1"))

test(1136.13, d1[J(2), id1], INT(2,2))
test(1136.14, d1[J(2), i.id1], error="not found")

DT <- data.table(x=c("A", "A", "C", "C"), y=1:4, key="x")
test(1136.15, DT["C", i.x], error="not found")

# test for FR #4979
DT <- data.table(x=1:5, y=6:10, z=11:15)
test(1137.1, DT[, .SD, .SDcols=-1L], DT[, 2:3, with=FALSE])
test(1137.2, DT[, .SD, .SDcols=-(1:2)], DT[, 3, with=FALSE])
test(1137.3, DT[, .SD, .SDcols=-"y"], DT[, c(1,3), with=FALSE])
test(1137.4, DT[, .SD, .SDcols=-c("y", "x")], DT[, 3, with=FALSE])
test(1137.5, DT[, .SD, .SDcols=-which(names(DT) %in% c("x", "y", "z"))], null.data.table())
test(1137.6, DT[, .SD, .SDcols=c(1, -2)], error=".SDcols is numeric but has both")
test(1137.7, DT[, .SD, .SDcols=c("x", -"y")], error="invalid argument to unary")
test(1137.8, DT[, .SD, .SDcols=c(-1, "x")], error="Some items of .SDcols are")

DT <- data.table(x=1:5, y=6:10, z=11:15, zz=letters[1:5])
test(1137.9, DT[, .SD, .SDcols=-grep("^z", names(DT))], DT[, 1:2, with=FALSE])
test(1137.10, DT[, .SD, .SDcols=-grep("^z", names(DT), value=TRUE)], DT[, 1:2, with=FALSE])
test(1137.11, DT[, .SD, .SDcols=-grep("^z", names(DT), value=TRUE, invert=TRUE)], DT[, 3:4, with=FALSE])

set.seed(45)
DT = data.table(x=c("A", "A", "C", "C"), y=1:4, z=runif(4))
test(1137.12, DT[, lapply(.SD, sum), by=x, .SDcols=-"y"], DT[, lapply(.SD, sum), by=x, .SDcols="z"])

# test for FR #353 / R-Forge #5020 - print.data.table gets new argument "row.names", default=TRUE. if FALSE, the row-names don't get printed
# Thanks to Eddi for `capture.output` function!
DT <- data.table(x=1:5, y=6:10)
test(1138.1, capture.output(print(DT, row.names=FALSE)), c(" x  y", " 1  6", " 2  7", " 3  8", " 4  9", " 5 10"))
DT <- data.table(x=1:101, y=6:106) # bug described in #1307
test(1138.2, capture.output(print(DT, row.names=FALSE)), c("      x   y", "      1   6", "      2   7", "      3   8", "      4   9", "      5  10", "---        ", "     97 102", "     98 103", "     99 104", "    100 105", "    101 106"))

# test for FR #2591 (format.data.table issue with column of class "formula")
DT <- data.table(x=c(a~b, c~d+e), y=1:2)
test(1139, capture.output(print(DT)), c("           x y", "1:     a ~ b 1", "2: c ~ d + e 2"))

# FR #4813 - provide warnings if there are remainders for both as.data.table.list(.) and data.table(.)
X = list(a = 1:2, b = 1:3)
test(1140, as.data.table(X), data.table(a=c(1,2,1), b=c(1,2,3)), warning="Item 1 is of size 2 but maximum")
test(1141.1, data.table(a=1:2, b=1:3), data.table(a=c(1L,2L,1L), b=1:3), warning="Item 1 is of size 2 but maximum")
test(1141.2, data.table(a=1:2, data.table(x=1:5, y=6:10)), data.table(a=c(1L,2L,1L,2L,1L), x=1:5, y=6:10), warning="Item 1 is of size 2 but maximum")
test(1141.3, data.table(a=1:5, data.table(x=c(1,2), y=c(3,4))), data.table(a=c(1:5), x=c(1,2,1,2,1), y=c(3,4,3,4,3)), warning="Item 2 is of size 2 but maximum")

# Fix for bug #5098 - DT[, foo()] returns function definition.
DT <- data.table(a=1:2)
foo <- function() sum(1:5)
test(1142, DT[, foo()], 15L)

# Fix for bug #5106 - DT[, .N, by=y] was slow when "y" is not a column in DT
DT <- data.table(x=sample.int(10, 1e6, replace=TRUE))
y <- DT$x
te1 <- system.time(ans1 <- DT[, .N, by=x])[["elapsed"]]
te2 <- system.time(ans2 <- DT[, .N, by=y])[["elapsed"]]
test(1143.1, ans1, setnames(ans2, "y", "x"))
test(1143.2, abs(te1-te2) < 1, TRUE)

# Fix for bug #5104 - side-effect of fixing #2531 - `:=` with grouping (by) and assigning factor columns 
DT <- data.table(x=c(1,1,1,2,2), y=factor(letters[1:5]))
test(1144.1, DT[, z := y, by=x], data.table(x=c(1,1,1,2,2), y=factor(letters[1:5]), z=factor(letters[1:5])))
# Added 3 more tests to close bug #5437 - partial regression due to recent changes (in 1.9.2)
# This should catch any attributes being lost hereafter.
DT<-data.table(X=factor(2006:2012),Y=rep(1:7,2))
test(1144.2, DT[, Z:=paste(X,.N,sep=" - "), by=list(X)], data.table(X=factor(2006:2012),Y=rep(1:7,2), Z=paste(as.character(2006:2012), 2L, sep=" - ")))
DT = data.table(x=as.POSIXct(c("2009-02-17 17:29:23.042", "2009-02-17 17:29:25.160")), y=c(1L,2L))
test(1144.4, DT[, list(lx=x[.N]), by=x], data.table(x=DT$x, lx=DT$x))
ans = copy(DT)
test(1144.3, DT[,`:=`(lx=tail(x,1L)), by=y], ans[, lx := x])

# FR #2356 - retain names of named vector as column with keep.rownames=TRUE
x <- 1:5
setattr(x, 'names', letters[1:5])
test(1144.1, as.data.table(x, keep=TRUE), data.table(rn=names(x), x=unname(x)))
x <- as.numeric(x)
setattr(x, 'names', letters[1:5])
test(1144.2, as.data.table(x, keep=TRUE), data.table(rn=names(x), x=unname(x)))
x <- as.character(x)
setattr(x, 'names', letters[1:5])
test(1144.3, as.data.table(x, keep=TRUE), data.table(rn=names(x), x=unname(x)))
x <- as.factor(x)
setattr(x, 'names', letters[1:5])
test(1144.4, as.data.table(x, keep=TRUE), data.table(rn=names(x), x=unname(x)))
x <- as.Date(1:5, origin="2013-01-01")
setattr(x, 'names', letters[1:5])
test(1144.5, as.data.table(x, keep=TRUE), data.table(rn=names(x), x=unname(x)))

# Fix for bug #5114 - .data.table.locked ISSUE
DT <- data.table(x=1:5, y=6:10)
xx <- DT[, .SD, .SDcols="y"]
test(1145, xx[, y := as.numeric(y)], data.table(y = as.numeric(6:10)))

# Fix for bug #5115 - set not adding columns on class that builds on data.table
DT <- as.data.table(BOD)
ans = copy(DT)[, Time := as.numeric(Time)]
setattr(DT, "class", c("myclass", class(DT)))
setattr(ans, 'class', class(DT))
test(1146.1, DT[, Time:= as.numeric(Time)], ans)
DF <- as.data.frame(DT)
test(1146.2, {set(DF, i=NULL, j=1L, value=seq_len(nrow(DF)));setattr(DF,"reference",NULL);DF}, data.frame(Time=1:nrow(BOD), demand=BOD$demand))
test(1146.3, set(DF, i=NULL, j="bla", value=seq_len(nrow(DF))), error="set() on a data.frame is for changing existing columns, not adding new ones. Please use a data.table for that.")

# Feature - implemented fast radix order for numeric types (both +ve and -ve numerics).
# note that if "x" is already a list, then the values will be modified by reference!
# Note: 'ordernumtol' doesn't distinguish between NA and NaN whereas this one does!
# R-wrapper is dradixorder
set.seed(45)
x <- rnorm(1e6)*1e4
test(1147.1, base::order(x), dradixorder(x, tol=numeric(0))) # base::order doesn't test with tolerance
test(1147.2, ordernumtol(x), dradixorder(x))
tol = .Machine$double.eps^0.5
x <- c(8, NaN, Inf, -7.18918, 5.18909+0.07*tol, NA, -7.18918111, -Inf, NA, 5.18909, NaN, 5.18909-1.2*tol, 5.18909-0.04*tol)
test(1147.3, dradixorder(x), c(6L, 9L, 2L, 11L, 8L, 7L, 4L, 12L, 5L, 10L, 13L, 1L, 3L))

# test for `iradixorder` when input is integer(0) and numeric(0)
test(1149.1, iradixorder(integer(0)), integer(0)) 
test(1149.2, iradixorder(numeric(0)), error="iradixorder is only for integer") 

# test uniqlengths
set.seed(45)
x <- sample(c(NA_integer_, 1:1e5), 1e7, TRUE)
ox <- forderv(x)
o1 <- uniqlist(list(x), ox)
test(1151.1, c(diff(o1), length(x)-tail(o1, 1L)+1L), uniqlengths(o1, length(x)))
o1 <- uniqlist(list(x))
test(1151.2, c(diff(o1), length(x)-tail(o1, 1L)+1L), uniqlengths(o1, length(x)))

# #5190 fix - grouping with .SDcols gave "symbol not subsettable error" - consequence of FR #4979 implementation
dt = data.table(grp = sample(letters[1:3],20, replace = TRUE), v1 = rnorm(20), v2 = rnorm(20))
sd.cols <- "v1"
test(1152, dt[, lapply(.SD, mean), by=grp, .SDcols=sd.cols], dt[, list(v1=mean(v1)), by=grp])

# #5171 fix - setattr attribute non-character led to segfault
x <- FALSE
test(1153, setattr(x, FALSE, FALSE), error="Attribute name must be")

# Fixed binary search capabilities for NA (for int and double) and NaN (for double):
set.seed(1)
DT <- data.table(x=sample(c(NA, NaN, Inf, 1:10), 100, TRUE), y=sample(c(NA, 1:10), 100, TRUE), z=sample(c(NA_character_, letters[1:10]), 100, TRUE))
setkey(DT, x)
test(1154.1, DT[J(NaN)], DT[is.nan(x)])
test(1154.2, DT[J(NA_real_)], DT[is.na(x) & !is.nan(x)])
setkey(DT, y)
test(1154.3, setcolorder(DT[J(NA_integer_)], c("x", "y", "z")), DT[is.na(y)])
setkey(DT, z)
test(1154.4, setcolorder(DT[J(NA_character_)], c("x", "y", "z")), DT[is.na(z)])

# Fixing the binary search above for NA/NaN also fixes BUG #4918
dt1 <- data.table(x = c('red','orange','green'), y=c(1,2,NA), key='y')
dt2 <- data.table(y = c(1,2,3,NA), z = c('a','b','c','missing data'), key='y')
test(1155.1, merge(dt1, dt2, by=c('y')), data.table(y=dt1$y, x=dt1$x, z=dt2$z[1:3], key="y"))
test(1155.2, dt2[dt1], data.table(y=dt1$y, z=dt2$z[1:3], x=dt1$x, key="y"))
test(1155.3, dt1[dt2, nomatch=0L], data.table(x=dt1$x, y=dt1$y, z=dt2$z[1:3], key="y"))

# NaN wasn't properly searched for in some cases. Fixed that. Here's the fix!
dt <- structure(list(x = c(NaN, NaN, NaN, NaN, NaN, NA, NA, -3, -3, 
-3, -2, -2, -1, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 3, 3), y = c(16L, 
25L, 23L, 17L, 21L, 11L, 13L, 15L, 1L, 6L, 4L, 18L, 7L, 3L, 12L, 
24L, 2L, 10L, 20L, 14L, 9L, 19L, 8L, 22L, 5L)), .Names = c("x", 
"y"), row.names = c(NA, -25L), class = c("data.table", "data.frame"
))
setkey(dt, x)
test(1155.4, dt[J(NaN)], dt[is.nan(x)])
test(1155.5, dt[J(NA_real_)], dt[is.na(x) & !is.nan(x)])


# Fix for (usually small) memory leak when grouping, #2648.
# Deliberate worst case: largest group (100000 rows) followed last by a small group (1 row).
DT = data.table(A=rep(1:2,c(100000,1)), B=runif(100001))
before = gc()["Vcells",2]
for (i in 1:50) DT[, sum(B), by=A]
after = gc()["Vcells",2]
test(1157, after < before+1)  # +1 = 1MB
# Before the patch, Vcells grew dramatically from 6MB to 60MB. Now stable at 6MB. Increase 50 to 1000 and it grew to over 1GB for this case.

# Similar for when dogroups writes less rows than allocated, #2648.
DT = data.table(k = 1:50, g = 1:20, val = rnorm(1e4))
before = gc()["Vcells",2]
for (i in 1:50) DT[ , unlist(.SD), by = 'k']
after = gc()["Vcells",2]
test(1158, after < before+1)

# tests for 'setDT' - convert list, DF to DT without copy
x <- data.frame(a=1:4, b=5:8)
test(1159.1, setDT(x), data.table(a=1:4, b=5:8))
x <- list(1:4, 5:8)
test(1159.2, setDT(x), data.table(1:4, 5:8))
x <- list(a=1:4, b=5:8)
test(1159.3, setDT(x), data.table(a=1:4, b=5:8))
x <- list(a=1:4, 5:8)
test(1159.4, setDT(x), setnames(data.table(1:4, 5:8), c("a", "V1")))
x <- data.table(a=1:4, b=5:8)
test(1159.5, setDT(x), data.table(a=1:4, b=5:8))
x <- 1:5
test(1159.6, setDT(x), error="Argument 'x' to 'setDT' should be a")
x <- list(1, 2:3)
test(1159.7, setDT(x), error="All elements in argument 'x' to 'setDT'")

# tests for setrev
x <- sample(10)
y <- rev(x)
setrev(x)
test(1160.1, y, x)
x <- sample(c(1:10, NA), 21, TRUE)
y <- rev(x)
setrev(x)
test(1160.2, y, x)
x <- sample(runif(10))
y <- rev(x)
setrev(x)
test(1160.3, y, x)
x <- sample(c(runif(10), NA, NaN), 21, TRUE)
y <- rev(x)
setrev(x)
test(1160.4, y, x)
x <- sample(letters)
y <- rev(x)
setrev(x)
test(1160.5, y, x)
x <- as.logical(sample(0:1, 20, TRUE))
y <- rev(x)
setrev(x)
test(1160.6, y, x)
x <- list(1:10)
test(1160.7, setrev(x), error="Input 'x' must be a vector")

# tests for setreordervec
# integer
x <- sample(c(-10:10, NA), 100, TRUE)
o <- base::order(x, na.last=FALSE)
y <- copy(x)
setreordervec(y, o)
test(1161.1, x[o], y)
# numeric
x <- sample(c(NA, rnorm(10)), 100, TRUE)
o <- base::order(x, na.last=FALSE)
y <- copy(x)
setreordervec(y, o)
test(1161.2, x[o], y)
# character
x <- sample(c(NA, letters), 100, TRUE)
o <- base::order(x, na.last=FALSE)
y <- copy(x)
setreordervec(y, o)
test(1161.3, x[o], y)

# tests for setreordervec
DT <- data.table(x=sample(c(NA, -10:10), 2e2, TRUE), 
      y=sample(c(NA, NaN, -Inf, Inf, -10:10), 2e2, TRUE), 
      z=sample(c(NA, letters), 2e2, TRUE))
# when not sorted, should return FALSE
test(1162.1, is.sorted(DT[[1L]]), FALSE)
setkey(DT, x)
test(1162.2, is.sorted(DT[[1L]]), TRUE)

test(1162.3, is.sorted(DT[[2L]]), FALSE)
setkey(DT, y)
test(1162.4, is.sorted(DT[[2L]]), TRUE)

test(1162.5, is.sorted(DT[[3L]]), FALSE)
setkey(DT, z)
test(1162.6, is.sorted(DT[[3L]]), TRUE)

setkey(DT, x, y)
test(1162.7, length(forderv(DT, by=1:2)), 0)
setkey(DT, x, z)
test(1162.8, length(forderv(DT, by=c(1L, 3L))), 0)
setkey(DT, y, z)
test(1162.9, length(forderv(DT, by=2:3)), 0)
setkey(DT)
# test number 1162.10 skipped because if it fails it confusingly prints out as 1662.1 not 1662.10 
test(1162.11, length(forderv(DT, by=1:3)), 0)
test(1162.12, is.sorted(DT, by=1:3), TRUE, warning="Use.*forderv.*for efficiency in one step, so you have o as well if not sorted")
test(1162.13, is.sorted(DT, by=2:1), FALSE, warning="Use.*forderv.*for efficiency in one step, so you have o as well if not sorted")

# FR #5152 - last on length=0 arguments
x <- character(0)
test(1163, last(x), character(0))

# Test 1164 was a non-ASCII test, now in DtNonAsciiTests

# Bug fix for #5117 - segfault when rbindlist on empty data.tables
x <- as.data.table(BOD)
y <- copy(x)
test(1165, x[Time>100], rbindlist(list(x[Time > 100], y[Time > 200])))

# Bug fix for the #5300 - rbind(DT, NULL) should not result in error, but BOD has an attribute as well, which won't be preserved (due to C-impl). Changing test.
setattr(x <- as.data.table(BOD), 'reference', NULL)
test(1166, x, rbind(x, NULL))

# fix for bug #5307 - ordering with multiple columns in which at least one of them is a logical column
foo = data.table(a=rep(c(0L,1L,0L,1L),2), b=rep(c(TRUE,TRUE,FALSE,FALSE),2), c=1L)
test(1167, foo[, .N, by=list(b,a)], data.table(b=c(TRUE, TRUE, FALSE, FALSE), a=c(0L,1L,0L,1L), N=2L))

# fix for bug #5355 - rbindlist with factor columns and empty data.tables resulted in error.
A <- data.table(x=factor(1), key='x')
B <- data.table(x=factor(), key='x')
test(1168.1, rbindlist(list(B,A)), data.table(x=factor(1)))

# fix for bug #5120, it's related to rbind and factors as well - more or less similar to 1168.1 (#5355). Seems to have been fixed with that commit. Just adding test here.
tmp1 <- as.data.table(structure(list(Year = 2013L, Maturity = structure(1L, .Label = c("<1", 
"1.0 - 1.5", "1.5 - 2.0", "2.0 - 2.5", "2.5 - 3.0", "3.0 - 4.0", 
"4.0 - 5.0", ">5.0"), class = "factor"), Quality = structure(2L, .Label = c(">BBB", 
"BBB", "BB", "B", "CCC", "<CCC", "NR", "CASH"), class = c("ordered", 
"factor")), Ct = 2L, Wt = 1.56, CtTotRet = 1.08, TotRet = 69.2307692307692), .Names = c("Year", 
"Maturity", "Quality", "Ct", "Wt", "CtTotRet", "TotRet"), class = c("data.table", 
"data.frame"), row.names = c(NA, -1L)))

tmp2 <- as.data.table(structure(list(Year = 2013L, Maturity = "TOTAL", Quality = "TOTAL", 
Ct = 214L, Wt = 100.001, CtTotRet = 406.26, TotRet = 406.255937440626), .Names = c("Year", 
"Maturity", "Quality", "Ct", "Wt", "CtTotRet", "TotRet"), class = c("data.table", 
"data.frame"), row.names = c(NA, -1L)))

ans <- rbind(tmp1, tmp2)
test(1168.2, as.data.frame(ans), rbind(as.data.frame(tmp1), as.data.frame(tmp2)))

# checks of "" and NA_character_ ordering.
test(1169, forderv(c(NA,"","a","NA")), INT(1,2,4,3))  # data.table does ascii ordering currently, so N comes before a
test(1170, length(forderv(c(NA,"","NA","a"))), 0)
test(1171, forderv(c("",NA,"a","NA")), INT(2,1,4,3))
test(1172, length(forderv(NA_character_)), 0)
test(1173, length(forderv(c(NA_character_,NA_character_))), 0)
test(1174, length(forderv(c(NA_character_,NA_character_,NA_character_))), 0)
test(1175, length(forderv("")), 0)
test(1176, length(forderv(c("",""))), 0)
test(1177, length(forderv(c("","",""))), 0)
test(1178, forderv(c("",NA,"")), INT(2,1,3))

# Test no invalid sort order warning when key is ok and 2nd colum is character/double
DT = CJ(a=rep(1:3),b=c("a","b"))
test(1179.1, key(DT), c("a","b"))
test(1179.2, setkey(DT), DT)  # i.e. no warning
DT = CJ(a=rep(1:3),b=c(3.14,3.15))
test(1180.1, key(DT), c("a","b"))
test(1180.2, setkey(DT), DT)  # i.e. no warning

# test for iradix (NA and negatives). Tests need large range to trigger iradix.
test(1181, forderv(INT(1,3,5000000,NA)), INT(4,1,2,3))
test(1182, forderv(INT(1,-1,5000000,NA)), INT(4,2,1,3))
test(1183, forderv(INT(-3,-7,1,-6000000,NA,3,5000000,NA,8)), INT(5,8,4,2,1,3,6,9,7))

# tests of gsum and gmean with NA
DT = data.table(x=rep(c("a","b","c","d"),each=3), y=c(1L,3L,6L), v=as.numeric(1:12))
set(DT,c(3L,8L),"y",NA)
set(DT,c(5L,9L),"v",NA)
set(DT,10:12,"y",NA)
set(DT,10:12,"v",NA)
options(datatable.optimize=1)  # turn off GForce
test(1184.1, DT[, sum(v), by=x, verbose=TRUE], output="dogroups")
test(1184.2, DT[, mean(v), by=x, verbose=TRUE], output="dogroups")
test(1185.1, DT[, list(sum(y), sum(v), sum(y,na.rm=TRUE), sum(v,na.rm=TRUE)), by=x],
           data.table(x=c("a","b","c","d"), V1=c(NA,10L,NA,NA), V2=c(6,NA,NA,NA), V3=c(4L,10L,7L,0L), V4=c(6,10,15,0)))
options(datatable.optimize=0)  # turn off fastmean optimization to get the answer to match to
test(1185.2, ans <- DT[, list(mean(y), mean(v), mean(y,na.rm=TRUE), mean(v,na.rm=TRUE)), by=x, verbose=TRUE], output="All optimizations.*off")
options(datatable.optimize=1)  # turn on old fastmean optimization only
test(1185.3, DT[, list(mean(y), mean(v), mean(y,na.rm=TRUE), mean(v,na.rm=TRUE)), by=x, verbose=TRUE], ans, output="Old mean.*changed j")
options(datatable.optimize=Inf)  # turn on GForce
test(1185.4, DT[, list(mean(y), mean(v), mean(y,na.rm=TRUE), mean(v,na.rm=TRUE)), by=x, verbose=TRUE], ans, output="GForce optimized j to")
test(1186, DT[, sum(v), by=x, verbose=TRUE], output="GForce optimized j to")
test(1187.1, DT[, list(sum(y), sum(v), sum(y,na.rm=TRUE), sum(v,na.rm=TRUE)), by=x],
           data.table(x=c("a","b","c","d"), V1=c(NA,10L,NA,NA), V2=c(6,NA,NA,NA), V3=c(4L,10L,7L,0L), V4=c(6,10,15,0)))
MyVar = TRUE
test(1187.2, DT[, list(sum(y,na.rm=MyVar), mean(y,na.rm=MyVar)), by=x, verbose=TRUE], output="GForce optimized j to",
             DT[, list(sum(y,na.rm=TRUE), mean(y,na.rm=TRUE)), by=x])
test(1187.3, DT[, mean(y,na.rm=MyVar), by=x, verbose=TRUE], output="GForce optimized j to",
             DT[, mean(y,na.rm=TRUE), by=x])
MyVar = FALSE
test(1187.4, DT[, list(sum(y,na.rm=MyVar), mean(y,na.rm=MyVar)), by=x, verbose=TRUE], output="GForce optimized j to",          
             DT[, list(sum(y,na.rm=FALSE), mean(y,na.rm=FALSE)), by=x])
test(1187.5, DT[, mean(y,na.rm=MyVar), by=x, verbose=TRUE], output="GForce optimized j to",
             DT[, mean(y,na.rm=FALSE), by=x])


# test from Zach Mayer
a <- c("\"\"\"\")  \" \"   \"  \"    \"",  "\"\"\"\")  \" \"   \"  \"   \"",  "\"\"\"\")  \" \"   \"  \"    ,\"")
test(1188, forderv(a), INT(1,3,2))

# test as.ITime vectorization
x = c("18:00", "18:00:12")
test(1189, all(as.character(as.ITime(x)) == c("18:00:00", "18:00:12")))

# that CJ() orders in the same order as setkey, #5375
DT = CJ(c("Corp", "CORP"), 1:3)
test(1190, setkey(DT), DT)   # tests no warning here from setkey, was "key rebuilt" due to inconsistent locale sorting in v1.8.10

# non-exact recycling in j results.  Was caught with error in v1.8.10, now recycles with remainder and warning
DT = data.table(a=1:2,b=1:6)
test(1191, DT[, list(b,1:2), by=a], data.table(a=INT(1,1,1,2,2,2),b=INT(1,3,5,2,4,6),V2=INT(1,2,1,1,2,1)),
           warning="Recycled leaving remainder of 1 item.*This warning is once only")
           
# that twiddle is used consistently, and tolerance has gone.
# nice example from : http://stackoverflow.com/questions/21885290/data-table-roll-nearest-returns-multiple-results
x = 0.0275016249293408
DT = data.table(rnk = c(0, 0.0909090909090909, 0.181818181818182, 0.272727272727273),
                val = c(0.0233775088495975, 0.0270831481152598, 0.0275016216267234, 0.0275016249293408))
# 2 byte rounding is about 11 s.f., so val[3] and val[4] are considered different
test(1192, DT[,.N,keyby=val], setkey(DT,val)[,.N,by=val])  # tests uniqlist uses twiddle
test(1193, DT[,.N,by=val]$N, INT(1,1,1,1))
test(1194, DT[.(x),.N], 1)  # tests bmerge uses twiddle
DT[3, val:=0.0275016249291963]
setkey(DT, NULL)  # val[3] and val[4] are now equal, within 2 byte rounding
test(1195, DT[,.N,keyby=val], setkey(DT,val)[,.N,by=val])
test(1196, DT[,.N,by=val]$N, INT(1,1,2))
test(1197, DT[.(x),.N], 2)

DT = data.table(id=1:2, val1=6:1, val2=6:1)   # 5380
test(1199, DT[, sum(.SD), by=id], error="GForce sum can only be applied to columns, not .SD or similar.*looking for.*lapply(.SD")

# Selection of columns, copy column to maintain the same as R <= 3.0.2, in Rdevel, for now
# Otherwise e.g. setkey changes the original columns too.  TO DO: could allow shallow copy, perhaps.
DT = data.table(a=1:3, b=6:4, c=7:9)
test(1200, address(DT[,"b",with=FALSE]$b) != address(DT$b))
test(1201, address(DT[,c("b","c"),with=FALSE]$c) != address(DT$c))
test(1202, address(DT[,list(b)]$b) != address(DT$b))
test(1203, address(DT[,list(b,c)]$c) != address(DT$c))
test(1204, address(DT[1:3,"b",with=FALSE]$b) != address(DT$b))
test(1205, address(DT[TRUE,"b",with=FALSE]$b) != address(DT$b))

# Somehow we didn't test DT[order(...)] anywhere yet (other than via plyr's arrange in test 304)
DT = data.table(a=6:1, b=1:2)
test(1206, DT[order(b,a)], data.table(a=INT(2,4,6,1,3,5),b=INT(1,1,1,2,2,2)))

# Test joining to Inf, -Inf and mixed non-finites, and grouping
DT = data.table(A=c(1,2,-Inf,+Inf,3,-1.1,NaN,NA,3.14,NaN,2.8,NA), B=1:12, key="A")
for (i in 1:2) {
    setNumericRounding(if (i==1L) 0L else 2L)
    test(1207+i*0.1, DT[.(c(NA_real_,Inf)),B], INT(8,12,4))
    test(1208+i*0.1, DT[.(c(Inf,NA_real_)),B], INT(4,8,12))
    test(1209+i*0.1, DT[.(c(NaN,NA_real_)),B], INT(7,10,8,12))
    test(1210+i*0.1, DT[.(c(NA_real_,NaN)),B], INT(8,12,7,10))
    test(1211+i*0.1, DT[,sum(B),by=A]$V1, INT(20,17,3,6,1,2,11,5,9,4)) 
    test(1212+i*0.1, DT[,sum(B),by=list(g=abs(trunc(A)))], data.table(g=c(NA,NaN,Inf,1,2,3),V1=INT(20,17,7,7,13,14)))
    test(1213+i*0.1, DT[,sum(B),keyby=list(g=abs(trunc(A)))], data.table(g=c(NA,NaN,1,2,3,Inf),V1=INT(20,17,7,13,14,7),key="g"))
    # test(1214+i*0.1, DT[.(-200.0),roll=TRUE]$B, 3L)  # TO DO: roll to -Inf.  Also remove -Inf and test rolling to NaN and NA
}

# that fread reads unescaped (but balanced) quotes in the middle of fields ok, #2694
test(1215,
   fread('N_ID VISIT_DATE REQ_URL REQType\n175931 2013-03-08T23:40:30 http://aaa.com/rest/api2.do?api=getSetMobileSession&data={"imei":"60893ZTE-CN13cd","appkey":"android_client","content":"Z0JiRA0qPFtWM3BYVltmcx5MWF9ZS0YLdW1ydXoqPycuJS8idXdlY3R0TGBtU 1'),
   data.table(N_ID=175931L, VISIT_DATE="2013-03-08T23:40:30", REQ_URL='http://aaa.com/rest/api2.do?api=getSetMobileSession&data={"imei":"60893ZTE-CN13cd","appkey":"android_client","content":"Z0JiRA0qPFtWM3BYVltmcx5MWF9ZS0YLdW1ydXoqPycuJS8idXdlY3R0TGBtU', REQType=1L)
)
test(1216.1, fread('A,B,C\n1.2,Foo"Bar,"a"b\"c"d"\nfo"o,bar,"b,az""\n'), data.table(A = c("1.2", "fo\"o"), B = c("Foo\"Bar", "bar"), C = c("\"a\"b\"c\"d\"", "\"b")), error="Expecting 3 cols, but line 3 contains")
test(1216.2, fread('A,B,C\n1.2,Foo"Bar,"a"b\"c"d""\nfo"o,bar,"b,az""\n'), data.table(A = c("1.2", "fo\"o"), B = c("Foo\"Bar", "bar"), C = c("a\"b\"c\"d\"", "\"b")), error="Expecting 3 cols, but line 3 contains")
test(1216.3, fread('A,B,C\n1.2,Foo"Bar,"a"b\"c"d""\nfo"o,bar,"b,"az""\n'), 
           data.table(A=c('1.2','fo"o'), B=c('Foo"Bar','bar'),C=c('a"b"c"d"','b,"az"')))
test(1217, fread('"One,Two","Three",Four\n12,3,4\n56,7,8\n'),  # quoted column names including the separator
           data.table("One,Two"=c(12L,56L),Three=c(3L,7L),Four=c(4L,8L)))

# joining from empty character, #5387
DT = data.table(a=1:3, b=c("a","b","c"), key="b")
test(1218, DT[ DT[FALSE] ], data.table(a=integer(), b=character(), i.a=integer(), key="b"))

# set() multiple columns
DT = data.table(a=1:3,b=4:6,c=7:9)
newVals = data.table(10:12,13:15)
test(1219, set(DT,j=2:3,value=newVals), data.table(a=1:3,b=10:12,c=13:15))
newVals = list(16:18,19:21)
test(1220, set(DT,j=2:3,value=newVals), data.table(a=1:3,b=16:18,c=19:21))

# Test non-join key columns used in j work again (spotted straight away by Michele on datatable-help when v1.9.2 was released).
# Introduced at commit 1030. Very extensive new tests 1136* still all pass (great stuff Arun).
DT = data.table(a=1:2,b=letters[1:6],key="a,b")
test(1221, DT[.(1),b], c("a","c","e"))

###########################################################################################
# extensive testing of forderv with decreasing order of sorting (total of >700 tests so far - without NaN/NA
###########################################################################################
# - Generate a random seed each time; the randomness allows catching errors quicker
# - But save the seed so that we can generate the same data back if any error occurs
seed = as.integer(Sys.time()) # sample(9999L, 1L) temporary fix, because all the set.seed(.) used above makes this sample() step deterministic (always seed=9107)
seedInfo = paste("forder decreasing argument test: seed = ", seed,"  ", sep="")
# no NaN (because it's hard to match with base:::order)                             ## TODO: add tests with NaN
set.seed(seed)
foo <- function(n) apply(matrix(sample(letters, n*8L, TRUE), ncol=8L), 1, paste, sep="")
i1 = as.integer(sample(c(-100:100), 1e3, TRUE))
i2 = as.integer(sample(c(-100:100, -1e6, 1e6), 1e3, TRUE))
d1 = as.numeric(sample(c(-100:100,Inf,-Inf), 1e3, TRUE))
d2 = as.numeric(rnorm(1e3))
c1 = sample(c(letters), 1e3, TRUE)
c2 = sample(foo(200), 1e3, TRUE)

DT = data.table(i1, i2, d1, d2, c1, c2)
# randomise col order as well
colorder=sample(ncol(DT))
setcolorder(DT, names(DT)[colorder])
seedInfo = paste(seedInfo, "colorder = ", paste(colorder, collapse=","), sep="")
ans = vector("list", length(names(DT)))

test_no = 1223.0
oldnfail = nfail
for (i in seq_along(names(DT))) {
    cj = as.matrix(do.call(CJ, split(rep(c(1L,-1L), each=i), 1:i)))
    ans[[i]] = combn(names(DT), i, function(x) {
        tmp = apply(cj, 1, function(y) {
            test_no <<- signif(test_no+.001, 7)
            ll = as.call(c(as.name("order"), 
                    lapply(seq_along(x), function(j) {
                        if (y[j] == 1L) 
                            as.name(x[j]) 
                        else {
                            if (class(DT[[x[j]]]) =="character") 
                                as.call(c(as.name("-"), as.call(list(as.name("xtfrm"), as.name(x[j])))))
                            else 
                                as.call(list(as.name("-"), as.name(x[j])))
                            }
                        })
                    )
                )
            test(test_no, forderv(DT, by=x, order=y), with(DT, eval(ll)))
        })
        dim(tmp)=NULL
        list(tmp)
    })
}
ans = NULL
if (nfail > oldnfail) cat(seedInfo, "\n")  # to reproduce

# fix for bug #5405 - unique on null data.table should return null data.table
test(1224, unique(data.table(NULL)), data.table(NULL))

# forderv should return 'integer(0)' when 'x' is not atomic and of 0 length (to be consistent with base:::order)
test(1225.1, forderv(list()), integer(0))
test(1225.2, forderv(data.table(NULL)), integer(0))

# fix for bug #5377 - data.table(null list, data.frame, data.table) should return null data.table
test(1226.1, data.table(list()), null.data.table())
test(1226.2, data.table(data.frame(NULL)), null.data.table())
test(1226.3, data.table(data.table(NULL)), null.data.table())
test(1226.4, data.table(data.frame()), null.data.table())
test(1226.5, data.table(data.table()), null.data.table())

# fix for bug #5321 - POSIXlt issue.
setDT(DT1 <- data.frame(id=1:3, d=strptime(c("06:02:36", "06:02:48", "07:03:12"), "%H:%M:%S")))
test(1227, data.table(id=1:3, d=strptime(c("06:02:36", "06:02:48", "07:03:12"), "%H:%M:%S")), DT1, warning="POSIXlt column type detected and converted to")

# fix for bug #5296 - retaining class of original data.table after passing through `[.data.table`
DT <- data.table(a=1:2,b=3:4)
setattr(DT, "class", c("newclass", class(DT)))
test(1228.1, class(DT), class(DT[a>1]))
test(1228.2, class(DT), class(DT[, list(b)]))
test(1228.3, class(DT), class(DT[, "b", with=FALSE]))
test(1228.4, class(DT), class(DT[, sum(b), by=a]))
test(1228.5, class(DT), class(DT[a>1, sum(b), by=a]))
test(1228.6, class(DT), class(DT[a>1, c:=sum(b), by=a]))

# test 1229 was non-ASCII, now in package DtNonAsciiTests

# Test that ad hoc by detects if ordered and dogroups switches to memcpy if contiguous, #1050
DT = data.table(a=1:3,b=1:6,key="a")
options(datatable.optimize=1) # turn off GForce, to test dogroups
test(1230, DT[, sum(b), by=a, verbose=TRUE], output="memcpy contiguous groups")
setkey(DT,NULL)
test(1231, DT[, sum(b), by=a, verbose=TRUE], output="memcpy contiguous groups")
test(1232, DT[, sum(b), by=a+1, verbose=TRUE], output="memcpy contiguous groups")
test(1233, DT[, sum(b), by=a%%2, verbose=TRUE], output="collecting discontiguous groups")
test(1234, DT[, sum(a), by=b, verbose=TRUE], output="collecting discontiguous groups")
setkey(DT,a)
test(1235, DT[.(2:3),sum(b),by=.EACHI,verbose=TRUE], data.table(a=2:3,V1=c(7L,9L),key="a"), output="memcpy contiguous groups")
test(1236, DT[.(3:2),sum(b),by=.EACHI,verbose=TRUE], data.table(a=3:2,V1=c(9L,7L)), output="memcpy contiguous groups")
test(1237, DT[.(3:2),sum(b),keyby=.EACHI,verbose=TRUE], data.table(a=2:3,V1=c(7L,9L),key="a"), output="memcpy contiguous groups")
options(datatable.optimize=Inf)

# check that key is not preserved when length of fastorder is > 0
DT <- data.table(x=1:5, y=6:10, key="x")
test(1238.1, key(setorder(DT, x)), "x")
test(1238.2, key(setorder(DT, -x)), NULL)

# Fix for bug #5366 - setkey fails when non-key columns are of type list.
DT <- data.table(x=5:1, y=as.list(1:5))
test(1239.1, setkey(DT, x), setattr(data.table(x=1:5, y=as.list(5:1)), 'sorted', 'x'))
DT <- data.table(x=5:1, y=as.list(1:5))
test(1239.2, setorder(DT, x), data.table(x=1:5, y=as.list(5:1)))

# Fix for bug #5408 - order of as.data.table.table is different when doing as.data.table(with(DT, table(x,y)))
set.seed(123)
DT <- data.table(XX = sample(LETTERS[1:5], 1000, replace = TRUE), yy = sample(1:5, 1000, replace = TRUE))
ans1 <- as.data.table(DT[, table(XX, yy)])
ans2 <- as.data.table(table(DT$XX, DT$yy))
setnames(ans1, 'N', 'Freq')
setnames(ans2, names(ans1))
test(1240.1, ans1, setDT(as.data.frame(with(DT, table(XX, yy)), stringsAsFactors=FALSE)))
test(1240.2, ans2, ans1)

# Test for optimisation of 'order' to 'forder'
set.seed(45L)
DT <- data.table(x=sample(1e2, 1e6,TRUE), y=sample(1e2, 1e6,TRUE))
# with optimisation -> order will be optimised to forder
optim = getOption("datatable.optimize")
options(datatable.optimize=Inf)
t1 = unname(system.time(ans1 <- DT[order(x,-y)])['elapsed'])
# without optimisation
options(datatable.optimize=0L)
t2 = unname(system.time(ans2 <- DT[order(x,-y)])['elapsed'])
test(1241.1, ans1, ans2)
test(1241.2, t1 < t2, TRUE)  # with optimisation must be faster
# restore optimisation
options(datatable.optimize=optim)

# check no warning yet for with=FALSE and :=.  To be helpful warning, then helpful error.
DT = data.table(a=1:3, b=4:6)
myCol = "a"
test(1242, DT[2,myCol:=6L,with=FALSE], data.table(a=INT(1,6,3), b=4:6))

# consistency of output type of mult, #5378
DT = data.table(id=rep(1:2,each=2), var=rnorm(4), key="id")
test(1243, DT[.(1:2), list(var)][c(2,4)], DT[.(1:2), list(var), mult="last"])
test(1244, DT[.(1:2), var], DT$var)
test(1245, DT[.(1:2), var][c(2,4)], DT[.(1:2), var, mult="last"])

#############################################
# FR #5205 - fromLast argument to duplicated
#############################################
seed = as.integer(Sys.time())
seedInfo = paste("forder decreasing argument test: seed = ", seed,"  ", sep="")
set.seed(seed)
DT <- data.table(w=sample(-5:5, 100, TRUE), 
x=as.numeric(sample(-5:5, 100, TRUE)), 
y=sample(paste("id", 1:10, sep=""), 100, TRUE), 
z=sample(c(TRUE, FALSE), 100, TRUE))

colorder=sample(ncol(DT))
setcolorder(DT, names(DT)[colorder])
seedInfo = paste(seedInfo, "colorder = ", paste(colorder, collapse=","), sep="")

test_no = 1246.0
oldnfail = nfail
for (i in seq_along(names(DT))) {
    cc = combn(names(DT), i)
    apply(cc, 2L, function(jj) { 
        test_no <<- signif(test_no+.01, 7) # first without key
        test(test_no, duplicated(DT, by=jj, fromLast=TRUE), duplicated.data.frame(DT[, jj, with=FALSE], fromLast=TRUE))
        test_no <<- signif(test_no+.01, 7)
        setkeyv(DT, jj) # with key
        test(test_no, duplicated(DT, by=jj, fromLast=TRUE), duplicated.data.frame(DT[, jj, with=FALSE], fromLast=TRUE))
    })
}
if (nfail > oldnfail) cat(seedInfo, "\n")  # to reproduce

# with NA
DT <- data.table(w=sample(c(-5:5,NA_integer_), 100, TRUE), 
x=as.numeric(sample(c(-5:5, NA), 100, TRUE)), 
y=sample(c(NA, paste("id", 1:10, sep="")), 100, TRUE), 
z=sample(c(NA, TRUE, FALSE), 100, TRUE))

colorder=sample(ncol(DT))
setcolorder(DT, names(DT)[colorder])
seedInfo = paste(seedInfo, "colorder = ", paste(colorder, collapse=","), sep="")

oldnfail = nfail
for (i in seq_along(names(DT))) {
    cc = combn(names(DT), i)
    apply(cc, 2L, function(jj) { 
        test_no <<- signif(test_no+.01, 7) # first without key
        test(test_no, duplicated(DT, by=jj, fromLast=TRUE), duplicated.data.frame(DT[, jj, with=FALSE], fromLast=TRUE))
        test_no <<- signif(test_no+.01, 7)
        setkeyv(DT, jj) # with key
        test(test_no, duplicated(DT, by=jj, fromLast=TRUE), duplicated.data.frame(DT[, jj, with=FALSE], fromLast=TRUE))
    })
}
if (nfail > oldnfail) cat(seedInfo, "\n")  # to reproduce

# FR #5172 - anyDuplicated.data.table
set.seed(45L)
dt <- data.table(x=sample(3,10,TRUE), y=sample(letters[1:3], 10,TRUE))
test(1247.1, anyDuplicated(dt), anyDuplicated.data.frame(dt))
test(1247.2, anyDuplicated(dt, fromLast=TRUE), anyDuplicated.data.frame(dt, fromLast=TRUE))
test(1247.3, anyDuplicated(dt, by="y"), anyDuplicated.data.frame(dt[, "y", with=FALSE]))
test(1247.4, anyDuplicated(dt, by="y", fromLast=TRUE), anyDuplicated.data.frame(dt[, "y", with=FALSE], fromLast=TRUE))

# Fix for #5423 - j-expression y * eval(parse(..)) should work without needing "("
DT <- data.table(x = seq(1,10,1), y = seq(2,20,2))
test(1248.1, DT[, y := y * eval(parse(text="1*2"))], data.table(x=seq(1,10,1), y=seq(4,40,4)))
# fix in 1248 was not complete. resurfaced again as bug #5527. Fixed now, test added here below:
DT <- data.table(id=1:5, var=letters[1:5])
ans <- copy(DT)
id <- "va"
test(1248.2, DT[, eval(parse(text=paste(id,"r",sep="")))], letters[1:5])
test(1248.3, DT[, id2:=eval(parse(text=paste(id,"r",sep="")))], ans[, id2 := var])

# test to make sure DT[order(...)] works fine when it's already sorted (forgot the case where forder returns integer(0) before)
DT <- data.table(x=rep(1:4, each=5), y=1:20)
test(1249.1, DT[order(x)], DT)
test(1249.2, DT[order(y)], DT)
test(1249.3, DT[order(x,y)], DT)

# Fix for #5424 - duplicated 'by=FALSE' inconsistency
set.seed(1L)
DT <- data.table(x=sample(3,10,TRUE), y=sample(2,10,TRUE), key="x")
test(1250.1, duplicated(DT, by=NULL), duplicated.data.frame(DT))
test(1250.2, duplicated(DT, by=FALSE), error="Only NULL, column indices or column names are allowed in by")
test(1250.3, duplicated(DT, by=TRUE), error="Only NULL, column indices or column names are allowed in by")

# more tests for DT[order(...)] - now testing 'decreasing=FALSE/TRUE' argument
set.seed(1L)
DT <- data.table(x=sample(3,10,TRUE), y=sample(2,10,TRUE))
test(1251.1, DT[order(x,y,decreasing=TRUE)], DT[order(-x,-y)])
test(1251.2, DT[order(x,-y,decreasing=TRUE)], DT[order(-x,y)])
# test in case of complex calls. check out the note in setkey.R under 'forder' for differences in forder and order for 'list' inputs. base is inconsistent I find. 
ix = with(DT, order(x+y))
test(1251.3, DT[order(x+y)], DT[ix])
ix = with(DT, order(-x-y))
test(1251.4, DT[order(-x-y)], DT[ix])
ix = with(DT, order(x+y, decreasing=TRUE))
test(1251.5, DT[order(x+y, decreasing=TRUE)], DT[ix])
ix = with(DT, order(4*x-5*y, decreasing=TRUE))
test(1251.6, DT[order(4*x-5*y, decreasing=TRUE)], DT[ix])
ix = with(DT, order(1-DT$x, decreasing=TRUE))
test(1251.7, DT[order(1-DT$x, decreasing=TRUE)], DT[ix])
test(1251.8, DT[order(x, list(-y), decreasing=TRUE)], error = "Column .* for ordering currently") # consistent with base (not the same error, but will error with "forder's" error instead)
# more "edge cases" to ensure we're consistent with base
test(1251.9, DT[order("a")], DT[1L])
test(1251.10, DT[order("b", "a")], DT[1L])
test(1251.11, DT[order(list("b", "a"))], error = "Column .* for ordering currently")
test(1251.12, DT[order(list("b"), list("a"))], DT[1L])

##############################################################
# extensive tests for order optimisation within `[.data.table`
##############################################################
seed = as.integer(Sys.time())
seedInfo = paste("forder decreasing argument test: seed = ", seed,"  ", sep="")
set.seed(seed)
# these variable try to simulate groups of length 1, 2, < 200, > 200 so as to cover all different internal implementations
foo <- function(n) apply(matrix(sample(letters, n*8L, TRUE), ncol=8L), 1, paste, sep="")
i1 = as.integer(sample(rep(c(-3:3, NA_integer_), c(1, 2, 190, 300, 7, 190, 210, 100))))
i2 = as.integer(sample(rep(c(-2:2, -1e6, 1e6, NA_integer_), c(1, 2, 190, 300, 7, 190, 210, 100))))
d1 = as.numeric(sample(rep(c(-2:2,Inf,-Inf, NA_real_, 5, -1e3), c(1, 190, 2, 300, 7, 50, 50, 100, 150, 150))))
c1 = sample(rep(c(letters[1:5], NA_character_, "z"), c(1, 2, 190, 7, 300, 200, 300)))
c2 = sample(c(foo(200), NA_character_), 1e3, TRUE)

DT = data.table(i1, i2, d1, c1, c2)
# randomise col order as well
colorder=sample(ncol(DT))
setcolorder(DT, names(DT)[colorder])
seedInfo = paste(seedInfo, "colorder = ", paste(colorder, collapse=","), sep="")
ans = vector("list", length(names(DT)))

test_no = 1253.13
oldnfail = nfail
for (i in seq_along(names(DT))) {
    cj = as.matrix(do.call(CJ, split(rep(c(1L,-1L), each=i), 1:i)))
    ans[[i]] = combn(names(DT), i, function(x) {
        tmp = apply(cj, 1, function(y) {
            test_no <<- signif(test_no+.001, 7)
            ll = as.call(c(as.name("order"), 
                    lapply(seq_along(x), function(j) {
                        if (y[j] == 1L) 
                            as.name(x[j]) 
                        else {
                            if (class(DT[[x[j]]]) =="character") 
                                as.call(c(as.name("-"), as.call(list(as.name("xtfrm"), as.name(x[j])))))
                            else 
                                as.call(list(as.name("-"), as.name(x[j])))
                            }
                        })
                    )
                )
            ans1 = forderv(DT, by=x, order=y, na.last=TRUE)         # adding tests for both nalast=TRUE and nalast=NA
            test(test_no, ans1, with(DT, eval(ll)))
            test_no <<- signif(test_no+.001, 7)
            ll <- as.call(c(as.list(ll), na.last=NA))
            ans1 = forderv(DT, by=x, order=y, na.last=NA)           # nalast=NA here.
            test(test_no, ans1[ans1 != 0], with(DT, eval(ll)))
        })
        dim(tmp)=NULL
        list(tmp)
    })
}
ans = NULL
if (nfail > oldnfail) cat(seedInfo, "\n")  # to reproduce

###############

# turning off tolerance for UPCs (> 11 s.f. stored in numeric), #5369
DT <- data.table(upc = c(301426027592, 301426027593, 314775802939, 314775802940, 314775803490, 314775803491, 314775815510, 314775815511, 314933000171, 314933000172),
                 year = 2006:2007)
test(1253, DT[,.N,by=upc]$N, rep.int(2L,5L))
setNumericRounding(0)
test(1254, DT[,.N,by=upc], data.table(upc=DT$upc, N=1L))
test(1255, unique(DT, by="upc"), DT)
setNumericRounding(2)
test(1256, DT[,.N,by=upc]$N, rep.int(2L,5L))
DT = data.table(upc=rep(c(360734147771, 360734147770), each=3), year=rep(2009:2011, times=2))
setNumericRounding(0)
test(1257, DT[,.N,by=upc], data.table(upc=c(360734147771, 360734147770), N=3L))
test(1258, DT[,.N,by=upc][order(upc)], data.table(upc=c(360734147770, 360734147771), N=3L))
setNumericRounding(1)
test(1259, DT[,.N,by=upc], data.table(upc=c(360734147771, 360734147770), N=3L))
test(1260, DT[,.N,by=upc][order(upc)], data.table(upc=c(360734147770, 360734147771), N=3L))
test(1261, getNumericRounding(), 1L)
# the limit of double precision (16 s.f.) ...
if (.Machine$sizeof.longdouble==16)
    test(1262, length(unique(c(1.2345678901234560, 1.2345678901234561, 1.2345678901234562, 1.2345678901234563))), 2L)
    # 2 not 4 is double precision limit which base::unique() relies on in this test
    # valgrind will also return (3) instead of (2) here.. due to floating point precision limitation. changing the last two values to 1.2345678901234563 and 1.2345678901234564 returns 2.
DT = data.table(id=c(1.234567890123450, 1.234567890123451, 1.234567890123452, 1.234567890123453))  # one less digit is limit
test(1263, length(unique(DT$id)), 4L)
test(1264, DT[,.N,by=id]$N, 4L)  # 1 byte rounding isn't enough
setNumericRounding(0)
test(1265, DT[,.N,by=id]$N, INT(1,1,1,1))
test(1266, getNumericRounding(), 0L)
setNumericRounding(2)

# fread reading NA in logical columns, #4766
DF = data.frame(I=1:3, L=c(T,F,NA), R=3.14)
write.csv(DF,f<-tempfile(),row.names=F)
test(1267.1, fread(f)$L, c(TRUE, FALSE, NA))
test(1267.2, fread(f), as.data.table(read.csv(f)))
unlink(f)

### FR #2722 test begins here ###
#################################
# FR #2722 optimise j=c(lapply(.SD,sum, ...)) - here any amount of such lapply(.SD, ...) can occur and in any order
set.seed(45L)
dt <- data.table(a=sample(2,10,TRUE), b=sample(3,10,TRUE), c=sample(4,10,TRUE), d=sample(5,10,TRUE))

options(datatable.optimize=1L)
ans2 <- dt[, c(lapply(.SD, mean), lapply(.SD, sum)), by=a]
options(datatable.optimize=Inf)
test(1268.1, dt[, c(lapply(.SD, mean), lapply(.SD, sum)), by=a, verbose=TRUE], ans2,
             output="GForce optimized j to 'list(gmean(b), gmean(c), gmean(d), gsum(b), gsum(c), gsum(d))'")

options(datatable.optimize=1L)
ans2 <- dt[, c(lapply(.SD, mean), .N), by=a]
options(datatable.optimize=Inf)
test(1268.2, dt[, c(lapply(.SD, mean), .N), by=a, verbose=TRUE], ans2,
                  output = "lapply optimization changed j from 'c(lapply(.SD, mean), .N)' to 'list(mean(b), mean(c), mean(d), .N)'")

options(datatable.optimize=1L)
ans2 <- dt[, c(list(c), lapply(.SD, mean)), by=a]
options(datatable.optimize=Inf)
test(1268.3, dt[, c(list(c), lapply(.SD, mean)), by=a, verbose=TRUE], ans2,
             output = "lapply optimization changed j from 'c(list(c), lapply(.SD, mean))' to 'list(c, mean(b), mean(c), mean(d))")

test(1268.4, dt[, c(as.list(c), lapply(.SD, mean)), by=a], 
             error = "j doesn't evaluate to the same number of columns for each group")

options(datatable.optimize=1L)
ans2 <- dt[, c(sum(d), lapply(.SD, mean)), by=a]
options(datatable.optimize=Inf)
test(1268.5, dt[, c(sum(d), lapply(.SD, mean)), by=a, verbose=TRUE], ans2,
             output = "GForce optimized j to 'list(gsum(d), gmean(b), gmean(c), gmean(d))'")

options(datatable.optimize=1L)
ans2 <- dt[, c(list(sum(d)), lapply(.SD, mean)), by=a]
options(datatable.optimize=Inf)
test(1268.6, dt[, c(list(sum(d)), lapply(.SD, mean)), by=a, verbose=TRUE], ans2,
             output = "GForce optimized j to 'list(gsum(d), gmean(b), gmean(c), gmean(d))'")

# newly added tests for #861
# optimise, but no GForce
options(datatable.optimize=1L)
ans2 <- dt[, c(list(sum(d), .I), lapply(.SD, mean)), by=a]
options(datatable.optimize=Inf)
test(1268.7, dt[, c(list(sum(d), .I), lapply(.SD, mean)), by=a, verbose=TRUE], ans2,
             output = "lapply optimization changed j from 'c(list(sum(d), .I), lapply(.SD, mean))' to 'list(sum(d), .I, mean(b), mean(c), mean(d))'")

# don't optimise .I in c(...)
options(datatable.optimize=1L)
dt = data.table(x=c(1,1,1,2,2,2), y=1:6)
ans2 <- dt[, c(.I, lapply(.SD, mean)), by=x]
options(datatable.optimize=Inf)
test(1268.8, dt[, c(.I, lapply(.SD, mean)), by=x, verbose=TRUE], ans2,
             output = "lapply optimization is on, j unchanged as 'c(.I, lapply(.SD, mean))'")

### FR #2722 tests end here ###

# Wide range numeric and integer64, to test all bits
x = sample( c(seq(-1e100, 1e100, length=1e5), c(seq(-1e-100,1e-100,length=1e5))) )
setNumericRounding(0)
test(1269, forderv(x), base::order(x))
setNumericRounding(2)   # not affected by rounding
test(1270, forderv(x), base::order(x))
if ("package:bit64" %in% search()) {
    x = as.integer64(2)^(0:62)
    x = sample(c(x,-x,0))
    if (!inherits(try(bit64::order(x),silent=TRUE), "try-error"))  # if for old version of bit64
        test(1271, forderv(x), bit64::order(x)) # because GenomicRanges replaces this order
    DT = data.table( a=as.integer64(2)^45 + 1:3, b=1:6 )
    test(1272, DT[,sum(b),by=a], data.table(a=DT$a[1:3], V1=INT(5,7,9)))
    test(1273, unique(DT, by="a"), DT[1:3])
    test(1274, duplicated(DT, by="a"), rep(c(FALSE,TRUE),each=3))
    setkey(DT,a)
    test(1275, DT[.(as.integer64(35184372088834))], DT[3:4])
    test(1276, unique(DT), DT[c(1,3,5)])
    test(1277, duplicated(DT), rep(c(FALSE,TRUE),3))
}

# distinguishing small numbers from 0.0 as from v1.9.2,  test from Rick
# http://stackoverflow.com/questions/22290544/grouping-very-small-numbers-e-g-1e-28-and-0-0-in-data-table-v1-8-10-vs-v1-9-2
test_no = 1278.001
for (dround in c(0,2)) {
    setNumericRounding(dround)  # rounding should not affect the result here because although small, it's very accurace (1 s.f.)
    for (i in c(-30:-1,1:30)) {
        DT = data.table(c(1 * (10^i),2,9999,-1,0,1))
        test(test_no, nrow(DT[, .N, by=V1]), 6)
        test_no = test_no + 0.001
    }
}

# rounding of milliseconds, workaround, TO DO: #5445
# http://stackoverflow.com/questions/22356957/rounding-milliseconds-of-posixct-in-data-table-v1-9-2-ok-in-1-8-10
DT = data.table(timestamp=as.POSIXct(
        c("2013-01-01 17:51:00.707",
          "2013-01-01 17:51:59.996",
          "2013-01-01 17:52:00.059",
          "2013-01-01 17:54:23.901",
          "2013-01-01 17:54:23.913",
          "2013-01-01 17:54:23.914")))
setNumericRounding(2)
test(1279, duplicated(DT), rep(c(FALSE,TRUE), c(4,2)))
setNumericRounding(1)
test(1280, duplicated(DT), rep(FALSE, 6))
setNumericRounding(2)

# FR #5465, keep.rownames argument for setDT, just for data.frames:
DF <- data.frame(x=1:5, y=10:6)
rownames(DF) <- letters[1:5]
test(1281, setDT(DF, keep.rownames=TRUE), data.table(rn=letters[1:5], x=1:5, y=10:6))

# Bug #5415 fix - BY doesn't retain names:
DT <- data.table(fruit=c("apple","peach","pear"))
test(1282, DT[, ans := .BY$fruit, by=fruit], data.table(fruit=DT$fruit, ans=DT$fruit))

# bug #5443 - get() doesn't see i's columns, when i is a data.table:
set.seed(1L)
dt1 <- data.table(a=rep(1:2, each=2), c=sample(10,4))
dt2 <- data.table(b=rep(2:3), c=sample(20,2), d=sample(20,2))
setkey(dt1, a)
setkey(dt2, b)

# without by
test(1283.1, dt1[dt2, list(a=a, c=get('c'), i.c=get('i.c'))], dt1[dt2, list(a=a, c=c, i.c=i.c)])
test(1283.2, dt1[dt2, list(a=a, d=get('d'))], dt1[dt2, list(a=a, d=d)])
# with by
test(1283.3, dt1[dt2, list(a=a, c=get('c'), i.c=get('i.c')), by=.EACHI], dt1[dt2, list(a=a, c=c, i.c=i.c), by=.EACHI])
test(1283.4, dt1[dt2, list(a=a, d=get('d')), by=.EACHI], dt1[dt2, list(a=a, d=d), by=.EACHI])

# fix for bug #5583 - missed cases like dt[order(abs(x))].
dt <- data.table(x=c(1L,-2L,3L))
test(1284.1, dt[order(abs(x))], dt)
test(1284.2, dt[order(-abs(x))], dt[3:1])

# fix for bug #5582 - unique/duplicated on empty data.table returned NA
dt <- data.table(x=numeric(0), y=character(0), key="x")
test(1285.1, duplicated(dt), duplicated.data.frame(dt))
test(1285.2, unique(dt), dt)

# BUG #5672 fix
a <- data.table(BOD, key="Time")
b <- data.table(BOD, key="Time")[Time < 0] # zero row data.table
ans <- merge(b, a, all=TRUE)
test(1287, ans, data.table(Time=a$Time, demand.x=NA_real_, demand.y=a$demand, key="Time"))

# more rbindlist tests - duplicate columns with "fill=TRUE"
ll <- list(data.table(x=1, y=-1, x=-2), data.table(y=10, y=20, y=30, x=-10, a="a", b=Inf, c=factor(1)))
test(1288.1, rbindlist(ll, use.names=TRUE, fill=FALSE), error = "Item 2 has 7 columns, inconsistent with item 1 which has 3 columns")
# modified after fixing #725 
test(1288.2, rbindlist(ll, use.names=TRUE, fill=TRUE), 
    data.table(x=c(1,-10), y=c(-1,10), x=c(-2, NA), y=c(NA,20), y=c(NA,30), a=c(NA, "a"), b=c(NA, Inf), c=factor(c(NA, 1))))

# check the name of output are consistent when binding two empty dts with one empy and other non-empty dt
dt1 <- data.table(x=1:5, y=6:10)
dt2 <- dt1[x > 5]
setnames(dt3 <- copy(dt2), c("A", "B"))
test(1288.3, names(rbindlist(list(dt2,dt3))), c("x", "y"))
test(1288.4, names(rbindlist(list(dt3,dt2))), c("A", "B"))
test(1288.5, names(rbindlist(list(dt1,dt3))), c("x", "y"))
test(1288.6, names(rbindlist(list(dt3,dt1))), c("A", "B"))

# check fix for bug #5612
DT <- data.table(x=c(1,2,3))
test(1288.7, rbind(DT, DT, data.table()), rbind(DT, data.table(), DT))

# factor on fill=TRUE with NA column..
DT1 = data.table(A=1:3,B=letters[1:3])
DT2 = data.table(B=letters[4:5],C=factor(1:2))
l = list(DT1,DT2)
test(1288.8, rbindlist(l, use.names=TRUE, fill=TRUE), data.table(A=c(1:3,NA_integer_,NA_integer_), B=letters[1:5], C=factor(c(NA,NA,NA,1,2))))

# adding more tests after modifying for better backwards compatibility:
# rbindlist and rbind both work fine even when certain elements of list are not named at all, as long as fill = FALSE, but use.names=TRUE errors when all names are NULL
# when fill=TRUE NO element of the list must have NULL names.
ll <- list(list(1:3, 4:6), list(5:7, 8:10))
test(1288.9, rbindlist(ll), data.table(V1=c(1:3, 5:7), V2=c(4:6, 8:10)))
test(1288.10, rbindlist(ll, use.names=TRUE), error="use.names=TRUE but no item of input list has any names.")
ll <- list(list(a=1:3, b=4:6), list(5:7, 8:10))
test(1288.11, rbindlist(ll, use.names=TRUE), data.table(a=c(1:3, 5:7), b=c(4:6, 8:10)))
ll <- list(list(1:3, 4:6), list(a=5:7, b=8:10))
test(1288.12, rbindlist(ll, use.names=TRUE), data.table(a=c(1:3, 5:7), b=c(4:6, 8:10)))
ll <- list(list(a=1:3, 4:6), list(5:7, b=8:10))
test(1288.13, rbindlist(ll, use.names=TRUE), error="Answer requires 3 columns whereas one or more item(s) in the input list has only 2 columns. This could be because the items in the list may not")
ll <- list(list(a=1:3, 4:6), list(5:7, b=8:10))
test(1288.14, rbindlist(ll, fill=TRUE), data.table(a=c(1:3, rep(NA_integer_,3L)), V1=c(4:6,5:7), b=c(rep(NA_integer_, 3L), 8:10)))
ll <- list(list(1:3, 4:6), list(5:7, 8:10))
test(1288.15, rbindlist(ll, fill=TRUE), error="fill=TRUE, but names of input list at position 1")
ll <- list(list(1:3, 4:6), list(a=5:7, b=8:10))
test(1288.16, rbindlist(ll, fill=TRUE), error="fill=TRUE, but names of input list at position 1")

# TO DO: TODO: think of and add more tests for rbindlist

# fix for #5647
dt <- data.table(x=1L, y=1:10)
cp = copy(dt)
test(1289.1, dt[,z := c(rep(NA, 5), y), by=x], cp[, z := c(rep(NA, 5), y[1:5])], warning="RHS 1 is length 15")
dt<-data.table(x=c(1:2), y=1:10)
cp = copy(dt)
test(1289.2, dt[, z := c(rep(NA, 5),y), by=x], cp[, z := rep(NA_integer_, 10)], warning="RHS 1 is length 10")

########################################
# Extensve testing for "duplicate" names
########################################
# Rules: Basically, if index is directly given in 'j', just those columns are touched/operated on. But if 'column' names are given and there are more than one 
# occurrence of that column, then it's hard to decide which to keep and which to remove. So, to remove, all are removed, to keep, always the first is kept.
# 1) when i,j,by are all absent (or) just 'i' is present then ALL duplicate columns are returned.
# 2) When 'with=FALSE' and 'j' is a character and 'notj' is TRUE, all instances of the column to be removed will be removed.
# 3) When 'with=FALSE' and 'j' is a character and 'notj' is FALSE, only the first column will be recognised in presence of duplicate columns.
# 4) When 'with=FALSE' and 'j' is numeric and 'notj' is TRUE, just those indices will be removed.
# 5) When 'with=FALSE' and 'j' is numeric and 'notj' is FALSE, all columns for indices given, if valid, are returned. (FIXES #5688)
# 6) When .SD is in 'j', but '.SDcols'  is not present, ALL columns are subset'd - FIXES BUG #5008.
# 7) When .SD and .SDcols are present and .SDcols is numeric, columns corresponding to the given indices are returned.
# 8) When .SD and .SDcols are present and .SDcols is character, duplicate column names will only return the first column, each time.
# 9) When .SD and .SDcols are present and .SDcols is numeric, and it's -SDcols, then just those columns are removed.
# 10) When .SD and .SDcols are present and .SDcols is character and -SDcols, then all occurrences of that object is removed.
# 11) When no .SD and no .SDcols and no with=FALSE, only duplicate column names will return only the first column each time.
# 12) With 'get("col")', it's the same as with all character types.
# 13) A logical expression in 'j'.
# 14) Finally, no tests but.. using 'by' with duplicate columns and aggregating may not return the intended result, as it may operate on column names in some cases.

# All points are tested with this example:
DT  <- data.table(x=1:2, y=3:4, x=5:6, x=7:8, y=9:10, z=11:12)
DT1 <- data.table(x=1L, y=3L, x=5L, x=7L, y=9L, z=11L)
DT2 <- data.table(x=2L, y=4L, x=6L, x=8L, y=10L, z=12L)
ll  <- list(x=1:2, y=3:4, x=5:6, x=7:8, y=9:10, z=11:12)

# case (1)
test(1290.1, DT[1], DT1)
test(1290.2, DT[], DT)
test(1290.3, DT[(TRUE)], DT)
# case (2)
test(1290.4, DT[, !"x", with=FALSE], as.data.table(ll[c(2,5,6)]))
test(1290.5, DT[, !"y", with=FALSE], as.data.table(ll[c(1,3,4,6)]))
test(1290.6, DT[, !c("x", "x"), with=FALSE], as.data.table(ll[c(2,5,6)]))
test(1290.7, DT[, !c("y", "y"), with=FALSE], as.data.table(ll[c(1,3,4,6)]))
# case (3)
test(1290.9,  DT[, "x", with=FALSE], as.data.table(ll[1]))
test(1290.10, DT[, "y", with=FALSE], as.data.table(ll[2]))
test(1290.11, DT[, c("x", "x"), with=FALSE], as.data.table(ll[c(1,1)]))
test(1290.12, DT[, c("y", "y"), with=FALSE], as.data.table(ll[c(2,2)]))
# case (4)
test(1290.13, DT[, !3, with=FALSE], as.data.table(ll[c(1,2,4,5,6)]))
test(1290.14, DT[, !c(1,1,3,4), with=FALSE], as.data.table(ll[c(2,5,6)]))
test(1290.15, DT[, !2, with=FALSE], as.data.table(ll[c(1,3,4,5,6)]))
test(1290.16, DT[, !c(2,5,2), with=FALSE], as.data.table(ll[c(1,3,4,6)]))
# case (5)
test(1290.17, DT[, 3, with=FALSE], as.data.table(ll[3]))
test(1290.18, DT[, c(1,1,3,4), with=FALSE], as.data.table(ll[c(1,1,3,4)]))
test(1290.19, DT[, 2, with=FALSE], as.data.table(ll[2]))
test(1290.20, DT[, c(2,5,2), with=FALSE], as.data.table(ll[c(2,5,2)]))
# case (6)
test(1290.21, DT[, .SD], as.data.table(ll))
test(1290.22, DT[, .SD[1]], DT[1])
test(1290.23, DT[, .SD[1, !3, with=FALSE]], as.data.table(DT[1, !3, with=FALSE]))
# case (7)
test(1290.24, DT[, .SD, .SDcols=c(1,1,3,4)], as.data.table(ll[c(1,1,3,4)]))
# case (8)
test(1290.25, DT[, .SD, .SDcols=c("x", "x", "y")], as.data.table(ll[c(1,1,2)]))
# case (9)
test(1290.26, DT[, .SD, .SDcols=-c(1,2)], as.data.table(ll[c(-(1:2))]))
# case (10)
test(1290.27, DT[, .SD, .SDcols=-c("x")], as.data.table(ll[c(2,6)]))
# case (11)
test(1290.28, DT[, x], ll[[1]])
test(1290.29, DT[, list(x,x,y,y,y)], as.data.table(ll[c(1,1,2,2,2)]))
test(1290.30, DT[, list(x,x,y)], as.data.table(ll[c(1,1,2)]))
# cast (12)
test(1290.31, DT[, get("x")], ll[[1]])
test(1290.32, DT[, list(get("x"))], setnames(as.data.table(ll[1]), "V1"))
test(1290.33, DT[, list(get("x"), get("y"))], setnames(as.data.table(ll[1:2]), c("V1", "V2")))
# case (13)
test(1290.34, DT[, names(DT) == "x", with=FALSE], as.data.table(ll[c(1,3,4)]))

# Bug #5376.. DT[, bla ;= character(0), by=.] dint add new column when `DT is empty DT.
dt1 = data.table(a=character(0),b=numeric(0))
ans1 = data.table(a=character(0), b=numeric(0), c=numeric(0))
ans2 = data.table(a=character(0), b=numeric(0), c=numeric(0), d=integer(0))
test(1291.1, dt1[, c:=max(b), by='a'], ans1, warning="no non-missing arguments to max")
test(1291.2, dt1[, d := integer(0), by=a], ans2)

# Bug #5714
test(1292.1, data.table(x=1:2, y=3:4)[, -(1:2), with=FALSE], null.data.table())
test(1292.2, data.table(x=1:2)[, -1, with=FALSE], null.data.table())
test(1292.3, data.table(x=1:2, y=3:4)[, !c("x","y"), with=FALSE], null.data.table())
test(1292.4, data.table(x=1:2)[, !c("x"), with=FALSE], null.data.table())

# Bug #5435 - print.data.table and digits option:
DT <- structure(list(fisyr = 1995:1996, er = list(c(1, 3), c(1, 3)),
    eg = c(0.0197315833926059, 0.0197315833926059), esal = list(
        c(2329.89763779528, 2423.6811023622), c(2263.07456978967,
        2354.16826003824)), fr = list(c(4, 4), c(4, 4)), fg =
c(0.039310363070415,
    0.039310363070415), fsal = list(c(2520.85433070866, 2520.85433070866
    ), c(2448.55449330784, 2448.55449330784)), mr = list(c(5,
    30), c(5, 30)), mg = c(0.0197779376457164, 0.0197779376457164
    ), msal = list(c(2571.70078740157, 4215.73622047244),
c(2497.94263862333,
    4094.82600382409))), .Names = c("fisyr", "er", "eg", "esal",
"fr", "fg", "fsal", "mr", "mg", "msal"), class = c("data.table",
"data.frame"), row.names = c(NA, -2L))

if (options()$width<80) options(width=80)
ans1 = capture.output(print(DT, digits=4, row.names=FALSE))
ans2 = c(" fisyr  er      eg      esal  fr      fg      fsal    mr      mg      msal", 
         "  1995 1,3 0.01973 2330,2424 4,4 0.03931 2521,2521  5,30 0.01978 2572,4216", 
         "  1996 1,3 0.01973 2263,2354 4,4 0.03931 2449,2449  5,30 0.01978 2498,4095")
test(1293, ans1, ans2)

## Fixes bug #5442
## Also improves upon bug fix #2551 to provide better warnings and at better places:
dt <- data.table(a=1:3, b=c(7,8,9), c=c(TRUE, NA, FALSE), d=as.list(4:6), e=c("a", "b", "c"))

test(1294.1,  dt[, a := 1]$a, rep(1L, 3L))
test(1294.2,  dt[, a := 1.5]$a, rep(1L, 3L), warning="Coerced 'double' RHS to 'integer' to match the column's type")
test(1294.3,  dt[, a := NA]$a, rep(NA_integer_, 3L))
test(1294.4,  dt[, a := "a"]$a, rep(NA_integer_, 3L), warning="NAs introduced by coercion")
test(1294.5,  dt[, a := list(list(1))]$a, rep(1L, 3L), warning="Coerced 'list' RHS to 'integer' to match the column's type")
test(1294.6,  dt[, a := list(1L)]$a, rep(1L, 3L))
test(1294.7,  dt[, a := list(1)]$a, rep(1L, 3L))
test(1294.8,  dt[, a := TRUE]$a, rep(1L, 3L), warning="Coerced 'logical' RHS to 'integer' to match the column's type")
test(1294.9,  dt[, b := 1L]$b, rep(1,3))
test(1294.10, dt[, b := NA]$b, rep(NA_real_,3))
test(1294.11, dt[, b := "bla"]$b, rep(NA_real_, 3), warning="NAs introduced by coercion")
test(1294.12, dt[, b := list(list(1))]$b, rep(1,3), warning="Coerced 'list' RHS to 'double' to match the column's type")
test(1294.13, dt[, b := TRUE]$b, rep(1,3), warning="Coerced 'logical' RHS to 'double' to match the column's type")
test(1294.14, dt[, b := list(1)]$b, rep(1,3))
test(1294.15, dt[, c := 1]$c, rep(TRUE, 3), warning="Coerced 'double' RHS to 'logical' to match the column's type")
test(1294.16, dt[, c := 1L]$c, rep(TRUE, 3), warning="Coerced 'integer' RHS to 'logical' to match the column's type")
test(1294.17, dt[, c := NA]$c, rep(NA, 3))
test(1294.18, dt[, c := list(1)]$c, rep(TRUE, 3), warning="Coerced 'double' RHS to 'logical' to match the column's type")
test(1294.19, dt[, c := list(list(1))]$c, rep(TRUE, 3), warning="Coerced 'list' RHS to 'logical' to match the column's type")
test(1294.20, dt[, c := "bla"]$c, rep(NA, 3), warning="Coerced 'character' RHS to 'logical' to match the column's type")
test(1294.21, dt[, d := 1]$d, rep(list(1), 3), warning="Coerced 'double' RHS to 'list' to match the column's type")
test(1294.22, dt[, d := 1L]$d, rep(list(1L), 3), warning="Coerced 'integer' RHS to 'list' to match the column's type")
test(1294.23, dt[, d := TRUE]$d, rep(list(TRUE), 3), warning="Coerced 'logical' RHS to 'list' to match the column's type")
test(1294.24, dt[, d := "bla"]$d, rep(list("bla"), 3), warning="Coerced 'character' RHS to 'list' to match the column's type")
test(1294.25, dt[, d := list(list(1))]$d, rep(list(1), 3))
test(1294.26, dt[, e := 1]$e, rep("1", 3), warning="Coerced 'double' RHS to 'character' to match the column's type")
test(1294.27, dt[, e := 1L]$e, rep("1", 3), warning="Coerced 'integer' RHS to 'character' to match the column's type")
test(1294.28, dt[, e := TRUE]$e, rep("TRUE", 3), warning="Coerced 'logical' RHS to 'character' to match the column's type")
test(1294.29, dt[, e := list(list(1))]$e, rep("1", 3), warning="Coerced 'list' RHS to 'character' to match the column's type")
test(1294.30, dt[, e := "bla"]$e, rep("bla", 3))
test(1294.31, dt[, e := list("bla2")]$e, rep("bla2", 3))

# FR #5357, when LHS evaluates to integer(0), provide warning and return dt, not an error.
dt = data.table(a = 1:5, b1 = 1:5, b2 = 1:5)
test(1295, dt[, grep("c", names(d)) := NULL], dt, warning="length(LHS) = 0, meaning no columns to delete or assign RHS to")

# Updating logical column in one-row DT (corruption of new R 3.1 internal globals for TRUE, FALSE and NA)
DT = data.table(a=1:6, b=c(TRUE,FALSE))
test(1296, DT[,list(b,sum(b)),by=a], data.table(a=1:6, b=c(TRUE,FALSE), V2=c(1L,0L)))  # was error "the ... list does not contain 2 elements"
DT = DT[1L]
set(DT,1L,"b",FALSE)  # passing 1L as i here is needed to avoid column plonk, so changes the logical singleton in place
test(1297, as.integer(TRUE[1]), 1L)   # In R 3.1, TRUE[1] returns the global TRUE but TRUE doesn't yet (parses as new vector)
test(1298, as.integer(TRUE), 1L)
# orignal example, verbatim from James Sams :
upc_table = data.table(upc=1:100000, upc_ver_uc=rep(c(1,2), times=50000), is_PL=rep(c(T, F, F, T), each=25000), product_module_code=rep(1:4, times=25000), ignore.column=2:100001)
test(1299, upc_table[, .N, by=list(upc, upc_ver_uc)][,max(N)], 1L)  # all size 1 groups
test(1300, upc_table[, list(is_PL, product_module_code), keyby=list(upc, upc_ver_uc)][,upc[1:3]], 1:3L)   # was warning "internal TRUE value has been modified"

# Same test but for singleton small integers which r-devel also plan to globalise internally.
DT = data.table(a=1:6, b=0:1)
test(1301, DT[,list(b,sum(b)),by=a], data.table(a=1:6, b=c(0L,1L), V2=c(0L,1L)))
DT = DT[1L]
set(DT,1L,"b",3L)
test(1302, 0L[1L], 3L-3L)
test(1303, 0L, 3L-3L)

# FR #5760. Test to just make sure that GForce and dogroups with .N are giving the same results.
set.seed(2L)
dt <- data.table(x=sample(rep(1:5e3, each=3)), y=sample(10))
options(datatable.optimize = 1L)
ans1 <- dt[, list(.N, sum(y)), by=x]
options(datatable.optimize = 2L)
ans2 <- dt[, list(.N, sum(y)), by=x]
test(1304.1, ans1, ans2)

dt <- data.table(x=sample(rep(1:5e3, each=3)), y=sample(10), key="x")
options(datatable.optimize = 1L)
ans1 <- dt[, list(.N, sum(y)), by=x]
options(datatable.optimize = 2L)
ans2 <- dt[, list(.N, sum(y)), by=x]
test(1304.2, ans1, ans2)

# FR #5528
DT <- data.table(x=1:5, y=6:10)
test(1305.1, setDF(DT), data.frame(x=1:5, y=6:10))
# setDF should return if input is data.frame, not error.
df <- data.frame(x=1:5, y=6:10)
test(1305.2, setDF(df), df) # setDF works on data.frame
# setDF also works on lists with equal lengths, #1132
df <- list(a=1:5, b=6:10)
test(1305.3, data.frame(df), setDF(df))
df <- list(1:5, 6:10)
test(1305.4, setDF(as.data.table(df)), setDF(df))
test(1305.5, setDF(1:5), error="setDF only accepts")
test(1305.6, setDF(list(1, 2:3)), error="All elements in argument")
# Tests .7 - .13 for FR #1320: setDF accepts rownames argument
dt  <- data.table(a=1:5, b=6:10)
df  <- data.frame(a=1:5, b=6:10)
lst <- list(a=1:5, b=6:10)
df2 <- data.frame(a=1:5, b=6:10)
rownames(df2) <- LETTERS[1:5]
test(1305.7, setDF(dt, rownames=LETTERS[1:5]), df2)
test(1305.8, setDF(df, rownames=LETTERS[1:5]), df2)
test(1305.9, setDF(lst,rownames=LETTERS[1:5]), df2)
# setDF returns an error for each type if rownames incorrect length
dt  <- data.table(a=1:5, b=6:10)
df  <- data.frame(a=1:5, b=6:10)
lst <- list(a=1:5, b=6:10)
test(1305.10, setDF(dt, rownames="a"), error='rownames incorrect length')
test(1305.11, setDF(df, rownames="a"), error='rownames incorrect length')
test(1305.12, setDF(lst,rownames="a"), error='rownames incorrect length')
# setDF returns an error when rownames contains duplicates
test(1305.13, setDF(dt, rownames=rep("a",5)), error='rownames contains duplicates')

# .SD retains as much of head(key) as appropriate.
#  by= always keeps data appearance order, so it's which columns are grouped and selected that drive how much of key is retained
DT = data.table(a=1:3,b=1:6,c=1:6,key="a,b")
test(1306, DT[1:2,key(.SD)], c("a","b"))
test(1307, DT[2:1,key(.SD)], NULL)
test(1308, DT[,key(.SD),by=a], data.table(a=integer()))
test(1309, DT[,key(.SD),by=b], data.table(b=DT$b, V1="a"))
test(1310, DT[,key(.SD),by=c%%2L], data.table(c=c(1L,1L,0L,0L), V1=c("a","b","a","b")))
test(1311, DT[,list(list(key(.SD))),by=a,.SDcols=1:2], data.table(a=1:3, V1=list(c("a","b")),key="a"))  # .SDcols as Arun found

# That setkey can't operate on locked tables such as .SD. Added in v1.9.3.
DT = data.table(a=1:3,b=6:1)
test(1312, DT[,setkey(.SD),by=a], error="Setting a physical key on .SD is reserved for possible future use")
# was warning "Already keyed by this key but had invalid row order" due to the key not being cleared after the previous group.  A solution could have been to put back the original key on populating .SD for each group.  But instead we reserve it for future use and push the user towards doing it a different more efficient way (see Arun's speedups in the datatable-help thread).

# gmin and gmax extensive testing (because there are tricky cases)
DT <- data.table(x=rep(1:6, each=3), y=INT(4,-1,0, NA,4,10, 4,NA,10, 4,10,NA, -2147483647, -2147483647, -2147483647, 2147483647, 2147483647, 2147483647))
# make sure GForce is running
options(datatable.optimize=2L)

# for integers
test(1313.1, DT[, min(y), by=x], DT[, base:::min(y), by=x])
test(1313.2, DT[, max(y), by=x], DT[, base:::max(y), by=x])
test(1313.3, DT[, min(y, na.rm=TRUE), by=x], DT[, base:::min(y, na.rm=TRUE), by=x])
test(1313.4, DT[, max(y, na.rm=TRUE), by=x], DT[, base:::max(y, na.rm=TRUE), by=x])
# testing all NA - GForce automatically converts to numeric.. optimize=1L errors due to change from integer/numeric (like median)
DT[x==6, y := INT(NA)]
test(1313.5, DT[, min(y), by=x], DT[, base:::min(y), by=x])
test(1313.6, DT[, max(y), by=x], DT[, base:::max(y), by=x])
test(1313.7, DT[, min(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=c(-1,4,4,4,-2147483647,Inf)), warning="No non-missing")
test(1313.8, DT[, max(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=c(4,10,10,10,-2147483647,-Inf)), warning="No non-missing")

# for numeric
DT <- data.table(x=rep(1:6, each=3), y=c(4,-1,0, NA,4,10, 4,NA,10, 4,10,NA, -Inf, NA, NA, Inf, NA, NA))
test(1313.9, DT[, min(y), by=x], DT[, base:::min(y), by=x])
test(1313.10, DT[, max(y), by=x], DT[, base:::max(y), by=x])
test(1313.11, DT[, min(y, na.rm=TRUE), by=x], DT[, base:::min(y, na.rm=TRUE), by=x])
test(1313.12, DT[, max(y, na.rm=TRUE), by=x], DT[, base:::max(y, na.rm=TRUE), by=x])
# testing all NA - GForce automatically converts to numeric.. optimize=1L errors due to change from integer/numeric (like median)
DT[x==6, y := NA_real_]
test(1313.13, DT[, min(y), by=x], DT[, base:::min(y), by=x])
test(1313.14, DT[, max(y), by=x], DT[, base:::max(y), by=x])
test(1313.15, DT[, min(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=c(-1,4,4,4,-Inf,Inf)), warning="No non-missing")
test(1313.16, DT[, max(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=c(4,10,10,10,-Inf,-Inf)), warning="No non-missing")

# for date (attribute check.. especially after issues/689 !!!)
DT <- data.table(x = rep(letters[1:2], each=5), y = as.POSIXct('2010-01-01', tz="UTC") + seq(0, 86400*9, 86400))
test(1313.17, DT[, list(y=min(y)), by=x], DT[c(1,6)])
test(1313.18, DT[, list(y=max(y)), by=x], DT[c(5,10)])
DT[c(1,6), y := NA]
test(1313.19, DT[, list(y=min(y)), by=x], DT[c(1,6)])
test(1313.20, DT[, list(y=max(y)), by=x], DT[c(1,6)])
test(1313.21, DT[, list(y=min(y, na.rm=TRUE)), by=x], DT[c(2,7)])
test(1313.22, DT[, list(y=max(y, na.rm=TRUE)), by=x], DT[c(5,10)])

# for character
set.seed(1L)
DT <- data.table(x=rep(1:6, each=3), y=sample(c("", letters[1:3], NA), 18, TRUE))
test(1313.23, DT[, min(y), by=x], DT[, base:::min(y), by=x])
test(1313.24, DT[, max(y), by=x], DT[, base:::max(y), by=x])
test(1313.25, DT[, min(y, na.rm=TRUE), by=x], DT[, base:::min(y, na.rm=TRUE), by=x])
test(1313.26, DT[, max(y, na.rm=TRUE), by=x], DT[, base:::max(y, na.rm=TRUE), by=x])
DT[x==6, y := NA_character_]
test(1313.27, DT[, min(y), by=x], DT[, base:::min(y), by=x])
test(1313.28, DT[, max(y), by=x], DT[, base:::max(y), by=x])
test(1313.29, DT[, min(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=c("a","a","c","","a",NA)), warning="No non-missing")
test(1313.30, DT[, max(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=c("b","a","c","a","c",NA)), warning="No non-missing")

# bug 700 - bmerge, roll=TRUE and nomatch=0L when i's key group occurs more than once
dt1 <- data.table(structure(list(x = c(7L, 33L), y = structure(c(15912, 15912), class = "Date"), z = c(626550.35284, 7766.385)), .Names =
c("x", "y", "z"), class = "data.frame", row.names = c(NA, -2L)), key = "x,y")
dt2 <- data.table(structure(list(x = c(7L, 7L, 33L, 33L, 33L, 33L), y = structure(c(15884, 15917, 15884, 15884, 15917, 15917), class = "Date"), w = c(-0.118303, 0.141225, -0.03137, -0.02533, 0.045967, 0.043694)), .Names = c("x", "y", "w"), class = "data.frame", row.names = c(NA, -6L)), key = "x,y")
test(1317.1, dt1[dt2, roll=TRUE, nomatch=0L], data.table(x=c(7L,33L,33L), y=as.Date(c("2013-07-31", "2013-07-31", "2013-07-31")), z=c(dt1$z[1:2], dt1$z[2]), w=c(dt2$w[2], dt2$w[5:6]), key="x,y"))

# also test where 'i' is not sorted.
set.seed(1L)
dt2 <- dt2[sample(nrow(dt2))] # key should be gone
test(1317.2, dt1[dt2, roll=TRUE, nomatch=0L], data.table(x=c(7L,33L,33L), y=as.Date(c("2013-07-31", "2013-07-31", "2013-07-31")), z=c(dt1$z[1:2], dt1$z[2]), w=c(dt2$w[1], dt2$w[c(2,6)])))

# bug fix for #472 : "parse" in j
set.seed(100)
nrow <- 100L
DT <- data.table(aa = sample(letters[1:5], nrow, replace = TRUE), bb = rnorm(nrow))
sumExpr <- parse(text = "sum(bb, na.rm = TRUE)")
meanExpr <- parse(text = "mean(bb, na.rm = TRUE)")
test(1318.1, DT[, eval(sumExpr), by = aa], DT[, sum(bb, na.rm=TRUE), by=aa])
test(1318.2, DT[, eval(meanExpr), by = aa], DT[, mean(bb, na.rm=TRUE), by=aa])
test(1318.3, DT[, list(mySum = eval(sumExpr), myMean = eval(meanExpr)), by = aa], DT[, list(mySum=sum(bb, na.rm=TRUE), myMean=mean(bb, na.rm=TRUE)), by=aa])

# get DT[order(.)] to be 100% consistent with base, even though the way base does some things is *utterly ridiculous*, inconsistent.
# closes #696.
DT <- data.table(a = 1:4, b = 8:5, c=letters[4:1])
test(1319.1, DT[order(DT[, "b", with=FALSE])], DT[base:::order(DT[, "b", with=FALSE])])
test(1319.2, DT[order(DT[, "c", with=FALSE])], DT[base:::order(DT[, "c", with=FALSE])])
test(1319.3, DT[order(DT[, c("b","c"), with=FALSE])], DT[base:::order(DT[, c("b","c"), with=FALSE])])
test(1319.4, DT[order(DT[, c("c","b"), with=FALSE])], DT[base:::order(DT[, c("c","b"), with=FALSE])])
test(1319.5, DT[order(DT[, "b", with=FALSE], DT[, "a", with=FALSE])], DT[base:::order(DT[, "b", with=FALSE], DT[, "a", with=FALSE])])
# test to make sure old things are not modified (ridiculous, but "consistency" demands it!)
test(1319.6, DT[order(list(DT$a))], DT[1])
test(1319.7, DT[order(list(DT$a), list(DT$b))], DT[1])
test(1319.8, DT[order(list(DT$a, DT$b))], error="Column '1' is type 'list' which is not")

# FR #703. Not so extensive testing because test 1223 already tests for everything else extensively. Only integer64 here.
# this'll be the test for both DT[order(.)] and setorder(.) as both internally uses forder/forderv
if ("package:bit64" %in% search()) {
    set.seed(45L)
    DT <- data.table(x=as.integer64(c(-50, 0, 50, 1e18, 1e-18)), y=sample(5))
    ans1 <- forder(DT, x, na.last=TRUE, decreasing=FALSE)
    ans2 <- forder(DT, x, na.last=FALSE, decreasing=FALSE)
    ans3 <- forder(DT, x, na.last=TRUE, decreasing=TRUE)
    ans4 <- forder(DT, x, na.last=FALSE, decreasing=TRUE)
    test(1320.1, ans1, as.integer(c(1,2,5,3,4)))
    test(1320.2, ans2, as.integer(c(1,2,5,3,4)))
    test(1320.3, ans3, as.integer(c(4,3,2,5,1)))
    test(1320.4, ans4, as.integer(c(4,3,2,5,1)))

    set.seed(45L)
    DT <- data.table(x=as.integer64(c(-50, 0, NA, 50, 1e18, NA, 1e-18)), y=sample(7))
    ans1 <- forder(DT, x, na.last=TRUE, decreasing=FALSE)
    ans2 <- forder(DT, x, na.last=FALSE, decreasing=FALSE)
    ans3 <- forder(DT, x, na.last=TRUE, decreasing=TRUE)
    ans4 <- forder(DT, x, na.last=FALSE, decreasing=TRUE)

    test(1320.5, ans1, as.integer(c(1,2,7,4,5,3,6)))
    test(1320.6, ans2, as.integer(c(3,6,1,2,7,4,5)))
    test(1320.7, ans3, as.integer(c(5,4,2,7,1,3,6)))
    test(1320.8, ans4, as.integer(c(3,6,5,4,2,7,1)))
    
    # missed test - checking na.last=NA!
    set.seed(45L)
    DT <- data.table(x=as.integer64(c(-50, 0, NA, 50, 1e18, NA, 1e-18)), y=sample(7))
    ans1 <- forder(DT, x, na.last=NA, decreasing=FALSE)
    ans2 <- forder(DT, x, na.last=NA, decreasing=TRUE)

    test(1320.9, ans1, as.integer(c(0,0,1,2,7,4,5)))
    test(1320.10, ans2, as.integer(c(0,0,5,4,2,7,1)))
}

# fread newlines inside quoted fields
test(1321, fread('A,B,C\n1,"foo\nbar",3\n4,baz,6'), data.table(A=c(1L,4L), B=c("foo\nbar","baz"), C=c(3L,6L)))
test(1322, fread('A,B,C\n1,"foo
bar",3\n4,baz,6'), data.table(A=c(1L,4L), B=c("foo\nbar","baz"), C=c(3L,6L)))
# NB: don't remove the newline after foo in test 1322 above, that's what's being tested.
test(1323, fread('col1,col2\n5,"4\n3"'), data.table(col1=5L, col2="4\n3"))
test(1324, fread('A,B,C\n1,4,"foo"\n2,5,"bar'), data.table(A=1:2,B=4:5,C=c("foo", "\"bar")))
test(1325, fread('A,B,C\n1,4,"foo"\n2,5,"bar"'), data.table(A=1:2,B=4:5,C=c("foo",'bar')))
test(1326, fread('A,B,C\n1,4,"foo"\n2,5,bar"'), data.table(A=1:2,B=4:5,C=c("foo",'bar"')))
test(1327, fread('A,B,C\n1,4,"foo"\n2,5,""bar""'), data.table(A=1:2,B=4:5,C=c("foo",'"bar"')))
cat('A,B\n1,"Joe \\",Bloggs"', file = f<-tempfile())
test(1328, fread(f), data.table(V1=1L, V2='Joe \\', V3='Bloggs"'), warning="Starting data input on line 2 and discarding line 1 because.*: A,B")
unlink(f)
test(1329, fread(), error="Input is either empty or fully whitespace after the skip or autostart")
# add test that that escaped escapes at the end of a quoted field
test(1330, fread('A,B\nfoo,1\nAnalyst\\,2\nbar,3'), data.table(A=c('foo','Analyst\\','bar'), B=1:3))
test(1331.1, fread('A,B\nfoo,1\nAnalyst\\ ,2\nbar,3'), data.table(A=c('foo','Analyst\\','bar'), B=1:3)) # strip.white=TRUE
test(1331.2, fread('A,B\nfoo,1\nAnalyst\\ ,2\nbar,3', strip.white=FALSE), data.table(A=c('foo','Analyst\\ ','bar'), B=1:3))
test(1332, fread('A,B\nfoo,1\n"Analyst\\",2\nbar,3'), data.table(A=c('foo','Analyst\\','bar'), B=1:3))
# double \\ in this file means one in the input, so the above " is escaped by a single '\' but still read ok
test(1333.1, fread('A,B\nfoo,1\n"Analyst\\" ,2\nbar,3'), data.table(A = c("foo", "\"Analyst\\\"", "bar"), B = 1:3))
test(1333.2, fread('A,B\nfoo,1\n"Analyst\\" ,2\nbar,3', strip.white=FALSE), data.table(A = c("foo", "\"Analyst\\\" ", "bar"), B = 1:3))
test(1334, fread('A,B\nfoo,1\n"Analyst\\" ,",2\nbar,3'), data.table(A=c('foo', 'Analyst\\" ,', 'bar'), B=1:3))
test(1335, fread('A,B\nfoo,1\n"Analyst\\\\",2\nbar,3'), data.table(A=c('foo','Analyst\\\\','bar'), B=1:3))

# data from 12GB file in comments on http://stackoverflow.com/a/23858323/403310 ...
# note that read.csv gets this wrong and puts jacoleman high school into the previous field, then fills the rest of the line silently.
cat('A,B,C,D,E,F
"12",0,"teacher private nfp\\\\\\\\"",""jacoleman high school","",""
"TX",77406,"business analyst\\\\\\\\\\\\\\","the boeing co","",""
"CA",94116,"na\\none","retired","",""
', file = f<-tempfile())   # aside: notice the \\ before n of none as well
test(1336.1, fread(f), data.table(A = c("12", "TX", "CA"), B = c(0L, 77406L, 94116L), C = c("teacher private nfp\\\\\\\\\"", "business analyst\\\\\\\\\\\\\\", "na\\none"), D = c("\"\"jacoleman high school\"", "the boeing co", "retired"), E = c("", "", ""), F = c("", "", "")))
cat('A,B,C,D,E,F
"12",0,"teacher private nfp\\\\\\\\"","jacoleman high school","",""
"TX",77406,"business analyst\\\\\\\\\\\\\\","the boeing co","",""
"CA",94116,"na\\none","retired","",""
', file = f)
test(1336.2, fread(f), data.table(A=c("12","TX","CA"), B=c(0L,77406L,94116L),C=c('teacher private nfp\\\\\\\\"','business analyst\\\\\\\\\\\\\\','na\\none'), D=c('jacoleman high school','the boeing co','retired'),E="",F=""))
unlink(f)

# file names ending with \ (quite common)
# http://stackoverflow.com/questions/24375832/fread-and-column-with-a-trailing-backslash
cat('file,size\n"windows\\user\\",123\n', file = f<-tempfile())
test(1337, fread(f), data.table(file='windows\\user\\',size=123L))
test(1338, fread(f), as.data.table(read.csv(f,stringsAsFactors=FALSE)))
unlink(f)

# TO DO, by checking for balanced embedded quotes
# cat('http,size\n"www.blah?x="one",y="two","three"",123\n', file = f<-tempfile())
# read.csv(f) -- unusually, seems to be a case it doesn't handle
# test(1339, fread(f), data.table(http='www.blah?x="one",y="two","three"',size=123L))
# unlink(f)

# FR #706 - setorder and setorderv now has 'na.last=TRUE/FALSE' argument. It can't have value NA though, like `DT[order(.)]` as it reorders by reference, doesn't subset. Simple tests.
set.seed(45L)
DT <- data.table(x=sample(c(-2:2, NA_integer_), 20, TRUE), y=sample(c(-1:1, NA, Inf, -Inf, NaN), 20, TRUE))
test(1340.1, setorder(copy(DT),  x, na.last=TRUE ), DT[order( x, na.last=TRUE)])
test(1340.2, setorder(copy(DT),  x, na.last=FALSE), DT[order( x, na.last=FALSE)])
test(1340.3, setorder(copy(DT), -x, na.last=TRUE ), DT[order(-x, na.last=TRUE)])
test(1340.4, setorder(copy(DT), -x, na.last=FALSE), DT[order(-x, na.last=FALSE)])
test(1340.5, setorder(copy(DT),  y, na.last=TRUE ), DT[order( y, na.last=TRUE)])
test(1340.6, setorder(copy(DT),  y, na.last=FALSE), DT[order( y, na.last=FALSE)])
test(1340.7, setorder(copy(DT), -y, na.last=TRUE ), DT[order(-y, na.last=TRUE)])
test(1340.8, setorder(copy(DT), -y, na.last=FALSE), DT[order(-y, na.last=FALSE)])

test(1340.9,  setorderv(copy(DT), "x",  1L, na.last=TRUE ), DT[order( x, na.last=TRUE)])
test(1340.10, setorderv(copy(DT), "x",  1L, na.last=FALSE), DT[order( x, na.last=FALSE)])
test(1340.11, setorderv(copy(DT), "x", -1L, na.last=TRUE ), DT[order(-x, na.last=TRUE)])
test(1340.12, setorderv(copy(DT), "x", -1L, na.last=FALSE), DT[order(-x, na.last=FALSE)])
test(1340.13, setorderv(copy(DT), "y",  1L, na.last=TRUE ), DT[order( y, na.last=TRUE)])
test(1340.14, setorderv(copy(DT), "y",  1L, na.last=FALSE), DT[order( y, na.last=FALSE)])
test(1340.15, setorderv(copy(DT), "y", -1L, na.last=TRUE ), DT[order(-y, na.last=TRUE)])
test(1340.16, setorderv(copy(DT), "y", -1L, na.last=FALSE), DT[order(-y, na.last=FALSE)])

test(1340.17, setorder(copy(DT), x, na.last=NA), error="na.last must be logical TRUE/FALSE")
test(1340.18, setorderv(copy(DT), "x", na.last=NA), error="na.last must be logical TRUE/FALSE")

# bug #481 - DT[, list(list(.)), by=.] on R v3.1.0
set.seed(1L)
f <- function(x) list(x)
DT <- data.table(x=sample(3,10,TRUE), y=as.numeric(sample(10)))
test(1341.1, DT[, list(list(y)), by=x], data.table(x=unique(DT$x), V1=list(c(3,5,9), c(2,6,4,1), c(10,7,8))))
test(1341.2, DT[, list(list(.I)), by=x], data.table(x=unique(DT$x), V1=list(c(1,5,10), c(2,3,8,9), c(4,6,7))))
test(1341.3, DT[, list(f(y)), by=x], data.table(x=unique(DT$x), V1=list(c(3,5,9), c(2,6,4,1), c(10,7,8))))
# test for list(list(.)) with :=
test(1341.4, copy(DT)[, z := list(list(y)), by=x], copy(DT)[, z := list(list(copy(y))), by=x])
test(1341.5, copy(DT)[, z := list(list(.I)), by=x], copy(DT)[, z := list(list(copy(.I))), by=x])
test(1341.6, copy(DT)[, z := list(f(y)), by=x], copy(DT)[, z := list(f(copy(y))), by=x])

# test regression on over-allocation (selfref) on unique() which uses new subsetDT()
bla <- data.table(x=c(1,1,2,2), y=c(1,1,1,1))
test(1342, unique(bla)[, bla := 2L], data.table(x=c(1,2),y=1,bla=2L))

# blank and NA fields in logical columns
test(1343, fread("A,B\n1,TRUE\n2,\n3,F"), data.table(A=1:3, B=c(TRUE,NA,FALSE)))
test(1344, fread("A,B\n1,T\n2,NA\n3,"), data.table(A=1:3, B=c(TRUE,NA,NA)))

# .N now available in i
DT = data.table(a=1:3,b=1:6)
test(1348, DT[.N], DT[6])
test(1349, DT[.N-1:3], DT[5:3])
test(1350, DT[.N+1], DT[NA])

# Adding test to catch any future regressions - #734
dt = data.table(id = rep(c('a','b'), each=2), val = rep(c(1,2,3), times=c(1,2,1)))
setkey(dt, id, val)
test(1351.1, dt[J("a"), val], c(1,2))
test(1351.2, dt[J('a'), range(val)], c(1,2))

# New feature: .() in j and .() in by
DT = data.table(a=1:3, b=1:6, c=LETTERS[1:6])
test(1352.1, DT[,.(b)], DT[,list(b)])
test(1352.2, DT[,.(b,c)], DT[,c("b","c"),with=FALSE])
test(1352.3, DT[,.(sum(b)),by=a], DT[,sum(b),by=a])
test(1352.4, DT[,.(MySum=sum(b)), by=a], data.table(a=1:3, MySum=c(5L,7L,9L)))
test(1352.5, DT[,sum(b),by=.(a)], DT[,sum(b),by=a])
test(1352.6, DT[,sum(b),by=.(a%%2)], DT[,sum(b),by=a%%2])
test(1352.7, DT[,sum(b),by=.(Grp=a%%2)], DT[,sum(b),by=list(Grp=a%%2)])
test(1352.8, DT[,sum(b),by=.(a%%2,c)], DT[,sum(b),by=list(a%%2,c)])

# that :=NULL together with i is now an error
DT = data.table(a=1:3, b=1:6)
test(1353.1, DT[2, b:=NULL], error="When deleting columns, i should not be provided")
test(1353.2, DT[2, c("a","b"):=list(42, NULL)], error="When deleting columns, i should not be provided")

# order optimisation caused trouble due to chaining because of 'substitute(x)' usage in [.data.table.
set.seed(1L)
X = data.table(id=1:10, val1=sample(3,10,TRUE))
Y = data.table(val1=1:4, val2=8:5, key="val1")
setkey(X, val1)
test(1354, X[Y, val2 := i.val2, allow.cartesian=TRUE][, val1 := NULL][order(id)], data.table(id=1:10, val2=as.integer(c(8,7,7,6,8,6,6,7,7,8))))

# Fix for #475, setDT(CO2) should error, as it's trying to modify the object whose binding is locked.
# CO2 is not locked in R 2.14.1 but is in R >= 3.1.0.  R NEWS isn't clear when that change happened, so just test there is an error when it is locked.
if (bindingIsLocked("CO2",as.environment("package:datasets"))) {
    test(1355, setDT(CO2), error="Can not convert 'CO2' to data.table by reference because binding is locked.")
} else {
    test(1355, setDT(CO2), CO2)
}

# Fix for #698. not join doesn't need to check for allow.cartesian=TRUE.
DT1 <- data.table(x=rep(1:3, each=3L), y=1:9, key="x")
DT2 <- data.table(x=rep(c(3L,1L), each=10), z=1L)
test(1356, DT1[!DT2], data.table(x=2L, y=4:6, key="x"))

# Fix for #745. as.data.table.matrix shouldn't convert character to factor
m <- matrix(letters[1:4], ncol=2)
test(1357, as.data.table(m), data.table(V1=letters[1:2], V2=letters[3:4]))

# Fix for #471. A[A[A]] contains duplicate names in 1.9.3
A <- data.table(foo = 1:2, bar = 3:4)
setkey(A, foo)
test(1358.1, names(A[A[A]]), c("foo", "bar", "i.bar", "i.bar.1"))
test(1358.2, names(A[A[A[A]]]), c("foo", "bar", "i.bar", "i.bar.2", "i.bar.1"))

# Fix for #743. 0 and -0 and the sign bit issue
A <- data.table(x=c(0,0,-1,1,-1,0,-0,1,-1,1,0,1), y=1:12)
test(1359.1, A[, .N, by=x], data.table(x=c(0,-1,1), N=c(5L,3L,4L)))
dt1 <- data.table(x2 = 0L)
dt2 <- data.table(x2 =-(11-11)/10)
test(1359.2, as.integer(merge(dt2, dt1, by="x2")$x2), as.integer(merge(dt1, dt2, by="x2")$x2))

# Fix for #744: X[Y, c(...), by=.EACHI] segfaults because of using 'i' as variable in for-loop that masked the original 'i' from input.
dt <- data.table(id = c("A", "A", "B", "B", "C"), val1=1:5, val2=6:10, key = "id")
sample <- c("A", "B")
test(1360.1, dt[sample, c(.N), by = .EACHI], dt[sample, list(V1=.N), by=.EACHI])
test(1360.2, copy(dt)[sample, N := c(.N), by = .EACHI], copy(dt)[sample, N := .N, by = .EACHI])

# Fix for #500 - `lapply` call shouldn't redirect to `[.data.frame`.
L <- list(data.table(BOD), data.table(BOD))
test(1361, lapply(L, "[", Time==3L), list(L[[1L]][Time == 3L], L[[2L]][Time == 3L]))

# Feature #735, first two cases: 1) .SD, and 2) DT[, c(.SD, lapply(.SD, ...)), by=...] optimisation:
# Don't set options(datatable.verbose=TRUE) here because the "running test 1362.1 ..." messages cause output to scroll away errors on CRAN checks last 13 lines
DT <- data.table(x=c(1,1,1,2,2), y=1:5, z=6:10)
test(1362.1, DT[, .SD, by=x, verbose=TRUE],
  output="lapply optimization changed j from '.SD' to 'list(y, z)'")
test(1362.2, DT[, c(.SD), by=x, verbose=TRUE],
  output="lapply optimization changed j from 'c(.SD)' to 'list(y, z)'")
test(1362.3, DT[, c(.SD, lapply(.SD, sum)), by=x, verbose=TRUE],
  output="lapply optimization changed j from 'c(.SD, lapply(.SD, sum))' to 'list(y, z, sum(y), sum(z))'")
test(1362.4, DT[, c(lapply(.SD, sum), .SD), by=x, verbose=TRUE],
  output="lapply optimization changed j from 'c(lapply(.SD, sum), .SD)' to 'list(sum(y), sum(z), y, z)'")
test(1362.5, DT[, c(list(y), .SD, lapply(.SD, sum)), by=x, verbose=TRUE],
  output="lapply optimization changed j from 'c(list(y), .SD, lapply(.SD, sum))' to 'list(y, y, z, sum(y), sum(z))'")
# 3) .SD[1] and 4) .SD[1L]
test(1362.6, DT[, c(.SD[1L]), by=x, verbose=TRUE],
  output="lapply optimization changed j from 'c(.SD[1L])' to 'list(y[1L], z[1L])'")
test(1362.7, DT[, c(.SD[1L], lapply(.SD, sum)), by=x, verbose=TRUE],
  output="lapply optimization changed j from 'c(.SD[1L], lapply(.SD, sum))' to 'list(y[1L], z[1L], sum(y), sum(z))'")
test(1362.8, DT[, c(.SD[.N]), by=x, verbose=TRUE],
  output="lapply optimization changed j from 'c(.SD[.N])' to 'list(y[.N], z[.N])'")
test(1362.9, DT[, .SD[1], by=x, verbose=TRUE],
  output="lapply optimization changed j from '.SD[1]' to 'list(y[1], z[1])'")
test(1362.11, DT[, c(.SD[1]), by=x, verbose=TRUE],
  output="lapply optimization changed j from 'c(.SD[1])' to 'list(y[1], z[1])'")
test(1362.12, DT[, c(.SD[1], lapply(.SD, sum)), by=x, verbose=TRUE],
  output="lapply optimization changed j from 'c(.SD[1], lapply(.SD, sum))' to 'list(y[1], z[1], sum(y), sum(z))'")
test(1362.13, DT[, head(.SD, 1), by=x, verbose=TRUE],
  output="lapply optimization changed j from 'head(.SD, 1)' to 'list(head(y, 1), head(z, 1))'")
# make sure .I is named as I when no name is given
test(1362.14, names(DT[, c(list(.I, mean(y)), lapply(.SD, sum)), by=x]), c("x", "I", "V2", "y", "z"))
# and if a name is given, it's retained
test(1362.15, names(DT[, c(list(bla=.I, mean(y)), lapply(.SD, sum)), by=x]), c("x", "bla", "V2", "y", "z"))
# Add test to ensure that mean() gets replaced with fastmean when GForce won't be used.
test(1362.16, DT[, c(list(.I, mean(y)), lapply(.SD, mean)), by=x, verbose=TRUE], 
  output="Old mean optimization changed j from 'list(.I, mean(y), mean(y), mean(z))' to 'list(.I, .External(Cfastmean, y, FALSE), .External(Cfastmean, y, FALSE), .External(Cfastmean, z, FALSE))'")

# setDT(DT), when input is already a data.table checks if selfrefok and if not, does alloc.col again.
DT = list(data.frame(x=1:5, y=6:10))
invisible(lapply(DT, setDT))
DT = DT[[1L]]
test(1363.1, selfrefok(DT), 1L)
foo <- function(x) setDT(x)
df = data.frame(x=1, y=2)
foo(df)
test(1363.2, selfrefok(df), 0L)
setDT(df)
test(1363.3, selfrefok(df), 1L)

# setdiff, parly #547. internal as of now, and named setdiff_ because the name "set" can be confused with the set* functions.
# maybe provide a %diff% operator that internally calls setdiff_?? Usage x %diff% y?
X = data.table(a=c(1,1,1,1,3,3,2,2,2))[, `:=`(b=factor(a), c=as.character(a), d = as.integer(a), e=1:9)]
Y = data.table(a=c(3,4), b=factor(3:4), c=c("3","4"), d=3:4, e=c(TRUE, FALSE), f=c(5L,7L)) 
test(1364.1, setdiff_(X, Y, "a", "a"), data.table(a=c(1,2)))
test(1364.2, setdiff_(X, Y, c("a", "e"), c("a", "f")), X[!5, list(a,e)])
test(1364.3, setdiff_(X, Y, "a", "e"), error="When x's column ('a') is integer or numeric, the corresponding column in y ('e')")
test(1364.4, setdiff_(X, Y, "b", "b"), data.table(b=factor(c(1,2), levels=c(1,2,3))))
test(1364.5, setdiff_(X, Y, c("b", "e"), c("b", "f")), X[!5, list(b,e)])
test(1364.6, setdiff_(X, Y, "b", "c"), data.table(b=factor(c(1,2), levels=c(1,2,3))))
test(1364.7, setdiff_(X, Y, "c", "c"), data.table(c=as.character(c(1,2))))
test(1364.8, setdiff_(X, Y, c("c", "e"), c("c", "f")), X[!5, list(c,e)])
test(1364.9, setdiff_(X, Y, "c", "b"), data.table(c=c("1", "2")))
test(1364.11, setdiff_(X, Y, "d", "d"), data.table(d=1:2))
test(1364.12, setdiff_(X, Y, c("d", "e"), c("d", "f")), X[!5, list(d,e)])
test(1364.13, setdiff_(X, Y, "d", "e"), error="When x's column ('d') is integer or numeric, the corresponding column in y ('e')")
test(1364.14, setdiff_(X, Y, "b", "a"), error="When x's column ('b') is factor, the corresponding column in y ('a')")
test(1364.15, setdiff_(X, Y, "c", "a"), error="When x's column ('c') is character, the corresponding column in y ('a') ")
test(1364.16, setdiff_(X, Y), error="length(by.x) != length(by.y)")
test(1364.17, setdiff_(X[, list(a)], Y[, list(a)]), data.table(a=c(1,2)))

# not join along with by=.EACHI, #604
DT <- data.table(A=c(1,1,1,2,2,2,2,3,3,4,5,5))[, `:=`(B=as.integer(A), C=c("c", "e", "a", "d"), D=factor(c("c", "e", "a", "d")), E=1:12)]
setkey(DT, A)
test(1365.1, DT[!J(c(2,5)), sum(E), by=.EACHI], DT[J(c(1,3,4)), sum(E), by=.EACHI])
setkey(DT, B)
test(1365.2, DT[!J(c(4:5)), list(.N, sum(E)), by=.EACHI], DT[J(1:3), list(.N, sum(E)), by=.EACHI])
setkey(DT, C)
test(1365.3, copy(DT)[!"c", f := .N, by=.EACHI], copy(DT)[c("a", "d", "e"), f := .N, by=.EACHI])
setkey(DT, D)
test(1365.4, DT[!J(factor("c")), .N, by=.EACHI], DT[J(factor(c("a", "d", "e"))), .N, by=.EACHI])
test(1365.5, DT[!"c", lapply(.SD, sum), by=.EACHI, .SDcols=c("B", "E")], DT[c("a", "d", "e"), lapply(.SD, sum), by=.EACHI, .SDcols=c("B", "E")])

# uniqlengths doesn't error on 0-length input
test(1366, uniqlengths(integer(0), 0L), integer(0))

# na.last=NA gets 0's for NAs not at the beginning when there are values so close to NA_integer_ for integers and -Inf for example for numerics. Moved logic to the end in forder.c so that we replace NAs with 0's after the ordering have been taken care of completely.
x = c(-2147483000L, NA_integer_, 1L)
test(1367.1, forderv(x, na.last=NA), c(0L,1L,3L))
x = c(NA, Inf, 0, 1, -1, -Inf, NaN)
test(1367.2, forderv(x, na.last=NA), c(0L, 0L, 6L, 5L, 3L, 4L, 2L))

# Fix for integer overflow segfault in setRange
x = c(-2147483647L, NA_integer_, 2L)
test(1368.1, forderv(x), c(2L, 1L, 3L))
x = c(2147483647L, NA_integer_, -2L)
test(1368.2, forderv(x), c(2L, 3L, 1L))

# tests for frankv. testing on vectors alone so that we can compare with base::rank
# One difference is that NAs belong to the same group, unlike base::rank. So are NaNs. 
# So, they can't be compared to base::rank, won't be identical except for ties="first", and (ties="random", na.last=NA) - should document this.

# no seed set on purpose
dt = data.table(AA=sample(c(-2:2), 50, TRUE), 
                BB=sample(c(-2,-1,0,1,2,Inf,-Inf), 50, TRUE), 
                CC=sample(c(letters[1:5]), 50, TRUE),
                DD=sample(c(-2:2), 50, TRUE),
                EE=sample(as.logical(c(-2:2)), 50, TRUE))
if ("package:bit64" %in% search()) dt[, DD := as.integer64(DD)]
test_no = 1369.0
for (i in seq_along(dt)) {
    col = dt[[i]]
    for (j in list(TRUE, FALSE, "keep")) {
        for (k in c("average", "min", "max", "first")) {
            if (k == "random") set.seed(45L)
            if (class(col) == "integer64") {
                r1 = rank(as.integer(col), ties.method=k, na.last=j)
                r2 = rank(-xtfrm(as.integer(col)), ties.method=k, na.last=j)
            }
            else {
                r1 = rank(col, ties.method=k, na.last=j)
                r2 = rank(-xtfrm(col), ties.method=k, na.last=j)
            }
            if (k == "random") set.seed(45L)
            r3 = frankv(col, ties.method=k, na.last=j)
            r4 = frankv(col, order=-1L, ties.method=k, na.last=j)
        
            test_no = signif(test_no+.01, 7)
            test(test_no, r1, r3)
            test_no = signif(test_no+.01, 7)
            test(test_no, r2, r4)
        }
    }
}
# test na.last=NA here separately.
dt = data.table(AA=sample(c(-2:2, NA), 50, TRUE), 
                BB=sample(c(-2,-1,0,1,2,Inf,-Inf, NA, NaN), 50, TRUE), 
                CC=sample(c(letters[1:5], NA), 50, TRUE),
                DD=sample(c(-2:2, NA), 50, TRUE),
                EE=sample(as.logical(c(-2:2, NA)), 50, TRUE))
if ("package:bit64" %in% search()) dt[, DD := as.integer64(DD)]

for (i in seq_along(dt)) {
    col = dt[[i]]
    for (k in c("average", "min", "max", "first")) {
        if (k == "random") set.seed(45L)
        if (class(col) == "integer64") {
            r1 = rank(as.integer(col), ties.method=k, na.last=NA)
            r2 = rank(-xtfrm(as.integer(col)), ties.method=k, na.last=NA)
        }
        else {
            r1 = rank(col, ties.method=k, na.last=NA)
            r2 = rank(-xtfrm(col), ties.method=k, na.last=NA)
        }
        if (k == "random") set.seed(45L)
        r3 = frankv(col, ties.method=k, na.last=NA)
        r4 = frankv(col, order=-1L, ties.method=k, na.last=NA)
    
        test_no = signif(test_no+.01, 7)
        test(test_no, r1, r3)
        test_no = signif(test_no+.01, 7)
        test(test_no, r2, r4)
    }
}

 
# tests for is_na, which is equivalent of rowSums(is.na(dt)) > 0L
# not exported yet, but we could!
## UPDATE: also added tests for "any_na", internal version of anyNA
## which also includes implementation for bit64::integer64, but the 
## real need is for merging factors correctly in joins, and we need 
## a fast check for NAs; can't rely on 3.1+ for anyNA.
dt = list(AA=sample(c(NA,-2:2), 50, TRUE), 
                BB=sample(c(NA,-2,-1,0,NaN,1,2,Inf,-Inf), 50, TRUE), 
                CC=sample(c(NA,letters[1:5]), 50, TRUE),
                DD=sample(c(NA,-2:2), 50, TRUE),
                EE=sample(as.logical(c(NA,-2:2)), 50, TRUE))
if ("package:bit64" %in% search()) dt[["DD"]] = as.integer64(dt[["DD"]])
test_no = 1370.0
ans = as.list(na.omit(as.data.table(dt)))
for (i in seq_along(dt)) {
    combn(names(dt), i, function(cols) {
        test_no = signif(test_no+.01, 7)
        ans1 = is_na(dt[cols])
        ans2 = rowSums(is.na(as.data.table(dt[cols]))) > 0L
        test(test_no, ans1, ans2)

        # update: tests for any_na
        test_no = signif(test_no+.01, 7)
        test(test_no, any_na(dt[cols]), TRUE)
        test_no = signif(test_no+.01, 7)
        test(test_no, any_na(ans[cols]), FALSE)
        TRUE
    }) 
}
## The function is_na now gains a "by" argument where we can specify the columns. Tests have not been added for that yet.
## However, I've added tests for 'na.omit.data.table' that uses this internally. So we don't have to add tests here again.
## See tests 1394.*

# extensive testing of overlap joins:

# first test all argument check errors...
x = data.table(chr=c("Chr1", "Chr1", "Chr2", "Chr2", "Chr2"), start=c(5,10, 1, 25, 50), end=c(11,20,4,52,60))
y = data.table(chr=c("Chr1", "Chr1", "Chr2"), start=c(1, 15,1), end=c(4, 18, 55), val=1:3)
# no by.x and by.y error
test(1372.1, foverlaps(x, y, type="any"), error="'y' must be keyed (i.e., sorted, and, marked as sorted).")
setkey(y, chr, end, start)
test(1372.2, foverlaps(x, y, by.y=1:3, type="any"), error="The first 3 columns of y's key is not identical to the columns specified in by.y.")
setkey(y, chr, start, end)
setnames(y, c("chr", "pos1", "pos2", "val"))
if ("package:GenomicRanges" %in% search()) {
    setcolorder(y, c("chr", "val", "pos1", "pos2"))
    ans1 = foverlaps(x, y, type="any", by.x=c("chr", "start", "end"), by.y=c("chr", "pos1", "pos2"), which=TRUE, nomatch=0L)
    test(1372.3, foverlaps(x,y,by.x=1:3, nomatch=0L), data.table(chr=x$chr[2:5], y[c(2,3,3,3), -1, with=FALSE], x[2:5, 2:3, with=FALSE]))
    gr <- function(x) {
        GRanges(Rle(x[[1]]), IRanges(start=x[[2]], end=x[[3]]))
    }
    fo <- function(gr1, gr2, ...) {
        olaps = findOverlaps(gr1, gr2, ...)
        if (is.vector(olaps)) return(olaps)
        ans = setDT(list(xid=queryHits(olaps), yid=subjectHits(olaps)))
        setorder(ans)
        ans
    }
    test(1372.4, setorder(ans1), fo(gr(x), gr(y[, c(1,3,4), with=FALSE]), type="any", select="all"))

    runs = 3L # repeat 3 times..
    types=c("any", "within", "start", "end")
    mults=c("all", "first", "last")
    maxgap=0L; minoverlap=1L
    verbose=FALSE; which=TRUE
    test_no <- 1372.4
    for (run in seq_len(runs)) {
        n1 = max(50L, sample(1e2L, 1, FALSE))
        n2 = max(50L, sample(1e2L, 1, FALSE))
        N = max(100L, sample(1e3L, 1, FALSE))

        i1 = sample(N, n1, TRUE)
        i2 = sample(N, n1, TRUE)
        start = pmin(i1,i2)
        end = pmax(i1,i2)
        chr  = sort(sample(paste("Chr", 1:2, sep=""), length(start), TRUE))
        i = setDT(list(chr=chr, start=start, end=end))

        i1 = sample(N, n2, TRUE)
        i2 = sample(N, n2, TRUE)
        start = pmin(i1,i2)
        end = pmax(i1,i2)
        chr  = sort(sample(paste("Chr", 1:2, sep=""), length(start), TRUE))
        x = setDT(list(chr=chr, start=start, end=end))
        setkey(x); setkey(i)
        for (type in types) {
            for (mult in mults) {
                # data.table overlap join
                nomatch = ifelse(mult == "all", 0L, NA_integer_)
                ans1 = foverlaps(i, x, mult=mult, type=type, nomatch=nomatch, which=which, verbose=verbose)
                ans2 = fo(gr(i), gr(x), type=type, select=mult)
                test_no = signif(test_no+.01, 7)
                # cat("test =", test_no, ", i = ", run, ", type = ", type, ", mult = ", mult, "\n", sep="")
                test(test_no, ans1, ans2)
            }
        }
    }
} else {
    cat("Tests 1372.3+ not run. If required call library(GenomicRanges) first.\n")
}

# fix for bug in address - #824
x = 1:10
address(x) ## shouldn't increment NAM field
out = capture.output(.Internal(inspect(x)))
test(1373, grepl("NAM\\(1\\)", out), TRUE)

# fix for bug #762 - key'd data.table with a non-existing column in 'by' is not handled properly.
DT <- data.table(x=1:5, z=5:1, key="z")
y <- c(1,3,2,3,2)
test(1374.1, DT[, list(x=sum(x)), by=y], data.table(y=c(1,3,2), x=c(5L, 6L, 4L)))
y <- c(1,2,2,3,3)
test(1374.2, DT[, list(x=sum(x)), by=y], data.table(y=c(1,2,3), x=c(5L, 7L, 3L), key="y"))

# order in i combined with := in j, updates those rows in that order
# order in i without := in j, returns new object in that order, which is then updated
# Similarly, subset in i with := in j, updates that subset
DT = as.data.table(iris)
DT[,Species:=as.character(Species)]
test(1375.1, DT[,mean(Petal.Width),by=Species][order(-V1),Species:=toupper(Species)]$Species, c("SETOSA","VERSICOLOR","VIRGINICA"))
test(1375.2, DT[,mean(Petal.Width),by=Species][order(-V1)][,Species:=toupper(Species)]$Species, c("VIRGINICA","VERSICOLOR","SETOSA"))
test(1375.3, DT[,mean(Petal.Width),by=Species][V1>1,Species:=toupper(Species)]$Species, c("setosa","VERSICOLOR","VIRGINICA"))

# Secondary keys a.k.a indexes ...
DT = data.table(a=1:10,b=10:1)
test(1376.1, key2(DT), NULL)
test(1376.2, DT[b==7L,verbose=TRUE], DT[4L], output="Creating new index 'b'")
test(1376.3, key2(DT), "b")
test(1376.4, DT[b==8L,verbose=TRUE], DT[3L], output="Using existing index 'b'")
test(1376.5, DT[a==7L,verbose=TRUE], DT[7L], output="Creating new index")  # add 2nd secondary key
test(1376.6, key2(DT), c("b","a"))  # 2 secondary keys of single columns
test(1376.7, DT[a==7L,verbose=TRUE], DT[7L], output="Using existing index 'a'")
setkey(DT,b)
test(1376.8, key2(DT), NULL)
test(1376.9, list(DT[a==2L], key2(DT)), list(DT[9L],"a"))  # create key2 for next test
set2key(DT,NULL)
test(1376.10, list(key(DT), key2(DT)), list("b", NULL))
options(datatable.auto.index = FALSE)
test(1376.11, list(DT[a==2L], key2(DT)), list(DT[9L],NULL))
options(datatable.auto.index = TRUE)
test(1376.12, list(DT[a==2L], key2(DT)), list(DT[9L],"a"))

# When i is FALSE, it shouldn't matter if .SDcols is wrong. Package vardpoor relies on this in example(vardchanges).
DT = data.table(a=1:3, b=4:6)
test(1377.1, DT[FALSE, foo:=7], DT)
test(1377.2, DT[0, foo:=7], DT)
test(1377.3, DT[, foo := Reduce(function(x,y)paste(x,y,sep="__"), .SD), .SDcols=c("a","b")], data.table(a=1:3, b=4:6, foo=c("1__4","2__5","3__6")))
test(1377.4, DT[, bar := Reduce(function(x,y)paste(x,y,sep="__"), .SD), .SDcols=c("a","zz")], error="Some items of .SDcols are not column names")
test(1377.5, DT[FALSE, bar := Reduce(function(x,y)paste(x,y,sep="__"), .SD), .SDcols=c("a","zz")], DT)

#====================================
# fread issue with http download on Windows, thanks to Steve Miller for highlighting.
# any file would do but this one is http://www.russell.com/common/indexes/csvs/russellmicrocapvalueindex_hist.csv
# it happens to have a \r embedded in the first (quoted) column as well but that's not the issue
# can't pass in the http: address directly because this runs on CRAN and any http: site might be unavailable
# therefore, this doesn't actually test mode="wb" but close as we can get

test(1378.1, dim(fread("russellCRLF.csv")), c(19,4))

f = paste("file://",getwd(),"/russellCRLF.csv",sep="")
# simulates a http:// request as far as file.download() and unlink() goes, without internet
# download.file() in fread() changes the input data from \r\n to \n, on Windows.
test(1378.2, dim(fread(f, showProgress=FALSE)), c(19,4))

f = paste("file://",getwd(),"/russellCRCRLF.csv",sep="")
# actually has 3 \r in the file, download.file() from file:// changes that to \r\r\n, so we can simulate download.file from http: in text mode.
test(1378.3, fread(f, showProgress=FALSE), error="Line ending is .*r.*r.*n. R's download.file() appears to add the extra .*r in text mode on Windows. Please download again in binary mode (mode='wb') which might be faster too. Alternatively, pass the URL directly to fread and it will download the file in binary mode for you.")
#====================================


#====================================
# Return to old bywithoutby behaviour. TO DO: delete these tests after Sep 2015

options(datatable.old.bywithoutby=TRUE)
deprecated_warn = "The data.table option 'datatable.old.bywithoutby' for grouping on join without providing `by` will be deprecated in the next release, use `by=.EACHI`."
# Old tests from before commit: 0be720956fdd9c274e46133e154d4bbd5b2c7840
# TO DO: address `allow.cartesian`.  Some differences below...
off = -1000
TESTDT = data.table(a=as.integer(c(1,3,4,4,4,4,7)), b=as.integer(c(5,5,6,6,9,9,2)), v=1:7)
setkey(TESTDT,a,b)
test(off-8, TESTDT[SJ(c(-9,1,4,4,8),c(1,4,4,10,1)),v]$v, INT(NA,NA,NA,NA,NA), warning=deprecated_warn)
test(off-9, TESTDT[SJ(c(-9,1,4,4,8),c(1,4,4,10,1)),v,roll=TRUE]$v, INT(NA,NA,NA,6,NA), warning=deprecated_warn)
test(off-10, TESTDT[SJ(c(-9,1,4,4,8),c(1,4,4,10,1)),v,roll=TRUE,rollends=FALSE]$v, INT(NA,NA,NA,NA,NA), warning=deprecated_warn)
test(off-16, TESTDT[SJ(c(4)),v][[2]], INT(3,4,5,6), warning=deprecated_warn)
test(off-18, TESTDT[SJ(c(-3,2,4,8)),v,mult="all",nomatch=0][[2]], INT(3:6), warning=deprecated_warn)
test(off-185, TESTDT[SJ(c(-3,2,4,8)),v,mult="all",nomatch=NA][[2]], INT(NA,NA,3:6,NA), warning=deprecated_warn)
test(off-19, TESTDT[SJ(c(-3,2,4,8)),v,mult="all",roll=TRUE,nomatch=0][[2]], INT(1,3:6,7), warning=deprecated_warn)
test(off-186, TESTDT[SJ(c(-3,2,4,8)),v,mult="all",roll=TRUE,nomatch=NA][[2]], INT(NA,1,3:6,7), warning=deprecated_warn)
test(off-20, TESTDT[SJ(c(-3,2,4,8)),v,mult="all",roll=TRUE,rollends=FALSE,nomatch=0][[2]], INT(1,3:6), warning=deprecated_warn)
test(off-187, TESTDT[SJ(c(-3,2,4,8)),v,mult="all",roll=TRUE,rollends=FALSE,nomatch=NA][[2]], INT(NA,1,3:6,NA), warning=deprecated_warn)
test(off-21, TESTDT[SJ(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",nomatch=0][[3]], INT(1,3:4), warning=deprecated_warn)
test(off-188, TESTDT[SJ(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",nomatch=NA][[3]], INT(NA,1,NA,3:4,NA,NA,NA), warning=deprecated_warn)
test(off-22, TESTDT[SJ(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",roll=TRUE,nomatch=0][[3]], INT(1,3:4,4,6), warning=deprecated_warn)
test(off-189, TESTDT[SJ(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",roll=TRUE,nomatch=NA][[3]], INT(NA,1,NA,3:4,4,6,NA), warning=deprecated_warn)
test(off-23, TESTDT[SJ(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",roll=TRUE,rollends=FALSE,nomatch=0][[3]], INT(1,3:4,4), warning=deprecated_warn)
test(off-190, TESTDT[SJ(c(-9,1,4,4,4,4,8),c(1,5,5,6,7,10,3)),v,mult="all",roll=TRUE,rollends=FALSE,nomatch=NA][[3]], INT(NA,1,NA,3:4,4,NA,NA), warning=deprecated_warn)
test(off-24, TESTDT[SJ(c(1,NA,4,NA,NA,4,4),c(5,5,6,6,7,9,10)),v,mult="all",roll=TRUE,nomatch=0][[3]], INT(1,3:4,5:6,6), warning=deprecated_warn)
test(off-191, TESTDT[SJ(c(1,NA,4,NA,NA,4,4),c(5,5,6,6,7,9,10)),v,mult="all",roll=TRUE,nomatch=NA][[3]], INT(NA,NA,NA,1,3:4,5:6,6), warning=deprecated_warn)

TESTDT[, a:=letters[a]]
TESTDT[, b:=letters[b]]
setkey(TESTDT,a,b)

a = "d"
# Variable Twister.  a in this scope has same name as a inside DT scope.
# Aug 2010 : As a result of bug 1005, and consistency with 'j' and 'by' we now allow self joins (test 183) in 'i'.
test(off-70, TESTDT[eval(J(a)),v], data.table(a="d",v=3:6,key="a"), warning=deprecated_warn)   # the eval() enabled you to use the 'a' in the calling scope, not 'a' in the TESTDT.  TO DO: document this.
test(off-71, TESTDT[eval(SJ(a)),v], data.table(a="d",v=3:6,key="a"), warning=deprecated_warn)
test(off-72, TESTDT[eval(CJ(a)),v], data.table(a="d",v=3:6,key="a"), warning=deprecated_warn)

DT = data.table(a=rep(1:3,each=2),b=c(TRUE,FALSE),v=1:6)
setkey(DT,a,b)
test(off-180, DT[J(2,FALSE),v]$v, 4L, warning=deprecated_warn)

DT = data.table(A = c("o", "x"), B = 1:10, key = "A")
test(off-183, DT[J(unique(A)), B]$B, DT$B, warning=deprecated_warn)

# Tests of bug 1015 highlight by Harish
# See thread "'by without by' now heeds nomatch=NA"
# Tests 185-201 were added in above next to originals
x <- data.table(a=c("a","b","d","e"),b=c("A","A","B","B"),d=c(1,2,3,4), key="a,b")
y <- data.table(g=c("a","b","c","d"),h=c("A","A","A","A"))
test(off-202, x[y], suppressWarnings(x[y,mult="all"]), warning=deprecated_warn)
test(off-203, x[y,d]$d, c(1,2,NA,NA), warning=deprecated_warn)
test(off-204, x[y,list(d)], suppressWarnings(x[y,d]), warning=deprecated_warn)
test(off-205, x[y,list(d),mult="all"][,d], c(1,2,NA,NA), warning=deprecated_warn)

DF = data.frame(a=LETTERS[1:10], b=1:10, stringsAsFactors=FALSE)
DT = data.table(DF)
setkey(DT,a)    # used to complain about character
test(off-215, DT["C",b]$b, 3L, warning=deprecated_warn)
DT = data.table(DF,key="a")
test(off-216, DT["C",b]$b, 3L, warning=deprecated_warn)
DT = data.table(a=c(1,2,3),v=1:3,key="a")
test(off-217, DT[J(2),v]$v, 2L, warning=deprecated_warn)
DT = data.table(a=c(1,2.1,3),v=1:3,key="a")
test(off-218, DT[J(2.1),v]$v, 2L, warning=deprecated_warn)

DT = data.table(a=1:5,b=6:10,key="a")
q = quote(a>3)
test(off-220, DT[eval(q),b], 9:10)
test(off-221, DT[eval(parse(text="a>4")),b], 10L)
test(off-222, DT[eval(parse(text="J(2)")),b]$b, 7L, warning=deprecated_warn)

# Join Inherited Scope, and X[Y] including Y's non-join columns
X=data.table(a=rep(1:3,c(3,3,2)),foo=1:8,key="a")
Y=data.table(a=2:3,bar=6:7)
test(off-239, X[Y,sum(foo)], data.table(a=2:3,V1=c(15L,15L),key="a"), warning=deprecated_warn)
test(off-240, X[Y,sum(foo*bar)], data.table(a=2:3,V1=c(90L,105L),key="a"), warning=deprecated_warn)
test(off-241, X[Y], data.table(a=rep(2:3,3:2),foo=4:8,bar=rep(6:7,3:2),key="a"), warning=deprecated_warn)
test(off-242, X[Y,list(foo,bar)][,sum(foo*bar)], 195L, warning=deprecated_warn)


X=data.table(a=rep(LETTERS[1:2],2:3),b=1:5,v=10:14,key="a,b")
test(off-246, X["A"], {tt=X[1:2];setkey(tt,a);tt}, warning=deprecated_warn)  # key="a,b" is retained in 1.9.2 and 1.9.4; just old.bywithoutby=TRUE in 1.9.4 keeps "a" only, unfortunately.

# Test .N==0 with nomatch=NA|0
DT = data.table(a=1:2,b=1:6,key="a")
test(off-349, DT[J(2:3),.N,nomatch=NA]$N, c(3L,0L), warning=deprecated_warn)
test(off-350, DT[J(2:3),.N,nomatch=0]$N, c(3L), warning=deprecated_warn)
# Test first .N==0 with nomatch=NA|0
test(off-350.1, DT[J(4),.N]$N, 0L, warning=deprecated_warn)
test(off-350.2, DT[J(0:4),.N]$N, c(0L,3L,3L,0L,0L), warning=deprecated_warn)

# Test printing on nested data.table, bug #1803
DT = data.table(x=letters[1:3],y=list(1:10,letters[1:4],data.table(a=1:3,b=4:6)))
test(off-558, capture.output(print(DT)), c("   x            y","1: a 1,2,3,4,5,6,","2: b      a,b,c,d","3: c <data.table>"))
test(off-559, setkey(DT,x)["a",y][[2]][[1]], 1:10, warning=deprecated_warn)   # y is symbol representing list column, specially detected in dogroups

# another test linked from #2162
DT = data.table(x=rep(c("a","b","c"),each=3), y=c(1L,3L,6L), v=1:9, key="x")
test(off-725, DT[c("a","b","d"),v][,list(v)], DT[J(c("a","b","d")),"v",with=FALSE], warning=deprecated_warn)  # unfiled bug fix for NA matches; see NEWS 1.8.3

DT = data.table(a=1:3,b=1:6,key="a")
test(off-869, suppressWarnings(DT[J(2,42,84),print(.SD)]), output="   b1: 22: 5.*Empty data.table (0 rows) of 3 cols: a,V2,V3")

rm(deprecated_warn)
options(datatable.old.bywithoutby=FALSE)
# End (test reverting to old bywithoutby behaviour)  TO DO: delete these tests after Sep 2015
#====================================


oldv = options(datatable.fread.datatable = FALSE)
test(1379.1, fread("A,B\n1,3\n2,4\n"), data.frame(A=1:2,B=3:4))
test(1379.2, fread("A,B\n1,3\n2,4\n",data.table=TRUE), data.table(A=1:2,B=3:4))
options(datatable.fread.datatable = TRUE)
test(1379.3, fread("A,B\n1,3\n2,4\n",data.table=FALSE), data.frame(A=1:2,B=3:4))
options(oldv)

# That that RHS of == is coerced to x's type before bmerge in auto index. Package vardpoor does this in example(linqsr)
DT = data.table(a=c(0,0,1,1,0,0), b=1:6)  # 'a' type double here, as it is in vardpoor
test(1380, DT[a==TRUE], DT[3:4])

# Fix #847, as.data.table.list and character(0) issue
x <- data.table(a=character(0), b=character(0), c=numeric(0))
setkey(x, a, b)
test(1381, x[J("foo", character(0)), nomatch=0L], x, warning="Item 2 is of size 0 but maximum size is 1,")

# Fix for #813 and #758
DT = data.table(x = 1:2)
test(1382.1, DT[c(FALSE, FALSE), list(x, 3:4)], data.table(x=integer(0), V2=integer(0)))
DT <- data.table(id = c("a", "a", "b", "b"), var = c(1.1, 2.5, 6.3, 4.5), key="id")
test(1382.2, DT["c", list(id, check = any(var > 3)), nomatch=0L], data.table(id=character(0), check=logical(0), key="id"))
test(1382.3, DT[c(FALSE), id], character(0))
DT <- DT[1:3]; setkey(DT, id)
test(1382.4, DT[c("c", "b"), list(id, check = any(var > 3)), nomatch=0L], data.table(id="b", check=TRUE, key="id"))

# Fix for #742 - allow.cartesian should be ignored if `i` has no duplicates.
DT <- data.table(id=rep(letters[1:2], 2), var = rnorm(4), key="id")
test(1383.1, DT[letters[1:3], list(var)], DT[1:5, list(var)])
# Fix for #800 - allow.cartesian should be ignored if jsub[1L] has `:=`. TODO: maybe still warn if `i` has duplicates?
DT=data.table(id=c(1,1), date=c(1992,1991), value=c(4.1,4.5), key="id")
test(1383.2, copy(DT)[DT, a:=1], DT[, a := 1])

# Fix for #476 and #825
if ("package:reshape" %in% search()) {
    DT <- data.table(ID = c(611557L, 611557L, 611557L, 894125L, 894125L, 894125L, 894125L, 894125L, 898856L, 898856L, 898856L, 898856L, 898856L, 898856L, 898899L, 898899L, 898899L), DATUM = structure(c(16101, 16071, 16261, 16104, 16133, 16167, 16201, 16236, 16089, 16118, 16147, 16176, 16236, 16208, 16163, 16125, 16209), class = "Date"), N = c(25L, 9L, 23L, 29L, 26L, 26L, 27L, 28L, 39L, 39L, 38L, 36L, 40L, 39L, 19L, 20L, 19L), rank = c(2, 1, 3, 1, 2, 3, 4, 5, 1, 2, 3, 4, 6, 5, 2, 1, 3))
    ans = cast(DT, ID ~ rank, value = "DATUM")
    test(1383.3, names(DT), c("ID", "DATUM", "N", "rank"))
} else {
    cat("Tests 1383.3 not run. If required call library(reshape) first.\n")
}

if ("package:caret" %in% search()) {
    DT <- data.table(x = rnorm(10), y = rnorm(10))
    cv.ctrl <- trainControl(method = 'repeatedcv', number = 5, repeats = 1)
    fit <- train(y ~ x, data = DT, 'lm', trControl = cv.ctrl)
    test(1383.4, names(DT), c("x", "y"))
} else {
    cat("Tests 1383.4 not run. If required call library(caret) first.\n")
}

# Somehow DT[col==max(col)] was never tested, broken by auto-indexing new in v1.9.4, #858
DT = data.table(a = c(1,1,1,2,2,2,3,3,3), b = rnorm(9))
test(1384, DT[a == max(a)], DT[7:9])

# Dups on RHS of == or %in%
DT = data.table(id = paste("id",1:5,sep=""))
id.sub = c("id1", "id2", "id3", "id3", "id4")   # deliberate dup
test(1385.1, DT[id %in% id.sub], DT[1:4])
test(1385.2, DT[id == id.sub], DT[1:3])

# reserved class attributes conflict with auto index names, #
DT = data.table(class=c('a','b'), x=c(1,2))
test(1386, DT[class=='a'], DT[1])

# Fix for #774 - parsing a$b() in 'j'
DT = data.table(x=1:5, y=6:10)
ll = list(foo = function() 1L)
test(1387.1, copy(DT)[, z := ll$foo()], copy(DT)[, z:=1L])
test(1387.2, copy(DT)[, z := ll[[1L]]()], copy(DT)[, z:=1L])

# Fix for #811 - ITime and negative integers formats wrong result.
x = c(1L, -1L, -3700L)
class(x) = "ITime"
test(1388, as.character(x), c("00:00:01", "-00:00:01", "-01:01:40"))

# Fix for #880. Another eval(parse(.)) issue.
DT <- as.data.table(iris)
DT[, foo := "Species"]
test(1389, copy(DT)[,bar := eval(parse(text=foo[1]), envir=.SD)], copy(DT)[, bar := Species])

# Fix for foverlaps() floating point interval (double) types. Should increment them by machine tolerance, not by 1L
DT1 = data.table(start=c(0.88), end=c(0.88))
DT2 = data.table(start=c(0.26, 0.5, 0.55, 0.7), end=c(0.61, 0.88, 0.88-.Machine$double.eps^0.5, 0.89))
setkey(DT2)
test(1390.1, foverlaps(DT1, DT2, which=TRUE), data.table(xid=1L, yid=c(2L, 4L)))
DT1 = data.table(start=c(0.3,0.5), end=c(0.3,0.5))
DT2 = data.table(start=c(0.4), end=c(0.4))
setkey(DT2)
test(1390.2, foverlaps(DT1, DT2, which=TRUE), data.table(xid=1:2, yid=as.integer(c(NA, NA))))
tt = c( as.POSIXct('2011-10-11 07:49:36'), as.POSIXct('2011-10-11 07:49:37'))
DT1 = data.table(start=tt, end=tt)
DT2 = data.table(start=tt[1], end=tt[1])
setkey(DT2)
test(1390.3, foverlaps(DT1, DT2, which=TRUE), data.table(xid=1:2, yid=as.integer(c(1L, NA))))
tt = c( as.POSIXct('2011-10-11 07:49:36.3'), as.POSIXct('2011-10-11 07:49:37.4'), as.POSIXct('2011-10-11 07:49:37.5'))
DT1 = data.table(start=tt, end=tt)
DT2 = data.table(start=tt[2], end=tt[2])
setkey(DT2)
test(1390.4, foverlaps(DT1, DT2, which=TRUE), data.table(xid=1:3, yid=as.integer(c(NA, 1L, NA))))
tt = c( as.POSIXct('2011-10-11 07:49:36.0003'), as.POSIXct('2011-10-11 07:49:36.0199'), as.POSIXct('2011-10-11 07:49:36.0399'))
DT1 = data.table(start=tt, end=tt)
DT2 = data.table(start=tt[2], end=tt[2])
setkey(DT2)
test(1390.5, foverlaps(DT1, DT2, which=TRUE), data.table(xid=1:3, yid=as.integer(c(NA, 1, NA))))

# Fix for #891. 'subset' and duplicate names.
# duplicate column names rule - if column numbers, extract the right column. If names, extract always the first column
DT = data.table(V1=1:5, V2=6:10, V3=11:15)
setnames(DT, c("V1", "V2", "V1"))
test(1391.1, subset(DT, select=c(3L,2L)), DT[, c(3L, 2L), with=FALSE])
test(1391.2, subset(DT, select=c("V2", "V1")), DT[, c("V2", "V1"), with=FALSE])

# Test faster version of na.omit() using is_na.
DT = data.table(x=sample(c(1:2, NA), 30, TRUE), y=sample(c(1:5, NA, NaN), 30, TRUE))
test(1392.1, na.omit(DT), DT[!is.na(x) & !is.na(y)])
# added 'invert = ', a logical argument which when TRUE returns rows that has any NAs instead.
test(1392.2, na.omit(DT, invert=TRUE), DT[is.na(x) | is.na(y)])

# Fix for #899. Mix of ordered and normal factors where normal factors in more than 1 data.table has identical levels.
DT1 = data.table(A = factor(INT(7,8,7,8,7)), B = factor(6:10), C = 0)
DT2 = data.table(D = ordered(1:5), A = factor(INT(1:2,1:2,1L)), C = 0)
DT3 = data.table(A = factor(INT(7:8)), C = 0)
ans = data.table(A=factor(INT(7,8,7,8,7,1,2,1,2,1,7,8), levels=c("7", "8", "1", "2")), B=factor(INT(6:10, rep(NA,7))), C=0, D=ordered(INT(rep(NA,5), 1:5, rep(NA,2))))
test(1393.1, rbindlist(list(DT1, DT2, DT3), fill = TRUE), ans)
# test for #591 (R-Forge #2491)
ans[, ID := rep(1:3, c(5,5,2))]
setcolorder(ans, c("ID", LETTERS[1:4]))
test(1393.2, rbindlist(list(DT1, DT2, DT3), fill = TRUE, idcol="ID"), ans)

# Tests for na.omit.data.table (faster version + with a 'cols=' new argument)
col = c(1:2, NA_integer_)
DT = data.table(a=sample(col, 20, TRUE), b=as.numeric(sample(col,20,TRUE)), c=as.logical(sample(col,20,TRUE)), d=as.character(sample(col,20,TRUE)))
# can't use complete.cases on bit64... will have to test integer64 separately.
# if ("package:bit64" %in% search()) {
#   DT[, e := as.integer64(sample(col,20,TRUE))]
# }
test_no = 1394
for (i in seq_along(DT)) {
    combn(names(DT), i, function(cols) {
        ans1 = na.omit(DT, cols=cols)
        ans2 = DT[complete.cases(DT[, cols, with=FALSE])]
        test_no <<- signif(test_no+.001, 7)
        test(test_no, ans1, ans2)
        0L
    })
}

# That data.table-unaware code in packages like knitr still work
if ("package:knitr" %in% search()) {
    DT = data.table(x=1, y=2)
    test(1395, kable(DT), output="x.*y.*1.*2")  # kable in knitr v1.6 calls DF[...] syntax
} else {
    cat("Test 1395 not run. If required call library(knitr) first.\n")
}

# dropping secondary keys on update or delete
DT = data.table(a=1:3, b=4:6)
test(1396, DT[a==2, verbose=TRUE], DT[2], output="Creating new index 'a'")
test(1397, DT[b==6, verbose=TRUE], DT[3], output="Creating new index 'b'")
test(1398, DT[b==6, verbose=TRUE], DT[3], output="Using existing index 'b'")
test(1399, key2(DT), c("a","b"))
test(1400, DT[2, a:=4L, verbose=TRUE], data.table(a=c(1L,4L,3L),b=4:6), output="Dropping index 'a' due to update on 'a' (column 1)")
test(1401, key2(DT), "b")
test(1402, DT[,b:=NULL,verbose=TRUE], data.table(a=c(1L,4L,3L)), output="Dropping index 'b' due to delete of 'b' (column 2)")
test(1403, key2(DT), NULL)
DT = data.table(x=1:5)
test(1404, DT[, y := x <= 2L], data.table(x=1:5, y=c(TRUE,TRUE,FALSE,FALSE,FALSE)))
test(1405, DT[y == TRUE, .N, verbose=TRUE], 2L, output="Creating new index")
test(1406, DT[, y := x <= 3L, verbose=TRUE], data.table(x=1:5, y=c(TRUE,TRUE,TRUE,FALSE,FALSE)), output="Dropping index")
test(1407, DT[y == TRUE, .N], 3L)
DT = data.table(x=1:5, y=10:6)
test(1408, DT[x==3,verbose=TRUE], DT[3], output="Creating")
test(1409, key2(DT), "x")
set(DT,1:3,1L,-10L)
test(1410, key2(DT), NULL)
test(1411, DT[x==5], DT[5])
setorder(DT, y)
test(1412, key2(DT), NULL)
test(1413, DT[x==5], DT[1])
DT = data.table(foo=1:3, bar=4:6, baz=9:7)
set2key(DT,foo,bar,baz)
test(1414, key2(DT), c("foo__bar__baz"))
test(1415, DT[2,bar:=10L,verbose=TRUE], output="Dropping index 'foo__bar__baz' due to update on 'bar'")  # test middle
test(1416, key2(DT), NULL)
set2key(DT,foo,bar,baz)
test(1417, DT[2,baz:=10L,verbose=TRUE], output="Dropping index 'foo__bar__baz' due to update on 'baz'")  # test last
set2key(DT,bar,baz)
test(1418, DT[2,c("foo","bar"):=10L,verbose=TRUE], output="Dropping index.* due to update on 'bar'")     # test 2nd to 1st
set2key(DT,bar,baz)
test(1419, DT[2,c("foo","baz"):=10L,verbose=TRUE], output="Dropping index.* due to update on 'baz'")     # test 2nd to 2nd

# setnames updates secondary key
DT = data.table(a=1:5,b=10:6)
set2key(DT,b)
test(1420, key2(DT), "b")
setnames(DT,"b","foo")
test(1421, key2(DT), "foo")
test(1422, DT[foo==9, verbose=TRUE], DT[2], output="Using existing index 'foo'")
set2key(DT,a,foo)
test(1423, key2(DT), c("foo","a__foo"))   # tests as well that order of attributes is retained although we don't use that property currently.
test(1424, key2(setnames(DT,"foo","bar")), c("bar","a__bar"))
test(1425, key2(setnames(DT,"a","baz")), c("bar","baz__bar"))
test(1426, DT[baz==4L, verbose=TRUE], output="Creating new index 'baz'")
test(1427, key2(DT), c("bar","baz__bar","baz"))
test(1428, DT[bar==9L, verbose=TRUE], output="Using existing index 'bar'")
test(1429, key2(setnames(DT,"bar","a")), c("baz", "a", "baz__a"))

# Finalised == and %in% optimization in i
DT = data.table(a=1:3,b=c(0,2,3,0,0,2))
test(1430, DT[a==1:2], error="RHS of == is length 2 which is not 1 or nrow (6). For robustness, no recycling is allowed (other than of length 1 RHS). Consider %in% instead.")
test(1431, DT[a %in% 1:2], DT[c(1,2,4,5)])
test(1432, DT[a==b], DT[2:3])
test(1433, DT[a %in% b], DT[c(2,3,5,6)])
test(1434, DT[a==b+1], DT[c(1,4,6)])
test(1435, DT[b==max(a)], DT[3])
test(1436, DT[a==2,verbose=TRUE], DT[c(2,5)], output="Coercing double column i.'V1' to integer")
DT[,a:=factor(letters[a])]
test(1437, DT[a==factor("b"),verbose=TRUE], DT[c(2,5)], output="Creating new index 'a'")

# fread dec=',' e.g. France
test(1438, fread("A;B\n1;2,34\n", dec="12"), error="dec must be a single character")

test(1439, (if (base::getRversion()<"3.3.0") suppressWarnings else identity)(fread("A;B\n1;2,34\n", dec="1")),
     error="Unable to change to a locale which provides the desired dec")  # this test runs on many machines so chose a dec for this test which is sure not to be valid in any locale
test(1440, fread("A;B\n1;2,34\n", sep=".", dec="."), error="The two arguments to fread 'dec' and 'sep' are equal ('.')")
if (.Platform$OS.type=="windows" ||
    (!inherits(tt <- try(system("locale -a", intern=TRUE)), "try-error") && "fr_FR.utf8" %in% tt )) {
    # e.g. on Matt's machine where I've installed fr_FR.utf8 which has dec=","
    old = options(datatable.fread.dec.locale=if (.Platform$OS.type=="unix") "fr_FR.utf8" else "French_France.1252")
    oldlocale = Sys.getlocale("LC_NUMERIC")
    test(1441, fread("A;B\n1;2,34\n", dec=",", verbose=TRUE), data.table(A=1L, B=2.34), output="success!")
    test(1442, Sys.getlocale("LC_NUMERIC"), oldlocale)  # locale restored after success
    test(1443.1, fread("A;B\n1;2,34\n", dec=",", sep=",", verbose=TRUE), error="'dec' and 'sep' are equal", output="success!")
    test(1443.2, Sys.getlocale("LC_NUMERIC"), oldlocale)  # locale restored after error. [ouput check in 1443.1 ensures it was changed]
    
    # sep=".", issue #502
    input = paste( paste("192.168.1.", 1:10, sep=""), collapse="\n")
    test(1444.1, fread(input, sep=".", dec="*"), error="Unable to change to a locale which provides the desired dec")
    test(1444.2, fread(input, sep="."), ans<-data.table(V1=192L,V2=168L,V3=1L,V4=1:10))   # by default, dec="," when sep="."
    test(1444.3, fread(paste(paste("192. 168. 1. ", 1:10, sep = ""), collapse="\n"), sep="."), ans)
    test(1444.4, fread(paste(paste("Hz.BB.GHG.", 1:10, sep = ""), collapse="\n"), sep="."), data.table(V1="Hz",V2="BB",V3="GHG",V4=1:10))

    options(old)  # return to default set it .onLoad
} else {
    cat("Tests 1441-1444 not run. If required install the 'fr_FR.utf8' locale.\n")
}

# doubled quote inside a quoted field followed by an embedded newline
# This file is 36 rows to move that line outside the top, middle and bottom 5 test rows
test(1445, fread("doublequote_newline.csv")[7:10], data.table(A=c(1L,1L,2L,1L), B=c("a","embedded \"\"field\"\"\nwith some embedded new\nlines as well","not this one","a")))
# the example from #489 directly :
test(1446, fread('A,B,C\n233,"AN ""EMBEDDED"" QUOTE FIELD",morechars\n'), data.table(A=233L, B='AN ""EMBEDDED"" QUOTE FIELD', C='morechars'))

# # unescaped quoted subregion followed by newline
# # commented this test for now as the logic now is to redirect to normal checks
# test(1447, fread('A,B,C\n233,"an unescaped "embedded"
# region followed by newline",morechars\n'))

# when detecting types ...
test(1448.1, fread('A,B\n1,"embedded""\nquote"\n2,should be ok\n'),
           data.table(A=1:2,B=c('embedded""\nquote','should be ok')))
test(1448.2, fread('A,B\n1,"embedded""
quote"\n2,should be ok\n'),
           data.table(A=1:2,B=c('embedded""
quote','should be ok')))

if ("package:bit64" %in% search()) {
    # quoted multiline (scrambled data thanks to #810)
    test(1449, fread("quoted_multiline.csv")[c(1,43:44),c(1,22:24),with=FALSE],
           data.table(GPMLHTLN=as.integer64(c("3308386085360","3440245203140","1305220146734")),
                      BLYBZ = c(0L,4L,6L),
                      ZBJBLOAJAQI = c("LHCYS AYE ZLEMYA IFU HEI JG FEYE","",""),
                      JKCRUUBAVQ = c("",".\\YAPCNXJ\\004570_850034_757\\VWBZSS_848482_600874_487_PEKT-6-KQTVIL-7_30\\IRVQT\\HUZWLBSJYHZ\\XFWPXQ-WSPJHC-00-0770000855383.KKZ","")))
}

# Fix for #927
DT = data.table(x=1L, y=2L)
test(1450, DT[, set(.SD, j="x", value=10L)], error=".SD is locked. Updating .SD by reference using := or set")

# Tests for shallow copy taking cols argument - not exported yet.
DT = setDT(lapply(1:5, sample, 10, TRUE))
ans1 = sapply(DT, address)
fans2 = function(DT, cols=NULL) sapply(shallow(DT, cols), address)
test(1451.1, ans1, fans2(DT))                               # make sure default/old functionality is intact
test(1451.2, ans1[3:4], fans2(DT, 3:4))                     # using integer column numbers
test(1451.3, ans1[c(5,2)], fans2(DT, c(5,2)))               # using numeric column numbers
test(1451.4, ans1[c(4,2,4)], fans2(DT,c(4,2,4)))            # using duplicate column numbers
test(1451.5, ans1[3:2], fans2(DT, c("V3", "V2")))           # using column names
test(1451.6, ans1[c(3,3)], fans2(DT, c("V3", "V3")))        # using duplicate column names
test(1451.7, shallow(DT, integer(0)), null.data.table())    # length-0 input work as intended as well.
test(1451.8, shallow(DT, character(0)), null.data.table())  # length-0 input work as intended as well.

test(1452, fread("notexist.csv"), error="File 'notexist.csv' does not exist. Include one or more spaces to consider the input a system command.")

# Test for #802
test(1453, fread("fread_line_error.csv"), error="Expecting 24 cols, but line 12 contains")

# no-sep-found => sep="\n", use case for this in #738
test(1454.1, fread('"Foo"`"Bar"\n1`2\n',sep="`"), data.table(Foo=1L,Bar=2L))
test(1454.2, fread('"Foo"\n1\n',sep="`"), data.table(Foo=1L))

# Fix for #958 - Don't create secondary keys on .SD
DT <- data.table(a=c(1, 1, 1, 0, 0), b=c("A", "B", "A1", "A", "B"))
test(1455, DT[, nrow(.SD[b == 'B']), by=.(a)], data.table(a=c(1,0), V1=1L))

# Test for chmatch2 bug fix
x1 = c("b", "a", "d", "a", "c", "a")
x2 = c("a", "a", "a")
x3 = c("d", "a", "a", "d", "a")
table = rep(letters[1:3], each=2)
test(1456.1, chmatch2(x1, table), as.integer(c(3,1,NA,2,5,NA)))
test(1456.2, chmatch2(x2, table), as.integer(c(1,2,NA)))
test(1456.3, chmatch2(x3, table), as.integer(c(NA,1,2,NA,NA)))

# Add tests for which_
x = sample(c(-5:5, NA), 25, TRUE)
test(1458.1, which(x > 0), which_(x > 0)) # default is TRUE
test(1458.2, which(x > 0), which_(x > 0, TRUE)) # test explicitly
test(1458.3, which(!x > 0), which_(x > 0, FALSE))

# Fix for #982. Testing subsetDT on complex/raw vectors, and added tests for other types.
DT = data.table(a=c(1:3,NA_integer_), b=c(1,2,3,NA), c=as.complex(c(1:3,NA)), d=as.raw(1:4), 
          e=as.list(1:4), f=c(FALSE,FALSE,TRUE,NA), g=c("a", "b", "c", NA_character_))
test(1459.1, .Call("CsubsetDT", DT, which(DT$a > 2), seq_along(DT)), setDT(as.data.frame(DT)[3, , drop=FALSE]))
test(1459.2, .Call("CsubsetDT", DT, which(DT$b > 2), seq_along(DT)), setDT(as.data.frame(DT)[3, , drop=FALSE]))
test(1459.3, .Call("CsubsetDT", DT, which(Re(DT$c) > 2), seq_along(DT)), setDT(as.data.frame(DT)[3, , drop=FALSE]))
test(1459.4, .Call("CsubsetDT", DT, which(DT$d > 2), seq_along(DT)), setDT(as.data.frame(DT)[3:4, , drop=FALSE]))
test(1459.5, .Call("CsubsetDT", DT, which(DT$f), seq_along(DT)), setDT(as.data.frame(DT)[3, , drop=FALSE]))
test(1459.6, .Call("CsubsetDT", DT, which(DT$g == "c"), seq_along(DT)), setDT(as.data.frame(DT)[3, , drop=FALSE]))
test(1459.7, .Call("CsubsetDT", DT, which(DT$a > 2 | is.na(DT$a)), seq_along(DT)), setDT(as.data.frame(DT)[3:4,]))
test(1459.8, .Call("CsubsetDT", DT, which(DT$b > 2 | is.na(DT$b)), seq_along(DT)), setDT(as.data.frame(DT)[3:4,]))
test(1459.9, .Call("CsubsetDT", DT, which(Re(DT$c) > 2 | is.na(DT$c)), seq_along(DT)), setDT(as.data.frame(DT)[3:4,]))
test(1459.10, .Call("CsubsetDT", DT, which(DT$f | is.na(DT$f)), seq_along(DT)), setDT(as.data.frame(DT)[3:4,]))
test(1459.11, .Call("CsubsetDT", DT, which(DT$g == "c" | is.na(DT$g)), seq_along(DT)), setDT(as.data.frame(DT)[3:4,]))
test(1459.12, .Call("CsubsetDT", DT, 5L, seq_along(DT)), setDT(as.data.frame(DT)[5,]))

# Test for na.omit with list, raw and complex types
DT = data.table(x=c(1L,1L,NA), y=c(NA, NA, 1), z=as.raw(1:3), w=list(1,NA,2), v=c(1+5i, NA, NA))
test(1460.1, na.omit(DT, cols="w"), DT)
test(1460.2, na.omit(DT, cols="v"), DT[1])
test(1460.3, na.omit(DT, cols=c("v", "y")), DT[0])
test(1460.4, na.omit(DT, cols=c("z", "v")), DT[1])
test(1460.5, na.omit(DT, cols=c("w", "v")), DT[1])

# Fix for #985
DT = data.table(x=c("a", "a", "b", "b"), v1=sample(4), v2=sample(4))
test(1461.1, DT[, c(lapply(.SD, mean), lapply(.SD, sd)), by=x], 
             DT[, c(lapply(.SD, function(x) mean(x)), lapply(.SD, function(x) sd(x))), by = x])


# Tests for #994
DT = data.table(x=c("a", "a", "b", "b"), v1=sample(4), v2=sample(4))
cols = c("v1", "v2")
test(1462.1, DT[, mget(cols, as.environment(-1))], DT[, cols, with=FALSE])   # as.environment needed for testing on pre-R3.0.0 which we don't want to depend on yet
test(1462.2, DT[, mget(cols[1], as.environment(-1))], DT[, cols[1], with=FALSE])
test(1462.3, DT[, sum(unlist(mget(cols, as.environment(-1)))), by=x], DT[, sum(unlist(.SD)), by=x, .SDcol=cols])

# test for 'shift'
x=1:5
y=factor(x)
test(1463.1,  shift(x,1L),                  as.integer(c(NA, 1:4)))
test(1463.2,  shift(x,1:2),                 list(as.integer(c(NA, 1:4)), as.integer(c(NA, NA, 1:3))))
test(1463.3,  shift(x,1L, 0L),              as.integer(c(0L, 1:4)))
test(1463.4,  shift(x,1L, type="lead"),     as.integer(c(2:5, NA)))
test(1463.5,  shift(x,1:2, type="lead"),    list(as.integer(c(2:5, NA)), as.integer(c(3:5, NA, NA))))
test(1463.6,  shift(x,1L, 0L, type="lead"), as.integer(c(2:5, 0L)))
test(1463.7,  shift(y,1L),                  factor(c(NA,1:4), levels=1:5))
test(1463.8,  shift(y,1L, type="lead"),     factor(c(2:5, NA), levels=1:5))

x=as.numeric(x)
test(1463.9,  shift(x,1L),                  as.numeric(c(NA, 1:4)))
test(1463.10,  shift(x,1:2),                list(as.numeric(c(NA, 1:4)), as.numeric(c(NA, NA, 1:3))))
test(1463.11, shift(x,1L, 0L),              as.numeric(c(0L, 1:4)))
test(1463.12, shift(x,1L, type="lead"),     as.numeric(c(2:5, NA)))
test(1463.13, shift(x,1:2, type="lead"),    list(as.numeric(c(2:5, NA)), as.numeric(c(3:5, NA, NA))))
test(1463.14, shift(x,1L, 0L, type="lead"), as.numeric(c(2:5, 0L)))

if ("package:bit64" %in% search()) {
    x=as.integer64(x)
    test(1463.15, shift(x,1L),                  as.integer64(c(NA, 1:4)))
    test(1463.16, shift(x,1:2),                 list(as.integer64(c(NA, 1:4)), as.integer64(c(NA, NA, 1:3))))
    test(1463.17, shift(x,1L, 0L),              as.integer64(c(0L, 1:4)))
    test(1463.18, shift(x,1L, type="lead"),     as.integer64(c(2:5, NA)))
    test(1463.19, shift(x,1:2, type="lead"),    list(as.integer64(c(2:5, NA)), as.integer64(c(3:5, NA, NA))))
    test(1463.20, shift(x,1L, 0L, type="lead"), as.integer64(c(2:5, 0L)))
}

x=as.character(x)
test(1463.21, shift(x,1L),                  as.character(c(NA, 1:4)))
test(1463.22, shift(x,1:2),                 list(as.character(c(NA, 1:4)), as.character(c(NA, NA, 1:3))))
test(1463.23, shift(x,1L, 0L),              as.character(c(0L, 1:4)))
test(1463.24, shift(x,1L, type="lead"),     as.character(c(2:5, NA)))
test(1463.25, shift(x,1:2, type="lead"),    list(as.character(c(2:5, NA)), as.character(c(3:5, NA, NA))))
test(1463.26, shift(x,1L, 0L, type="lead"), as.character(c(2:5, 0L)))

x=c(TRUE,FALSE,TRUE,FALSE,TRUE)
test(1463.27, shift(x,1L),                  c(NA, x[-5L]))
test(1463.28, shift(x,1:2),                 list(c(NA, x[-5L]), c(NA, NA, x[-(4:5)])))
test(1463.29, shift(x,1L, 0L),              c(FALSE, x[-5L]))
test(1463.30, shift(x,1L, type="lead"),     c(x[-1L], NA))
test(1463.31, shift(x,1:2, type="lead"),    list(c(x[-1L],NA), c(x[-(1:2)],NA,NA)))
test(1463.32, shift(x,1L, 0L, type="lead"), c(x[-(1)], FALSE))

# Fix for #1009 segfault in shift
val = runif(1)
test(1463.33, shift(val, 2L), NA_real_)
test(1463.34, shift(val, 2L, type="lead"), NA_real_)

test(1463.35, shift(1:5, -1L), error="n must be non-negative integer")
test(1463.36, shift(1:5, 1L, fill=c(1:2)), error="fill must be a vector of length")

# add tests for date and factor?

# test for 'give.names=TRUE' on vectors
x = 1:10
nm = c("x_lag_1", "x_lag_2")
ans = list(as.integer(c(NA, 1:9)), as.integer(c(NA, NA, 1:8)))
setattr(ans, 'names', nm)
test(1463.27, shift(x, 1:2, give.names=TRUE), ans)

# FR #686
DT = data.table(a=rep(c("A", "B", "C", "A", "B"), c(2,2,3,1,2)))
DT[, b := as.integer(factor(a))][, c := as.numeric(factor(a))]
test(1464.1, rleidv(DT, "a"), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L))
test(1464.2, rleid(DT$a), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L))
test(1464.3, rleidv(DT, "b"), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L))
test(1464.4, rleid(DT$b), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L))
test(1464.5, rleidv(DT, "c"), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L))
test(1464.6, rleid(DT$c), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L))
test(1464.7, rleid(as.complex(c(1,0+5i,0+5i,1))), error="Type 'complex' not supported")

# data.table-xts conversion #882

if ("package:xts" %in% search()) {
    # Date index
    dt = data.table(index = as.Date((as.Date("2014-12-12")-49):as.Date("2014-12-12"),origin="1970-01-01"),quantity = rep(c(1:5),10),value = rep(c(1:10)*100,5))
    xt = as.xts(matrix(data = c(dt$quantity, dt$value),ncol = 2,dimnames = list(NULL,c("quantity","value"))),order.by = dt$index)
    dt_xt = as.data.table(xt)
    xt_dt = as.xts.data.table(dt)
    test(1465.1, all.equal(dt, dt_xt, check.attributes = FALSE))
    test(1465.2, xt, xt_dt)
    # POSIXct index
    dt <- data.table(index = as.POSIXct(as.Date((as.Date("2014-12-12")-49):as.Date("2014-12-12"),origin="1970-01-01"),origin="1970-01-01"),quantity = rep(c(1:5),10),value = rep(c(1:10)*100,5))
    xt = as.xts(matrix(data = c(dt$quantity, dt$value),ncol = 2,dimnames = list(NULL,c("quantity","value"))),order.by = dt$index)
    dt_xt = as.data.table(xt)
    xt_dt = as.xts.data.table(dt)
    test(1465.3, all.equal(dt, dt_xt, check.attributes = FALSE))
    test(1465.4, xt, xt_dt)
}

# as.data.table.default #969
ar <- array(NA, dim=c(10,4),dimnames = list(NULL,paste("col",1:4,sep="")))
test(1466.1, as.data.table(as.data.frame(ar)), as.data.table(ar)) # array type
x <- rep(Sys.time(),3)
test(1466.2, as.data.table(as.data.frame(x)), as.data.table(x)) # posix type

# fix for #1001
options(datatable.auto.index=TRUE)
DT <- data.table(a=1:2)
test(1467.1, DT[a==3, b:=d+1], DT)
# restore
options(datatable.auto.index=FALSE)

# fix for first bug reported in #1006 on 'foverlaps()'
x <- c(-0.1, 0, 0.1)
n <- length(x)
dt.ref <- data.table(start=x[-n], end=x[-1], key=c("start", "end"))
dt.query <- data.table(q1=c(-0.2, -0.05, 0.05, 0.15), q2=c(-0.2, -0.05, 0.05, 0.15), key=c("q1", "q2"))
ans=cbind(dt.ref[, .(start,end)], dt.query[2:3, .(q1,q2)])
setkey(ans, q1,q2)
test(1468.1, foverlaps(dt.query, dt.ref, nomatch=0L), ans)
# fix and additional tests for #1006 following OP's follow-up.
dt1 = data.table(x=c(-6.36917800737546, -2.19964384651646),
                 y=c(-2.19964384651646, 4.07116428752538))
dt2 = data.table(x= 2.91816502571793, y=2.91816502571793)
setkey(dt1)
setkey(dt2)
test(1468.2, foverlaps(dt2, dt1, which=TRUE), data.table(xid=1L, yid=2L))
dt1 = data.table(x=c(-6,-3), y=c(-3,4))
dt2 = data.table(x=3,y=3)
setkey(dt1)
setkey(dt2)
test(1468.3, foverlaps(dt2, dt1, which=TRUE), data.table(xid=1L, yid=2L))


# Fix for #1010 (discovered while fixing #1007). Don't retain key if i had no key, but irows is sorted, and roll != FALSE... See example in #1010.
DT = data.table(x=c(-5,5), y=1:2, key="x")
test(1469.1, key(DT[J(c(2,0)), roll=TRUE]), NULL)
test(1469.2, key(DT[J(c(2,0)), .(x,y), roll=TRUE]), NULL)
test(1469.3, key(DT[J(c(2,0)), y, roll=TRUE, by=.EACHI]), NULL)
test(1469.4, key(DT[J(c(2,0))]), NULL)
test(1469.5, key(DT[SJ(c(2,0)), roll=TRUE]), "x")
test(1469.6, key(DT[J(c(2,0)), roll="nearest"]), NULL)

# 1007 fix, dealing with Inf and -Inf correctly in rolling joins.
DT = data.table(x=c(-Inf, 3, Inf), y=1:3, key="x")
test(1470.1, DT[J(c(2,-Inf,5,Inf)), roll=Inf], data.table(x=c(2,-Inf,5,Inf), y=c(1, 1, 2, 3)))
test(1470.2, DT[J(c(2,-Inf,5,Inf)), roll=10], data.table(x=c(2,-Inf,5,Inf), y=INT(c(NA, 1, 2, 3))))
test(1470.3, DT[SJ(c(2,-Inf,5,Inf)), roll=Inf], data.table(x=c(-Inf,2,5,Inf), y=c(1, 1, 2, 3), key="x"))

# 1006, second bug with -Inf, now that #1007 is fixed.
x <- c(-Inf, -0.1, 0, 0.1, Inf)
n <- length(x)
dt.ref <- data.table(start=x[-n], end=x[-1], key=c("start", "end"))
dt.query <- data.table(q1=c(-0.2, -0.05, 0.05, 0.15), q2=c(-0.2, -0.05, 0.05, 0.15), key=c("q1", "q2"))
test(1471, foverlaps(dt.query, dt.ref), data.table(dt.ref, dt.query, key=c("q1", "q2")))

# #1014 (segfault) fix
test(1472, shift(1, 1:2, NA, 'lag'), list(NA_real_, NA_real_))

# #528, type=equal simple test
# dt1 = data.table(x=1:5, y=6:10)
# dt2 = data.table(x=3:7, y=8:12)
# setkey(dt1)
# setkey(dt2)
# test(1473, foverlaps(dt1,dt2, which=TRUE, nomatch=0L, type="equal"),
#            data.table(xid=3:5, yid=1:3))

# More tests for `frankv`, #760
DT = data.table(x=c(4, 1, 4, NA, 1, NA, 4), y=c(1, 1, 1, 0, NA, 0, 2))
test(1474.1, frankv(DT, "y", ties.method="dense"), frankv(DT$y, ties.method="dense"))
test(1474.2, frank(DT, y, ties.method="dense"), frank(DT$y, ties.method="dense"))
test(1474.3, frankv(DT, "y", order=-1L, ties.method="dense"), frankv(-DT$y, ties.method="dense"))
test(1474.4, frank(DT, -y, ties.method="dense"), frank(-DT$y, ties.method="dense"))

# uniqueN, #884, part of #756 and part of #1019
DT <- data.table(A = rep(1:3, each=4), B = rep(1:4, each=3), C = rep(1:2, 6))
test(1475.1, uniqueN(DT), 10L)
test(1475.2, DT[, .(uN=uniqueN(.SD)), by=A], data.table(A=1:3, uN=c(3L,4L,3L)))

# preserve class attribute in GForce mean (and sum)
DT <- data.table(x = rep(1:3, each = 3), y = as.Date(seq(Sys.Date(), (Sys.Date() + 8), by = "day")))
test(1476.1, DT[, .(y=mean(y)), x], setDT(aggregate(y ~ x, DT, mean)))

# test for 'transpose' of a list, TODO: integer64 support.
ll = lapply(1:12, function(x) {
    if (x <= 3) sample(10, sample(5:10, 1L))
    else if (x > 3 & x <= 6) as.numeric(sample(101:115, sample(7:12, 1L)))
    else if (x > 7 & x <= 9) sample(c(TRUE, FALSE), sample(7:9, 1L), TRUE)
    else sample(letters, sample(5:10, 1L))
})
ans1 = setDT(transpose(ll))
ans2 = setDT(lapply(seq_along(ans1), function(x) sapply(ll, `[`, x)))
test(1477.1, ans1, ans2)
ans1 = setDT(transpose(ll[4:6]))
ans2 = setDT(lapply(seq_along(ans1), function(x) sapply(ll[4:6], `[`, x)))
test(1477.9, ans1, ans2)
ans1 = setDT(transpose(ll[8:9]))
ans2 = setDT(lapply(seq_along(ans1), function(x) sapply(ll[8:9], `[`, x)))
test(1477.10, ans1, ans2)
# class is preserved?
dt = data.table(x=1:5, y=6:10)
test(1477.2, transpose(dt), as.data.table(t(as.matrix(dt))))
# factor column coerce to character
ll = list(factor(letters[1:5]), factor(letters[6:8]))
test(1477.3, transpose(ll), list(c("a", "f"), c("b", "g"), c("c", "h"), c("d", NA), c("e", NA)))
# for data.frames
test(1477.4, transpose(data.frame(x=1:2, y=3:4)), data.frame(V1=c(1L,3L), V2=c(2L,4L)))
# test for `tstrsplit`
ll = sapply(ll, paste, collapse=",")
test(1477.5, transpose(strsplit(ll, ",", fixed=TRUE)), tstrsplit(ll, ",", fixed=TRUE))
test(1477.6, transpose(1:5), error="l must be a list")
test(1477.7, transpose(list(as.complex(c(1, 1+5i)))), error="Unsupported column type")
test(1477.8, transpose(list(list(1:5))), error="Item 1 of list input is")

# #480 `setDT` and 'lapply'
ll = list(data.frame(a=1), data.frame(x=1, y=2), NULL, list())
ll <- lapply(ll, setDT)
test(1478.1, sapply(ll, truelength), rep(100L, 4L))
test(1478.2, sapply(ll, length), INT(1,2,0,0))

# rbindlist stack imbalance issue, #980.
test(1479, rbindlist(replicate(4,rbindlist(replicate(47, NULL), 
      use.names=TRUE, fill=TRUE)), use.names=TRUE, fill=TRUE), null.data.table())

# #936, assigning list column to a factor column by reference
DT <- data.table(x = factor(c("a", "b c", "d e f")))
test(1480, DT[, x := strsplit(as.character(x), " ")], data.table(x=list("a", letters[2:3], letters[4:6])))

# #970, over-allocation issue
a=data.frame(matrix(1,ncol=101L))
options(datatable.alloccol=100L)
ans1 = data.table(a)
options(datatable.alloccol=101L)
ans2 = data.table(a)
test(1481, ans1, ans2)
# Global option. so reset back, else test 1478 fails.
options(datatable.alloccol=100L)

# #479, check := assignment in environment (actual case is when loaded from disk, but we'll just simulate a scenario here).
ee = new.env()
ee$DT = data.frame(x=1L, y=1:3)
setattr(ee$DT, 'class', c("data.table", "data.frame"))
test(1482.1, truelength(ee$DT), 0L) # make sure that the simulated environment is right.
test(1482.2, ee$DT[, z := 3:1], data.table(x=1L, y=1:3, z=3:1), warning="Invalid .internal.selfref detected and")
test(1482.3, truelength(ee$DT) >= 100L, TRUE) # truelength restored?

# Fix for #499 and #945
x <- data.table(k=as.factor(c(NA,1,2)),v=c(0,1,2), key="k")
y <- data.table(k=as.factor(c(NA,1,3)),v=c(0,1,3), key="k")
test(1483.1, x[y], data.table(k=factor(c(NA,1,3)), v=c(0,1,NA), i.v=c(0,1,3), key="k"))
test(1483.2, merge(x,y,all=TRUE), data.table(k=factor(c(NA,1,2,3)), v.x=c(0,1,2,NA), v.y=c(0,1,NA,3), key="k"))

x <- data.table(country="US")
y <- data.table(country=factor("USA"))
test(1483.3, merge(x,y,by="country",all=T), data.table(country=factor(c("US", "USA")), key="country"))
setkey(y)
test(1483.4, y[x], data.table(country=factor("US"), key="country"))

# Fix for #842
SomeFunction <- function(x, setnull=1L) {
  ans <- replicate(length(x), list("bla1", "bla2"), simplify=FALSE)
  ans[setnull] <- list(NULL)
  return(ans)
}
DT <- data.table(ID=1:3, key="ID")
test(1484, DT[, SomeFunction(ID, setnull=1L)], DT[, SomeFunction(ID, setnull=2L)])

# Fix for #868
vals = c("setosa", "versicolor", "virginica")
if (base::getRversion()>="3.1.0") {
  # depends on bug fix to combn() in R 3.1.0
  test(1485, as.data.table(combn(unique(iris$Species),2)), data.table(vals[1:2], vals[c(1,3)], vals[2:3]))
}

# Fix for #955
DT <- data.table(Time=.POSIXct(0, tz="UTC")+0:1, Value=1:2)
options(datatable.auto.index=FALSE)  # Have to turn off to avoid error.
ans1.1 = DT[Time==Time[1]]
ans2.1 = DT[Time==.POSIXct(0, tz="UTC")]
options(datatable.auto.index=TRUE)
ans1.2 = DT[Time==Time[1]]
ans2.2 = DT[Time==.POSIXct(0, tz="UTC")]
test(1486.1, as.data.frame(ans1.1), as.data.frame(ans1.2))
test(1486.2, as.data.frame(ans2.1), as.data.frame(ans2.1))

# Fix for #832
x <- matrix(1:9, ncol=3)
setattr(x, "names", paste("V", seq_len(length(x)), sep = ""))
test(1487.1, setattr(x, "class", c("data.table", "data.frame")), error="Internal structure doesn't seem to be a list")
x <- matrix(1:9, ncol=3)
class(x) = c("data.table", "data.frame")
# not sure how to test this one, so using `tryCatch`
test(1487.2, tryCatch(print(x), error=function(k) "bla"), "bla")

# Fix for #1043
DT = data.table(grp=LETTERS[1:2], categ=rep(c("X","Y"), each=2L), condition=rep(c("P","Q"), each=4L), value=sample(8))
tbl = with(DT, table(grp, categ, condition))
ans1 = setnames(setDF(data.table(tbl)), "N", "Freq")
ans2 = data.frame(tbl)
ans2[1:3] = lapply(ans2[1:3], as.character)
test(1488, ans1, ans2)

# joins where x is integer type and i is logical type
DT = data.table(x=1:5, y=6:10, key="x")
test(1489, DT[.(TRUE)], DT[1L])

# Fix for #932
DT <- data.table(v1 = c(1:3, NA), v2 = c(1,NA,2.5,NaN), v3=c(NA, FALSE, NA, TRUE), v4=c("a", NA, "b", "c"))
options(datatable.auto.index = TRUE) # just to be sure
set2key(DT, v1)
test(1490.1,  DT[v1==3],      subset(DT, v1==3))
test(1490.2,  DT[!v1==3],     subset(DT, !v1==3))
test(1490.3,  DT[v1==NA],     subset(DT, v1==NA))
test(1490.4,  DT[!v1==NA],    subset(DT, !v1==NA))

set2key(DT, v2)
test(1490.5,  DT[v2==2.5],    subset(DT, v2==2.5))
test(1490.6,  DT[!v2==2.5],   subset(DT, !v2==2.5))
test(1490.7,  DT[v2==NA],     subset(DT, v2==NA))
test(1490.8,  DT[!v2==NA],    subset(DT, !v2==NA))
test(1490.9,  DT[v2==NaN],    subset(DT, v2==NaN))
test(1490.10, DT[!v2==NaN],   subset(DT, !v2==NaN))

set2key(DT, v3)
test(1490.11, DT[v3==FALSE],  subset(DT, v3==FALSE))
test(1490.12, DT[!v3==FALSE], subset(DT, !v3==FALSE))
test(1490.13, DT[v3==TRUE],   subset(DT, v3==TRUE))
test(1490.14, DT[!v3==TRUE],  subset(DT, !v3==TRUE))
test(1490.15, DT[v3==NA],     subset(DT, v3==NA))
test(1490.16, DT[!v3==NA],    subset(DT, !v3==NA))
test(1490.17, DT[(v3)],       subset(DT, v3==TRUE))
test(1490.18, DT[!(v3)],      subset(DT, !v3==TRUE))

set2key(DT, v4)
test(1490.19, DT[v4=="b"],    subset(DT, v4=="b"))
test(1490.20, DT[!v4=="b"],   subset(DT, !v4=="b"))
test(1490.21, DT[v4==NA],     subset(DT, v4==NA))
test(1490.22, DT[!v4==NA],    subset(DT, !v4==NA))

# test for #957 test
DT <- as.data.table(BOD)
options(datatable.auto.index=FALSE)
ans1 = DT[Time %in% c("1", "2")]
options(datatable.auto.index=TRUE)
ans2 = DT[Time %in% c("1", "2")]
test(1490.23, ans1, ans2)

# test for #961
DT <- as.data.table(cars)
options(datatable.auto.index=FALSE)
ans1 = DT[speed %in% list(1, 4)]
options(datatable.auto.index=TRUE)
ans2 = DT[speed %in% list(1, 4)]
test(1490.24, ans1, ans2)

# replace "." with "list" in 'j'
ee1 = quote(.(val = lm(x ~ .)))
ee2 = quote(.(v1=.(.SD), v2=.(min(y)), v3=.(.(x)), v4=.(x)))
ee3 = quote(.(v1=.(.SD), v2=.(lm(. ~ xx)), v3=.(.(x)), v4=.(x^2)))
ee4 = quote(c("a", "b") := .(.SD))
ee5 = quote(c("a", "b") := .(v1=x^2, v2 = .(.SD[[1L]])))
ee6 = quote(.(v1=.(.SD), v2=.(lm(. ~ xx)), v3=list(.(x)), v4=.(x^2)))
test(1491.1, replace_dot(ee1), quote(list(val = lm(x ~ .))))
test(1491.2, replace_dot(ee2), quote(list(v1=list(.SD), v2=list(min(y)), v3=list(list(x)), v4=list(x))))
test(1491.3, replace_dot(ee3), quote(list(v1=list(.SD), v2=list(lm(. ~ xx)), v3=list(list(x)), v4=list(x^2))))
test(1491.4, replace_dot(ee4), quote(c("a", "b") := list(.SD)))
test(1491.5, replace_dot(ee5), quote(c("a", "b") := list(v1=x^2, v2 = list(.SD[[1L]]))))
test(1491.6, replace_dot(ee6), quote(list(v1=list(.SD), v2=list(lm(. ~ xx)), v3=list(list(x)), v4=list(x^2))))

# Fix for #1050
dt = data.table(x=1:5, y=6:10)
options(datatable.auto.index=FALSE)
ans1 <- dt[x == 2.5]
options(datatable.auto.index=TRUE)
ans2 <- dt[x == 2.5]
test(1492, ans1, ans2)

# Fix for #497
dt = data.table(x=1:10, y=11:20)
test(1493, dt[, .(x=sum(x)),by= x %% 2, verbose=TRUE], data.table(`x%%2`=c(1,0), x=c(25L,30L)), output="by-expression 'x%%2' is not named")

# Fix for #705
DT1 = data.table(date=as.POSIXct("2014-06-22", format="%Y-%m-%d", tz="GMT"))
DT2 = data.table(date=as.Date("2014-06-23"))
test(1494.1, rbind(DT1, DT2), error="Class attributes at column")
test(1494.2, rbind(DT2, DT1), error="Class attributes at column")

# test 1495 has been added to melt's test section (fix for #1055)

# Fix for #1056
DT = data.table(year=2010:2014, v1=runif(5), v2=1:5, v3=letters[1:5])
test(1496, DT[, shift(v1, 1:2, NA, "lead", TRUE)], DT[, shift(.SD, 1:2, NA, "lead", TRUE), .SDcols=2L])

# Fix for #1066
DT = data.table(x=1, y=2, z=3, a=4, b=5, c=6)
test(1497, DT[, .SD, .SDcols = !c("a", "c")], DT[, !c("a", "c"), with=FALSE])

# Fix for #1060
DT = data.table(x=1, y=2, z=3, a=4, b=5, c=6)
test(1498.1, DT[, .SD, .SDcols=c(T,F)], DT[, c("x", "z", "b"), with=FALSE])
test(1498.2, DT[, .SD, .SDcols=!c(T,F)], DT[, !c("x", "z", "b"), with=FALSE])

# Fix for #1072
dt <- data.table(group1 = "a", group2 = "z", value  = 1)
options(datatable.auto.index=FALSE)
ans1 = dt[group1 %in% c("a", "b"), sum(value), group2]
options(datatable.auto.index=TRUE)
ans2 = dt[group1 %in% c("a", "b"), sum(value), group2]
test(1499, ans1, ans2)

# Fix for #488
if ("package:bit64" %in% search()) {
    test(1500.1, fread("x,y\n0,\n", colClasses = list(integer64 = "y")), 
            data.table(x=0L, y=as.integer64(NA)))
    # more tests after new fix
    test(1500.2, fread("x,y\n0,12345678901234\n0,\n0,\n0,\n0,\n,\n,\n,\n,\n,\n,\n,\n,\n,\n,\n,\n12345678901234,\n0,\n0,\n0,\n0,\n0,\n"), 
        data.table(x=as.integer64(c(rep(0L, 5L), rep(NA, 11), 12345678901234, rep(0L,5L))), 
                   y=as.integer64(c(12345678901234, rep(NA,21)))))

    x = c("12345678901234", rep("NA", 178), "a")
    y = sample(letters, length(x), TRUE)
    ll = paste(x,y, sep=",", collapse="\n")
    test(1500.3, fread(ll), 
        data.table(V1=c("12345678901234", rep("", 178), "a"), V2=y), warning="Bumped column 1 to type character on data")

    x = c("12345678901234", rep("NA", 178), "0.5")
    y = sample(letters, length(x), TRUE)
    ll = paste(x,y, sep=",", collapse="\n")
    test(1500.4, fread(ll), data.table(V1=suppressWarnings(as.numeric(x)), V2=y))

}

# fix for #1082
dt1 = data.table(x=rep(c("a","b","c"),each=3), y=c(1,3,6), v=1:9, key=c("x", "y"))
dt2 = copy(dt1)
test(1502.1, dt1["a", z := NULL], error="When deleting columns, i should not be provided")
# this shouldn't segfault on 'dt1[...]'
test(1502.2, dt1["a", z := 42L], dt2["a", z := 42L])

# fix for #1080
dt = data.table(col1 = c(1,2,3,2,5,3,2), col2 = c(0,9,8,9,6,5,4), key=c("col1"))
test(1503.1, uniqueN(dt), 4L) # default on key columns
test(1503.2, uniqueN(dt, by=NULL), 6L) # on all columns
test(1503.3, uniqueN(dt$col1), 4L) # on just that column

# .SDcols and with=FALSE understands colstart:colend syntax
dt = setDT(lapply(1:10, function(x) sample(3, 10, TRUE)))
# .SDcols
test(1504.1, dt[, lapply(.SD, sum), by=V1, .SDcols=V8:V10], 
             dt[, lapply(.SD, sum), by=V1, .SDcols=8:10])
test(1504.2, dt[, lapply(.SD, sum), by=V1, .SDcols=V10:V8], 
             dt[, lapply(.SD, sum), by=V1, .SDcols=10:8])
test(1504.3, dt[, lapply(.SD, sum), by=V1, .SDcols=-(V8:V10)], 
             dt[, lapply(.SD, sum), by=V1, .SDcols=-(8:10)])
test(1504.4, dt[, lapply(.SD, sum), by=V1, .SDcols=!(V8:V10)], 
             dt[, lapply(.SD, sum), by=V1, .SDcols=!(8:10)])
# with=FALSE
test(1504.5, dt[, V8:V10, with=FALSE],    dt[, 8:10, with=FALSE])
test(1504.6, dt[, V10:V8, with=FALSE],    dt[, 10:8, with=FALSE])
test(1504.7, dt[, -(V8:V10), with=FALSE], dt[, -(8:10), with=FALSE])
test(1504.8, dt[, !(V8:V10), with=FALSE], dt[, !(8:10), with=FALSE])

# Fix for #1083
dt = data.table(x=1:4, y=c(TRUE,FALSE))
test(1505.1, as.matrix(dt), as.matrix(as.data.frame(dt)))

# setcolorder works with data.frames, #1018
dt = data.table(x=1, y=2)
test(1506, setcolorder(dt, c("y", "x")), data.table(y=2, x=1))

# tstrsplit, #1094
# factor to character
x = factor(paste(letters[1:5], letters[6:10], sep="-"))
test(1507.1, tstrsplit(x, "-"), list(letters[1:5], letters[6:10]))
# type.convert
x = paste(letters[1:5], 1:5, sep="-")
test(1507.2, tstrsplit(x, "-"), list(letters[1:5], as.character(1:5)))
test(1507.3, tstrsplit(x, "-", type.convert=TRUE), list(letters[1:5], 1:5))

# implementing #575, keep.rownames can take a name
x = matrix(1:6, ncol=2)
rownames(x) = letters[3:1]
test(1508.1, as.data.table(x, keep="bla"), data.table(bla=letters[3:1], x))
x = as.data.frame(x)
test(1508.2, as.data.table(x, keep="bla"), data.table(bla=letters[3:1], x))
x = sample(10); setattr(x, 'names', letters[1:10])
test(1508.3, as.data.table(x, keep="bla"), data.table(bla=letters[1:10], x=unname(x)))
# also for setDT
df = data.frame(x=1:5, y=6:10, row.names=letters[5:1])
ans = data.table(foo=letters[5:1], df)
test(1508.4, setDT(df, keep="foo"), ans)

# #1509 test added for melt above.

# #1510 transpose converts NULL to NAs
ll = list(1:2, NULL, 3:4)
test(1510.1, transpose(ll), list(c(1L, NA, 3L), c(2L, NA, 4L)))
test(1510.2, transpose(ll, ignore=TRUE), list(c(1L, 3L), c(2L, 4L)))

# setorder can reorder data.frames too, #1018
DF = data.frame(x=sample(3,10,TRUE), y=sample(letters[1:2], 10, TRUE))
rownames(DF) = sample(letters, 10)
ans = DF[order(-xtfrm(DF$y), DF$x), ]
test(1511, ans, setorder(DF, -y, x))

# fix for #1108
if ("package:bit64" %in% search()) {
    dt <- data.table(id = as.integer64(1:3), a = c("a", "b", "c"), key = "id")
    test(1512.1, dt[.(2)], dt[.(as.integer64(2))])
    test(1512.2, dt[.(2L)], dt[.(as.integer64(2))])

    dt <- data.table(id = as.numeric(1:3), a = c("a", "b", "c"), key = "id")
    test(1512.3, dt[.(2L)], dt[.(2)])
    test(1512.4, dt[.(as.integer64(2))], dt[.(2)])

    dt <- data.table(id = 1:3, a = c("a", "b", "c"), key = "id")
    test(1512.5, dt[.(2)], dt[.(2L)])
    test(1512.6, dt[.(as.integer64(2))], dt[.(2L)])
}

# setDT gains key argument, #1121
X = list(a = 4:1, b=runif(4))
test(1513, setkey(as.data.table(X), a), setDT(X, key="a"))

# Adding tests for `isReallyReal`
x = as.numeric(sample(10))
test(1514.1, isReallyReal(x), FALSE)
x = as.numeric(sample(c(1:5, NA)))
test(1514.2, isReallyReal(x), FALSE) # NAs are handled properly
x = as.numeric(sample(c(1:2, NaN, NA)))
test(1514.3, isReallyReal(x), TRUE)
x = as.numeric(sample(c(1:2, Inf, NA)))
test(1514.4, isReallyReal(x), TRUE)
x = as.numeric(sample(c(1:2, -Inf, NA)))
test(1514.5, isReallyReal(x), TRUE)
x = as.numeric(runif(2))
test(1514.6, isReallyReal(x), TRUE)
x = numeric()
test(1514.7, isReallyReal(x), FALSE)

# #1091
old.option = getOption("datatable.prettyprint.char")
options(datatable.prettyprint.char = 5L)
DT = data.table(x=1:2, y=c("abcdefghijk", "lmnopqrstuvwxyz"))
test(1515.1, grep("abcde...", capture.output(print(DT))), 2L)
options(datatable.prettyprint.char = old.option)

# test 1516: chain setnames() - used while mapping source to target columns
SRC = data.table(x=1:2, y=c("abcdefghij", "klmnopqrstuv"), z=rnorm(2))
src_cols <- c("y","z")
tgt_cols <- c("name","value")
DT <- SRC[, src_cols, with=FALSE][, setnames(.SD, tgt_cols)]
test(1516.1, names(SRC), c("x","y","z")) # src not altered by ref
test(1516.2, names(DT), tgt_cols) # target expected
test(1516.3, unname(unclass(DT[, tgt_cols, with=FALSE])), unname(unclass(SRC[,src_cols, with=FALSE]))) # content match

# Fix for #1078 and #1128
x = data.frame(x=1L, y=2L)
setattr(x, 'class', c("foo", "data.frame"))
test(1517.1, class(as.data.table(x)), c("data.table", "data.frame"))
test(1517.2, class(setDT(x)), c("data.table", "data.frame"))
x = data.table(x="a", y=2L)
setattr(x, 'class', c("foo", "data.table", "data.frame"))
test(1517.3, class(as.data.table(x)), c("data.table", "data.frame"))
test(1517.4, class(setDT(x)), c("data.table", "data.frame"))
# for plm package
if ("package:plm" %in% search()) {
    set.seed(45L)
    x  = data.table(V1=c(1L,2L), V2=LETTERS[1:3], V3=round(rnorm(4),4), V4=1:12)
    px = pdata.frame(x, index=c("V2", "V4"), drop.index=FALSE, row.names=TRUE)
    test(1517.5, class(as.data.table(px)), class(x))
    test(1517.6, class(setDT(px)), class(x))
}

# Fix for setattr, #1142
x = factor(rep(1:4, each=2L))
ax = address(x)
setattr(x, 'levels', c("a", "a", "b", "b"))
test(1518.1, levels(x), c("a", "b"))
test(1518.2, address(x), ax)

# Fix for #1074 and #1092
x = data.table(x=c(1,1,1,2), y=1:4, key="x")
test(1519.1, x[.(2:3), .N, nomatch=0L], 1L)
x = data.table(k = INT(0,2,3,7), o = "b", key = "k")
y = data.table(k = 1:5, n = paste("n", 1:5, sep=""), key = "k")
test(1519.2, x[y, o := n, nomatch = 0], data.table(k = INT(0,2,3,7), o = c("b","n2","n3","b"), key = "k"))

# Fix for #1141 (thanks to @yvanrichard)
x <- data.table(zxc = 1:3, vbn = 4:6)
test(1520, x[, c('zxc', 'qwe', 'rty', 'vbn'), with = FALSE], error = "column(s) not found")

# Fix for #1154 (unnecessary lock on .SD)
x = data.table(a=c(1,1,2))[, unique(.SD)]
test(1521, x[, b := 5], data.table(a=c(1,2), b=5))

# Fix for #1160, fastmean retaining attributes
x = data.table(a = c(2,2,1,1,2), b=setattr(1:5, 'class', c('bla', 'integer')))
test(1522, class(x[, .(mean(b), all(b)), by=a]$V1), c('bla', 'integer'))

# Fix for #1145, .N lock handled properly
x = data.table(a=1:5)
test(1523, x[, head(.SD, n=2)[1:.N]], data.table(a=1:2))

# #637 add by.x and by.y to merge.data.table
d1 <- data.table(x1=c(1,3,8), y1=rnorm(3), key="x1")
d2 <- data.table(x2=c(3,8,10), y2=rnorm(3), key="x2")
ans1 = merge(d1, d2, by.x = "x1", by.y = "x2") 
ans2 = setkey(setDT(merge.data.frame(d1, d2, by.x = key(d1), by.y = key(d2))), x1)
test(1524, ans1, ans2)

# 'unique =' argument for CJ, #1148
x = c(1, 2, 1)
y = c(5, 8, 8, 4)
test(1525, CJ(x, y, unique=TRUE), CJ(c(1,2), c(4,5,8)))

# `key` argument fix for `setDT` when input is already a `data.table`,  #1169
DT <- data.table(A = 1:4, B = 5:8)
setDT(DT, key = "A")
test(1526.1, key(DT), "A")
test(1526.2, key(setDT(DT, key = NULL)), NULL)

# #501, fread stringsAsFactors=FALSE
dt = data.table(x=1:5, y = letters[1:5])
text = "x,y\n1,a\n2,b\n3,c\n4,d\n5,e\n"
test(1527.1, dt[, y := factor(y)], fread(text, stringsAsFactors=TRUE))
set.seed(1L)
dt = data.table(x=1:5, y = sample(letters[1:5]))
text = "x,y\n1,b\n2,e\n3,d\n4,c\n5,a\n"
test(1527.2, dt[, y := factor(y)], fread(text, stringsAsFactors=TRUE))
set.seed(1L)
dt = data.table(x=1:5, y = sample(letters[1:2], 5, TRUE))
text = "x,y\n1,a\n2,a\n3,b\n4,b\n5,a\n"
test(1527.3, dt[, y := factor(y)], fread(text, stringsAsFactors=TRUE))

# #1027, check.names argument to fread
nm1 = names(data.table(a=1:2, a=3:4))
nm2 = names(fread("a,a\n1,2\n3,4", check.names=TRUE))
nm3 = names(fread("a,a\n1,2\n3,4", check.names=FALSE))
test(1528.1, make.unique(nm1), nm2)
test(1528.2, nm1, nm3)

# add tests for between
x = sample(10, 20, TRUE)
test(1529.1, between(x, 1L, 5L, TRUE), x >= 1L & x <= 5L)
test(1529.2, x %between% c(1L, 5L), x >= 1L & x <= 5L)
test(1529.3, between(x, 1L, 5L, FALSE), x > 1L & x < 5L)
x = sample(c(1:10, NA), 20, TRUE)
test(1529.4, between(x, 1L, 5L, TRUE), x >= 1L & x <= 5L)
test(1529.5, x %between% c(1L, 5L), x >= 1L & x <= 5L)
test(1529.6, between(x, 1L, 5L, FALSE), x > 1L & x < 5L)
x = runif(15)
test(1529.7, between(x, 0.25, 0.75, TRUE), x >= 0.25 & x <= 0.75)
test(1529.8, x %between% c(0.25, 0.75), x >= 0.25 & x <= 0.75)
test(1529.9, between(x, 0.25, 0.75, FALSE), x > 0.25 & x < 0.75)
x = c(NA, runif(15), NA)
test(1529.10, between(x, 0.25, 0.75, TRUE), x >= 0.25 & x <= 0.75)
test(1529.11, x %between% c(0.25, 0.75), x >= 0.25 & x <= 0.75)
test(1529.12, between(x, 0.25, 0.75, FALSE), x > 0.25 & x < 0.75)

# add tests for which.first and which.last
# which.first
test(1530.1, which.first(sample(5, 20, TRUE)), error = "x not boolean")
x <- sample(c(TRUE, FALSE), 20, TRUE)
test(1530.2, which.first(x), which(x)[1L])
# which.last
test(1530.3, which.last(1:5), error = "x not boolean")
test(1530.4, which.last(x), tail(which(x), 1L))

# test xts's last()
if ("package:xts" %in% search()) {
    test(1531, xts::last(1:5), 5L)
}

# test for like, %like%
set.seed(2L)
x = apply(matrix(sample(letters, 12), nrow=2), 1, paste, collapse="")
y = factor(sample(c(letters[1:5], x), 20, TRUE))
xsub = substring(x, 1L, 1L)
test(1532.1, y %like% xsub[1L], grepl(xsub[1L], y))
test(1532.2, y %like% xsub[2L], grepl(xsub[2L], y))
test(1532.3, like(y, xsub[1L]), grepl(xsub[1L], y))
test(1532.4, like(y, xsub[2L]), grepl(xsub[2L], y))

# coverage for setkey() to 100%
dt1 = data.table(x=sample(5), y=1:5, key="y")
dt2 = as.data.table(dt1); setattr(dt2, 'sorted', NULL)
test(1533.1, setkeyv(dt1, character(0)), dt2, warning = "cols is a character vector")
test(1533.2, setkeyv(dt1, "x", verbose=TRUE), setkey(dt2, x), output = "forder took")

# coverage for %+% and trim
test(1534, `%+%.default`(1:5, 6:10), "1,2,3,4,56,7,8,9,10")
test(1535.1, trim("  abcde "), "abcde")
test(1535.2, trim("  abcde"), "abcde")
test(1535.3, trim("abcde "), "abcde")

# remaining test for covering duplicated.data.table
dt = data.table(x=1:5, y=6:10)
test(1536, duplicated(dt, incomparables=TRUE), error = "argument 'incomparables != FALSE'")

# test for covering melt 100%
test(1537 , names(melt(dt, id=1L, variable.name = "x", value.name="x")), c("x", "x.1", "x.2"), output = "Duplicate column names")

# test for tables()
test(1538, tables(), output = "Total:")

# uniqueN could supports list input #1224
d1 <- data.table(a = 1:4, l = list(list(letters[1:2]),list(Sys.time()),list(1:10),list(letters[1:2])))
test(1539, d1[,uniqueN(l)], 3L)

# feature #1130 - joins without setting keys
# can't test which=TRUE with DT1.copy's results..
set.seed(45L)
DT1 = data.table(x=sample(letters[1:3], 15, TRUE), y=sample(6:10, 15, TRUE), 
                 a=sample(100, 15), b=runif(15))
DT2 = CJ(x=letters[1:3], y=6:10)[, mul := sample(20, 15)][sample(15L, 5L)]
DT3 = rbindlist(list(DT2, list(x="d", y=7L, mul=100L)))
DT3 = DT3[sample(nrow(DT3))]

# key on char column
DT1.copy = copy(DT1)
setkey(DT1.copy, x)
test(1540.1, DT1[DT2, on=c(x="x")], DT1.copy[DT2])
test(1540.33, DT1[DT2, on=c("x")], DT1.copy[DT2])
test(1540.2, DT1[DT2, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, on=c(x="x"), .SDcols=c("a", "b")],
             DT1.copy[DT2, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, .SDcols=c("a", "b")])
test(1540.3, DT1[DT3, on=c(x="x")], DT1.copy[DT3])
test(1540.4, DT1[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, on=c(x="x"), .SDcols=c("a", "b")],
             DT1.copy[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, .SDcols=c("a", "b")])
test(1540.5, DT1[DT3, on=c(x="x"), nomatch=0L], DT1.copy[DT3, nomatch=0L])
test(1540.6, DT1[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, on=c(x="x"), .SDcols=c("a", "b"), nomatch=0L],
             DT1.copy[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, .SDcols=c("a", "b"), nomatch=0L])
test(1540.7, DT1[DT3, on=c(x="x"), roll=TRUE], DT1.copy[DT3, roll=TRUE])
test(1540.8, DT1[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, on=c(x="x"), .SDcols=c("a", "b"), roll=TRUE],
             DT1.copy[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, .SDcols=c("a", "b"), roll=TRUE])

# key on integer col
DT1.copy = copy(DT1)
setkey(DT1.copy, y)
test(1540.9, DT1[DT2, on=c(y="y")], DT1.copy[DT2[, c(2,1,3), with=FALSE]])
test(1540.34, DT1[DT2, on=c("y")], DT1.copy[DT2[, c(2,1,3), with=FALSE]])
test(1540.10, DT1[DT2, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, on=c(y="y"), .SDcols=c("a", "b")],
             DT1.copy[DT2[, c(2,1,3), with=FALSE], lapply(.SD, function(x) x * mul), 
                 by=.EACHI, .SDcols=c("a", "b")])
test(1540.11, DT1[DT3, on=c(y="y")], DT1.copy[DT3[, c(2,1,3), with=FALSE]])
test(1540.12, DT1[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, on=c(y="y"), .SDcols=c("a", "b")],
             DT1.copy[DT3[, c(2,1,3), with=FALSE], lapply(.SD, function(x) x * mul), 
                 by=.EACHI, .SDcols=c("a", "b")])
test(1540.13, DT1[DT3, on=c(y="y"), nomatch=0L], DT1.copy[DT3[, c(2,1,3), with=FALSE], nomatch=0L])
test(1540.14, DT1[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, on=c(y="y"), .SDcols=c("a", "b"), nomatch=0L],
             DT1.copy[DT3[, c(2,1,3), with=FALSE], lapply(.SD, function(x) x * mul), 
                 by=.EACHI, .SDcols=c("a", "b"), nomatch=0L])
test(1540.15, DT1[DT3, on=c(y="y"), roll=TRUE], DT1.copy[DT3[, c(2,1,3), with=FALSE], roll=TRUE])
test(1540.16, DT1[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, on=c(y="y"), .SDcols=c("a", "b"), roll=TRUE],
             DT1.copy[DT3[, c(2,1,3), with=FALSE], lapply(.SD, function(x) x * mul), 
                 by=.EACHI, .SDcols=c("a", "b"), roll=TRUE])

# multiple keys
DT1.copy = copy(DT1)
setkey(DT1.copy, x, y)
test(1540.17, DT1[DT2, on=c(x="x", y="y")], DT1.copy[DT2])
test(1540.35, DT1[DT2, on=c("x", "y")], DT1.copy[DT2])
test(1540.18, DT1[DT2, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, on=c(x="x", y="y")],
             DT1.copy[DT2, lapply(.SD, function(x) x * mul), by=.EACHI])
test(1540.19, DT1[DT3, on=c(x="x", y="y")], DT1.copy[DT3])
test(1540.20, DT1[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, on=c(x="x", y="y")],
             DT1.copy[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI])
test(1540.21, DT1[DT3, on=c(x="x", y="y"), nomatch=0L], DT1.copy[DT3, nomatch=0L])
test(1540.22, DT1[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, on=c(x="x", y="y"), nomatch=0L],
             DT1.copy[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, nomatch=0L])
test(1540.23, DT1[DT3, on=c(x="x", y="y"), roll=TRUE], DT1.copy[DT3, roll=TRUE])
test(1540.24, DT1[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, on=c(x="x", y="y"), roll=TRUE],
             DT1.copy[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, roll=TRUE])

# multiple keys, non-identical names
DT1.copy = copy(DT1)
setkey(DT1.copy, x, y)
setnames(DT2, c("q", "r", "mul"))
setnames(DT3, names(DT2))
test(1540.25, DT1[DT2, on=c(x="q", y="r")], DT1.copy[DT2])
test(1540.26, DT1[DT2, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, on=c(x="q", y="r")],
             DT1.copy[DT2, lapply(.SD, function(x) x * mul), by=.EACHI])
test(1540.27, DT1[DT3, on=c(x="q", y="r")], DT1.copy[DT3])
test(1540.28, DT1[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, on=c(x="q", y="r")],
             DT1.copy[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI])
test(1540.29, DT1[DT3, on=c(x="q", y="r"), nomatch=0L], DT1.copy[DT3, nomatch=0L])
test(1540.30, DT1[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, on=c(x="q", y="r"), nomatch=0L],
             DT1.copy[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, nomatch=0L])
test(1540.31, DT1[DT3, on=c(x="q", y="r"), roll=TRUE], DT1.copy[DT3, roll=TRUE])
test(1540.32, DT1[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, on=c(x="q", y="r"), roll=TRUE],
             DT1.copy[DT3, lapply(.SD, function(x) x * mul), 
                 by=.EACHI, roll=TRUE])

# to do: add tests for :=

# fix for #477, key not being retained on joins on factor columns
set.seed(1)
dtp <- data.table(pid = gl(3, 3, labels = c("du", "i", "nouana")),
                  year = gl(3, 1, 9, labels = c("2007", "2010", "2012")),
                  val = rnorm(9), key = c("pid", "year"))
dtab <- data.table(pid = factor(c("i", "nouana")),
                  year = factor(c("2010", "2000")),
                  abn = sample(1:5, 2, replace = TRUE), key =
                   c("pid", "year"))
test(1541, key(dtp[dtab]), c("pid", "year"))

#  fix DT[TRUE, :=] using too much working memory for i, #1249
if (!inherits(try(Rprofmem(NULL), silent=TRUE), "try-error")) {  # in case R not compiled with memory profiling enabled
  f = tempfile()
  N = 1000000           # or any large number of rows
  DT = data.table(A=1:N, B=rnorm(N))
  DT[TRUE, B := B * 2] # stabilize with initial dummy update
  Rprofmem(f)
  DT[TRUE, B := B * 2] # or some in-place update
  Rprofmem(NULL)
  test(1542, length(grep("000",readLines(f, warn=FALSE))), 1L)  # one allocation for the RHS only
  unlink(f)
}

# rest of #1130 - merge doesn't copy, instead uses joins without keys.
set.seed(1L)
d1 <- data.table(A = sample(letters[1:10]), X = 1:10, total = TRUE)
d2 <- data.table(A = sample(letters[5:14]), Y = 1:10, total = FALSE)

ans1 <- suppressWarnings(merge(setDF(d1), setDF(d2), by="A"))
ans2 <- setDF(merge(setDT(d1), setDT(d2), by="A"))
test(1543.1, ans1, ans2)
ans1 <- suppressWarnings(merge(setDF(d1), setDF(d2), all=TRUE, by="A"))
ans2 <- setDF(merge(setDT(d1), setDT(d2), all=TRUE, by="A"))
test(1542.2, ans1, ans2)
# test duplicate name cases
setnames(d2, c("A", "Y"), c("B", "A"))
ans1 <- suppressWarnings(merge(setDF(d1), setDF(d2), by.x="A", by.y="B"))
ans2 <- setDF(merge(setDT(d1), setDT(d2), by.x="A", by.y="B"))
test(1543.3, ans1, ans2)
ans1 <- suppressWarnings(merge(setDF(d2), setDF(d1), by.x="B", by.y="A"))
ans2 <- setDF(merge(setDT(d2), setDT(d1), by.x="B", by.y="A"))
test(1543.4, ans1, ans2)
ans1 <- suppressWarnings(merge(setDF(d2), setDF(d1), all=TRUE, by.x="B", by.y="A"))
ans2 <- setDF(merge(setDT(d2), setDT(d1), all=TRUE, by.x="B", by.y="A"))
test(1543.5, ans1, ans2)

# test for sort=FALSE argument, #1282
set.seed(1L)
d1 <- data.table(A = sample(letters[1:10]), X = 1:10, total = TRUE)
d2 <- data.table(A = sample(letters[5:14]), Y = 1:10, total = FALSE)
test(1543.7, merge(setDT(d1), setDT(d2), by="A", sort=FALSE), 
       setDT(merge(setDF(d1), setDF(d2), by="A", sort=FALSE)))

# thinko in merge dupnames handling
dt1 = data.table(x=1:5, y1=2L, y2=3L)
dt2 = data.table(a=4:6, y2=TRUE, y1 = FALSE)
test(1543.6, setDF(merge(dt1, dt2, by.x="x", by.y="a")), 
             merge(as.data.frame(dt1), as.data.frame(dt2), by.x="x", by.y="a"))

# fix #1290, restore colorder before setting names
set.seed(1)
dt1 <- data.table(sex = rep(1:2, 5), group = rep(letters[1:5], 2),V1 = sample(1:10))
set.seed(2)
dt2 <- data.table(group = rep(letters[1:5], 2),sex = rep(1:2, 5),V2 = sample(1:10))
test(1543.7, setDF(merge(dt1, dt2, by = c("sex", "group"))), 
             merge(as.data.frame(dt1), as.data.frame(dt2), by=c("sex", "group")))
by.x = c("sex.1", "group.1")
by.y = c("sex.2", "group.2")
setnames(dt1, 1:2, by.x)
setnames(dt2, 1:2, rev(by.y))
test(1543.8, setDF(merge(dt1, dt2, by.x=by.x, by.y=by.y)), 
             merge(as.data.frame(dt1), as.data.frame(dt2), by.x=by.x, by.y=by.y))

# fix for #1258 (bug on .shallow - retains keys when it shouldn't)
# nice catch and excellent report from @and3k
x1 <- data.table(a1 = c('a', 'b', 'c'), a2 = c(1L, 3L, 2L))
y <- data.table(a2 = 1:3)
setkey(y, a2)
setkey(x1, a1, a2)
test(1544.1, setDF(merge(x1, y)), merge(as.data.frame(x1), as.data.frame(y)))
test(1544.2, setDF(merge(x1, y, by="a2")), merge(as.data.frame(x1), as.data.frame(y), by="a2"))
# also test shallow here so as to catch future regressions
x1 <- data.table(a1 = c('a', 'b', 'c'), a2 = c(1L, 3L, 2L), key="a1,a2")
test(1545.1, key(.shallow(x1, cols="a2")), NULL)
test(1545.2, key(.shallow(x1, retain.key=FALSE)), NULL)
test(1545.3, key(.shallow(x1, retain.key=TRUE)), key(x1))
test(1545.4, key(.shallow(x1, cols="a1", retain.key=TRUE)), "a1")

# test for #1234
df1 = df2 = data.frame(cats = rep(c('', ' ',  'meow'), 5))
df2[grep("^[ ]*$", df2$cats), "cats"] = NA_integer_
test(1546, set(df1, grep("^[ ]*$", df1$cats), 1L, NA_integer_), df2)

# Add test for getdots() function (although it doesn't seem to be used anywhere)
foo <- function(x, y, ...) { getdots() }
test(1547, foo(1L, 5L, a=2L, "c"), c("2", "c"))

# Fix for encoding issues in windows, #563
f="issue_563_fread.txt"
ans1 <- fread(f, sep=",", header=TRUE)
ans2 <- fread(f, sep=",", header=TRUE, encoding="UTF-8")
test(1548.1, unique(unlist(lapply(ans1, Encoding))), "unknown")
test(1548.2, unique(unlist(lapply(ans2, Encoding))), "UTF-8")

# #1167 print.data.table row id in non-scientific notation 
DT <- data.table(a = rep(1:5,3*1e6), b = rep(letters[1:3],5*1e6))
test(1549, capture.output(print(DT)), c("          a b", "       1: 1 a", "       2: 2 b", "       3: 3 c", "       4: 4 a", "       5: 5 b", "      ---    ", "14999996: 1 b", "14999997: 2 c", "14999998: 3 a", "14999999: 4 b", "15000000: 5 c"))
rm(DT)

# PR by @dselivanov
# fixes #504 - handle nastring while reading (without coercion to character) 
# Note: this doesn't address cases like na.strings="-999" yet. See https://github.com/Rdatatable/data.table/pull/1236 for those examples.
K = 10L
nastrings = c('null', 'NULL', 'na', '_NA', 'NA', 'nan', 'Nan', 'NAN', 'NaN')
DT = data.table(int = 1:K, 
                 char = sample(letters, size = K, replace = T), 
                 float = 1:K + 0.1, 
                 bool = sample( c(T, F), K, replace = T))

DT_NA = DT
for (j in seq_len( ncol(DT) )) {
  set(x = DT_NA, i = j, j = j, value = NA)
}

for(k in seq_along(nastrings)) {
  dt0 = copy(DT)
  for (j in seq_len( ncol(DT) )) {
    set(x = dt0, i = NULL, j = j, value = as.character(dt0[[j]]))
    set(x = dt0, i = j, j = j, value = nastrings[[k]])
  }
  str = do.call(paste, c(dt0, collapse="\n", sep=","))
  str = paste(paste(names(dt0), collapse=","), str, sep="\n")
  DT_fread = fread(str, na.strings = nastrings, verbose = FALSE)
  test(1550 + k * 0.1, DT_fread, DT_NA)
}

# FR #568
str = "a,b\n1.5,\"at the 5\" end of the gene.\""
test(1551.1, fread(str), data.table(a = 1.5, b = "\"at the 5\" end of the gene.\""))
#1256
str = "x,y\nx1,\"oops\" y1\n"
test(1551.2, fread(str), data.table(x = "x1", y = "\"oops\" y1"))
#1077
str = '2,3\n""foo,bar'
test(1551.3, fread(str), data.table(V1 = c("2", "\"\"foo"), V2 = c("3", "bar")))
#1079
str = 'L1\tsome\tunquoted\tstuff\nL2\tsome\t"half" quoted\tstuff\nL3\tthis\t"should work"\tok thought'
test(1551.4, fread(str), data.table(L1 = c("L2", "L3"), some = c("some", "this"), unquoted = c("\"half\" quoted", "should work"), stuff = c("stuff", "ok thought")))
#1095
rhs = read.table("issue_1095_fread.txt", sep=",", comment.char="", stringsAsFactors=FALSE, quote="", strip.white=TRUE)
test(1551.5, fread("issue_1095_fread.txt"), setDT(rhs), warning="Bumped column 47 to type character on data row")

# FR #1314 rest of na.strings issue
str = "a,b,c,d\n#N/A,+1,5.5,FALSE\n#N/A,5,6.6,TRUE\n#N/A,+1,#N/A,-999\n#N/A,#N/A,-999,FALSE\n#N/A,1,NA,TRUE"
read_table = function(str, ...) {
    setDT(read.table(text=str, stringsAsFactors=FALSE, comment.char="", sep=",", header=TRUE, ...))[]
}
test(1552.1, fread(str, na.strings="#N/A"), read_table(str, na.strings="#N/A"))
test(1552.2, fread(str, na.strings=c("#N/A", "-999")), read_table(str, na.strings=c("#N/A", "-999")))
test(1552.3, fread(str, na.strings=c("#N/A", "-999", "+1")), read_table(str, na.strings=c("#N/A", "-999", "+1")))
test(1552.4, fread(str, na.strings=c("#N/A", "-999", "+1", "1")), read_table(str, na.strings=c("#N/A", "-999", "+1", "1")))
test(1552.5, fread(str, na.strings=c("#N/A", "-999", "FALSE")), read_table(str, na.strings=c("#N/A", "-999", "FALSE")))

# FR #1177: 'quote' option of 'print.data.table'
DT1 <- data.table(s1=paste(" ",LETTERS[1:5],sep=""),s2=LETTERS[1:5])
ans1 <- c("     s1  s2","1: \" A\" \"A\"",
          "2: \" B\" \"B\"","3: \" C\" \"C\"",
          "4: \" D\" \"D\"","5: \" E\" \"E\"")
ans2 <- c("   s1 s2","1:  A  A","2:  B  B",
          "3:  C  C","4:  D  D","5:  E  E")
test(1553.1, capture.output(print(DT1, quote = TRUE)), ans1)
test(1553.2, capture.output(print(DT1)), ans2)

# #826 - subset DT on single integer vector stored as matrix the same way as data.frame
dt <- data.table(a=letters[1:10])
idx <- c(2:4,7L,9:10)
dim(idx) <- c(6L, 1L)
dimnames(idx) <- list(NULL, "Resample1") # as in caret::createDataPartition
test(1554.1, dt[idx], data.table(a=letters[idx]))
test(1554.2, dt[-idx], data.table(a=letters[(1:10)[-idx]]))
test(1554.3, dt[!idx], data.table(a=letters[(1:10)[-idx]]))
test(1554.4, idx, structure(c(2L, 3L, 4L, 7L, 9L, 10L), .Dim = c(6L, 1L), .Dimnames = list(NULL, "Resample1")))

# strip.white and other enhancements to 'fread()'
# bug #1113
ans1 <- fread("issue_1113_fread.txt")
ans2 <- setDT(read.table("issue_1113_fread.txt", header=TRUE))
setnames(ans2, names(ans1))
test(1555.1, ans1, ans2)

# bug #1035, take care of spaces automatically. Note that the columns are also read in proper types. Also with quotes when sep is not space.
str1=" ITERATION    THETA1       THETA2                
            0  3.95527E+01  2.10651E+01"
str2=" ITERATION,    THETA1,       THETA2                
            0,  3.95527E+01,  2.10651E+01"
str3=" ITERATION  ,  THETA1   ,    THETA2                
            0  ,  3.95527E+01  ,  2.10651E+01"
str4=" ITERATION  ,  THETA1   ,    \"THETA2\"                
            0  ,  3.95527E+01  ,  2.10651E+01"
str5=" ITERATION  ,  THETA1   ,    THETA2                
            bla  ,  3.95527E+01  ,  2.10651E+01"
test(1555.2, fread(str1), data.table(ITERATION=0L, THETA1=39.5527, THETA2=21.0651))
test(1555.3, fread(str2), data.table(ITERATION=0L, THETA1=39.5527, THETA2=21.0651))
test(1555.4, fread(str3), data.table(ITERATION=0L, THETA1=39.5527, THETA2=21.0651))
test(1555.5, fread(str4), data.table(ITERATION=0L, THETA1=39.5527, `"THETA2"`=21.0651))
test(1555.6, fread(str5), data.table(ITERATION="bla", THETA1=39.5527, THETA2=21.0651))
# without strip.white
# header col spaces are dealt properly irrespective of strip.white
test(1555.7, fread(str1, strip.white=FALSE), error="Expecting 4 cols, but line 2 contains") 
test(1555.8, names(fread(str2, strip.white=FALSE)), c("ITERATION","THETA1","THETA2                "))
test(1555.9, names(fread(str3, strip.white=FALSE)), c("ITERATION  ","THETA1   ","THETA2                "))
test(1555.10, names(fread(str4, strip.white=FALSE)), c("ITERATION  ","THETA1   ","\"THETA2\"                "))

# bug #1035, reply to the post from another user
str1="  22 4 6 4\n  34 22 34 5\n  6 2 1 4\n"
str2="22 4 6 4\n34 22 34 5\n6 2 1 4\n"
test(1555.11, fread(str1), fread(str2))

# bug #785
rhs <- setDT(read.table("issue_785_fread.txt", header=TRUE, stringsAsFactors=FALSE, sep="\t", strip.white=TRUE))
test(1555.12, fread("issue_785_fread.txt"), rhs)

# bug #529, http://stackoverflow.com/questions/22229109/r-data-table-fread-command-how-to-read-large-files-with-irregular-separators
str1=" YYYY MM DD HH mm             19490             40790
 1991 10  1  1  0      1.046465E+00      1.568405E+00"
str2="YYYY MM DD HH mm             19490             40790
1991 10  1  1  0      1.046465E+00      1.568405E+00"
test(1555.13, fread(str1), fread(str2))

# fix for #1330
test(1556.1, fread("issue_1330_fread.txt", nrow=2), data.table(a=1:2, b=1:2))
test(1556.2, fread("issue_1330_fread.txt", nrow=4), data.table(a=1:2, b=1:2), warning="Stopped reading at empty line 4")

# FR #768
str="1,2\n3,4\n"
test(1557.1, names(fread(str)), c("V1", "V2")) # autonamed
test(1557.2, names(fread(str, col.names=letters[1:2])), letters[1:2])
test(1557.3, names(fread(str, col.names=letters[1])), error="Can't assign 1 names to")
test(1557.4, names(fread(str, col.names=letters[1:3])), error="Can't assign 3 names to")
test(1557.5, names(fread(str, col.names=1:2)), error="Passed a vector of type")

# Fix for #773
f = "issue_773_fread.txt"
ans = data.table(AAA=as.integer(c(4,7,rep(1,17),31,21)),
                 BBB=as.integer(c(5,8,rep(2,17),32,22)),
                 CCC=as.integer(c(6,9,rep(3,17),33,23)))
test(1558, fread(f, nrow=21L), ans) # no warning

# FR # 1338 -- check.names argument of setDT
ans=data.table(X=1:3,"X.1"=1:3)
dt1<-data.table(X=1:3,X=1:3)
df1<-data.frame(X=1:3,X=1:3,check.names=FALSE)
ls1<-list("X"=1:3,"X"=1:3)
test(1559.1, setDT(dt1, check.names=TRUE), ans)
test(1559.2, setDT(df1, check.names=TRUE), ans)
test(1559.3, setDT(ls1, check.names=TRUE), ans)

# Fix #1140
test(1560.1, data.table(x=letters[1:5])[, 0, with=FALSE], null.data.table())
test(1560.2, data.table(x=letters[1:5])[, c(0,FALSE), with=FALSE], null.data.table())

# Fix for #1298
d = data.table(a = 1)
q = quote(.(a))
test(1561, d[, 1, by = eval(q)], d[, 1, by = .(a)])

# Fix for #1315
d = as.IDate(seq(as.Date("2015-01-01"), as.Date("2015-01-15"), by='1 day'))
test(1562.1, as.list(d), lapply(as.list(as.Date(d)), as.IDate))
test(1562.2, sapply(d, identity), as.integer(sapply(as.Date(d), identity)))

# Fix for #1216, .SDcols and with=FALSE should evaluate within frame of 'x' only when it's of the form a:b
dt = data.table(index1=1:10, index2=10:1, index3=1, s=4, i=24)
i = 2L
test(1557.1, dt[, paste0("index", 1:i), with=FALSE], dt[, index1:index2, with=FALSE])
test(1557.2, dt[, paste0("index", 1:i), with=FALSE], dt[, 1:2, with=FALSE])
test(1557.3, dt[, 5:4, with=FALSE], dt[, i:s, with=FALSE])
test(1557.4, dt[, .SD, .SDcols=paste0("index", 1:i)], dt[, .SD, .SDcols=index1:index2])

# fix for #1354
test(1558, as.ITime(NA), setattr(NA_integer_, 'class', 'ITime'))

if (!"package:xts" %in% search()) {
    # #1347, xts issue from Joshua
    x = as.Date(1:5, origin="2015-01-01")
    test(1559.1, last(x), tail(x, 1L))
} else {
    test(1559.2, last(.xts(1:3,1:3)), .xts(1:3, 1:3)[3, ])
}

# fix for #1352
dt1 = data.table(a=1:5, b=6:10, c=11:15)
dt2 = data.table(a=3:6, b=8:11, d=1L)
by_cols = c(x="a", y="b")
test(1560, merge(dt1,dt2, by=by_cols, sort=FALSE), dt1[dt2, nomatch=0L, on=unname(by_cols)])

# FR #1353
DT = data.table(x=c(20,10,10,30,30,20), y=c("a", "a", "a", "b", "b", "b"), z=1:6)

test(1561.1, rowid(DT$x), as.integer(c(1,1,2,1,2,2)))
test(1561.2, rowidv(DT, cols="x"), as.integer(c(1,1,2,1,2,2)))
test(1561.3, rowid(DT$x, prefix="group"), paste("group", as.integer(c(1,1,2,1,2,2)), sep=""))
test(1561.4, rowid(DT$x, DT$y), as.integer(c(1,1,2,1,2,1)))
test(1561.5, rowidv(DT, cols=c("x","y")), as.integer(c(1,1,2,1,2,1)))
# convenient usage with dcast
test(1561.6, dcast(DT, x ~ rowid(x, prefix="group"), value.var="z"), data.table(x=c(10,20,30), group1=c(2L,1L,4L), group2=c(3L,6L,5L), key="x"))

# Fix for #1346
DT = data.table(id=1:3, g1=4:6, g2=7:9)
test(1562, melt(DT, measure=patterns("^g[12]"), variable.factor=FALSE), data.table(id=1:3, variable=rep(c("g1","g2"),each=3L), value=4:9))

# tet 1563 added for melt above, fix for #1359.

# fix for #1341
dt <- data.table(a = 1:10)
test(1564.1, truelength(dt[, .SD]), 100L)
test(1564.2, truelength(dt[a==5, .SD]), 100L)
test(1564.3, dt[a==5, .SD][, b := 1L], data.table(a=5L, b=1L))

# Fix for #1251, DT[, .N, by=a] and DT[, .(.N), by=a] uses GForce now
dt = data.table(a=sample(3,20,TRUE), b=1:10)
optim = getOption("datatable.optimize") # save old optim value
options(datatable.optimize = Inf)
ans1 = dt[, .N, by=a]
ans2 = capture.output(dt[, .N, by=a, verbose=TRUE])
test(1565.1, length(grep("GForce optimized j to", ans2))>0L, TRUE) # make sure GForce optimisation works
options(datatable.optimize = 1L) # make sure result is right
test(1565.2, ans1, dt[, .N, by=a])
# restore optim level
options(datatable.optimize = optim)

# Fix for #1212
set.seed(123)
dt <- data.table(a=c("abc", "def", "ghi"), b=runif(3))[, c:=list(list(data.table(d=runif(1), e=runif(1))))]
test(1566.1, dt[, c], dt[, get("c")])
test(1566.2, dt[, .(c=c)], dt[, .(c=get("c"))])
test(1566.3, address(dt$c) == address(dt[, get("c")]), FALSE)

# Fix for #1207
d1 <- data.table(a = character(), b = list())
test(1567.1, d1[, b, by=a], d1)
test(1567.2, d1[, b, keyby=a], data.table(d1, key="a"))

# Fix for #1334
dt = data.table(x=ordered(rep(1:3,each=5)),y=ordered(rep(c("B","A","C"),5),levels=c("B","A","C")),z=1:15)
test(1568, dt[, sum(z), keyby=.(I(x), I(y))], data.table(I=I(ordered(rep(1:3,each=3))), I.1=I(ordered(rep(c("B","A","C"),3),levels=c("B","A","C"))),V1=c(5L, 7L, 3L, 17L, 8L, 15L, 13L, 25L, 27L), key=c("I", "I.1")))

# Test 1569 is written under melt above.

# fix for #1378, merge resets class
X = data.table(a=1:3, b=4:6)
Y = data.table(a=1L, c=5L)
setattr(Y, 'class', c("custom","data.table","data.frame"))
test(1570.1, class(merge(X, Y, all=TRUE, by="a")), class(X))
test(1570.2, class(merge(Y, X, all=TRUE, by="a")), class(X))

# #1379, tstrsplit gains names argument
X = data.table(a=c("ABC", "DEFG"))
test(1571, names(tstrsplit(X$a, "", fixed=TRUE, give.names=TRUE)), paste("V", 1:4, sep=""))

# fix for #1367, quote="" argument in use. Using embedded quotes in the example below reads the 
# first two columns as one. I couldn't find a way to avoid introducing quote argument.
test(1572, fread('"abcd efgh." ijkl.\tmnop "qrst uvwx."\t45\n', quote=""), 
           setDT(read.table(text='"abcd efgh." ijkl.\tmnop "qrst uvwx."\t45\n', sep="\t", stringsAsFactors=FALSE, quote="")))

# Fix for #1384, fread with empty new line, initial checks failed due to extra spaces.
test(1573, fread('a,b
       1,2
       '), data.table(a=1L, b=2L))

# Fix for #1375
X = data.table(a=1:3,b=4:6,c=c("foo","bar","baz"))
test(1574.1, X[.(5), on="b"], X[2])

X = data.table(A=1:3,b=4:6,c=c("foo","bar","baz"))
Y = data.table(A=2:4, B=5:7)
test(1574.2, X[Y, on=c("A",b="B")], X[Y, on=c(A="A", b="B")])
test(1574.3, X[Y, on=c(b="B", "A")], X[Y, on=c(b="B", A="A")])

# fix for #1376
X = data.table(a=1:3,b=4:6,c=c("foo","bar","baz"))
Y = data.table(A=2:4, B=5:7)
test(1575.1, X[Y, on=c(A="a")], error="not found in x")
test(1575.2, X[Y, on=c(a="a")], error="not found in i")

##########################


# TO DO: Add test for fixed bug #5519 - dcast returned error when a package imported data.table, but dint happen when "depends" on data.table. This is fixed (commit 1263 v1.9.3), but not sure how to add test.

# TO DO: test and highlight in docs that negatives are fine and fast in forderv (ref R wish #15644)
# TO DO: tests of freading classes like Date and the verbose messages there.
# TO DO: Test mid read bump of logical T/F to character, collapse back to T and F.

# TO DO: add examples of multiple LHS (name and position) and multiple RHS to example(":=")
# TO DO: tests on double in add hoc by
# TO DO: test on -i that retain key e.g.  DT[-4] and DT[-4,sum(v),by=b] should both retain key
#        test on out of bound i subsets e.g. 6:10 when DT has 7 rows, and mixed negative and positive i integer is error.
#  test that ordered subsets when i is unkeyed now retain x's key (using is.sorted(f__))

# TO DO: add FAQ that  eval() is evaled in calling frame so don't need a, then update SO question of 14 March. See the test using variable name same as column name. Actually, is that true?  Need "..J".
# TO DO: why did SO answer using eval twice in j need .SD in lapply(f,eval,.SD) on 19 Apr
# TO DO: change all 1 to 1L internally (done in data.table.R, other .R to do)

# TO DO: check the "j is named list could be inefficient" message from verbose than Chris N showed recently to 15 May
# TO DO: !make sure explicitly that unnamed lists are being executed by dogroups!
# TO DO: Add to warning about a previous copy that class<-, levels<- can also copy whole vector.  *Any* fun<- form basically.
# TO DO: use looped := vs set test in example(":=") or example(setnames) to test overhead in [.data.table is tested to stay low in future.

# TO DO: add tests on smaller examples with NAs for 'frankv', even though can't compare to base::rank.
## See test-* for more tests

##########################
options(warn=0)
options(oldbwb) # set at top of this file
plat = paste("endian=",.Platform$endian,", sizeof(long double)==",.Machine$sizeof.longdouble,sep="")
if (nfail > 0) {
    if (nfail>1) {s1="s";s2="s: "} else {s1="";s2=" "}
    cat("\r")
    stop(nfail," error",s1," out of ",ntest, " (lastID=",lastnum,", ",plat, ") in inst/tests/tests.Rraw on ",date(),". Search tests.Rraw for test number",s2,paste(whichfail,collapse=", "),".")
    # important to stop() here, so that 'R CMD check' fails
}
cat("\rAll ",ntest," tests (lastID=",lastnum,") in inst/tests/tests.Rraw completed ok in ",timetaken(started.at)," on ",date()," (",plat,")\n",sep="")
# Reporting lastnum rather than ntest makes it easier to check user has the latest version, assuming
# each release or patch has extra tests.
# date() is included so we can tell when CRAN checks were run (in particular if they have been rerun since
# an update to Rdevel itself; data.table doesn't have any other dependency) since there appears to be no other
# way to see the timestamp that CRAN checks were run. Some CRAN machines lag by several days.