From 0e67524fb2c86a175f8ad099fb22e5e1a3b96664 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Fri, 24 Jan 2020 11:55:14 +0000 Subject: [PATCH 01/21] Non-atomic types listified in rbindlist Any non-allowed column not listed in the typeorder in src/init.c are now appropriately detected in src/rbindlist.c. In these cases, the column type becomes a list, with each element wrapped in a list. --- src/init.c | 2 +- src/rbindlist.c | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/init.c b/src/init.c index aed2da3db..f1a433462 100644 --- a/src/init.c +++ b/src/init.c @@ -221,7 +221,7 @@ R_ExternalMethodDef externalMethods[] = { }; static void setSizes() { - for (int i=0; i<100; ++i) { sizes[i]=0; typeorder[i]=0; } + for (int i=0; i<100; ++i) { sizes[i]=0; typeorder[i]=7; } // only these types are currently allowed as column types : sizes[LGLSXP] = sizeof(int); typeorder[LGLSXP] = 0; sizes[RAWSXP] = sizeof(Rbyte); typeorder[RAWSXP] = 1; diff --git a/src/rbindlist.c b/src/rbindlist.c index f39b63f0a..1231a121e 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -292,8 +292,8 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) } SEXP thisCol = VECTOR_ELT(li, w); int thisType = TYPEOF(thisCol); - // Use >= for #546 -- TYPEORDER=0 for both LGLSXP and EXPRSXP (but also NULL) - if (TYPEORDER(thisType)>=TYPEORDER(maxType) && !isNull(thisCol)) maxType=thisType; + if (TYPEORDER(thisType)>TYPEORDER(VECSXP) && !isNull(thisCol)) maxType=VECSXP; + else if (TYPEORDER(thisType)>TYPEORDER(maxType) && !isNull(thisCol)) maxType=thisType; if (isFactor(thisCol)) { if (isNull(getAttrib(thisCol,R_LevelsSymbol))) error(_("Column %d of item %d has type 'factor' but has no levels; i.e. malformed."), w+1, i+1); factor = true; @@ -514,7 +514,16 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) if (w==-1 || !length(thisCol=VECTOR_ELT(li, w))) { // !length for zeroCol warning above; #1871 writeNA(target, ansloc, thisnrow); // writeNA is integer64 aware and writes INT64_MIN } else { - if ((TYPEOF(target)==VECSXP || TYPEOF(target)==EXPRSXP) && TYPEOF(thisCol)!=TYPEOF(target)) { + if ((TYPEOF(target)==VECSXP) && TYPEOF(thisCol)>TYPEOF(target)) { + // Exotic non-atomic types need each element to be wrapped in a list, e.g. expression vectors #546 + SEXP thisColList = PROTECT(allocVector(VECSXP, length(thisCol))); nprotect++; + for(int r=0; r Date: Fri, 24 Jan 2020 12:03:48 +0000 Subject: [PATCH 02/21] Fixed test, and added more extensive tests --- inst/tests/tests.Rraw | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f6d17a076..fbc13ab91 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16736,7 +16736,23 @@ test(2128.5, DT[, .SD, .SDcols=function(x) NA], error='conditions were not met # expression columns in rbindlist, #546 A = data.table(c1 = 1, c2 = 'asd', c3 = expression(as.character(Sys.time()))) B = data.table(c1 = 3, c2 = 'qwe', c3 = expression(as.character(Sys.time()+5))) -test(2129, rbind(A,B)$c3, expression(as.character(Sys.time()), as.character(Sys.time()+5))) +DT = data.table(c1 = c(1, 3), c2 = c("asd", "qwe"), + c3 = list(expression(as.character(Sys.time())), expression(as.character(Sys.time() + 5)))) +test(2129.1, rbind(A,B), DT) + +A = data.table(c1 = 1, c2 = 'asd', c3 = expression(as.character(Sys.time()))) +B = data.table(c1 = 3:5, c2 = c('qwe', "foo", "bar"), + c3 = expression(1+1, print("test"), as.character(Sys.time()+5))) +DT = data.table(c1 = c(1, 3:5), c2 = c("asd", "qwe", "foo", "bar"), + c3 = list(expression(as.character(Sys.time())), expression(1+1), expression(print("test")), + expression(as.character(Sys.time()+5)))) +test(2129.2, rbind(A,B), DT) + +A = data.table(c1 = 1, c2 = 'asd', c3 = expression(as.character(Sys.time()))) +B = data.table(c1 = 3:5, c2 = c('qwe', "foo", "bar"), c3 = 4:6) +DT = data.table(c1 = c(1, 3:5), c2 = c("asd", "qwe", "foo", "bar"), + c3 = list(expression(as.character(Sys.time())), 4L, 5L, 6L)) +test(2129.3, rbind(A,B), DT) # print dims in list-columns, #3671 DT = data.table( From 79e6ed252ae97d468f6ea39ed49a6c1f4895440f Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Fri, 24 Jan 2020 12:05:46 +0000 Subject: [PATCH 03/21] Updated NEWS item --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 2d607e9ba..315bad2de 100644 --- a/NEWS.md +++ b/NEWS.md @@ -69,7 +69,7 @@ unit = "s") 8. Compiler support for OpenMP is now detected during installation, which allows data.table to compile from source (in single threaded mode) on macOS which, frustratingly, does not include OpenMP support by default, [#2161](https://github.com/Rdatatable/data.table/issues/2161), unlike Windows and Linux. A helpful message is emitted during installation from source, and on package startup as before. Many thanks to @jimhester for the PR. This was typically a problem just after release to CRAN in the few days before macOS binaries (which do support OpenMP) are made available by CRAN. -9. `rbindlist` now supports columns of type `expression`, [#546](https://github.com/Rdatatable/data.table/issues/546). Thanks @jangorecki for the report. +9. `rbindlist` now supports columns of type `expression`, [#546](https://github.com/Rdatatable/data.table/issues/546). Thanks @jangorecki for the report and @sritchie73 for the PR. 10. The dimensions of objects in a `list` column are now displayed, [#3671](https://github.com/Rdatatable/data.table/issues/3671). Thanks to @randomgambit for the request, and Tyson Barrett for the PR. From b647ebe0afa0f262b090b3a1637af0c11f128f8c Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Fri, 24 Jan 2020 12:22:46 +0000 Subject: [PATCH 04/21] Added comment to future proof typeorder --- src/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/init.c b/src/init.c index f1a433462..a02cbdeb9 100644 --- a/src/init.c +++ b/src/init.c @@ -221,7 +221,7 @@ R_ExternalMethodDef externalMethods[] = { }; static void setSizes() { - for (int i=0; i<100; ++i) { sizes[i]=0; typeorder[i]=7; } + for (int i=0; i<100; ++i) { sizes[i]=0; typeorder[i]=7; } // typeorder init value must be > listed types below // only these types are currently allowed as column types : sizes[LGLSXP] = sizeof(int); typeorder[LGLSXP] = 0; sizes[RAWSXP] = sizeof(Rbyte); typeorder[RAWSXP] = 1; From d44129f5a36aa80caf407160e68a3fc686dde4fa Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Fri, 24 Jan 2020 14:13:35 +0000 Subject: [PATCH 05/21] Missing PROTECT added. --- src/rbindlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rbindlist.c b/src/rbindlist.c index 1231a121e..277f80a51 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -519,7 +519,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) SEXP thisColList = PROTECT(allocVector(VECSXP, length(thisCol))); nprotect++; for(int r=0; r Date: Fri, 24 Jan 2020 17:37:36 +0000 Subject: [PATCH 06/21] All exotic types sould now be listified --- src/rbindlist.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/rbindlist.c b/src/rbindlist.c index 277f80a51..d35a26310 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -514,18 +514,33 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) if (w==-1 || !length(thisCol=VECTOR_ELT(li, w))) { // !length for zeroCol warning above; #1871 writeNA(target, ansloc, thisnrow); // writeNA is integer64 aware and writes INT64_MIN } else { - if ((TYPEOF(target)==VECSXP) && TYPEOF(thisCol)>TYPEOF(target)) { - // Exotic non-atomic types need each element to be wrapped in a list, e.g. expression vectors #546 + if ((TYPEOF(target)==VECSXP) && (isVectorAtomic(thisCol) || TYPEOF(thisCol)==LISTSXP)) { + // do an as.list() on the atomic column; #3528 + // pairlists (LISTSXP) can also be coerced to lists using coerceVector + thisCol = PROTECT(coerceVector(thisCol, TYPEOF(target))); nprotect++; + } else if ((TYPEOF(target)==VECSXP) && TYPEOF(thisCol)==EXPRSXP) { + // For EXPRSXP each element to be wrapped in a list, e.g. expression vectors #546 SEXP thisColList = PROTECT(allocVector(VECSXP, length(thisCol))); nprotect++; for(int r=0; r 1, in which case the other columns in data.table + // will have been recycled. We therefore in turn have to recycle the list elements + // to match the number of rows. + SEXP thisColList = PROTECT(allocVector(VECSXP, length(thisCol))); nprotect++; + for(int r=0; r Date: Fri, 24 Jan 2020 18:04:18 +0000 Subject: [PATCH 07/21] Added missing comment to UNPROTECT --- src/rbindlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rbindlist.c b/src/rbindlist.c index d35a26310..de6aa448d 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -552,6 +552,6 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) } } } - UNPROTECT(nprotect); // ans, coercedForFactor, thisCol + UNPROTECT(nprotect); // ans, coercedForFactor, thisCol, thisElement return(ans); } From a7ae6db9a9a2225586e0f88c5170e3eb89adb84c Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Sat, 25 Jan 2020 10:46:20 +0000 Subject: [PATCH 08/21] Added tests --- inst/tests/tests.Rraw | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index fbc13ab91..ad1cc5c9c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16733,7 +16733,7 @@ test(2128.3, DT[, .SD, .SDcols=function(x) x==1], error='conditions were not met test(2128.4, DT[, .SD, .SDcols=function(x) 2L], error='conditions were not met for: [a, b, c]') test(2128.5, DT[, .SD, .SDcols=function(x) NA], error='conditions were not met for: [a, b, c]') -# expression columns in rbindlist, #546 +# Test non-atomic columns in rbindlist, e.g. expressions, #546 A = data.table(c1 = 1, c2 = 'asd', c3 = expression(as.character(Sys.time()))) B = data.table(c1 = 3, c2 = 'qwe', c3 = expression(as.character(Sys.time()+5))) DT = data.table(c1 = c(1, 3), c2 = c("asd", "qwe"), @@ -16754,7 +16754,14 @@ DT = data.table(c1 = c(1, 3:5), c2 = c("asd", "qwe", "foo", "bar"), c3 = list(expression(as.character(Sys.time())), 4L, 5L, 6L)) test(2129.3, rbind(A,B), DT) -# print dims in list-columns, #3671 +A = data.table(c1 = 1, c2 = quote(1+1)) # length(quote(1+1)) = 3, so A[,.N] = 3 with c1 recycled +B = data.table(c1 = 2, c2 = 'asd') +test(2129.4, rbind(A, B)$c2, list(quote(1+1), quote(1+1), quote(1+1), 'asd')) # list(quote(1+1)) is recycled + +A = data.table(c1 = 1:3, c2 = pairlist(A=1, B=list(1, 2:3), C=pairlist(C1=1, C2=3:4))) +test(2129.5, rbind(A,B)$c2, c(as.list(A$c2), 'asd')) # top level pairlist becomes list + +# print dims in list-colrmns, #3671 DT = data.table( x = 1:2, y = list(data.table(x=1, y=1), From 2bae80cf4e2ba7ae5ea9863848f8796f94b945f8 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Mon, 27 Jan 2020 12:56:57 +0000 Subject: [PATCH 09/21] Abstracted list coercion into separate function I.e. plan to use in Casmatrix in #4144 --- src/data.table.h | 1 + src/rbindlist.c | 29 ++--------------------------- src/utils.c | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 27 deletions(-) diff --git a/src/data.table.h b/src/data.table.h index f2687f52e..a5008bf58 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -233,6 +233,7 @@ bool islocked(SEXP x); SEXP islockedR(SEXP x); bool need2utf8(SEXP x); SEXP coerceUtf8IfNeeded(SEXP x); +SEXP coerceAsList(SEXP x, int len); // types.c char *end(char *start); diff --git a/src/rbindlist.c b/src/rbindlist.c index de6aa448d..45dd451b6 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -514,33 +514,8 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) if (w==-1 || !length(thisCol=VECTOR_ELT(li, w))) { // !length for zeroCol warning above; #1871 writeNA(target, ansloc, thisnrow); // writeNA is integer64 aware and writes INT64_MIN } else { - if ((TYPEOF(target)==VECSXP) && (isVectorAtomic(thisCol) || TYPEOF(thisCol)==LISTSXP)) { - // do an as.list() on the atomic column; #3528 - // pairlists (LISTSXP) can also be coerced to lists using coerceVector - thisCol = PROTECT(coerceVector(thisCol, TYPEOF(target))); nprotect++; - } else if ((TYPEOF(target)==VECSXP) && TYPEOF(thisCol)==EXPRSXP) { - // For EXPRSXP each element to be wrapped in a list, e.g. expression vectors #546 - SEXP thisColList = PROTECT(allocVector(VECSXP, length(thisCol))); nprotect++; - for(int r=0; r 1, in which case the other columns in data.table - // will have been recycled. We therefore in turn have to recycle the list elements - // to match the number of rows. - SEXP thisColList = PROTECT(allocVector(VECSXP, length(thisCol))); nprotect++; - for(int r=0; r 1, in which case the other columns in data.table + // will have been recycled. We therefore in turn have to recycle the list elements + // to match the number of rows. + coerced = PROTECT(allocVector(VECSXP, len)); nprotect++; + for(int i=0; i Date: Mon, 27 Jan 2020 16:06:24 +0000 Subject: [PATCH 10/21] bugfix abstracted function --- src/utils.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/utils.c b/src/utils.c index 3822b3b9d..a624a3d43 100644 --- a/src/utils.c +++ b/src/utils.c @@ -365,10 +365,10 @@ SEXP coerceAsList(SEXP x, int len) { // do an as.list() on the atomic column; #3528 // pairlists (LISTSXP) can also be coerced to lists using coerceVector coerced = PROTECT(coerceVector(x, VECSXP)); nprotect++; - } else if (TYPEOF(thisCol)==EXPRSXP) { + } else if (TYPEOF(x)==EXPRSXP) { // For EXPRSXP each element to be wrapped in a list, e.g. expression vectors #546 - coerced = PROTECT(allocVector(VECSXP, len)); nprotect++; - for(int i=0; i 1, in which case the other columns in data.table // will have been recycled. We therefore in turn have to recycle the list elements // to match the number of rows. + if (len < 1) // len used here instead of length(x) because length(x) does not reflect length of target vector data.table + error("Internal Error: len provided to C function coerceAsList must be > 0\n"); // # nocov coerced = PROTECT(allocVector(VECSXP, len)); nprotect++; - for(int i=0; i Date: Mon, 27 Jan 2020 16:06:45 +0000 Subject: [PATCH 11/21] Add test and allow integer64 when maxType = VECSXP --- inst/tests/tests.Rraw | 7 +++++++ src/rbindlist.c | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ad1cc5c9c..0f9f2f1d3 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16761,6 +16761,13 @@ test(2129.4, rbind(A, B)$c2, list(quote(1+1), quote(1+1), quote(1+1), 'asd')) # A = data.table(c1 = 1:3, c2 = pairlist(A=1, B=list(1, 2:3), C=pairlist(C1=1, C2=3:4))) test(2129.5, rbind(A,B)$c2, c(as.list(A$c2), 'asd')) # top level pairlist becomes list +if (test_bit64) { + A = data.table(A=list(1, 2:3, 4)) + B = data.table(A=as.integer64(5:7)) + target = data.table(A=list(1, 2:3, 4, as.integer64(5), as.integer64(6), as.integer64(7))) + test(2129.6, rbind(A,B), target) # list of atomic vectors should preserve class +} + # print dims in list-colrmns, #3671 DT = data.table( x = 1:2, diff --git a/src/rbindlist.c b/src/rbindlist.c index 45dd451b6..1bedb0c42 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -322,7 +322,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) if (!foundName) { static char buff[12]; sprintf(buff,"V%d",j+1), SET_STRING_ELT(ansNames, idcol+j, mkChar(buff)); foundName=buff; } if (factor) maxType=INTSXP; // if any items are factors then a factor is created (could be an option) - if (int64 && maxType!=REALSXP) + if (int64 && maxType!=REALSXP && maxType!=VECSXP) error(_("Internal error: column %d of result is determined to be integer64 but maxType=='%s' != REALSXP"), j+1, type2char(maxType)); // # nocov SEXP target; SET_VECTOR_ELT(ans, idcol+j, target=allocVector(maxType, nrow)); // does not initialize logical & numerics, but does initialize character and list @@ -517,6 +517,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) if (TYPEOF(target)==VECSXP) { thisCol = PROTECT(coerceAsList(thisCol, thisnrow)); nprotect++; } + // else coerces if needed within memrecycle; with a no-alloc direct coerce from 1.12.4 (PR #3909) const char *ret = memrecycle(target, R_NilValue, ansloc, thisnrow, thisCol, 0, -1, idcol+j+1, foundName); if (ret) warning(_("Column %d of item %d: %s"), w+1, i+1, ret); From 5f6543a29d084c270f728c26f006ec9d8956bc96 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Mon, 27 Jan 2020 17:02:05 +0000 Subject: [PATCH 12/21] Class propogates to list elements --- src/utils.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/utils.c b/src/utils.c index a624a3d43..69806ba0f 100644 --- a/src/utils.c +++ b/src/utils.c @@ -361,8 +361,14 @@ SEXP coerceAsList(SEXP x, int len) { int nprotect = 0; if (TYPEOF(x)==VECSXP) { return(x); - } else if (isVectorAtomic(x) || TYPEOF(x)==LISTSXP) { + } else if (isVectorAtomic(x)) { // do an as.list() on the atomic column; #3528 + coerced = PROTECT(coerceVector(x, VECSXP)); nprotect++; + // propogate any class attributes onto each list element: + for(int i=0; i Date: Mon, 27 Jan 2020 17:07:45 +0000 Subject: [PATCH 13/21] No error on class mismatch when maxType==VECSXP --- src/rbindlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rbindlist.c b/src/rbindlist.c index 1bedb0c42..d5e4410ca 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -309,7 +309,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) } if (firsti==-1) { firsti=i; firstw=w; firstCol=thisCol; } else { - if (!factor && !int64) { + if (!factor && !int64 && maxType != VECSXP) { if (!R_compute_identical(PROTECT(getAttrib(thisCol, R_ClassSymbol)), PROTECT(getAttrib(firstCol, R_ClassSymbol)), 0)) { From 49fd930b6c86e228dfb65e8c2ff972dacc246d3f Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Mon, 27 Jan 2020 17:34:59 +0000 Subject: [PATCH 14/21] Patch to handle memrecycle behaviour --- src/rbindlist.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/rbindlist.c b/src/rbindlist.c index d5e4410ca..186ffdf0f 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -514,7 +514,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) if (w==-1 || !length(thisCol=VECTOR_ELT(li, w))) { // !length for zeroCol warning above; #1871 writeNA(target, ansloc, thisnrow); // writeNA is integer64 aware and writes INT64_MIN } else { - if (TYPEOF(target)==VECSXP) { + if (maxType==VECSXP) { thisCol = PROTECT(coerceAsList(thisCol, thisnrow)); nprotect++; } @@ -523,6 +523,10 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) if (ret) warning(_("Column %d of item %d: %s"), w+1, i+1, ret); // e.g. when precision is lost like assigning 3.4 to integer64; test 2007.2 // TODO: but maxType should handle that and this should never warn + + // memrecycle incorrectly adds integer64 class to whole list if any element has class integer64 + if (maxType == VECSXP && INHERITS(target, char_integer64)) + setAttrib(target, R_ClassSymbol, R_NilValue); } ansloc += thisnrow; } From 2e26e9b86d59fe4602ce5bb3e31078c448abfe5e Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Mon, 27 Jan 2020 23:20:15 +0000 Subject: [PATCH 15/21] Bugfix class addition to list column. copyMostAttrib statement after target allocation responsible, not memrecycle as initially debugging suggested. copyMostAttrib now skipped if target column is VECSXP. --- src/rbindlist.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/rbindlist.c b/src/rbindlist.c index 186ffdf0f..ed563a663 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -326,7 +326,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) error(_("Internal error: column %d of result is determined to be integer64 but maxType=='%s' != REALSXP"), j+1, type2char(maxType)); // # nocov SEXP target; SET_VECTOR_ELT(ans, idcol+j, target=allocVector(maxType, nrow)); // does not initialize logical & numerics, but does initialize character and list - if (!factor) copyMostAttrib(firstCol, target); // all but names,dim and dimnames; mainly for class. And if so, we want a copy here, not keepattr's SET_ATTRIB. + if (!factor & maxType != VECSXP) copyMostAttrib(firstCol, target); // all but names,dim and dimnames; mainly for class. And if so, we want a copy here, not keepattr's SET_ATTRIB. if (factor && anyNotStringOrFactor) { // in future warn, or use list column instead ... warning(_("Column %d contains a factor but not all items for the column are character or factor"), idcol+j+1); @@ -523,10 +523,6 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) if (ret) warning(_("Column %d of item %d: %s"), w+1, i+1, ret); // e.g. when precision is lost like assigning 3.4 to integer64; test 2007.2 // TODO: but maxType should handle that and this should never warn - - // memrecycle incorrectly adds integer64 class to whole list if any element has class integer64 - if (maxType == VECSXP && INHERITS(target, char_integer64)) - setAttrib(target, R_ClassSymbol, R_NilValue); } ansloc += thisnrow; } From 10baf706de228ecae03114ed7c7b422c0025e2c8 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Mon, 27 Jan 2020 23:44:25 +0000 Subject: [PATCH 16/21] & should be && --- src/rbindlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rbindlist.c b/src/rbindlist.c index ed563a663..c6e612a6d 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -326,7 +326,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) error(_("Internal error: column %d of result is determined to be integer64 but maxType=='%s' != REALSXP"), j+1, type2char(maxType)); // # nocov SEXP target; SET_VECTOR_ELT(ans, idcol+j, target=allocVector(maxType, nrow)); // does not initialize logical & numerics, but does initialize character and list - if (!factor & maxType != VECSXP) copyMostAttrib(firstCol, target); // all but names,dim and dimnames; mainly for class. And if so, we want a copy here, not keepattr's SET_ATTRIB. + if (!factor && maxType != VECSXP) copyMostAttrib(firstCol, target); // all but names,dim and dimnames; mainly for class. And if so, we want a copy here, not keepattr's SET_ATTRIB. if (factor && anyNotStringOrFactor) { // in future warn, or use list column instead ... warning(_("Column %d contains a factor but not all items for the column are character or factor"), idcol+j+1); From e2f3b651e51282406bc617fcf36468d6b5aae204 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Tue, 28 Jan 2020 09:52:04 +0000 Subject: [PATCH 17/21] Reverted no longer relevant comment change --- src/rbindlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rbindlist.c b/src/rbindlist.c index c6e612a6d..ab9e7f29d 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -528,6 +528,6 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) } } } - UNPROTECT(nprotect); // ans, coercedForFactor, thisCol, thisElement + UNPROTECT(nprotect); // ans, coercedForFactor, thisCol return(ans); } From e0d2f465c7a8faaf7656c24d1423d48e89772104 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Tue, 28 Jan 2020 12:57:46 +0000 Subject: [PATCH 18/21] Generalised vector case, uses memrecycle --- src/utils.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/utils.c b/src/utils.c index 69806ba0f..55aa59eb6 100644 --- a/src/utils.c +++ b/src/utils.c @@ -361,23 +361,25 @@ SEXP coerceAsList(SEXP x, int len) { int nprotect = 0; if (TYPEOF(x)==VECSXP) { return(x); - } else if (isVectorAtomic(x)) { - // do an as.list() on the atomic column; #3528 - coerced = PROTECT(coerceVector(x, VECSXP)); nprotect++; - // propogate any class attributes onto each list element: - for(int i=0; i 0 should be impossible because TYPEOF(thisElement) == TYPEOF(x) } } else if (!isVector(x)) { // Anything not a vector we can assign directly through SET_VECTOR_ELT @@ -396,6 +398,6 @@ SEXP coerceAsList(SEXP x, int len) { error("Internal error: cannot coerce type %s to list\n", type2char(TYPEOF(x))); // # nocov } - UNPROTECT(nprotect); + UNPROTECT(nprotect); // coerced, thisElement return(coerced); } From 60dddd1280eb0a95b21a6d7c367a96fb63172246 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Sat, 1 Feb 2020 11:17:56 +0000 Subject: [PATCH 19/21] Better PROTECT stack management --- src/data.table.h | 2 +- src/rbindlist.c | 6 ++++-- src/utils.c | 16 ++++++++-------- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/data.table.h b/src/data.table.h index a5008bf58..741371167 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -233,7 +233,7 @@ bool islocked(SEXP x); SEXP islockedR(SEXP x); bool need2utf8(SEXP x); SEXP coerceUtf8IfNeeded(SEXP x); -SEXP coerceAsList(SEXP x, int len); +SEXP coerceAsList(SEXP x, int len, int *nprotect); // types.c char *end(char *start); diff --git a/src/rbindlist.c b/src/rbindlist.c index ab9e7f29d..ad1bb1099 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -514,8 +514,9 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) if (w==-1 || !length(thisCol=VECTOR_ELT(li, w))) { // !length for zeroCol warning above; #1871 writeNA(target, ansloc, thisnrow); // writeNA is integer64 aware and writes INT64_MIN } else { + int cnprotect = 0; if (maxType==VECSXP) { - thisCol = PROTECT(coerceAsList(thisCol, thisnrow)); nprotect++; + thisCol = coerceAsList(thisCol, thisnrow, &cnprotect); // cnprotect incremented by 0 or 1 } // else coerces if needed within memrecycle; with a no-alloc direct coerce from 1.12.4 (PR #3909) @@ -523,11 +524,12 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) if (ret) warning(_("Column %d of item %d: %s"), w+1, i+1, ret); // e.g. when precision is lost like assigning 3.4 to integer64; test 2007.2 // TODO: but maxType should handle that and this should never warn + UNPROTECT(cnprotect); // pop coerced 'thisCol' from PROTECT stack - not need now copied into 'target'. } ansloc += thisnrow; } } } - UNPROTECT(nprotect); // ans, coercedForFactor, thisCol + UNPROTECT(nprotect); // ans, coercedForFactor return(ans); } diff --git a/src/utils.c b/src/utils.c index 55aa59eb6..80cf0b6df 100644 --- a/src/utils.c +++ b/src/utils.c @@ -356,20 +356,19 @@ SEXP coerceUtf8IfNeeded(SEXP x) { return(ans); } -SEXP coerceAsList(SEXP x, int len) { +SEXP coerceAsList(SEXP x, int len, int *nprotect) { SEXP coerced; - int nprotect = 0; if (TYPEOF(x)==VECSXP) { return(x); } if (TYPEOF(x) == LISTSXP) { - coerced = PROTECT(coerceVector(x, VECSXP)); nprotect++; // top level pairlist converted to regular list + coerced = PROTECT(coerceVector(x, VECSXP)); (*nprotect)++; // top level pairlist converted to regular list } else if (isVector(x)) { // Allocate VECSXP container, - coerced = PROTECT(allocVector(VECSXP, length(x))); nprotect++; + coerced = PROTECT(allocVector(VECSXP, length(x))); (*nprotect)++; for(int i=0; i 0\n"); // # nocov - coerced = PROTECT(allocVector(VECSXP, len)); nprotect++; + coerced = PROTECT(allocVector(VECSXP, len)); (*nprotect)++; for(int i=0; i Date: Mon, 18 May 2020 14:01:55 +0100 Subject: [PATCH 20/21] Unit test for expression column with length > 1 --- inst/tests/tests.Rraw | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0f9f2f1d3..29b95ad18 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16748,18 +16748,27 @@ DT = data.table(c1 = c(1, 3:5), c2 = c("asd", "qwe", "foo", "bar"), expression(as.character(Sys.time()+5)))) test(2129.2, rbind(A,B), DT) +A = data.table(c1 = 1:2, c2 = c('asd', 'xzy'), c3 = expression(as.character(Sys.time()), 1+2)) +B = data.table(c1 = 3:5, c2 = c('qwe', "foo", "bar"), + c3 = expression(1+1, print("test"), as.character(Sys.time()+5))) +DT = data.table(c1 = c(1:2, 3:5), c2 = c("asd", "xzy", "qwe", "foo", "bar"), + c3 = list(expression(as.character(Sys.time())), expression(1+2), + expression(1+1), expression(print("test")), + expression(as.character(Sys.time()+5)))) +test(2129.3, rbind(A,B), DT) + A = data.table(c1 = 1, c2 = 'asd', c3 = expression(as.character(Sys.time()))) B = data.table(c1 = 3:5, c2 = c('qwe', "foo", "bar"), c3 = 4:6) DT = data.table(c1 = c(1, 3:5), c2 = c("asd", "qwe", "foo", "bar"), c3 = list(expression(as.character(Sys.time())), 4L, 5L, 6L)) -test(2129.3, rbind(A,B), DT) +test(2129.4, rbind(A,B), DT) A = data.table(c1 = 1, c2 = quote(1+1)) # length(quote(1+1)) = 3, so A[,.N] = 3 with c1 recycled B = data.table(c1 = 2, c2 = 'asd') -test(2129.4, rbind(A, B)$c2, list(quote(1+1), quote(1+1), quote(1+1), 'asd')) # list(quote(1+1)) is recycled +test(2129.5, rbind(A, B)$c2, list(quote(1+1), quote(1+1), quote(1+1), 'asd')) # list(quote(1+1)) is recycled A = data.table(c1 = 1:3, c2 = pairlist(A=1, B=list(1, 2:3), C=pairlist(C1=1, C2=3:4))) -test(2129.5, rbind(A,B)$c2, c(as.list(A$c2), 'asd')) # top level pairlist becomes list +test(2129.6, rbind(A,B)$c2, c(as.list(A$c2), 'asd')) # top level pairlist becomes list if (test_bit64) { A = data.table(A=list(1, 2:3, 4)) From 00c6d859b158737f130d3f5a1da416693358e14d Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Mon, 18 May 2020 14:18:15 +0100 Subject: [PATCH 21/21] bugfix test number increment --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 29b95ad18..f65ebc8f9 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16774,7 +16774,7 @@ if (test_bit64) { A = data.table(A=list(1, 2:3, 4)) B = data.table(A=as.integer64(5:7)) target = data.table(A=list(1, 2:3, 4, as.integer64(5), as.integer64(6), as.integer64(7))) - test(2129.6, rbind(A,B), target) # list of atomic vectors should preserve class + test(2129.7, rbind(A,B), target) # list of atomic vectors should preserve class } # print dims in list-colrmns, #3671