diff --git a/NEWS.md b/NEWS.md index 2d607e9ba..f0184a815 100644 --- a/NEWS.md +++ b/NEWS.md @@ -87,6 +87,8 @@ unit = "s") 6. `all.equal(DT, y)` no longer errors when `y` is not a data.table, [#4042](https://github.com/Rdatatable/data.table/issues/4042). Thanks to @d-sci for reporting and the PR. +7. `rbindlist()` now correctly coerces raw to logical instead of vice-versa [#4172](https://github.com/Rdatatable/data.table/issues/4172), making it consistent with base R's coercion rules. Thanks to @sritchie73 for reporting and fixing. + ## NOTES 1. `as.IDate`, `as.ITime`, `second`, `minute`, and `hour` now recognize UTC equivalents for speed: GMT, GMT-0, GMT+0, GMT0, Etc/GMT, and Etc/UTC, [#4116](https://github.com/Rdatatable/data.table/issues/4116). diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f6d17a076..42458c516 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16770,7 +16770,13 @@ test(2132.2, fifelse(TRUE, 1, s2), error = "S4 class objects (except nanot test(2132.3, fcase(TRUE, s1, FALSE, s2), error = "S4 class objects (except nanotime) are not supported. Please see https://github.com/Rdatatable/data.table/issues/4131.") rm(s1, s2, class2132) +# Check rbindlist coercion rules for raw match base R (e.g. using c()) #4172 +DT1 = data.table(a=as.raw(1), b=as.raw(2), c=as.raw(3), d=as.raw(4), e=as.raw(5), f=as.raw(6), g=as.raw(7), h=as.raw(8)) +DT2 = data.table(a=as.raw(1), b=TRUE, c=1L, d=1.5, e=complex(real=3, imaginary=1), f="a", g=list(3:5), h=expression(1+1)) +DT3 = setDT(lapply(names(DT1), function(j) c(DT1[[j]], DT2[[j]]))) +setnames(DT3, names(DT1)) +test(2133, rbind(DT1, DT2), DT3, warning="Column 2 of item 1: 2 (type 'raw') at RHS position 1 taken as TRUE when assigning to type 'logical' (column 2 named 'b')") # warning expected due to loss of precision when coercing raw to logical ######################## # Add new tests here # -######################## +######################## \ No newline at end of file diff --git a/src/init.c b/src/init.c index aed2da3db..d89c6cb92 100644 --- a/src/init.c +++ b/src/init.c @@ -223,8 +223,8 @@ R_ExternalMethodDef externalMethods[] = { static void setSizes() { for (int i=0; i<100; ++i) { sizes[i]=0; typeorder[i]=0; } // only these types are currently allowed as column types : - sizes[LGLSXP] = sizeof(int); typeorder[LGLSXP] = 0; - sizes[RAWSXP] = sizeof(Rbyte); typeorder[RAWSXP] = 1; + sizes[RAWSXP] = sizeof(Rbyte); typeorder[RAWSXP] = 0; + sizes[LGLSXP] = sizeof(int); typeorder[LGLSXP] = 1; sizes[INTSXP] = sizeof(int); typeorder[INTSXP] = 2; // integer and factor sizes[REALSXP] = sizeof(double); typeorder[REALSXP] = 3; // numeric and integer64 sizes[CPLXSXP] = sizeof(Rcomplex); typeorder[CPLXSXP] = 4; diff --git a/src/rbindlist.c b/src/rbindlist.c index f39b63f0a..b5c1e4ed3 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -271,7 +271,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) SEXP coercedForFactor = NULL; for(int j=0; j= for #546 -- TYPEORDER=0 for both LGLSXP and EXPRSXP (but also NULL) - if (TYPEORDER(thisType)>=TYPEORDER(maxType) && !isNull(thisCol)) maxType=thisType; + // Use >= for #546 -- TYPEORDER=0 for both RAWSXP and EXPRSXP. For NULL, keep maxType as -1 + if (!isNull(thisCol) && (maxType == -1 || TYPEORDER(thisType)>=TYPEORDER(maxType))) maxType=thisType; if (isFactor(thisCol)) { if (isNull(getAttrib(thisCol,R_LevelsSymbol))) error(_("Column %d of item %d has type 'factor' but has no levels; i.e. malformed."), w+1, i+1); factor = true; @@ -320,6 +320,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) } } + if (maxType == -1) maxType=LGLSXP; // col X is NULL in both lists then column to be filled with NA_logical_ #1871, test 2002.03 if (!foundName) { static char buff[12]; sprintf(buff,"V%d",j+1), SET_STRING_ELT(ansNames, idcol+j, mkChar(buff)); foundName=buff; } if (factor) maxType=INTSXP; // if any items are factors then a factor is created (could be an option) if (int64 && maxType!=REALSXP)