diff --git a/NEWS.md b/NEWS.md index 74edb9961..f17c8205a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -597,6 +597,8 @@ 14. The options `datatable.print.class` and `datatable.print.keys` are now `TRUE` by default. They have been available since v1.9.8 (Nov 2016) and v1.11.0 (May 2018) respectively. +15. Thanks to @ssh352, Václav Tlapák, Cole Miller, András Svraka and Toby Dylan Hocking for reporting and bisecting a significant performance regression in dev. This was fixed before release thanks to a PR by Jan Gorecki, [#5463](https://github.com/Rdatatable/data.table/pull/5463). + # data.table [v1.14.4](https://github.com/Rdatatable/data.table/milestone/26?closed=1) diff --git a/src/assign.c b/src/assign.c index f48f71e73..61f38a554 100644 --- a/src/assign.c +++ b/src/assign.c @@ -684,6 +684,12 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) #define MSGSIZE 1000 static char memrecycle_message[MSGSIZE+1]; // returned to rbindlist so it can prefix with which one of the list of data.table-like objects +const char *targetDesc(const int colnum, const char *colname) { + static char str[501]; // #5463 + snprintf(str, 500, colnum==0 ? _("target vector") : _("column %d named '%s'"), colnum, colname); + return str; +} + const char *memrecycle(const SEXP target, const SEXP where, const int start, const int len, SEXP source, const int sourceStart, const int sourceLen, const int colnum, const char *colname) // like memcpy but recycles single-item source // 'where' a 1-based INTEGER vector subset of target to assign to, or NULL or integer() @@ -707,8 +713,6 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con if (colname==NULL) error(_("Internal error: memrecycle has received NULL colname")); // # nocov *memrecycle_message = '\0'; - static char targetDesc[501]; // from 1.14.1 coerceAs reuses memrecycle for a target vector, PR#4491 - snprintf(targetDesc, 500, colnum==0 ? _("target vector") : _("column %d named '%s'"), colnum, colname); int protecti=0; const bool sourceIsFactor=isFactor(source), targetIsFactor=isFactor(target); const bool sourceIsI64=isReal(source) && INHERITS(source, char_integer64); @@ -730,7 +734,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con for (int i=0; inlevel) { - error(_("Assigning factor numbers to %s. But %d is outside the level range [1,%d]"), targetDesc, val, nlevel); + error(_("Assigning factor numbers to %s. But %d is outside the level range [1,%d]"), targetDesc(colnum, colname), val, nlevel); } } } else { @@ -738,7 +742,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con for (int i=0; inlevel)) { - error(_("Assigning factor numbers to %s. But %f is outside the level range [1,%d], or is not a whole number."), targetDesc, val, nlevel); + error(_("Assigning factor numbers to %s. But %f is outside the level range [1,%d], or is not a whole number."), targetDesc(colnum, colname), val, nlevel); } } } @@ -830,19 +834,19 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con } } } else if (isString(source) && !isString(target) && !isNewList(target)) { - warning(_("Coercing 'character' RHS to '%s' to match the type of %s."), targetIsI64?"integer64":type2char(TYPEOF(target)), targetDesc); + warning(_("Coercing 'character' RHS to '%s' to match the type of %s."), targetIsI64?"integer64":type2char(TYPEOF(target)), targetDesc(colnum, colname)); // this "Coercing ..." warning first to give context in case coerceVector warns 'NAs introduced by coercion' // and also because 'character' to integer/double coercion is often a user mistake (e.g. wrong target column, or wrong // variable on RHS) which they are more likely to appreciate than find inconvenient source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; } else if (isNewList(source) && !isNewList(target)) { if (targetIsI64) { - error(_("Cannot coerce 'list' RHS to 'integer64' to match the type of %s."), targetDesc); + error(_("Cannot coerce 'list' RHS to 'integer64' to match the type of %s."), targetDesc(colnum, colname)); // because R's coerceVector doesn't know about integer64 } // as in base R; e.g. let as.double(list(1,2,3)) work but not as.double(list(1,c(2,4),3)) // relied on by NNS, simstudy and table.express; tests 1294.* - warning(_("Coercing 'list' RHS to '%s' to match the type of %s."), type2char(TYPEOF(target)), targetDesc); + warning(_("Coercing 'list' RHS to '%s' to match the type of %s."), type2char(TYPEOF(target)), targetDesc(colnum, colname)); source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; } else if ((TYPEOF(target)!=TYPEOF(source) || targetIsI64!=sourceIsI64) && !isNewList(target)) { if (GetVerbose()>=3) { @@ -850,27 +854,27 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con Rprintf(_("Zero-copy coerce when assigning '%s' to '%s' %s.\n"), sourceIsI64 ? "integer64" : type2char(TYPEOF(source)), targetIsI64 ? "integer64" : type2char(TYPEOF(target)), - targetDesc); + targetDesc(colnum, colname)); } // The following checks are up front here, otherwise we'd need them twice in the two branches // inside BODY that cater for 'where' or not. Maybe there's a way to merge the two macros in future. // The idea is to do these range checks without calling coerceVector() (which allocates) -#define CHECK_RANGE(STYPE, RFUN, COND, FMT, TO, FMTVAL) {{ \ - const STYPE *sd = (const STYPE *)RFUN(source); \ - for (int i=0; i0 && slen==len && soff==0; // mc=memcpy; only if types match and not for single items (a single assign faster than these non-const memcpy calls)