Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add gforce dynamic support #6636

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 5 additions & 22 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -1890,10 +1890,11 @@ replace_dot_alias = function(e) {
assign(".N", len__, thisEnv) # For #334
#fix for #1683
if (use.I) assign(".I", seq_len(nrow(x)), thisEnv)
ans = gforce(thisEnv, jsub, o__, f__, len__, irows) # irows needed for #971.
ans = gforce(thisEnv, jsub, o__, f__, len__, irows, # irows needed for #971
.Call(CsubsetVector, groups, grpcols), # just a list() subset to make C level neater; doesn't copy column contents
lhs) # for now this just prevents := with new feature first/last n>1; in future see TODO below
gi = if (length(o__)) o__[f__] else f__
g = lapply(grpcols, function(i) .Call(CsubsetVector, groups[[i]], gi)) # use CsubsetVector instead of [ to preserve attributes #5567

# returns all rows instead of one per group
nrow_funs = c("gshift")
.is_nrows = function(q) {
Expand All @@ -1904,33 +1905,15 @@ replace_dot_alias = function(e) {
q[[1L]] %chin% nrow_funs
}
}

# adding ghead/gtail(n) support for n > 1 #5060 #523
q3 = 0
if (!is.symbol(jsub)) {
headTail_arg = function(q) {
if (length(q)==3L && length(q3 <- q[[3L]])==1L && is.numeric(q3) &&
(q[[1L]]) %chin% c("ghead", "gtail") && q3!=1) q3
else 0
}
if (jsub %iscall% "list"){
q3 = max(sapply(jsub, headTail_arg))
} else if (length(jsub)==3L) {
q3 = headTail_arg(jsub)
}
}
if (q3 > 0) {
grplens = pmin.int(q3, len__)
g = lapply(g, rep.int, times=grplens)
} else if (.is_nrows(jsub)) {
if (.is_nrows(jsub)) {
g = lapply(g, rep.int, times=len__)
# unpack list of lists for nrows functions
zip_items = function(ll) do.call(mapply, c(list(FUN = c), ll, SIMPLIFY=FALSE, USE.NAMES=FALSE))
if (all(vapply_1b(ans, is.list))) {
ans = lapply(ans, zip_items)
}
}
ans = c(g, ans)
} else {
ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose, showProgress)
}
Expand Down Expand Up @@ -3087,7 +3070,7 @@ gshift = function(x, n=1L, fill=NA, type=c("lag", "lead", "shift", "cyclic")) {
stopifnot(is.numeric(n))
.Call(Cgshift, x, as.integer(n), fill, type)
}
gforce = function(env, jsub, o, f, l, rows) .Call(Cgforce, env, jsub, o, f, l, rows)
gforce = function(env, jsub, o, f, l, rows, grpcols, lhs) .Call(Cgforce, env, jsub, o, f, l, rows, grpcols, lhs)

# GForce needs to evaluate all arguments not present in the data.table before calling C part #5547
# Safe cases: variables [i], calls without variables [c(0,1), list(1)] # TODO extend this list
Expand Down
6 changes: 3 additions & 3 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -18119,9 +18119,9 @@ test(2233.25, copy(DT)[a!=4, v := head(b, 3L), a, verbose=TRUE], copy(DT)[a!=4,
DT = data.table(a=1:3,b=(1:9)/10)
test(2233.26, DT[, c("v1","v2") := .(min(b), max(b)), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v1=(1:3)/10, v2=(7:9)/10), output="GForce optimized j to")
test(2233.27, DT[, c("v1","v2") := .(head(b,3L), tail(b,3L)), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v1=(1:9)/10, v2=(1:9)/10), output="GForce optimized j to")
test(2233.28, DT[, c("v1","v2") := .(head(b,3L), tail(b,2L)), a], error="Supplied 6 items to be assigned to 9 items of column 'v2'.")
test(2233.29, DT[, c("v1","v2") := .(head(b,2L), tail(b,3L)), a], error="Supplied 6 items to be assigned to 9 items of column 'v1'.")
test(2233.30, DT[, c("v1","v2") := .(head(b,2L), tail(b,2L)), a], error="Supplied 6 items to be assigned to 9 items of column 'v1'.")
test(2233.28, DT[, c("v1","v2") := .(head(b,3L), tail(b,2L)), a], error="Supplied 6 items.*9")
test(2233.29, DT[, c("v1","v2") := .(head(b,2L), tail(b,3L)), a], error="Supplied 6 items.*9")
test(2233.30, DT[, c("v1","v2") := .(head(b,2L), tail(b,2L)), a], error="Supplied 6 items.*9.")
test(2233.31, DT[, c("v1","v2") := .(min(b), max(b)), a, verbose=TRUE], DT[, c("v1","v2") := .(base::min(b), base::max(b)), a ], output="GForce optimized j to")
test(2233.32, DT[, c("v1","v2") := .(head(b,3L), tail(b,3L)), a, verbose=TRUE], DT[, c("v1","v2") := .(utils::head(b,3L), utils::tail(b,3L)), a], output="GForce optimized j to")

Expand Down
2 changes: 1 addition & 1 deletion src/assign.c
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose)
names = getAttrib(dt,R_NamesSymbol);
// names may be NULL when null.data.table() passes list() to alloccol for example.
// So, careful to use length() on names, not LENGTH().
if (length(names)!=l) internal_error(__func__, "length of names (%d) is not length of dt (%d)", length(names),l); // # nocov
if (length(names)!=l && length(names)>0) internal_error(__func__, "length of names (%d) is not length of dt (%d)", length(names),l); // # nocov
if (!selfrefok(dt,verbose))
return shallow(dt,R_NilValue,(n>l) ? n : l); // e.g. test 848 and 851 in R > 3.0.2
// added (n>l) ? ... for #970, see test 1481.
Expand Down
3 changes: 2 additions & 1 deletion src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ extern SEXP sym_index;
extern SEXP sym_BY;
extern SEXP sym_starts, char_starts;
extern SEXP sym_maxgrpn;
extern SEXP sym_gforce_dynamic;
extern SEXP sym_anyna;
extern SEXP sym_anyinfnan;
extern SEXP sym_anynotascii;
Expand Down Expand Up @@ -302,7 +303,7 @@ SEXP expandAltRep(SEXP);
SEXP fmelt(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
SEXP fcast(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
SEXP issorted(SEXP, SEXP);
SEXP gforce(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
SEXP gforce(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
SEXP gsum(SEXP, SEXP);
SEXP gmean(SEXP, SEXP);
SEXP gmin(SEXP, SEXP);
Expand Down
13 changes: 11 additions & 2 deletions src/dogroups.c
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
warning(_("Item %d of j's result for group %d is zero length. This will be filled with %d NAs to match the longest column in this result. Later groups may have a similar problem but only the first is reported to save filling the warning buffer."), j+1, i+1, maxn);
NullWarnDone = TRUE;
}
writeNA(target, thisansloc, maxn, false);
writeNA(target, thisansloc, maxn, true);
} else {
// thislen>0
if (TYPEOF(source) != TYPEOF(target))
Expand All @@ -438,7 +438,16 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
source = PROTECT(copyAsPlain(source));
copied = true;
}
memrecycle(target, R_NilValue, thisansloc, maxn, source, 0, -1, 0, "");
if (TRUELENGTH(source)==LENGTH(source)) {
// first() and last() set truelength to mark that it is a true vector; see comments at the end of last.R and test 2240.81
// a true vector is not recycled when length-1 and is padded with NA to match the length of the longest result
memrecycle(target, R_NilValue, thisansloc, thislen, source, 0, -1, 0, ""); // just using memrecycle to copy contents
writeNA(target, thisansloc+thislen, maxn-thislen, true); // pad with NA
} else {
if (thislen>1 && thislen!=maxn && grpn>0) // grpn>0 for grouping empty tables; test 1986
error(_("Supplied %d items for column %d of group %d which has %d rows. The RHS length must either be 1 (single values are ok) or match the LHS length exactly. If you wish to 'recycle' the RHS please use rep() explicitly to make this intent clear to readers of your code."), thislen, j+1, i+1, maxn);
memrecycle(target, R_NilValue, thisansloc, maxn, source, 0, -1, 0, "");
}
if (copied) UNPROTECT(1);
}
}
Expand Down
Loading
Loading