From b4057af4bdcae0c84a36593dc3aa3b4588d35227 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 18 Jul 2019 04:16:03 +0800 Subject: [PATCH] gsum, gmean, gfirst, glast, g[ for complex vectors (#3692) --- inst/tests/tests.Rraw | 38 +++++--- src/coalesce.c | 11 ++- src/gsumm.c | 211 +++++++++++++++++++++++++++++++++++------- 3 files changed, 214 insertions(+), 46 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 299de72fc..6fb289dd5 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -13602,7 +13602,6 @@ test(1967.78, x[1:5, sum(v), by = list(i5 = 1:5 %% 2L), verbose = TRUE], # gforce integer overflow coerce to double DT = data.table(A=1:5, B=-3i, C=2147483647L) -test(1968.1, DT[, sum(B), by=A%%2L], error="Type 'complex' not supported by GForce sum (gsum). Either add the") test(1968.2, storage.mode(DT$C), "integer") test(1968.3, DT[, sum(C), by=A%%2L], data.table(A=c(1L,0L), V1=c(6442450941, 4294967294)), warning="sum.*integer column.*more than type 'integer' can hold.*coerced to 'numeric'") @@ -14257,16 +14256,16 @@ if (test_bit64) { DT = data.table(id=c(rep(1L,3), rep(2L, 3)), v=bit64::as.integer64(c(1:3, 4L, 5:6))) test(2019, DT[2:6, sum(v), id], data.table(id=1:2, V1=bit64::as.integer64(c(5L,15L)))) # gather, case of int64 and irows } -DT = data.table(id = c(1L,1L,2L), v = c(1i, 2i, 3i)) -test(2020.01, DT[, min(v), by=id], error="'complex' not supported by GForce min") -test(2020.02, DT[, max(v), by=id], error="'complex' not supported by GForce max") -test(2020.03, DT[, median(v), by=id], error="'complex' not supported by GForce median") -test(2020.04, DT[, head(v, 1), by=id], error="'complex' not supported by GForce head") -test(2020.05, DT[, tail(v, 1), by=id], error="'complex' not supported by GForce tail") -test(2020.06, DT[, v[1], by=id], error="'complex' not supported by GForce subset") -test(2020.07, DT[, sd(v), by=id], error="'complex' not supported by GForce sd") -test(2020.08, DT[, var(v), by=id], error="'complex' not supported by GForce var") -test(2020.09, DT[, prod(v), by=id], error="'complex' not supported by GForce prod") +DT = data.table(id = c(1L,1L,2L), v = as.raw(0:2)) +test(2020.01, DT[, min(v), by=id], error="'raw' not supported by GForce min") +test(2020.02, DT[, max(v), by=id], error="'raw' not supported by GForce max") +test(2020.03, DT[, median(v), by=id], error="'raw' not supported by GForce median") +test(2020.04, DT[, head(v, 1), by=id], error="'raw' not supported by GForce head") +test(2020.05, DT[, tail(v, 1), by=id], error="'raw' not supported by GForce tail") +test(2020.06, DT[, v[1], by=id], error="'raw' not supported by GForce subset") +test(2020.07, DT[, sd(v), by=id], error="'raw' not supported by GForce sd") +test(2020.08, DT[, var(v), by=id], error="'raw' not supported by GForce var") +test(2020.09, DT[, prod(v), by=id], error="'raw' not supported by GForce prod") DT = data.table(id = c(1L,1L,2L,2L), v = c(1L, 2L, NA, NA)) test(2020.10, DT[, median(v), id], data.table(id=1:2, V1=c(1.5, NA))) # median whole group has NAs @@ -15298,6 +15297,23 @@ test(2065.6, DT[ , z_sum := base::sum(z), by = .(a, b)][1:3, z_sum], c(1.8791864549242+0i, 3.17903639358309+0i, -4.18868631527035+0i)) test(2065.7, DT[1L, z_sum := 1i][1L, z_sum], 1i) +# GForce for complex columns, part of #3690 +DT = data.table(id=c(1L,1L,2L), v=c(1i, 2i, 3i)) +test(2066.01, DT[, min(v), by=id], error="'complex' has no well-defined min") +test(2066.02, DT[, max(v), by=id], error="'complex' has no well-defined max") +test(2066.03, DT[, head(v, 1), by=id], data.table(id=1:2, V1=c(1, 3)*1i)) +test(2066.04, DT[, tail(v, 1), by=id], data.table(id=1:2, V1=(2:3)*1i)) +test(2066.05, DT[, v[2], by=id], data.table(id = 1:2, V1=c(2i, NA))) +## former test 1968.1 +DT = data.table(A=1:5, B=-3i, C=2147483647L) +test(2066.06, DT[, .(sum(B), mean(B)), by=A%%2L], data.table(A=1:0, V1=c(-9i, -6i), V2=-3i)) +test(2066.07, DT[2:4, .(sum(B), mean(B)), by=A%%2L], data.table(A=0:1, V1=c(-6i, -3i), V2=-3i)) +DT[4, B:=NA] +test(2066.08, DT[, .(sum(B), mean(B)), by=A%%2L], data.table(A=1:0, V1=c(-9i, NA), V2=c(-3i, NA))) +test(2066.09, DT[2:4, .(sum(B), mean(B)), by=A%%2L], data.table(A=0:1, V1=c(NA, -3i), V2=c(NA, -3i))) +test(2066.10, DT[, .(sum(B, na.rm=TRUE), mean(B, na.rm=TRUE)), by=A%%2L], data.table(A=1:0, V1=c(-9i, -3i), V2=-3i)) +test(2066.11, DT[2:4, .(sum(B, na.rm=TRUE), mean(B, na.rm=TRUE)), by=A%%2L], data.table(A=0:1, V1=c(-3i, -3i), V2=-3i)) + ################################### # Add new tests above this line # diff --git a/src/coalesce.c b/src/coalesce.c index d1cb2bc0a..e57d63eb4 100644 --- a/src/coalesce.c +++ b/src/coalesce.c @@ -7,6 +7,7 @@ SEXP coalesce(SEXP x, SEXP inplaceArg) { error("Internal error in coalesce.c: argument 'inplaceArg' must be TRUE or FALSE"); // # nocov const bool inplace = LOGICAL(inplaceArg)[0]; const bool verbose = GetVerbose(); + int nprotect = 0; if (length(x)==0) return R_NilValue; SEXP first; // the first vector (it might be the first argument, or it might be the first column of a data.table|frame int off = 1; // when x has been pointed to the list of replacement candidates, is the first candidate in position 0 or 1 in the list @@ -29,21 +30,23 @@ SEXP coalesce(SEXP x, SEXP inplaceArg) { if (factor) { if (!isFactor(item)) error("Item 1 is a factor but item %d is not a factor. When factors are involved, all items must be factor.", i+2); - if (!R_compute_identical(getAttrib(first, R_LevelsSymbol), getAttrib(item, R_LevelsSymbol), 0)) + if (!R_compute_identical(PROTECT(getAttrib(first, R_LevelsSymbol)), PROTECT(getAttrib(item, R_LevelsSymbol)), 0)) error("Item %d is a factor but its levels are not identical to the first item's levels.", i+2); + UNPROTECT(2); } else { if (isFactor(item)) error("Item %d is a factor but item 1 is not a factor. When factors are involved, all items must be factor.", i+2); } if (TYPEOF(first) != TYPEOF(item)) error("Item %d is type %s but the first item is type %s. Please coerce before coalescing.", i+2, type2char(TYPEOF(item)), type2char(TYPEOF(first))); - if (!R_compute_identical(getAttrib(first, R_ClassSymbol), getAttrib(item, R_ClassSymbol), 0)) + if (!R_compute_identical(PROTECT(getAttrib(first, R_ClassSymbol)), PROTECT(getAttrib(item, R_ClassSymbol)), 0)) error("Item %d has a different class than item 1.", i+2); + UNPROTECT(2); if (length(item)!=1 && length(item)!=nrow) error("Item %d is length %d but the first item is length %d. Only singletons are recycled.", i+2, length(item), nrow); } if (!inplace) { - first = PROTECT(duplicate(first)); + first = PROTECT(duplicate(first)); nprotect++; if (verbose) Rprintf("coalesce copied first item (inplace=FALSE)\n"); } void **valP = (void **)R_alloc(nval, sizeof(void *)); @@ -140,7 +143,7 @@ SEXP coalesce(SEXP x, SEXP inplaceArg) { default: error("Unsupported type: %s", type2char(TYPEOF(first))); // e.g. raw is tested } - if (!inplace) UNPROTECT(1); + UNPROTECT(nprotect); return first; } diff --git a/src/gsumm.c b/src/gsumm.c index 6263b33b6..69839804a 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -38,7 +38,8 @@ static int nbit(int n) } SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) { - // double started = wallclock(); + double started = wallclock(); + const bool verbose = GetVerbose(); if (TYPEOF(env) != ENVSXP) error("env is not an environment"); // The type of jsub is pretty flexbile in R, so leave checking to eval() below. if (!isInteger(o)) error("o is not an integer vector"); @@ -94,7 +95,8 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) { int *elem = grp + fp[g]-1; for (int j=0; ji /= grpsize[i]; + xd->r /= grpsize[i]; + xd++; + } + } break; default : error("Internal error: gsum returned type '%s'. typeof(x) is '%s'", type2char(TYPEOF(ans)), type2char(TYPEOF(x))); // # nocov } @@ -464,7 +545,7 @@ SEXP gmean(SEXP x, SEXP narm) const int n = (irowslen == -1) ? length(x) : irowslen; if (nrow != n) error("nrow [%d] != length(x) [%d] in gsum", nrow, n); - long double *s = calloc(ngrp, sizeof(long double)); + long double *s = calloc(ngrp, sizeof(long double)), *si=NULL; // s = sum; si = sum imaginary just for complex if (!s) error("Unable to allocate %d * %d bytes for sum in gmean na.rm=TRUE", ngrp, sizeof(long double)); int *c = calloc(ngrp, sizeof(int)); @@ -491,23 +572,51 @@ SEXP gmean(SEXP x, SEXP narm) c[thisgrp]++; } } break; + case CPLXSXP: { + const Rcomplex *xd = COMPLEX(x); + si = calloc(ngrp, sizeof(long double)); + if (!si) error("Unable to allocate %d * %d bytes for si in gmean na.rm=TRUE", ngrp, sizeof(long double)); + for (int i=0; i DBL_MAX) ansd[i] = R_PosInf; - else if (s[i] < -DBL_MAX) ansd[i] = R_NegInf; - else ansd[i] = (double)s[i]; + switch(TYPEOF(x)) { + case LGLSXP: case INTSXP: case REALSXP: { + ans = PROTECT(allocVector(REALSXP, ngrp)); + double *ansd = REAL(ans); + for (int i=0; iDBL_MAX ? R_PosInf : (s[i] < -DBL_MAX ? R_NegInf : (double)s[i]); + } + } break; + case CPLXSXP: { + ans = PROTECT(allocVector(CPLXSXP, ngrp)); + Rcomplex *ansd = COMPLEX(ans); + for (int i=0; iDBL_MAX ? R_PosInf : (s[i] < -DBL_MAX ? R_NegInf : (double)s[i]); + ansd[i].i = si[i]>DBL_MAX ? R_PosInf : (si[i]< -DBL_MAX ? R_NegInf : (double)si[i]); + } + } break; + default: + error("Internal error: unsupported type at the end of gmean"); // # nocov } - free(s); free(c); + free(s); free(si); free(c); copyMostAttrib(x, ans); - UNPROTECT(1); // Rprintf("this gmean na.rm=TRUE took %8.3f\n", 1.0*(clock()-start)/CLOCKS_PER_SEC); + UNPROTECT(1); return(ans); } @@ -618,6 +727,9 @@ SEXP gmin(SEXP x, SEXP narm) } } break; + case CPLXSXP: + error("Type 'complex' has no well-defined min"); + break; default: error("Type '%s' not supported by GForce min (gmin). Either add the prefix base::min(.) or turn off GForce optimization using options(datatable.optimize=1)", type2char(TYPEOF(x))); } @@ -761,6 +873,9 @@ SEXP gmax(SEXP x, SEXP narm) } } break; + case CPLXSXP: + error("Type 'complex' has no well-defined max"); + break; default: error("Type '%s' not supported by GForce max (gmax). Either add the prefix base::max(.) or turn off GForce optimization using options(datatable.optimize=1)", type2char(TYPEOF(x))); } @@ -868,6 +983,17 @@ SEXP glast(SEXP x) { } } break; + case CPLXSXP: { + const Rcomplex *dx = COMPLEX(x); + ans = PROTECT(allocVector(CPLXSXP, ngrp)); + Rcomplex *dans = COMPLEX(ans); + for (i=0; i grpsize[i]) { dans[i].r = NA_REAL; dans[i].i = NA_REAL; continue; } + k = ff[i]+val-2; + if (isunsorted) k = oo[k]-1; + k = (irowslen == -1) ? k : irows[k]-1; + dans[i] = dx[k]; + } + } break; case STRSXP: ans = PROTECT(allocVector(STRSXP, ngrp)); for (i=0; i