Skip to content

Commit

Permalink
between int64 support and num-to-int coerce, #3517
Browse files Browse the repository at this point in the history
  • Loading branch information
jangorecki committed May 1, 2019
1 parent 1259b66 commit aa175aa
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 13 deletions.
13 changes: 11 additions & 2 deletions R/between.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ between <- function(x,lower,upper,incbounds=TRUE) {
if (is.logical(lower)) lower = as.integer(lower) # typically NA (which is logical type)
if (is.logical(upper)) upper = as.integer(upper) # typically NA (which is logical type)
is.px = function(x) inherits(x, "POSIXct")
is.i64 = function(x) inherits(x, "integer64")
# POSIX special handling to auto coerce character
if (is.px(x) && !is.null(tz<-attr(x, "tzone", TRUE)) && nzchar(tz) &&
(is.character(lower) || is.character(upper))) {
Expand All @@ -23,7 +24,7 @@ between <- function(x,lower,upper,incbounds=TRUE) {
}
stopifnot(is.px(x), is.px(lower), is.px(upper)) # nocov # internal
}
# POSIX check time zone match
# POSIX check timezone match
if (is.px(x) && is.px(lower) && is.px(upper)) {
tz_match = function(x, y, z) { # NULL match "", else all identical
((is.null(x) || !nzchar(x)) && (is.null(y) || !nzchar(y)) && (is.null(z) || !nzchar(z))) ||
Expand All @@ -33,7 +34,15 @@ between <- function(x,lower,upper,incbounds=TRUE) {
stop("'between' function arguments have mismatched timezone attribute, align all arguments to same timezone")
}
}
is.supported = function(x) (is.numeric(x) && !inherits(x, "integer64")) || is.px(x)
# int64
if (is.i64(x)) {
if (!requireNamespace("bit64", quietly=TRUE)) stop("trying to use integer64 class when 'bit64' package is not installed")
if (!is.i64(lower) && is.numeric(lower)) lower = bit64::as.integer64(lower)
if (!is.i64(upper) && is.numeric(upper)) upper = bit64::as.integer64(upper)
} else if (is.i64(lower) || is.i64(upper)) {
stop("'lower' and/or 'upper' are integer64 class while 'x' argument is not, align classes before passing to 'between'")
}
is.supported = function(x) is.numeric(x) || is.px(x)
if (is.supported(x) && is.supported(lower) && is.supported(upper)) {
# faster parallelised version for int/double.
# Cbetween supports length(lower)==1 (recycled) and (from v1.12.0) length(lower)==length(x).
Expand Down
14 changes: 14 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -14181,6 +14181,20 @@ test(2032.18, between(x, dn, up), error="mismatched timezone attribute")
X = data.table(a = 1:5, b = 6:10, c = c(5:1))
test(2032.19, X[c %between% list(a, b)], X[c %between% .(a, b)])

# between num to int coercion #3517
old = options("datatable.verbose"=TRUE)
#TODO
options(old)

# between int64 support
if (test_bit64) {
as.i64 = bit64::as.integer64
test(2032.51, between(1:10, as.i64(3), as.i64(6)), error="are integer64 class while 'x' argument is not")
test(2032.52, between(1:10, 3, as.i64(6)), error="are integer64 class while 'x' argument is not")
old = options("datatable.verbose"=TRUE)
#TODO
options(old)
}


###################################
Expand Down
73 changes: 62 additions & 11 deletions src/between.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#include "data.table.h"

#define NA_INTEGER64 LLONG_MIN
#define MAX_INTEGER64 LLONG_MAX

SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP bounds) {

R_len_t nx = length(x), nl = length(lower), nu = length(upper);
Expand All @@ -18,15 +21,32 @@ SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP bounds) {

int nprotect = 0;
bool integer=true;
if (isReal(x) || isReal(lower) || isReal(upper)) {
if (inherits(x,"integer64") || inherits(lower,"integer64") || inherits(upper,"integer64")) {
error("Internal error: one or more of x, lower and upper is type integer64 but this should have been caught by between() at R level."); // # nocov
bool integer64=false;
if (isInteger(x) && // #3517 coerce to num to int when possible
(isInteger(lower) || (!isInteger(lower) && !isReallyReal(lower))) &&
(isInteger(upper) || (!isInteger(upper) && !isReallyReal(upper)))) {
if (!isInteger(lower)) {
lower = PROTECT(coerceVector(lower, INTSXP)); nprotect++;
}
if (!isInteger(upper)) {
upper = PROTECT(coerceVector(upper, INTSXP)); nprotect++;
}
} else if (inherits(x,"integer64")) {
if (!inherits(lower,"integer64") || !inherits(upper,"integer64"))
error("Internal error in between: 'x' is integer64 while 'lower' and/or 'upper' are not, should have been catched by now"); // # nocov
integer=false;
lower = PROTECT(coerceVector(lower, REALSXP)); // these coerces will convert NA appropriately
upper = PROTECT(coerceVector(upper, REALSXP));
x = PROTECT(coerceVector(x, REALSXP));
nprotect += 3;
integer64=true;
} else if (isReal(x) || isReal(lower) || isReal(upper)) {
integer=false;
if (!isReal(x)) {
x = PROTECT(coerceVector(x, REALSXP)); nprotect++;
}
if (!isReal(lower)) {
lower = PROTECT(coerceVector(lower, REALSXP)); nprotect++; // these coerces will convert NA appropriately
}
if (!isReal(upper)) {
upper = PROTECT(coerceVector(upper, REALSXP)); nprotect++;
}
}
// TODO: sweep through lower and upper ensuring lower<=upper (inc bounds) and no lower>upper or lower==INT_MAX

Expand All @@ -52,9 +72,9 @@ SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP bounds) {
if (verbose) Rprintf("between parallel processing of integer with recycling took %8.3fs\n", omp_get_wtime()-tic);
}
else {
const int xMask = recycleX ? 0 : INT_MAX;
const int lowMask = recycleLow ? 0 : INT_MAX;
const int uppMask = recycleUpp ? 0 : INT_MAX;
const int64_t xMask = recycleX ? 0 : INT_MAX;
const int64_t lowMask = recycleLow ? 0 : INT_MAX;
const int64_t uppMask = recycleUpp ? 0 : INT_MAX;
if (verbose) tic = omp_get_wtime();
#pragma omp parallel for num_threads(getDTthreads())
for (int i=0; i<longest; i++) {
Expand All @@ -66,7 +86,7 @@ SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP bounds) {
}
if (verbose) Rprintf("between parallel processing of integer took %8.3fs\n", omp_get_wtime()-tic);
}
} else {
} else if (!integer64) {
// type real
const double *lp = REAL(lower);
const double *up = REAL(upper);
Expand Down Expand Up @@ -108,6 +128,37 @@ SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP bounds) {
}
if (verbose) Rprintf("between parallel processing of double took %8.3fs\n", omp_get_wtime()-tic);
}
} else {
// type integer64
const int64_t *lp = (int64_t *)REAL(lower);
const int64_t *up = (int64_t *)REAL(upper);
const int64_t *xp = (int64_t *)REAL(x);
if (!recycleX && recycleLow && recycleUpp) {
const int64_t l = lp[0] + open; // +open as for int32 branch
const int64_t u = up[0]==NA_INTEGER64 ? MAX_INTEGER64 : up[0] - open;
if (verbose) tic = omp_get_wtime();
#pragma omp parallel for num_threads(getDTthreads())
for (int i=0; i<longest; i++) {
int64_t elem = xp[i];
ansp[i] = elem==NA_INTEGER64 ? NA_LOGICAL : (l<=elem && elem<=u);
}
if (verbose) Rprintf("between parallel processing of intege64r with recycling took %8.3fs\n", omp_get_wtime()-tic);
}
else {
const int xMask = recycleX ? 0 : INT_MAX;
const int lowMask = recycleLow ? 0 : INT_MAX;
const int uppMask = recycleUpp ? 0 : INT_MAX;
if (verbose) tic = omp_get_wtime();
#pragma omp parallel for num_threads(getDTthreads())
for (int i=0; i<longest; i++) {
int64_t elem = xp[i & xMask];
int64_t l = lp[i & lowMask];
int64_t u = up[i & uppMask];
u = u==NA_INTEGER64 ? MAX_INTEGER64 : u-open;
ansp[i] = elem==NA_INTEGER64 ? NA_LOGICAL : (l<=elem && elem<=u);
}
if (verbose) Rprintf("between parallel processing of integer64 took %8.3fs\n", omp_get_wtime()-tic);
}
}
UNPROTECT(nprotect);
return(ans);
Expand Down

0 comments on commit aa175aa

Please sign in to comment.