Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New function psum/pprod #4188

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ export(chmatch, "%chin%", chorder, chgroup)
export(rbindlist)
export(fifelse)
export(fcase)
export(psum)
export(pprod)
export(fread)
export(fwrite)
export(foverlaps)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ unit = "s")

14. Added support for `round()` and `trunc()` to extend functionality of `ITime`. `round()` and `trunc()` can be used with argument units: "hours" or "minutes". Thanks to @JensPederM for the suggestion and PR.

15. `psum(..., na.rm=FALSE)` and `pprod(..., na.rm=FALSE)` implemented in C by Morgan Jacob, [#3467](https://github.com/Rdatatable/data.table/issues/3467), are inspired by `base::pmin` and `base::pmax`. These new functions work only for integer and double type and do not recycle vectors. Please see `?psum` for more details.

## BUG FIXES

1. A NULL timezone on POSIXct was interpreted by `as.IDate` and `as.ITime` as UTC rather than the session's default timezone (`tz=""`) , [#4085](https://github.com/Rdatatable/data.table/issues/4085).
Expand Down
2 changes: 2 additions & 0 deletions R/wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE)

fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na)
fcase = function(..., default=NA) .Call(CfcaseR, default, parent.frame(), as.list(substitute(list(...)))[-1L])
psum = function(..., na.rm=FALSE) .Call(CpsumR, na.rm, list(...))
pprod = function(..., na.rm=FALSE) .Call(CpprodR, na.rm, list(...))

colnamesInt = function(x, cols, check_dups=FALSE) .Call(CcolnamesInt, x, cols, check_dups)
coerceFill = function(x) .Call(CcoerceFillR, x)
Expand Down
35 changes: 35 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -16846,3 +16846,38 @@ A = data.table(A=c(complex(real = 1:3, imaginary=c(0, -1, 1)), NaN))
test(2138.3, rbind(A,B), data.table(A=c(as.character(A$A), B$A)))
A = data.table(A=as.complex(rep(NA, 5)))
test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A)))

# psum / pprod, #3467
x = c(1, 3, NA, 5)
y = c(2, NA, 4, 1)
z = c(3, 4, 4, 1)
# psum
test(2139.001, psum(x, y, z, na.rm = FALSE), c(6, NA, NA, 7))
test(2139.002, psum(x, y, z, na.rm = TRUE), c(6, 7, 8, 7))
test(2139.003, psum(as.integer(x), as.integer(y), as.integer(z), na.rm = FALSE), c(6L, NA_integer_, NA_integer_, 7L))
test(2139.004, psum(as.integer(x), as.integer(y), as.integer(z), na.rm = TRUE), c(6L, 7L, 8L, 7L))
test(2139.005, psum(as.raw(z), y, na.rm = TRUE), error = "Argument 1 is of type raw. Only integer and double types are supported.")
test(2139.006, psum(x, y, 1:2, na.rm = FALSE), error = "Argument 3 is of length 2 but argument 1 is of length 4. If you wish to 'recycle' your argument, please use rep() to make this intent clear to the readers of your code.")
test(2139.007, psum(1:10, 1:5, na.rm = FALSE), error = "Argument 2 is of length 5 but argument 1 is of length 10. If you wish to 'recycle' your argument, please use rep() to make this intent clear to the readers of your code.")
test(2139.008, psum(x, as.raw(z), y, na.rm = TRUE), error = "Argument 2 is of type raw. Only integer and double types are supported.")
test(2139.009, psum(1:10, 1:10, 21:30), 1:10 + 1:10 + 21:30)
test(2139.010, psum(x, y, z, na.rm = NA), error = "Argument 'na.rm' must be TRUE or FALSE and length 1.")
test(2139.011, psum(x, na.rm = FALSE), error = "Please supply at least 2 arguments. (1 argument supplied)")
test(2139.012, psum(as.integer(x), y, z, na.rm = TRUE), c(6, 7, 8, 7))
test(2139.013, psum(c(1,3,NA,5,NA), c(2,NA,4,1,NA), na.rm = TRUE), c(3, 3, 4, 6, NA))
test(2139.014, psum(x, y, as.integer(z), na.rm = FALSE), c(6, NA, NA, 7))
# pprod
test(2139.015, pprod(x, y, z, na.rm = FALSE), c(6, NA, NA, 5))
test(2139.016, pprod(x, y, z, na.rm = TRUE), c(6, 12, 16, 5))
test(2139.017, pprod(as.integer(x), as.integer(y), as.integer(z), na.rm = FALSE), c(6L, NA_integer_, NA_integer_, 5L))
test(2139.018, pprod(as.integer(x), as.integer(y), as.integer(z), na.rm = TRUE), c(6L, 12L, 16L, 5L))
test(2139.019, pprod(as.raw(z), y, na.rm = TRUE), error = "Argument 1 is of type raw. Only integer and double types are supported.")
test(2139.020, pprod(x, y, 1:2, na.rm = FALSE), error = "Argument 3 is of length 2 but argument 1 is of length 4. If you wish to 'recycle' your argument, please use rep() to make this intent clear to the readers of your code.")
test(2139.021, pprod(1:10, 1:5, na.rm = FALSE), error = "Argument 2 is of length 5 but argument 1 is of length 10. If you wish to 'recycle' your argument, please use rep() to make this intent clear to the readers of your code.")
test(2139.022, pprod(x, as.raw(z), y, na.rm = TRUE), error = "Argument 2 is of type raw. Only integer and double types are supported.")
test(2139.023, pprod(1:10, 1:10, 21:30), 1:10 * 1:10 * 21:30)
test(2139.024, pprod(x, y, z, na.rm = NA), error = "Argument 'na.rm' must be TRUE or FALSE and length 1.")
test(2139.025, pprod(x, na.rm = FALSE), error = "Please supply at least 2 arguments. (1 argument supplied)")
test(2139.026, pprod(as.integer(x), y, z, na.rm = TRUE), c(6, 12, 16, 5))
test(2139.027, pprod(c(1,3,NA,5,NA), c(2,NA,4,1,NA), na.rm = TRUE), c(2, 3, 4, 5, NA))
test(2139.028, pprod(x, y, as.integer(z), na.rm = FALSE), c(6, NA, NA, 5))
33 changes: 33 additions & 0 deletions man/psum.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
\name{psum}
\alias{psum}
\alias{pprod}
\title{ Sum and Product}
\description{
Similar to \code{pmin} and \code{pmax} but for sum and product. Only works for integer and double. These functions do not recycle vectors.
}
\usage{
psum(..., na.rm=FALSE)
pprod(..., na.rm=FALSE)
}
\arguments{
\item{...}{ Numeric arguments of type integer or double.}
\item{na.rm}{ A logical indicating whether missing values should be removed. Default value is \code{FALSE}.}
}
\value{
Return the sum or product of all numeric argument. The value returned will be of the type of the highest argument types (integer < double)
}
\examples{
x = c(1, 3, NA, 5)
y = c(2, NA, 4, 1)
z = c(3, 4, 4, 1)

# Example 1: psum
psum(x, y, z, na.rm = FALSE)
psum(x, y, z, na.rm = TRUE)

# Example 2: pprod
pprod(x, y, z, na.rm = FALSE)
pprod(x, y, z, na.rm = TRUE)

}
\keyword{ data }
2 changes: 2 additions & 0 deletions src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,3 +245,5 @@ SEXP testMsgR(SEXP status, SEXP x, SEXP k);
//fifelse.c
SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na);
SEXP fcaseR(SEXP na, SEXP rho, SEXP args);
SEXP psumR(SEXP na, SEXP args);
SEXP pprodR(SEXP na, SEXP args);
166 changes: 166 additions & 0 deletions src/fifelse.c
Original file line number Diff line number Diff line change
Expand Up @@ -344,3 +344,169 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) {
UNPROTECT(nprotect);
return ans;
}

SEXP psumR(SEXP na, SEXP args) {
if (!IS_TRUE_OR_FALSE(na)) {
error(_("Argument 'na.rm' must be TRUE or FALSE and length 1."));
}
const int n=length(args);
if (n <= 1) {
error(_("Please supply at least 2 arguments. (%d argument supplied)"), n);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why error, instead of just returning x itself?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find it odd to provide 1 argument. A sum operation needs at least 2 arguments. Does it make sense to you to only provide 1 argument?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, it's odd, but edge case like this can come up in dynamic scripting, where e.g. user is running

DT[ , do.call(psum, .SD), .SDcols = is.numeric]

and DT has a variable number of numeric columns depending on some other conditions in the script.

This way, the user can use the same code to cover the normal case when there are several normal columns, as well as the case when there only ends up being one such column.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok will change that later. Tks

}
SEXPTYPE anstype = TYPEOF(SEXPPTR_RO(args)[0]);
const int64_t len0 = xlength(SEXPPTR_RO(args)[0]);
if (anstype != INTSXP && anstype != REALSXP) {
error(_("Argument 1 is of type %s. Only integer and double types are supported."), type2char(anstype));
}
for (int i = 1; i < n; ++i) {
SEXPTYPE type = TYPEOF(SEXPPTR_RO(args)[i]);
int64_t len1 = xlength(SEXPPTR_RO(args)[i]);
if (type != INTSXP && type != REALSXP) {
error(_("Argument %d is of type %s. Only integer and double types are supported."),
i+1, type2char(type));
}
if (len1 != len0) {
error(_("Argument %d is of length %"PRId64" but argument 1 is of length %"PRId64". "
"If you wish to 'recycle' your argument, please use rep() to make this intent "
"clear to the readers of your code."), i+1, len1, len0);
}
if (type > anstype) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should use typeorder for this, the ordering of the SEXPTYPEs doesn't necessarily reflect the coercion order. I guess it's fine for just int/double but later we may add other types

anstype = type;
}
}
int nprotect = 0;
SEXP ans = PROTECT(duplicate(SEXPPTR_RO(args)[0])); nprotect++;
if (anstype != TYPEOF(ans)) {
ans = coerceVector(ans, anstype);
}
const bool narm = LOGICAL(na)[0];
switch(anstype) {
case INTSXP: {
int *restrict pans =INTEGER(ans);
SEXP int_a = R_NilValue;
PROTECT_INDEX Iaint;
PROTECT_WITH_INDEX(int_a, &Iaint); nprotect++;
for (int i = 1; i < n; ++i) {
REPROTECT(int_a = duplicate(SEXPPTR_RO(args)[i]), Iaint);
int *pa = INTEGER(int_a);
if (narm) {
for (int64_t j = 0; j < len0; ++j) {
pans[j] = pans[j] == NA_INTEGER ? pa[j] : (pa[j]==NA_INTEGER ? pans[j] : (pans[j] + pa[j]));
}
} else {
for (int64_t j = 0; j < len0; ++j) {
pans[j] = (pans[j] == NA_INTEGER || pa[j] == NA_INTEGER) ? NA_INTEGER : (pans[j] + pa[j]);
}
}
}
} break;
case REALSXP: {
double *restrict pans = REAL(ans);
SEXP dbl_a = R_NilValue;
PROTECT_INDEX Iadbl;
PROTECT_WITH_INDEX(dbl_a, &Iadbl); nprotect++;
for (int i = 1; i < n; ++i) {
if (TYPEOF(SEXPPTR_RO(args)[i]) != anstype) {
REPROTECT(dbl_a = coerceVector(duplicate(SEXPPTR_RO(args)[i]), anstype), Iadbl);
} else {
REPROTECT(dbl_a = duplicate(SEXPPTR_RO(args)[i]), Iadbl);
}
double *pa = REAL(dbl_a);
if (narm) {
for (int64_t j = 0; j < len0; ++j) {
pans[j] = ISNAN(pans[j]) ? pa[j] : (ISNAN(pa[j]) ? pans[j] : (pans[j] + pa[j]));
}
} else {
for (int64_t j = 0; j < len0; ++j) {
pans[j] = (ISNAN(pans[j]) || ISNAN(pa[j])) ? NA_REAL : (pans[j] + pa[j]);
}
}
}
} break;
}
UNPROTECT(nprotect);
return ans;
}

SEXP pprodR(SEXP na, SEXP args) {
if (!IS_TRUE_OR_FALSE(na)) {
error(_("Argument 'na.rm' must be TRUE or FALSE and length 1."));
}
const int n=length(args);
if (n <= 1) {
error(_("Please supply at least 2 arguments. %d argument supplied."), n);
}
SEXPTYPE anstype = TYPEOF(SEXPPTR_RO(args)[0]);
const int64_t len0 = xlength(SEXPPTR_RO(args)[0]);
if (anstype != INTSXP && anstype != REALSXP) {
error(_("Argument 1 is of type %s. Only integer and double types are supported."), type2char(anstype));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for translation, better to use %d here as well, then only one message to translate instead of 2

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry Michael, I did not understand what you mean.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

each unique item wrapped in _() will generate a message to be translated

one with Argument 1, another with Argument %d -- twice as much work for our translation team

vs using Argument %d for both, and hard-coding 1 to fill that in the former case; looks same to user, very similar in code, and easier for translators

}
for (int i = 1; i < n; ++i) {
SEXPTYPE type = TYPEOF(SEXPPTR_RO(args)[i]);
int64_t len1 = xlength(SEXPPTR_RO(args)[i]);
if (type != INTSXP && type != REALSXP) {
error(_("Argument %d is of type %s. Only integer and double types are supported."),
i+1, type2char(type));
}
if (len1 != len0) {
error(_("Argument %d is of length %"PRId64" but argument 1 is of length %"PRId64". "
"If you wish to 'recycle' your argument, please use rep() to make this intent "
"clear to the readers of your code."), i+1, len1, len0);
}
if (type > anstype) {
anstype = type;
}
}
int nprotect = 0;
SEXP ans = PROTECT(duplicate(SEXPPTR_RO(args)[0])); nprotect++;
if (anstype != TYPEOF(ans)) {
ans = coerceVector(ans, anstype);
}
const bool narm = LOGICAL(na)[0];
switch(anstype) {
case INTSXP: {
int *restrict pans =INTEGER(ans);
SEXP int_a = R_NilValue;
PROTECT_INDEX Iaint;
PROTECT_WITH_INDEX(int_a, &Iaint); nprotect++;
for (int i = 1; i < n; ++i) {
REPROTECT(int_a = duplicate(SEXPPTR_RO(args)[i]), Iaint);
int *pa = INTEGER(int_a);
if (narm) {
for (int64_t j = 0; j < len0; ++j) {
pans[j] = pans[j] == NA_INTEGER ? pa[j] : (pa[j]==NA_INTEGER ? pans[j] : (pans[j] * pa[j]));
}
} else {
for (int64_t j = 0; j < len0; ++j) {
pans[j] = (pans[j] == NA_INTEGER || pa[j] == NA_INTEGER) ? NA_INTEGER : (pans[j] * pa[j]);
}
}
}
} break;
case REALSXP: {
double *restrict pans = REAL(ans);
SEXP dbl_a = R_NilValue;
PROTECT_INDEX Iadbl;
PROTECT_WITH_INDEX(dbl_a, &Iadbl); nprotect++;
for (int i = 1; i < n; ++i) {
if (TYPEOF(SEXPPTR_RO(args)[i]) != anstype) {
REPROTECT(dbl_a = coerceVector(duplicate(SEXPPTR_RO(args)[i]), anstype), Iadbl);
} else {
REPROTECT(dbl_a = duplicate(SEXPPTR_RO(args)[i]), Iadbl);
}
double *pa = REAL(dbl_a);
if (narm) {
for (int64_t j = 0; j < len0; ++j) {
pans[j] = ISNAN(pans[j]) ? pa[j] : (ISNAN(pa[j]) ? pans[j] : (pans[j] * pa[j]));
}
} else {
for (int64_t j = 0; j < len0; ++j) {
pans[j] = (ISNAN(pans[j]) || ISNAN(pa[j])) ? NA_REAL : (pans[j] * pa[j]);
}
}
}
} break;
}
UNPROTECT(nprotect);
return ans;
}
4 changes: 4 additions & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ SEXP chmatchdup_R();
SEXP chin_R();
SEXP fifelseR();
SEXP fcaseR();
SEXP psumR();
SEXP pprodR();
SEXP freadR();
SEXP fwriteR();
SEXP reorder();
Expand Down Expand Up @@ -205,6 +207,8 @@ R_CallMethodDef callMethods[] = {
{"Ccoalesce", (DL_FUNC) &coalesce, -1},
{"CfifelseR", (DL_FUNC) &fifelseR, -1},
{"CfcaseR", (DL_FUNC) &fcaseR, -1},
{"CpsumR", (DL_FUNC) &psumR, -1},
{"CpprodR", (DL_FUNC) &pprodR, -1},
{"C_lock", (DL_FUNC) &lock, -1}, // _ for these 3 to avoid Clock as in time
{"C_unlock", (DL_FUNC) &unlock, -1},
{"C_islocked", (DL_FUNC) &islockedR, -1},
Expand Down