Add support for complex vectors to CJ, part of #3690 (#3691)

Rdatatable · Jul 16, 2019 · d3e146d · d3e146d
1 parent 54cb9f4
commit d3e146d
Show file tree

Hide file tree

Showing 4 changed files with 33 additions and 10 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -94,7 +94,7 @@
 
 16. `as.data.table` now unpacks columns in a `data.frame` which are themselves a `data.frame`. This need arises when parsing JSON, a corollary in [#3369](https://github.com/Rdatatable/data.table/issues/3369#issuecomment-462662752). `data.table` does not allow columns to be objects which themselves have columns (such as `matrix` and `data.frame`), unlike `data.frame` which does. Bug fix 19 in v1.12.2 (see below) added a helpful error (rather than segfault) to detect such invalid `data.table`, and promised that `as.data.table()` would unpack these columns in the next release (i.e. this release) so that the invalid `data.table` is not created in the first place.
 
-17. `CJ` has been ported to C and parallelized, thanks to a PR by Michael Chirico, [#3596](https://github.com/Rdatatable/data.table/pull/3596). All types benefit, and as in many `data.table` operations, factors benefit more than character.
+17. `CJ` has been ported to C and parallelized, thanks to a PR by Michael Chirico, [#3596](https://github.com/Rdatatable/data.table/pull/3596). All types benefit (including newly supported complex, part of [#3690](https://github.com/Rdatatable/data.table/issues/3690)), and as in many `data.table` operations, factors benefit more than character.
 
     ```R
     # default 4 threads on a laptop with 16GB RAM and 8 logical CPU

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
@@ -2820,23 +2820,29 @@ xx <- factor(letters[1:2], ordered=TRUE)
 yy <- sample(2L)
 yy_sort = base::sort.int(yy)
 old = options(datatable.CJ.names=FALSE)
-test(996.1, CJ(xx, yy), setkey(data.table(rep(xx, each=2L), rep(yy_sort, 2L))))
-test(996.2, CJ(a = xx, yy), setkey(data.table(a = rep(xx, each=2L), V2 = rep(yy_sort, 2L))))
+test(996.01, CJ(xx, yy), setkey(data.table(rep(xx, each=2L), rep(yy_sort, 2L))))
+test(996.02, CJ(a = xx, yy), setkey(data.table(a = rep(xx, each=2L), V2 = rep(yy_sort, 2L))))
 options(datatable.CJ.names=TRUE)
-test(996.3, CJ(xx, yy), setkey(data.table(xx = rep(xx, each=2L), yy = rep(yy_sort, 2L))))
+test(996.03, CJ(xx, yy), setkey(data.table(xx = rep(xx, each=2L), yy = rep(yy_sort, 2L))))
 options(old)
 
 # #3597 -- CJ properly informs sorted can't apply to list input
-test(996.4, CJ(list(1:2, 3L)), error = "non-atomic, which can't be sorted")
-test(996.5, CJ(list(1:2, 3), 4:6, sorted = FALSE),
+test(996.04, CJ(list(1:2, 3L)), error = "non-atomic, which can't be sorted")
+test(996.05, CJ(list(1:2, 3), 4:6, sorted = FALSE),
      data.table(V1 = list(1:2, 1:2, 1:2, 3, 3, 3), V2 = rep(4:6, 2L)))
-test(996.6, CJ(4:6, list(1:2, 3), sorted = FALSE),
+test(996.06, CJ(4:6, list(1:2, 3), sorted = FALSE),
      data.table(V1 = rep(4:6, each = 2L), V2 = rep(list(1:2, 3), 3L)))
-test(996.7, CJ(1:2, list(1:2, 3), 4:5, sorted = FALSE),
+test(996.07, CJ(1:2, list(1:2, 3), 4:5, sorted = FALSE),
      data.table(V1 = rep(1:2, each = 4L), V2 = rep(rep(list(1:2, 3), each = 2L), 2L), V3 = rep(4:5, 4L)))
 
-test(996.8, CJ(expression(1)), error = "element 1 is non-atomic")
-test(996.9, CJ(expression(2), 3, sorted = FALSE), error = "Type 'expression' not supported")
+test(996.08, CJ(expression(1)), error = "element 1 is non-atomic")
+test(996.09, CJ(expression(2), 3, sorted = FALSE), error = "Type 'expression' not supported")
+## complex input support (can't handle sorted yet)
+test(996.10, CJ(z = 0:1 + (0:1)*1i, b = 1:3, sorted = FALSE),
+     data.table(z = rep(0:1, each=3L) + rep(0:1, each=3L)*1i, b = rep(1:3, 2)))
+test(996.11, CJ(b = 1:3, z = 0:1 + (0:1)*1i, sorted = FALSE),
+     data.table(b = rep(1:3, each = 2L), z = rep(0:1, 3) + rep(0:1, 3)*1i))
+
 
 # That CJ orders NA consistently with setkey and historically, now it doesn't use setkey.
 # NA must always come first in data.table throughout, since binary search relies on that internally.

diff --git a/man/J.Rd b/man/J.Rd
@@ -56,6 +56,9 @@ x = c(1, 1, 2)
 y = c(4, 6, 4)
 CJ(x, y)              # output columns are automatically named 'x' and 'y'
 CJ(x, y, unique=TRUE) # unique(x) and unique(y) are computed automatically
+
+z = 0:1 + (0:1)*1i
+CJ(x, z, sorted = FALSE) # support for sorting complex is not yet implemented
 }
 \keyword{ data }
 

diff --git a/src/cj.c b/src/cj.c
@@ -47,6 +47,20 @@ SEXP cj(SEXP base_list) {
         memcpy(targetP + i*blocklen, targetP, blocklen*sizeof(double));
       }
     } break;
+    case CPLXSXP: {
+      const Rcomplex *restrict sourceP = COMPLEX(source);
+      Rcomplex *restrict targetP = COMPLEX(target);
+      #pragma omp parallel for num_threads(getDTthreads())
+      for (int i=0; i<thislen; ++i) {
+        const Rcomplex item = sourceP[i];
+        const int end=(i+1)*eachrep;
+        for (int j=i*eachrep; j<end; ++j) targetP[j] = item;
+      }
+      #pragma omp parallel for num_threads(getDTthreads())
+      for (int i=1; i<ncopy; ++i) {
+        memcpy(targetP + i*blocklen, targetP, blocklen*sizeof(Rcomplex));
+      }
+    } break;
     case STRSXP: {
       const SEXP *sourceP = STRING_PTR(source);
       int start = 0;