Skip to content

Commit 3c876fc

Browse files
committed
faster binning
1 parent 4cbad31 commit 3c876fc

36 files changed

+9356
-9494
lines changed

src/transforms/bin.js

Lines changed: 73 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
import {bin as binner, extent, thresholdFreedmanDiaconis, thresholdScott, thresholdSturges, utcTickInterval} from "d3";
1+
import {bisect, extent, thresholdFreedmanDiaconis, thresholdScott, thresholdSturges, ticks, utcTickInterval} from "d3";
22
import {
33
valueof,
4-
range,
54
identity,
65
maybeColumn,
76
maybeInterval,
@@ -11,7 +10,8 @@ import {
1110
mid,
1211
labelof,
1312
isTemporal,
14-
isIterable
13+
isIterable,
14+
map
1515
} from "../options.js";
1616
import {coerceDate, coerceNumber} from "../scales.js";
1717
import {basic} from "./basic.js";
@@ -74,7 +74,7 @@ function binn(
7474
gx, // optionally group on x (exclusive with bx and gy)
7575
gy, // optionally group on y (exclusive with by and gx)
7676
{
77-
data: reduceData = reduceIdentity,
77+
data: reduceData = reduceIdentity, // TODO avoid materializing when unused?
7878
filter = reduceCount, // return only non-empty bins by default
7979
sort,
8080
reverse,
@@ -87,7 +87,7 @@ function binn(
8787

8888
// Compute the outputs.
8989
outputs = maybeOutputs(outputs, inputs);
90-
reduceData = maybeReduce(reduceData, identity);
90+
reduceData = reduceData == null ? undefined : maybeReduce(reduceData, identity);
9191
sort = sort == null ? undefined : maybeOutput("sort", sort, inputs);
9292
filter = filter == null ? undefined : maybeEvaluator("filter", filter, inputs);
9393

@@ -147,12 +147,11 @@ function binn(
147147
const GZ = Z && setGZ([]);
148148
const GF = F && setGF([]);
149149
const GS = S && setGS([]);
150-
const BX = bx ? bx(data) : [[, , (I) => I]];
151-
const BY = by ? by(data) : [[, , (I) => I]];
152150
const BX1 = bx && setBX1([]);
153151
const BX2 = bx && setBX2([]);
154152
const BY1 = by && setBY1([]);
155153
const BY2 = by && setBY2([]);
154+
const bin = Bin(bx?.(data), by?.(data), cumulative > 0 ? bin1cp : cumulative < 0 ? bin1cn : bin1);
156155
let i = 0;
157156
for (const o of outputs) o.initialize(data);
158157
if (sort) sort.initialize(data);
@@ -164,23 +163,18 @@ function binn(
164163
if (filter) filter.scope("facet", facet);
165164
for (const [f, I] of maybeGroup(facet, G)) {
166165
for (const [k, g] of maybeGroup(I, K)) {
167-
for (const [x1, x2, fx] of BX) {
168-
const bb = fx(g);
169-
for (const [y1, y2, fy] of BY) {
170-
const extent = {x1, x2, y1, y2};
171-
const b = fy(bb);
172-
if (filter && !filter.reduce(b, extent)) continue;
173-
groupFacet.push(i++);
174-
groupData.push(reduceData.reduce(b, data, extent));
175-
if (K) GK.push(k);
176-
if (Z) GZ.push(G === Z ? f : Z[b[0]]);
177-
if (F) GF.push(G === F ? f : F[b[0]]);
178-
if (S) GS.push(G === S ? f : S[b[0]]);
179-
if (BX1) BX1.push(x1), BX2.push(x2);
180-
if (BY1) BY1.push(y1), BY2.push(y2);
181-
for (const o of outputs) o.reduce(b, extent);
182-
if (sort) sort.reduce(b);
183-
}
166+
for (const [b, extent] of bin(g)) {
167+
if (filter && !filter.reduce(b, extent)) continue;
168+
groupFacet.push(i++);
169+
groupData.push(reduceData.reduce(b, data, extent));
170+
if (K) GK.push(k);
171+
if (Z) GZ.push(G === Z ? f : Z[b[0]]);
172+
if (F) GF.push(G === F ? f : F[b[0]]);
173+
if (S) GS.push(G === S ? f : S[b[0]]);
174+
if (BX1) BX1.push(extent.x1), BX2.push(extent.x2);
175+
if (BY1) BY1.push(extent.y1), BY2.push(extent.y2);
176+
for (const o of outputs) o.reduce(b, extent);
177+
if (sort) sort.reduce(b);
184178
}
185179
}
186180
}
@@ -222,12 +216,12 @@ function maybeBinValueTuple(options) {
222216

223217
function maybeBin(options) {
224218
if (options == null) return;
225-
const {value, cumulative, domain = extent, thresholds} = options;
219+
const {value, domain = extent, thresholds} = options;
226220
const bin = (data) => {
227-
let V = valueof(data, value, Array); // d3.bin prefers Array input
228-
const bin = binner().value((i) => V[i]);
221+
let V = valueof(data, value);
222+
let T; // bin thresholds
229223
if (isTemporal(V) || isTimeThresholds(thresholds)) {
230-
V = V.map(coerceDate);
224+
V = map(V, coerceDate, Float64Array);
231225
let [min, max] = typeof domain === "function" ? domain(V) : domain;
232226
let t = typeof thresholds === "function" && !isInterval(thresholds) ? thresholds(V, min, max) : thresholds;
233227
if (typeof t === "number") t = utcTickInterval(min, max, t);
@@ -238,25 +232,26 @@ function maybeBin(options) {
238232
}
239233
t = t.range(min, max);
240234
}
241-
bin.thresholds(t).domain([min, max]);
235+
T = t;
242236
} else {
243-
V = V.map(coerceNumber);
244-
let d = domain;
245-
let t = thresholds;
246-
if (isInterval(t)) {
247-
let [min, max] = typeof d === "function" ? d(V) : d;
248-
if (d === extent) {
237+
V = map(V, coerceNumber, Float64Array); // TODO deduplicate with code above
238+
let [min, max] = typeof domain === "function" ? domain(V) : domain;
239+
let t = typeof thresholds === "function" && !isInterval(thresholds) ? thresholds(V, min, max) : thresholds;
240+
if (typeof t === "number") t = ticks(min, max, t);
241+
else if (isInterval(t)) {
242+
if (domain === extent) {
249243
min = t.floor(min);
250244
max = t.offset(t.floor(max));
251-
d = [min, max];
252245
}
253246
t = t.range(min, max);
254247
}
255-
bin.thresholds(t).domain(d);
248+
T = t;
256249
}
257-
let bins = bin(range(data)).map(binset);
258-
if (cumulative) bins = (cumulative < 0 ? bins.reverse() : bins).map(bincumset);
259-
return bins.map(binfilter);
250+
const extents = [];
251+
for (let i = 0; i <= T.length; ++i) extents.push([T[i - 1], T[i]]);
252+
T = T.map(coerceNumber); // for faster bisection
253+
extents.bin = (i) => bisect(T, V[i]); // TODO test for null? respect domain?
254+
return extents;
260255
};
261256
bin.label = labelof(value);
262257
return bin;
@@ -305,38 +300,50 @@ function isInterval(t) {
305300
return t ? typeof t.range === "function" : false;
306301
}
307302

308-
function binset(bin) {
309-
return [bin, new Set(bin)];
310-
}
311-
312-
function bincumset([bin], j, bins) {
313-
return [
314-
bin,
315-
{
316-
get size() {
317-
for (let k = 0; k <= j; ++k) {
318-
if (bins[k][1].size) {
319-
return 1; // a non-empty value
303+
function Bin(EX, EY, bin) {
304+
return EX && EY
305+
? function* (I) {
306+
const X = bin(EX, I); // first bin on x
307+
for (const [ix, [x1, x2]] of EX.entries()) {
308+
const Y = bin(EY, X[ix]); // then bin on y
309+
for (const [iy, [y1, y2]] of EY.entries()) {
310+
yield [Y[iy], {x1, y1, x2, y2}];
320311
}
321312
}
322-
return 0;
323-
},
324-
has(i) {
325-
for (let k = 0; k <= j; ++k) {
326-
if (bins[k][1].has(i)) {
327-
return true;
328-
}
313+
}
314+
: EX
315+
? function* (I) {
316+
const X = bin(EX, I);
317+
for (const [i, [x1, x2]] of EX.entries()) {
318+
yield [X[i], {x1, x2}];
329319
}
330-
return false;
331320
}
332-
}
333-
];
321+
: function* (I) {
322+
const Y = bin(EY, I);
323+
for (const [i, [y1, y2]] of EY.entries()) {
324+
yield [Y[i], {y1, y2}];
325+
}
326+
};
327+
}
328+
329+
// non-cumulative distribution
330+
function bin1(E, I) {
331+
const B = E.map(() => []);
332+
for (const i of I) B[E.bin(i)].push(i);
333+
return B;
334334
}
335335

336-
function binfilter([{x0, x1}, set]) {
337-
return [x0, x1, set.size ? (I) => I.filter(set.has, set) : binempty];
336+
// cumulative distribution
337+
function bin1cp(E, I) {
338+
const B = E.map(() => []);
339+
const n = B.length;
340+
for (const i of I) for (let j = E.bin(i); j < n; ++j) B[j].push(j);
341+
return B;
338342
}
339343

340-
function binempty() {
341-
return new Uint32Array(0);
344+
// complementary cumulative distribution
345+
function bin1cn(E, I) {
346+
const B = E.map(() => []);
347+
for (const i of I) for (let j = E.bin(i); j >= 0; --j) B[j].push(j);
348+
return B;
342349
}

0 commit comments

Comments
 (0)