Skip to content

Commit 2d20123

Browse files
authored
Merge pull request #1201 from plotly/date-bin-shift
Date bin shift
2 parents e91c3bc + 0050c1e commit 2d20123

File tree

5 files changed

+217
-136
lines changed

5 files changed

+217
-136
lines changed

src/plots/cartesian/axes.js

Lines changed: 171 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -486,18 +486,19 @@ axes.expand = function(ax, data, options) {
486486
};
487487

488488
axes.autoBin = function(data, ax, nbins, is2d) {
489-
var datamin = Lib.aggNums(Math.min, null, data),
490-
datamax = Lib.aggNums(Math.max, null, data);
489+
var dataMin = Lib.aggNums(Math.min, null, data),
490+
dataMax = Lib.aggNums(Math.max, null, data);
491+
491492
if(ax.type === 'category') {
492493
return {
493-
start: datamin - 0.5,
494-
end: datamax + 0.5,
494+
start: dataMin - 0.5,
495+
end: dataMax + 0.5,
495496
size: 1
496497
};
497498
}
498499

499500
var size0;
500-
if(nbins) size0 = ((datamax - datamin) / nbins);
501+
if(nbins) size0 = ((dataMax - dataMin) / nbins);
501502
else {
502503
// totally auto: scale off std deviation so the highest bin is
503504
// somewhat taller than the total number of bins, but don't let
@@ -506,102 +507,190 @@ axes.autoBin = function(data, ax, nbins, is2d) {
506507
var distinctData = Lib.distinctVals(data),
507508
msexp = Math.pow(10, Math.floor(
508509
Math.log(distinctData.minDiff) / Math.LN10)),
509-
// TODO: there are some date cases where this will fail...
510510
minSize = msexp * Lib.roundUp(
511511
distinctData.minDiff / msexp, [0.9, 1.9, 4.9, 9.9], true);
512512
size0 = Math.max(minSize, 2 * Lib.stdev(data) /
513513
Math.pow(data.length, is2d ? 0.25 : 0.4));
514514
}
515515

516516
// piggyback off autotick code to make "nice" bin sizes
517-
var dummyax;
517+
var dummyAx;
518518
if(ax.type === 'log') {
519-
dummyax = {
519+
dummyAx = {
520520
type: 'linear',
521-
range: [datamin, datamax],
521+
range: [dataMin, dataMax],
522522
r2l: Number
523523
};
524524
}
525525
else {
526-
dummyax = {
526+
dummyAx = {
527527
type: ax.type,
528528
// conversion below would be ax.c2r but that's only different from l2r
529529
// for log, and this is the only place (so far?) we would want c2r.
530-
range: [datamin, datamax].map(ax.l2r),
530+
range: [dataMin, dataMax].map(ax.l2r),
531531
r2l: ax.r2l
532532
};
533533
}
534534

535-
axes.autoTicks(dummyax, size0);
536-
var binstart = axes.tickIncrement(
537-
axes.tickFirst(dummyax), dummyax.dtick, 'reverse'),
538-
binend;
539-
540-
function nearEdge(v) {
541-
// is a value within 1% of a bin edge?
542-
return (1 + (v - binstart) * 100 / dummyax.dtick) % 100 < 2;
543-
}
535+
axes.autoTicks(dummyAx, size0);
536+
var binStart = axes.tickIncrement(
537+
axes.tickFirst(dummyAx), dummyAx.dtick, 'reverse'),
538+
binEnd;
544539

545540
// check for too many data points right at the edges of bins
546541
// (>50% within 1% of bin edges) or all data points integral
547542
// and offset the bins accordingly
548-
if(typeof dummyax.dtick === 'number') {
549-
var edgecount = 0,
550-
midcount = 0,
551-
intcount = 0,
552-
blankcount = 0;
553-
for(var i = 0; i < data.length; i++) {
554-
if(data[i] % 1 === 0) intcount++;
555-
else if(!isNumeric(data[i])) blankcount++;
556-
557-
if(nearEdge(data[i])) edgecount++;
558-
if(nearEdge(data[i] + dummyax.dtick / 2)) midcount++;
559-
}
560-
var datacount = data.length - blankcount;
561-
562-
if(intcount === datacount && ax.type !== 'date') {
563-
// all integers: if bin size is <1, it's because
564-
// that was specifically requested (large nbins)
565-
// so respect that... but center the bins containing
566-
// integers on those integers
567-
if(dummyax.dtick < 1) {
568-
binstart = datamin - 0.5 * dummyax.dtick;
569-
}
570-
// otherwise start half an integer down regardless of
571-
// the bin size, just enough to clear up endpoint
572-
// ambiguity about which integers are in which bins.
573-
else binstart -= 0.5;
574-
}
575-
else if(midcount < datacount * 0.1) {
576-
if(edgecount > datacount * 0.3 ||
577-
nearEdge(datamin) || nearEdge(datamax)) {
578-
// lots of points at the edge, not many in the middle
579-
// shift half a bin
580-
var binshift = dummyax.dtick / 2;
581-
binstart += (binstart + binshift < datamin) ? binshift : -binshift;
582-
}
583-
}
543+
if(typeof dummyAx.dtick === 'number') {
544+
binStart = autoShiftNumericBins(binStart, data, dummyAx, dataMin, dataMax);
584545

585-
var bincount = 1 + Math.floor((datamax - binstart) / dummyax.dtick);
586-
binend = binstart + bincount * dummyax.dtick;
546+
var bincount = 1 + Math.floor((dataMax - binStart) / dummyAx.dtick);
547+
binEnd = binStart + bincount * dummyAx.dtick;
587548
}
588549
else {
550+
// month ticks - should be the only nonlinear kind we have at this point.
551+
// dtick (as supplied by axes.autoTick) only has nonlinear values on
552+
// date and log axes, but even if you display a histogram on a log axis
553+
// we bin it on a linear axis (which one could argue against, but that's
554+
// a separate issue)
555+
if(dummyAx.dtick.charAt(0) === 'M') {
556+
binStart = autoShiftMonthBins(binStart, data, dummyAx.dtick, dataMin);
557+
}
558+
589559
// calculate the endpoint for nonlinear ticks - you have to
590560
// just increment until you're done
591-
binend = binstart;
592-
while(binend <= datamax) {
593-
binend = axes.tickIncrement(binend, dummyax.dtick);
561+
binEnd = binStart;
562+
while(binEnd <= dataMax) {
563+
binEnd = axes.tickIncrement(binEnd, dummyAx.dtick);
594564
}
595565
}
596566

597567
return {
598-
start: ax.c2r(binstart),
599-
end: ax.c2r(binend),
600-
size: dummyax.dtick
568+
start: ax.c2r(binStart),
569+
end: ax.c2r(binEnd),
570+
size: dummyAx.dtick
601571
};
602572
};
603573

604574

575+
function autoShiftNumericBins(binStart, data, ax, dataMin, dataMax) {
576+
var edgecount = 0,
577+
midcount = 0,
578+
intcount = 0,
579+
blankCount = 0;
580+
581+
function nearEdge(v) {
582+
// is a value within 1% of a bin edge?
583+
return (1 + (v - binStart) * 100 / ax.dtick) % 100 < 2;
584+
}
585+
586+
for(var i = 0; i < data.length; i++) {
587+
if(data[i] % 1 === 0) intcount++;
588+
else if(!isNumeric(data[i])) blankCount++;
589+
590+
if(nearEdge(data[i])) edgecount++;
591+
if(nearEdge(data[i] + ax.dtick / 2)) midcount++;
592+
}
593+
var dataCount = data.length - blankCount;
594+
595+
if(intcount === dataCount && ax.type !== 'date') {
596+
// all integers: if bin size is <1, it's because
597+
// that was specifically requested (large nbins)
598+
// so respect that... but center the bins containing
599+
// integers on those integers
600+
if(ax.dtick < 1) {
601+
binStart = dataMin - 0.5 * ax.dtick;
602+
}
603+
// otherwise start half an integer down regardless of
604+
// the bin size, just enough to clear up endpoint
605+
// ambiguity about which integers are in which bins.
606+
else binStart -= 0.5;
607+
}
608+
else if(midcount < dataCount * 0.1) {
609+
if(edgecount > dataCount * 0.3 ||
610+
nearEdge(dataMin) || nearEdge(dataMax)) {
611+
// lots of points at the edge, not many in the middle
612+
// shift half a bin
613+
var binshift = ax.dtick / 2;
614+
binStart += (binStart + binshift < dataMin) ? binshift : -binshift;
615+
}
616+
}
617+
return binStart;
618+
}
619+
620+
621+
function autoShiftMonthBins(binStart, data, dtick, dataMin) {
622+
var exactYears = 0,
623+
exactMonths = 0,
624+
exactDays = 0,
625+
blankCount = 0,
626+
dataCount,
627+
di,
628+
d,
629+
year,
630+
month;
631+
632+
for(var i = 0; i < data.length; i++) {
633+
di = data[i];
634+
if(!isNumeric(di)) {
635+
blankCount ++;
636+
continue;
637+
}
638+
d = new Date(di),
639+
year = d.getUTCFullYear();
640+
if(di === Date.UTC(year, 0, 1)) {
641+
exactYears ++;
642+
}
643+
else {
644+
month = d.getUTCMonth();
645+
if(di === Date.UTC(year, month, 1)) {
646+
exactMonths ++;
647+
}
648+
else if(di === Date.UTC(year, month, d.getUTCDate())) {
649+
exactDays ++;
650+
}
651+
}
652+
}
653+
654+
dataCount = data.length - blankCount;
655+
656+
// include bigger exact dates in the smaller ones
657+
exactMonths += exactYears;
658+
exactDays += exactMonths;
659+
660+
// unmber of data points that needs to be an exact value
661+
// to shift that increment to (near) the bin center
662+
var threshold = 0.8 * dataCount;
663+
664+
if(exactDays > threshold) {
665+
var numMonths = Number(dtick.substr(1));
666+
667+
if((exactYears > threshold) && (numMonths % 12 === 0)) {
668+
// The exact middle of a non-leap-year is 1.5 days into July
669+
// so if we start the bins here, all but leap years will
670+
// get hover-labeled as exact years.
671+
binStart = axes.tickIncrement(binStart, 'M6', 'reverse') + ONEDAY * 1.5;
672+
}
673+
else if(exactMonths > threshold) {
674+
// Months are not as clean, but if we shift half the *longest*
675+
// month (31/2 days) then 31-day months will get labeled exactly
676+
// and shorter months will get labeled with the correct month
677+
// but shifted 12-36 hours into it.
678+
binStart = axes.tickIncrement(binStart, 'M1', 'reverse') + ONEDAY * 15.5;
679+
}
680+
else {
681+
// Shifting half a day is exact, but since these are month bins it
682+
// will always give a somewhat odd-looking label, until we do something
683+
// smarter like showing the bin boundaries (or the bounds of the actual
684+
// data in each bin)
685+
binStart -= ONEDAY / 2;
686+
}
687+
var nextBinStart = axes.tickIncrement(binStart, dtick);
688+
689+
if(nextBinStart <= dataMin) return nextBinStart;
690+
}
691+
return binStart;
692+
}
693+
605694
// ----------------------------------------------------
606695
// Ticks and grids
607696
// ----------------------------------------------------
@@ -919,6 +1008,7 @@ function autoTickRound(ax) {
9191008
// for pure powers of 10
9201009
// numeric ticks always have constant differences, other datetime ticks
9211010
// can all be calculated as constant number of milliseconds
1011+
var THREEDAYS = 3 * ONEDAY;
9221012
axes.tickIncrement = function(x, dtick, axrev) {
9231013
var axSign = axrev ? -1 : 1;
9241014

@@ -930,10 +1020,23 @@ axes.tickIncrement = function(x, dtick, axrev) {
9301020

9311021
// Dates: months (or years)
9321022
if(tType === 'M') {
933-
var y = new Date(x);
934-
// is this browser consistent? setUTCMonth edits a date but
935-
// returns that date's milliseconds
936-
return y.setUTCMonth(y.getUTCMonth() + dtSigned);
1023+
/*
1024+
* set(UTC)Month does not (and CANNOT) always preserve day, since
1025+
* months have different lengths. The worst example of this is:
1026+
* d = new Date(1970,0,31); d.setMonth(1) -> Feb 31 turns into Mar 3
1027+
*
1028+
* But we want to be able to iterate over the last day of each month,
1029+
* regardless of what its number is.
1030+
* So shift 3 days forward, THEN set the new month, then unshift:
1031+
* 1/31 -> 2/28 (or 29) -> 3/31 -> 4/30 -> ...
1032+
*
1033+
* Note that odd behavior still exists if you start from the 26th-28th:
1034+
* 1/28 -> 2/28 -> 3/31
1035+
* but at least you can't shift any dates into the wrong month,
1036+
* and ticks on these days incrementing by month would be very unusual
1037+
*/
1038+
var y = new Date(x + THREEDAYS);
1039+
return y.setUTCMonth(y.getUTCMonth() + dtSigned) - THREEDAYS;
9371040
}
9381041

9391042
// Log scales: Linear, Digits
-25 Bytes
Loading

test/image/mocks/date_histogram.json

Lines changed: 3 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -9,66 +9,23 @@
99
"2012-03-01 00:00:00",
1010
"2012-02-01 00:00:00"
1111
],
12-
"name": "trace 0",
13-
"autobinx": false,
12+
"autobinx": true,
1413
"nbinsx": 3,
15-
"xbins": {
16-
"start": "2011-12-16",
17-
"end": "2012-03-16",
18-
"size": "M1"
19-
},
20-
"autobiny": true,
21-
"xaxis": "x",
22-
"yaxis": "y",
2314
"showlegend": false,
2415
"type": "histogram"
2516
}
2617
],
2718
"layout": {
28-
"title": "Click to enter Plot title",
29-
"font": {
30-
"family": "\"Open sans\", verdana, arial, sans-serif",
31-
"size": 12,
32-
"color": "#444"
33-
},
34-
"showlegend": true,
3519
"width": 600,
3620
"height": 400,
3721
"xaxis": {
38-
"title": "month",
39-
"showgrid": false,
40-
"zeroline": false,
41-
"showline": false,
42-
"ticks": "",
43-
"showticklabels": true,
44-
"tickcolor": "rgb(127,127,127)",
45-
"gridcolor": "rgb(255,255,255)"
22+
"title": "month"
4623
},
4724
"yaxis": {
4825
"title": "count",
49-
"showgrid": true,
50-
"zeroline": true,
51-
"showline": false,
52-
"ticks": "",
53-
"tickcolor": "rgb(127,127,127)",
5426
"gridcolor": "rgb(255,255,255)"
5527
},
56-
"legend": {
57-
"x": 100,
58-
"y": 0.5,
59-
"traceorder": "reversed",
60-
"font": {
61-
"family": "",
62-
"size": 0,
63-
"color": ""
64-
},
65-
"bgcolor": "#fff",
66-
"bordercolor": "transparent",
67-
"borderwidth": 0
68-
},
6928
"plot_bgcolor": "rgb(229,229,229)",
70-
"barmode": "stack",
71-
"bargap": 0.2,
72-
"bargroupgap": 0
29+
"bargap": 0.2
7330
}
7431
}

0 commit comments

Comments
 (0)