Skip to content

Date bin shift #1201

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 28, 2016
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
235 changes: 167 additions & 68 deletions src/plots/cartesian/axes.js
Original file line number Diff line number Diff line change
Expand Up @@ -486,18 +486,19 @@ axes.expand = function(ax, data, options) {
};

axes.autoBin = function(data, ax, nbins, is2d) {
var datamin = Lib.aggNums(Math.min, null, data),
datamax = Lib.aggNums(Math.max, null, data);
var dataMin = Lib.aggNums(Math.min, null, data),
dataMax = Lib.aggNums(Math.max, null, data);

if(ax.type === 'category') {
return {
start: datamin - 0.5,
end: datamax + 0.5,
start: dataMin - 0.5,
end: dataMax + 0.5,
size: 1
};
}

var size0;
if(nbins) size0 = ((datamax - datamin) / nbins);
if(nbins) size0 = ((dataMax - dataMin) / nbins);
else {
// totally auto: scale off std deviation so the highest bin is
// somewhat taller than the total number of bins, but don't let
Expand All @@ -506,102 +507,186 @@ axes.autoBin = function(data, ax, nbins, is2d) {
var distinctData = Lib.distinctVals(data),
msexp = Math.pow(10, Math.floor(
Math.log(distinctData.minDiff) / Math.LN10)),
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed because I have no idea anymore what this was about. We'll wait for a bug report...

// TODO: there are some date cases where this will fail...
minSize = msexp * Lib.roundUp(
distinctData.minDiff / msexp, [0.9, 1.9, 4.9, 9.9], true);
size0 = Math.max(minSize, 2 * Lib.stdev(data) /
Math.pow(data.length, is2d ? 0.25 : 0.4));
}

// piggyback off autotick code to make "nice" bin sizes
var dummyax;
var dummyAx;
if(ax.type === 'log') {
dummyax = {
dummyAx = {
type: 'linear',
range: [datamin, datamax],
range: [dataMin, dataMax],
r2l: Number
};
}
else {
dummyax = {
dummyAx = {
type: ax.type,
// conversion below would be ax.c2r but that's only different from l2r
// for log, and this is the only place (so far?) we would want c2r.
range: [datamin, datamax].map(ax.l2r),
range: [dataMin, dataMax].map(ax.l2r),
r2l: ax.r2l
};
}

axes.autoTicks(dummyax, size0);
var binstart = axes.tickIncrement(
axes.tickFirst(dummyax), dummyax.dtick, 'reverse'),
binend;

function nearEdge(v) {
// is a value within 1% of a bin edge?
return (1 + (v - binstart) * 100 / dummyax.dtick) % 100 < 2;
}
axes.autoTicks(dummyAx, size0);
var binStart = axes.tickIncrement(
axes.tickFirst(dummyAx), dummyAx.dtick, 'reverse'),
binEnd;

// check for too many data points right at the edges of bins
// (>50% within 1% of bin edges) or all data points integral
// and offset the bins accordingly
if(typeof dummyax.dtick === 'number') {
var edgecount = 0,
midcount = 0,
intcount = 0,
blankcount = 0;
for(var i = 0; i < data.length; i++) {
if(data[i] % 1 === 0) intcount++;
else if(!isNumeric(data[i])) blankcount++;

if(nearEdge(data[i])) edgecount++;
if(nearEdge(data[i] + dummyax.dtick / 2)) midcount++;
}
var datacount = data.length - blankcount;

if(intcount === datacount && ax.type !== 'date') {
// all integers: if bin size is <1, it's because
// that was specifically requested (large nbins)
// so respect that... but center the bins containing
// integers on those integers
if(dummyax.dtick < 1) {
binstart = datamin - 0.5 * dummyax.dtick;
}
// otherwise start half an integer down regardless of
// the bin size, just enough to clear up endpoint
// ambiguity about which integers are in which bins.
else binstart -= 0.5;
}
else if(midcount < datacount * 0.1) {
if(edgecount > datacount * 0.3 ||
nearEdge(datamin) || nearEdge(datamax)) {
// lots of points at the edge, not many in the middle
// shift half a bin
var binshift = dummyax.dtick / 2;
binstart += (binstart + binshift < datamin) ? binshift : -binshift;
}
}
if(typeof dummyAx.dtick === 'number') {
binStart = autoShiftNumericBins(binStart, data, dummyAx, dataMin, dataMax);

var bincount = 1 + Math.floor((datamax - binstart) / dummyax.dtick);
binend = binstart + bincount * dummyax.dtick;
var bincount = 1 + Math.floor((dataMax - binStart) / dummyAx.dtick);
binEnd = binStart + bincount * dummyAx.dtick;
}
else {
// month ticks - should be the only nonlinear kind we have at this point.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... after traces/histogram/clean_bins.js that is, correct?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only relevant to auto-binning, where nonlinear ticks are dtick values... dtick only has special meaning on date axes (where 'M<n>' is n-months) and on log axes, but when you display a histogram on log axes we still bin on a linear scale.

if(dummyAx.dtick.charAt(0) === 'M') {
binStart = autoShiftMonthBins(binStart, data, dummyAx.dtick, dataMin);
}

// calculate the endpoint for nonlinear ticks - you have to
// just increment until you're done
binend = binstart;
while(binend <= datamax) {
binend = axes.tickIncrement(binend, dummyax.dtick);
binEnd = binStart;
while(binEnd <= dataMax) {
binEnd = axes.tickIncrement(binEnd, dummyAx.dtick);
}
}

return {
start: ax.c2r(binstart),
end: ax.c2r(binend),
size: dummyax.dtick
start: ax.c2r(binStart),
end: ax.c2r(binEnd),
size: dummyAx.dtick
};
};


function autoShiftNumericBins(binStart, data, ax, dataMin, dataMax) {
var edgecount = 0,
midcount = 0,
intcount = 0,
blankCount = 0;

function nearEdge(v) {
// is a value within 1% of a bin edge?
return (1 + (v - binStart) * 100 / ax.dtick) % 100 < 2;
}

for(var i = 0; i < data.length; i++) {
if(data[i] % 1 === 0) intcount++;
else if(!isNumeric(data[i])) blankCount++;

if(nearEdge(data[i])) edgecount++;
if(nearEdge(data[i] + ax.dtick / 2)) midcount++;
}
var dataCount = data.length - blankCount;

if(intcount === dataCount && ax.type !== 'date') {
// all integers: if bin size is <1, it's because
// that was specifically requested (large nbins)
// so respect that... but center the bins containing
// integers on those integers
if(ax.dtick < 1) {
binStart = dataMin - 0.5 * ax.dtick;
}
// otherwise start half an integer down regardless of
// the bin size, just enough to clear up endpoint
// ambiguity about which integers are in which bins.
else binStart -= 0.5;
}
else if(midcount < dataCount * 0.1) {
if(edgecount > dataCount * 0.3 ||
nearEdge(dataMin) || nearEdge(dataMax)) {
// lots of points at the edge, not many in the middle
// shift half a bin
var binshift = ax.dtick / 2;
binStart += (binStart + binshift < dataMin) ? binshift : -binshift;
}
}
return binStart;
}


function autoShiftMonthBins(binStart, data, dtick, dataMin) {
var exactYears = 0,
exactMonths = 0,
exactDays = 0,
blankCount = 0,
dataCount,
di,
d,
year,
month;

for(var i = 0; i < data.length; i++) {
di = data[i];
if(!isNumeric(di)) {
blankCount ++;
continue;
}
d = new Date(di),
year = d.getUTCFullYear();
if(di === Date.UTC(year, 0, 1)) {
exactYears ++;
}
else {
month = d.getUTCMonth();
if(di === Date.UTC(year, month, 1)) {
exactMonths ++;
}
else if(di === Date.UTC(year, month, d.getUTCDate())) {
exactDays ++;
}
}
}

dataCount = data.length - blankCount;

// include bigger exact dates in the smaller ones
exactMonths += exactYears;
exactDays += exactMonths;

// unmber of data points that needs to be an exact value
// to shift that increment to (near) the bin center
var threshold = 0.8 * dataCount;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

non- ⛔ It would be nice to put these auto-algo threshold fractions in a constants.js file somewhere. It's not obvious where that file should be though.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright, I'll leave this as is for now but I'm happy to change it if you tell me where to put them. I don't have a good feel for what would make this easy to grok.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No action required in this PR.


if(exactDays > threshold) {
var numMonths = Number(dtick.substr(1));

if((exactYears > threshold) && (numMonths % 12 === 0)) {
// The exact middle of a non-leap-year is 1.5 days into July
// so if we start the bins here, all but leap years will
// get hover-labeled as exact years.
binStart = axes.tickIncrement(binStart, 'M6', 'reverse') + ONEDAY * 1.5;
}
else if(exactMonths > threshold) {
// Months are not as clean, but if we shift half the *longest*
// month (31/2 days) then 31-day months will get labeled exactly
// and shorter months will get labeled with the correct month
// but shifted 12-36 hours into it.
binStart = axes.tickIncrement(binStart, 'M1', 'reverse') + ONEDAY * 15.5;
}
else {
// Shifting half a day is exact, but since these are month bins it
// will always give a somewhat odd-looking label, until we do something
// smarter like showing the bin boundaries (or the bounds of the actual
// data in each bin)
binStart -= ONEDAY / 2;
}
var nextBinStart = axes.tickIncrement(binStart, dtick);

if(nextBinStart <= dataMin) return nextBinStart;
}
return binStart;
}

// ----------------------------------------------------
// Ticks and grids
// ----------------------------------------------------
Expand Down Expand Up @@ -919,6 +1004,7 @@ function autoTickRound(ax) {
// for pure powers of 10
// numeric ticks always have constant differences, other datetime ticks
// can all be calculated as constant number of milliseconds
var THREEDAYS = 3 * ONEDAY;
axes.tickIncrement = function(x, dtick, axrev) {
var axSign = axrev ? -1 : 1;

Expand All @@ -930,10 +1016,23 @@ axes.tickIncrement = function(x, dtick, axrev) {

// Dates: months (or years)
if(tType === 'M') {
var y = new Date(x);
// is this browser consistent? setUTCMonth edits a date but
// returns that date's milliseconds
return y.setUTCMonth(y.getUTCMonth() + dtSigned);
/*
* set(UTC)Month does not (and CANNOT) always preserve day, since
* months have different lengths. The worst example of this is:
* d = new Date(1970,0,31); d.setMonth(1) -> Feb 31 turns into Mar 3
*
* But we want to be able to iterate over the last day of each month,
* regardless of what its number is.
* So shift 3 days forward, THEN set the new month, then unshift:
* 1/31 -> 2/28 (or 29) -> 3/31 -> 4/30 -> ...
*
* Note that odd behavior still exists if you start from the 26th-28th:
* 1/28 -> 2/28 -> 3/31
* but at least you can't shift any dates into the wrong month,
* and ticks on these days incrementing by month would be very unusual
*/
var y = new Date(x + THREEDAYS);
return y.setUTCMonth(y.getUTCMonth() + dtSigned) - THREEDAYS;
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@etpinard FYI - this is a slightly breaking change... if anyone explicitly asked for n-month ticks starting on the 26th, 27th, or 28th of a month and the ticks continue past a February, subsequent ticks will be shifted to the last few days of subsequent months. I think it's fairly unlikely this has ever been done, and even if it has it was probably in order to put ticks near the end of the month rather than on that specific date.

Anyway I think the advantages far outweigh this risk.

  • month ticks will never accidentally shift into the wrong month, even if, as a result of this change, they will sometimes change dates (which they did before anyway)
  • you can now put ticks in the last couple of days of the month and have them stay there.

It's probably not clear from this comment why I did this in this PR: I needed it for month bins with exactDay data, where the first bin should start 12 hours before the beginning of each month.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anyway I think the advantages far outweigh this risk.

👍

}

// Log scales: Linear, Digits
Expand Down
Binary file modified test/image/baselines/date_histogram.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
49 changes: 3 additions & 46 deletions test/image/mocks/date_histogram.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,66 +9,23 @@
"2012-03-01 00:00:00",
"2012-02-01 00:00:00"
],
"name": "trace 0",
"autobinx": false,
"autobinx": true,
"nbinsx": 3,
"xbins": {
"start": "2011-12-16",
"end": "2012-03-16",
"size": "M1"
},
"autobiny": true,
"xaxis": "x",
"yaxis": "y",
"showlegend": false,
"type": "histogram"
}
],
"layout": {
"title": "Click to enter Plot title",
"font": {
"family": "\"Open sans\", verdana, arial, sans-serif",
"size": 12,
"color": "#444"
},
"showlegend": true,
"width": 600,
"height": 400,
"xaxis": {
"title": "month",
"showgrid": false,
"zeroline": false,
"showline": false,
"ticks": "",
"showticklabels": true,
"tickcolor": "rgb(127,127,127)",
"gridcolor": "rgb(255,255,255)"
"title": "month"
},
"yaxis": {
"title": "count",
"showgrid": true,
"zeroline": true,
"showline": false,
"ticks": "",
"tickcolor": "rgb(127,127,127)",
"gridcolor": "rgb(255,255,255)"
},
"legend": {
"x": 100,
"y": 0.5,
"traceorder": "reversed",
"font": {
"family": "",
"size": 0,
"color": ""
},
"bgcolor": "#fff",
"bordercolor": "transparent",
"borderwidth": 0
},
"plot_bgcolor": "rgb(229,229,229)",
"barmode": "stack",
"bargap": 0.2,
"bargroupgap": 0
"bargap": 0.2
}
}
10 changes: 3 additions & 7 deletions test/jasmine/tests/histogram2d_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,11 @@ describe('Test histogram2d', function() {

// TODO: even though the binning is done on non-uniform bins,
// the display makes them linear (using only y0 and dy)
// when we sort out https://github.com/plotly/plotly.js/issues/1151
// lets also make it display the bins with nonuniform size,
// and ensure we don't generate an extra bin on the end (see
// first row of z below)
expect(out.y0).toBe('1969-07-02 14:24');
expect(out.dy).toBe(365.2 * oneDay);
// Can we also make it display the bins with nonuniform size?
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed from the scope of this PR because it turned out to be more of a pain than I thought, and it's a really minor effect visually.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ref #360

expect(out.y0).toBe('1970-01-01 03:00');
expect(out.dy).toBe(365.25 * oneDay);

expect(out.z).toEqual([
[0, 0, 0, 0],
[2, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 0, 0],
Expand Down
Loading