-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Date bin shift #1201
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Date bin shift #1201
Changes from 5 commits
f6b902c
1b9b122
11f6830
8628661
dcd4be1
0050c1e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -486,18 +486,19 @@ axes.expand = function(ax, data, options) { | |
}; | ||
|
||
axes.autoBin = function(data, ax, nbins, is2d) { | ||
var datamin = Lib.aggNums(Math.min, null, data), | ||
datamax = Lib.aggNums(Math.max, null, data); | ||
var dataMin = Lib.aggNums(Math.min, null, data), | ||
dataMax = Lib.aggNums(Math.max, null, data); | ||
|
||
if(ax.type === 'category') { | ||
return { | ||
start: datamin - 0.5, | ||
end: datamax + 0.5, | ||
start: dataMin - 0.5, | ||
end: dataMax + 0.5, | ||
size: 1 | ||
}; | ||
} | ||
|
||
var size0; | ||
if(nbins) size0 = ((datamax - datamin) / nbins); | ||
if(nbins) size0 = ((dataMax - dataMin) / nbins); | ||
else { | ||
// totally auto: scale off std deviation so the highest bin is | ||
// somewhat taller than the total number of bins, but don't let | ||
|
@@ -506,102 +507,186 @@ axes.autoBin = function(data, ax, nbins, is2d) { | |
var distinctData = Lib.distinctVals(data), | ||
msexp = Math.pow(10, Math.floor( | ||
Math.log(distinctData.minDiff) / Math.LN10)), | ||
// TODO: there are some date cases where this will fail... | ||
minSize = msexp * Lib.roundUp( | ||
distinctData.minDiff / msexp, [0.9, 1.9, 4.9, 9.9], true); | ||
size0 = Math.max(minSize, 2 * Lib.stdev(data) / | ||
Math.pow(data.length, is2d ? 0.25 : 0.4)); | ||
} | ||
|
||
// piggyback off autotick code to make "nice" bin sizes | ||
var dummyax; | ||
var dummyAx; | ||
if(ax.type === 'log') { | ||
dummyax = { | ||
dummyAx = { | ||
type: 'linear', | ||
range: [datamin, datamax], | ||
range: [dataMin, dataMax], | ||
r2l: Number | ||
}; | ||
} | ||
else { | ||
dummyax = { | ||
dummyAx = { | ||
type: ax.type, | ||
// conversion below would be ax.c2r but that's only different from l2r | ||
// for log, and this is the only place (so far?) we would want c2r. | ||
range: [datamin, datamax].map(ax.l2r), | ||
range: [dataMin, dataMax].map(ax.l2r), | ||
r2l: ax.r2l | ||
}; | ||
} | ||
|
||
axes.autoTicks(dummyax, size0); | ||
var binstart = axes.tickIncrement( | ||
axes.tickFirst(dummyax), dummyax.dtick, 'reverse'), | ||
binend; | ||
|
||
function nearEdge(v) { | ||
// is a value within 1% of a bin edge? | ||
return (1 + (v - binstart) * 100 / dummyax.dtick) % 100 < 2; | ||
} | ||
axes.autoTicks(dummyAx, size0); | ||
var binStart = axes.tickIncrement( | ||
axes.tickFirst(dummyAx), dummyAx.dtick, 'reverse'), | ||
binEnd; | ||
|
||
// check for too many data points right at the edges of bins | ||
// (>50% within 1% of bin edges) or all data points integral | ||
// and offset the bins accordingly | ||
if(typeof dummyax.dtick === 'number') { | ||
var edgecount = 0, | ||
midcount = 0, | ||
intcount = 0, | ||
blankcount = 0; | ||
for(var i = 0; i < data.length; i++) { | ||
if(data[i] % 1 === 0) intcount++; | ||
else if(!isNumeric(data[i])) blankcount++; | ||
|
||
if(nearEdge(data[i])) edgecount++; | ||
if(nearEdge(data[i] + dummyax.dtick / 2)) midcount++; | ||
} | ||
var datacount = data.length - blankcount; | ||
|
||
if(intcount === datacount && ax.type !== 'date') { | ||
// all integers: if bin size is <1, it's because | ||
// that was specifically requested (large nbins) | ||
// so respect that... but center the bins containing | ||
// integers on those integers | ||
if(dummyax.dtick < 1) { | ||
binstart = datamin - 0.5 * dummyax.dtick; | ||
} | ||
// otherwise start half an integer down regardless of | ||
// the bin size, just enough to clear up endpoint | ||
// ambiguity about which integers are in which bins. | ||
else binstart -= 0.5; | ||
} | ||
else if(midcount < datacount * 0.1) { | ||
if(edgecount > datacount * 0.3 || | ||
nearEdge(datamin) || nearEdge(datamax)) { | ||
// lots of points at the edge, not many in the middle | ||
// shift half a bin | ||
var binshift = dummyax.dtick / 2; | ||
binstart += (binstart + binshift < datamin) ? binshift : -binshift; | ||
} | ||
} | ||
if(typeof dummyAx.dtick === 'number') { | ||
binStart = autoShiftNumericBins(binStart, data, dummyAx, dataMin, dataMax); | ||
|
||
var bincount = 1 + Math.floor((datamax - binstart) / dummyax.dtick); | ||
binend = binstart + bincount * dummyax.dtick; | ||
var bincount = 1 + Math.floor((dataMax - binStart) / dummyAx.dtick); | ||
binEnd = binStart + bincount * dummyAx.dtick; | ||
} | ||
else { | ||
// month ticks - should be the only nonlinear kind we have at this point. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ... after There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is only relevant to auto-binning, where nonlinear ticks are |
||
if(dummyAx.dtick.charAt(0) === 'M') { | ||
binStart = autoShiftMonthBins(binStart, data, dummyAx.dtick, dataMin); | ||
} | ||
|
||
// calculate the endpoint for nonlinear ticks - you have to | ||
// just increment until you're done | ||
binend = binstart; | ||
while(binend <= datamax) { | ||
binend = axes.tickIncrement(binend, dummyax.dtick); | ||
binEnd = binStart; | ||
while(binEnd <= dataMax) { | ||
binEnd = axes.tickIncrement(binEnd, dummyAx.dtick); | ||
} | ||
} | ||
|
||
return { | ||
start: ax.c2r(binstart), | ||
end: ax.c2r(binend), | ||
size: dummyax.dtick | ||
start: ax.c2r(binStart), | ||
end: ax.c2r(binEnd), | ||
size: dummyAx.dtick | ||
}; | ||
}; | ||
|
||
|
||
function autoShiftNumericBins(binStart, data, ax, dataMin, dataMax) { | ||
var edgecount = 0, | ||
midcount = 0, | ||
intcount = 0, | ||
blankCount = 0; | ||
|
||
function nearEdge(v) { | ||
// is a value within 1% of a bin edge? | ||
return (1 + (v - binStart) * 100 / ax.dtick) % 100 < 2; | ||
} | ||
|
||
for(var i = 0; i < data.length; i++) { | ||
if(data[i] % 1 === 0) intcount++; | ||
else if(!isNumeric(data[i])) blankCount++; | ||
|
||
if(nearEdge(data[i])) edgecount++; | ||
if(nearEdge(data[i] + ax.dtick / 2)) midcount++; | ||
} | ||
var dataCount = data.length - blankCount; | ||
|
||
if(intcount === dataCount && ax.type !== 'date') { | ||
// all integers: if bin size is <1, it's because | ||
// that was specifically requested (large nbins) | ||
// so respect that... but center the bins containing | ||
// integers on those integers | ||
if(ax.dtick < 1) { | ||
binStart = dataMin - 0.5 * ax.dtick; | ||
} | ||
// otherwise start half an integer down regardless of | ||
// the bin size, just enough to clear up endpoint | ||
// ambiguity about which integers are in which bins. | ||
else binStart -= 0.5; | ||
} | ||
else if(midcount < dataCount * 0.1) { | ||
if(edgecount > dataCount * 0.3 || | ||
nearEdge(dataMin) || nearEdge(dataMax)) { | ||
// lots of points at the edge, not many in the middle | ||
// shift half a bin | ||
var binshift = ax.dtick / 2; | ||
binStart += (binStart + binshift < dataMin) ? binshift : -binshift; | ||
} | ||
} | ||
return binStart; | ||
} | ||
|
||
|
||
function autoShiftMonthBins(binStart, data, dtick, dataMin) { | ||
var exactYears = 0, | ||
exactMonths = 0, | ||
exactDays = 0, | ||
blankCount = 0, | ||
dataCount, | ||
di, | ||
d, | ||
year, | ||
month; | ||
|
||
for(var i = 0; i < data.length; i++) { | ||
di = data[i]; | ||
if(!isNumeric(di)) { | ||
blankCount ++; | ||
continue; | ||
} | ||
d = new Date(di), | ||
year = d.getUTCFullYear(); | ||
if(di === Date.UTC(year, 0, 1)) { | ||
exactYears ++; | ||
} | ||
else { | ||
month = d.getUTCMonth(); | ||
if(di === Date.UTC(year, month, 1)) { | ||
exactMonths ++; | ||
} | ||
else if(di === Date.UTC(year, month, d.getUTCDate())) { | ||
exactDays ++; | ||
} | ||
} | ||
} | ||
|
||
dataCount = data.length - blankCount; | ||
|
||
// include bigger exact dates in the smaller ones | ||
exactMonths += exactYears; | ||
exactDays += exactMonths; | ||
|
||
// unmber of data points that needs to be an exact value | ||
// to shift that increment to (near) the bin center | ||
var threshold = 0.8 * dataCount; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. non- ⛔ It would be nice to put these auto-algo threshold fractions in a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alright, I'll leave this as is for now but I'm happy to change it if you tell me where to put them. I don't have a good feel for what would make this easy to grok. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No action required in this PR. |
||
|
||
if(exactDays > threshold) { | ||
var numMonths = Number(dtick.substr(1)); | ||
|
||
if((exactYears > threshold) && (numMonths % 12 === 0)) { | ||
// The exact middle of a non-leap-year is 1.5 days into July | ||
// so if we start the bins here, all but leap years will | ||
// get hover-labeled as exact years. | ||
binStart = axes.tickIncrement(binStart, 'M6', 'reverse') + ONEDAY * 1.5; | ||
} | ||
else if(exactMonths > threshold) { | ||
// Months are not as clean, but if we shift half the *longest* | ||
// month (31/2 days) then 31-day months will get labeled exactly | ||
// and shorter months will get labeled with the correct month | ||
// but shifted 12-36 hours into it. | ||
binStart = axes.tickIncrement(binStart, 'M1', 'reverse') + ONEDAY * 15.5; | ||
} | ||
else { | ||
// Shifting half a day is exact, but since these are month bins it | ||
// will always give a somewhat odd-looking label, until we do something | ||
// smarter like showing the bin boundaries (or the bounds of the actual | ||
// data in each bin) | ||
binStart -= ONEDAY / 2; | ||
} | ||
var nextBinStart = axes.tickIncrement(binStart, dtick); | ||
|
||
if(nextBinStart <= dataMin) return nextBinStart; | ||
} | ||
return binStart; | ||
} | ||
|
||
// ---------------------------------------------------- | ||
// Ticks and grids | ||
// ---------------------------------------------------- | ||
|
@@ -919,6 +1004,7 @@ function autoTickRound(ax) { | |
// for pure powers of 10 | ||
// numeric ticks always have constant differences, other datetime ticks | ||
// can all be calculated as constant number of milliseconds | ||
var THREEDAYS = 3 * ONEDAY; | ||
axes.tickIncrement = function(x, dtick, axrev) { | ||
var axSign = axrev ? -1 : 1; | ||
|
||
|
@@ -930,10 +1016,23 @@ axes.tickIncrement = function(x, dtick, axrev) { | |
|
||
// Dates: months (or years) | ||
if(tType === 'M') { | ||
var y = new Date(x); | ||
// is this browser consistent? setUTCMonth edits a date but | ||
// returns that date's milliseconds | ||
return y.setUTCMonth(y.getUTCMonth() + dtSigned); | ||
/* | ||
* set(UTC)Month does not (and CANNOT) always preserve day, since | ||
* months have different lengths. The worst example of this is: | ||
* d = new Date(1970,0,31); d.setMonth(1) -> Feb 31 turns into Mar 3 | ||
* | ||
* But we want to be able to iterate over the last day of each month, | ||
* regardless of what its number is. | ||
* So shift 3 days forward, THEN set the new month, then unshift: | ||
* 1/31 -> 2/28 (or 29) -> 3/31 -> 4/30 -> ... | ||
* | ||
* Note that odd behavior still exists if you start from the 26th-28th: | ||
* 1/28 -> 2/28 -> 3/31 | ||
* but at least you can't shift any dates into the wrong month, | ||
* and ticks on these days incrementing by month would be very unusual | ||
*/ | ||
var y = new Date(x + THREEDAYS); | ||
return y.setUTCMonth(y.getUTCMonth() + dtSigned) - THREEDAYS; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @etpinard FYI - this is a slightly breaking change... if anyone explicitly asked for n-month ticks starting on the 26th, 27th, or 28th of a month and the ticks continue past a February, subsequent ticks will be shifted to the last few days of subsequent months. I think it's fairly unlikely this has ever been done, and even if it has it was probably in order to put ticks near the end of the month rather than on that specific date. Anyway I think the advantages far outweigh this risk.
It's probably not clear from this comment why I did this in this PR: I needed it for month bins with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
👍 |
||
} | ||
|
||
// Log scales: Linear, Digits | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -91,15 +91,11 @@ describe('Test histogram2d', function() { | |
|
||
// TODO: even though the binning is done on non-uniform bins, | ||
// the display makes them linear (using only y0 and dy) | ||
// when we sort out https://github.com/plotly/plotly.js/issues/1151 | ||
// lets also make it display the bins with nonuniform size, | ||
// and ensure we don't generate an extra bin on the end (see | ||
// first row of z below) | ||
expect(out.y0).toBe('1969-07-02 14:24'); | ||
expect(out.dy).toBe(365.2 * oneDay); | ||
// Can we also make it display the bins with nonuniform size? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed from the scope of this PR because it turned out to be more of a pain than I thought, and it's a really minor effect visually. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ref #360 |
||
expect(out.y0).toBe('1970-01-01 03:00'); | ||
expect(out.dy).toBe(365.25 * oneDay); | ||
|
||
expect(out.z).toEqual([ | ||
[0, 0, 0, 0], | ||
[2, 0, 0, 0], | ||
[0, 1, 0, 0], | ||
[0, 0, 0, 0], | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
removed because I have no idea anymore what this was about. We'll wait for a bug report...