Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1022,6 +1022,22 @@ Draws a mesh for the cell boundaries of the Voronoi tesselation of the points gi

If a **z** channel is specified, the input points are grouped by *z*, and separate Voronoi tesselations are constructed for each group.

### Density

[<img src="./img/density-contours.png" width="325" height="204" alt="A scatterplot showing the relationship between the idle duration and eruption duration for Old Faithful">](https://observablehq.com/@observablehq/plot-density)

[Source](./src/marks/density.js) · [Examples](https://observablehq.com/@observablehq/plot-density) · Draws regions of a two-dimensional point distribution in which the number of points per unit of screen space exceeds a certain density.

#### Plot.density(*data*, *options*)

Draws a region for each density level where the number of points given by the **x** and **y** channels, and possibly weighted by the **weight** channel, exceeds the given level. The **thresholds** option, which defaults to 20, indicates the approximate number of levels that will be computed at even intervals between 0 and the maximum density.

If a **z**, **stroke** or **fill** channel is specified, the input points are grouped by series, and separate sets of contours are generated for each series.

If stroke or fill is specified as *density*, a color channel is returned with values representing the density normalized between 0 and 1.

If either of the **x** or **y** channels are not specified, the corresponding position is controlled by the **frameAnchor** option.

### Dot

[<img src="./img/dot.png" width="320" height="198" alt="a scatterplot">](https://observablehq.com/@observablehq/plot-dot)
Expand Down
Binary file added img/density-contours.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
"vite": "2"
},
"dependencies": {
"d3": "^7.3.0",
"d3": "^7.4.5",
"interval-tree-1d": "1",
"isoformat": "0.2"
},
Expand Down
114 changes: 92 additions & 22 deletions src/marks/density.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import {contourDensity, create, geoPath} from "d3";
import {constant, maybeTuple} from "../options.js";
import {constant, maybeTuple, maybeZ, valueof} from "../options.js";
import {Mark} from "../plot.js";
import {applyFrameAnchor, applyIndirectStyles, applyTransform} from "../style.js";
import {applyFrameAnchor, applyGroupedChannelStyles, applyDirectStyles, applyIndirectStyles, applyTransform, groupZ} from "../style.js";
import {initializer} from "../transforms/basic.js";

const defaults = {
ariaLabel: "density",
Expand All @@ -11,44 +12,113 @@ const defaults = {
};

export class Density extends Mark {
constructor(data, options = {}) {
const {x, y, bandwidth = 20, thresholds = 20} = options;
constructor(data, {x, y, z, weight, stroke, fill, bandwidth = 20, thresholds = 20, ...options} = {}) {
let f, s;
if (fill === "density") { fill = undefined; f = true; }
if (stroke === "density") { stroke = undefined; s = true; }
super(
data,
[
{name: "x", value: x, scale: "x", optional: true},
{name: "y", value: y, scale: "y", optional: true}
{name: "y", value: y, scale: "y", optional: true},
{name: "weight", value: weight, optional: true},
{name: "z", value: maybeZ({z, fill, stroke}), optional: true}
],
options,
densityInitializer({...options, fill, stroke}, +bandwidth, +thresholds, f, s),
defaults
);
this.bandwidth = +bandwidth;
this.thresholds = +thresholds;
this.z = z;
this.path = geoPath();
}
filter(index) {
return index;
}
render(index, scales, channels, dimensions) {
const {x: X, y: Y} = channels;
const {bandwidth, thresholds} = this;
const [cx, cy] = applyFrameAnchor(this, dimensions);
const {width, height} = dimensions;
const {contours} = channels;
const {path} = this;
return create("svg:g")
.call(applyIndirectStyles, this, scales, dimensions)
.call(applyTransform, this, scales)
.call(g => g.selectAll("path")
.data(contourDensity()
.x(X ? i => X[i] : constant(cx))
.y(Y ? i => Y[i] : constant(cy))
.size([width, height])
.bandwidth(bandwidth)
.thresholds(thresholds)
(index))
.call(g => g.selectAll()
.data(Array.from(index, i => [i]))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we use applyChannelStyles instead of applyGroupedChannelStyles, we won’t need to do this. (I’ll make this change shortly.)

.enter()
.append("path")
.attr("d", geoPath()))
.node();
.call(applyDirectStyles, this)
.call(applyGroupedChannelStyles, this, channels)
.attr("d", ([i]) => path(contours[i])))
.node();
}
}

export function density(data, {x, y, ...options} = {}) {
([x, y] = maybeTuple(x, y));
return new Density(data, {...options, x, y});
}

function densityInitializer(options, bandwidth, thresholds, f, s) {
return initializer(options, function(data, facets, channels, scales, dimensions) {
const X = channels.x?.scale ? valueof(channels.x.value, scales[channels.x.scale]) : channels.x?.value;
const Y = channels.y?.scale ? valueof(channels.y.value, scales[channels.y.scale]) : channels.y?.value;
const W = channels.weight?.value;
const Z = channels.z?.value;
const {z} = this;
const [cx, cy] = applyFrameAnchor(options, dimensions);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

applyFrameAnchor takes this as the first argument, not options. (I’ll fix this.)

const {width, height} = dimensions;
const newFacets = [];
const contours = [];
const newChannels = Object.entries(channels).filter(([key]) => key !== "x" && key !== "y" && key !== "weight").map(([key, d]) => [key, {...d, value: []}]);
if (f) newChannels.push(["fill", {value: [], scale: "color"}]);
if (s) newChannels.push(["stroke", {value: [], scale: "color"}]);
let max = 0, maxn = 0;
const density = contourDensity()
.x(X ? i => X[i] : constant(cx))
.y(Y ? i => Y[i] : constant(cy))
.weight(W ? i => W[i] : 1)
.size([width, height])
.bandwidth(bandwidth)
.thresholds(thresholds);

// First pass: seek the maximum density across all facets and series; memoize for performance.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

“Memoizing” refers to a caching technique where, given a function that computes a consistent return value for its given arguments, the return values are cached by their arguments so as to avoid invoking the function repeatedly for the same arguments. This isn’t really memoizing here; there’s not a function that represents the computational task. I haven’t quite followed what this code is doing yet but this is probably something like “cache the initial set of contours to avoid recomputing them in the second pass”. I need to read this code more thoroughly (I’m making an edit pass now) but I’ll add more comments here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, cache the contours and reuse those that ended up with the same threshold selection.

const memo = [];
thresholds = [];
for (const [facetIndex, facet] of facets.entries()) {
newFacets.push([]);
for (const index of Z ? groupZ(facet, Z, z) : [facet]) {
const c = density(index);
const d = c[c.length - 1];
if (d) {
if (d.value > max) {
max = d.value;
maxn = c.length;
thresholds = c.map(d => d.value);
}
memo.push({facetIndex, index, c, top: d.value});
}
}
}

// Second pass: generate contours with the thresholds derived above
density.thresholds(thresholds);
for (const {facetIndex, index, c: memoc, top} of memo) {
const c = top < max ? density(index) : memoc;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m not sure how much this caching is helping; I think most commonly, only one series (or facet-series) will be cacheable as having the max threshold. All the others will need to be recomputed. I guess that’s still helpful in the common case of only a single series and single facet, but I suspect we could make this significantly faster if d3.contourDensity had a way to compute and store the blurred grid, and then compute individual contours for a particular threshold afterward. That way, we wouldn’t have to recompute the underlying grid when computing new contours, and we could cache individual contours rather than sets.

for (const contour of c) {
newFacets[facetIndex].push(contours.length);
contours.push(contour);
for (const [key, {value}] of newChannels) {
value.push(
(f && key === "fill") || (s && key === "stroke") ? contour.value
: channels[key].value[index[0]]
);
}
}
}

channels = {contours: {value: contours}, ...Object.fromEntries(newChannels)};
// normalize colors to a thresholds scale
const m = max * (maxn + 1) / maxn;
if (f) channels.fill.value = channels.fill.value.map(v => v / m);
if (s) channels.stroke.value = channels.stroke.value.map(v => v / m);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m thinking maybe we don’t do this, and just leave the units as “points per square pixel” or “value per square pixel” or perhaps “points per 1,000 square pixels” as a more human-readable value. Even though the value is arbitrary and resolution-dependent, it’s still probably more useful than 0 = 0, 1 = maximum observed density because it allows you to have a consistent scale across charts.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess it depends if you want to compare distributions or actual quantities, but I'm OK with reverting this. If needed, we could add the normalization as an option, but I'm not convinced it's that useful—I was just trying to make sense of the values.

100 square pixels is probably a better choice for a unit than 1000, as one can picture a 10x10 square hosting 1 or a few points.


return {data, facets: newFacets, channels};
});
}
Loading