Skip to content

Commit

Permalink
Contiguous storage for path segments
Browse files Browse the repository at this point in the history
This is part of the larger multisampled path rendering work, under stroke rework (#303). It refactors the GPU pipeline so that the path segments available to fine rasterization are stored as a contiguous slice rather than a linked list as before.

Numerous parts of the pipeline are refactored. In the old pipeline, path segment decoding generated cubic line segments and also estimated a bounding box (somewhat imprecise), and the combination of flattening those cubics and tiling was in a separate stage (path_coarse) quite a bit later in the pipeline. In the new pipeline, path decoding is fused with flattening, generating a `LineSoup` structure (line segments associated with paths, otherwise unordered) (with bbox as a side effect), and tiling is spread over multiple stages, later in the pipeline.

The first tiling stage (path_count) counts the number of tiles that will be generated. Then coarse rasterization allocates contiguous slices based on those counts. The second stage does a scattered write of the resulting tiles. Both of these stages rely on indirect dispatch, as the number of lines and the number of segments (respectively) are not known at encode time.

These changes only make sense for filled paths, thus they relied on stroke expansion being done earlier, currently on the CPU.
  • Loading branch information
raphlinus committed Oct 6, 2023
1 parent 34483eb commit 7220ce6
Show file tree
Hide file tree
Showing 20 changed files with 756 additions and 746 deletions.
11 changes: 11 additions & 0 deletions crates/encoding/src/clip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,14 @@ pub struct Clip {
pub struct ClipBbox {
pub bbox: [f32; 4],
}

impl ClipBic {
pub fn new(a: u32, b: u32) -> Self {
ClipBic { a, b }
}

pub fn combine(self, other: ClipBic) -> Self {
let m = self.b.min(other.a);
ClipBic::new(self.a + other.a - m, self.b + other.b - m)
}
}
35 changes: 31 additions & 4 deletions crates/encoding/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
// Copyright 2023 The Vello authors
// SPDX-License-Identifier: Apache-2.0 OR MIT

use crate::SegmentCount;

use super::{
BinHeader, Clip, ClipBbox, ClipBic, ClipElement, Cubic, DrawBbox, DrawMonoid, Layout, Path,
PathBbox, PathMonoid, PathSegment, Tile,
BinHeader, Clip, ClipBbox, ClipBic, ClipElement, Cubic, DrawBbox, DrawMonoid, Layout, LineSoup,
Path, PathBbox, PathMonoid, PathSegment, Tile,
};
use bytemuck::{Pod, Zeroable};
use std::mem;
Expand All @@ -29,8 +31,24 @@ pub struct BumpAllocators {
pub binning: u32,
pub ptcl: u32,
pub tile: u32,
pub seg_counts: u32,
pub segments: u32,
pub blend: u32,
pub lines: u32,
}

/// Storage of indirect dispatch size values.
///
/// The original plan was to reuse BumpAllocators, but the WebGPU compatible
/// usage list rules forbid that being used as indirect counts while also
/// bound as writable.
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
#[repr(C)]
pub struct IndirectCount {
pub count_x: u32,
pub count_y: u32,
pub count_z: u32,
pub pad0: u32,
}

/// Uniform render configuration data used by all GPU stages.
Expand Down Expand Up @@ -114,7 +132,7 @@ pub struct WorkgroupCounts {
pub path_scan1: WorkgroupSize,
pub path_scan: WorkgroupSize,
pub bbox_clear: WorkgroupSize,
pub path_seg: WorkgroupSize,
pub flatten: WorkgroupSize,
pub draw_reduce: WorkgroupSize,
pub draw_leaf: WorkgroupSize,
pub clip_reduce: WorkgroupSize,
Expand Down Expand Up @@ -159,7 +177,7 @@ impl WorkgroupCounts {
path_scan1: (reduced_size / PATH_REDUCE_WG, 1, 1),
path_scan: (path_tag_wgs, 1, 1),
bbox_clear: (draw_object_wgs, 1, 1),
path_seg: (path_coarse_wgs, 1, 1),
flatten: (path_coarse_wgs, 1, 1),
draw_reduce: (draw_object_wgs, 1, 1),
draw_leaf: (draw_object_wgs, 1, 1),
clip_reduce: (clip_reduce_wgs, 1, 1),
Expand Down Expand Up @@ -248,11 +266,14 @@ pub struct BufferSizes {
pub clip_bboxes: BufferSize<ClipBbox>,
pub draw_bboxes: BufferSize<DrawBbox>,
pub bump_alloc: BufferSize<BumpAllocators>,
pub indirect_count: BufferSize<IndirectCount>,
pub bin_headers: BufferSize<BinHeader>,
pub paths: BufferSize<Path>,
// Bump allocated buffers
pub lines: BufferSize<LineSoup>,
pub bin_data: BufferSize<u32>,
pub tiles: BufferSize<Tile>,
pub seg_counts: BufferSize<SegmentCount>,
pub segments: BufferSize<PathSegment>,
pub ptcl: BufferSize<u32>,
}
Expand Down Expand Up @@ -284,6 +305,7 @@ impl BufferSizes {
let clip_bboxes = BufferSize::new(n_clips);
let draw_bboxes = BufferSize::new(n_paths);
let bump_alloc = BufferSize::new(1);
let indirect_count = BufferSize::new(1);
let bin_headers = BufferSize::new(draw_object_wgs * 256);
let n_paths_aligned = align_up(n_paths, 256);
let paths = BufferSize::new(n_paths_aligned);
Expand All @@ -293,6 +315,8 @@ impl BufferSizes {
// reasonable heuristics.
let bin_data = BufferSize::new(1 << 18);
let tiles = BufferSize::new(1 << 21);
let lines = BufferSize::new(1 << 21);
let seg_counts = BufferSize::new(1 << 21);
let segments = BufferSize::new(1 << 21);
let ptcl = BufferSize::new(1 << 23);
Self {
Expand All @@ -311,10 +335,13 @@ impl BufferSizes {
clip_bboxes,
draw_bboxes,
bump_alloc,
indirect_count,
lines,
bin_headers,
paths,
bin_data,
tiles,
seg_counts,
segments,
ptcl,
}
Expand Down
7 changes: 4 additions & 3 deletions crates/encoding/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ mod resolve;
pub use binning::BinHeader;
pub use clip::{Clip, ClipBbox, ClipBic, ClipElement};
pub use config::{
BufferSize, BufferSizes, BumpAllocators, ConfigUniform, RenderConfig, WorkgroupCounts,
WorkgroupSize,
BufferSize, BufferSizes, BumpAllocators, ConfigUniform, IndirectCount, RenderConfig,
WorkgroupCounts, WorkgroupSize,
};
pub use draw::{
DrawBbox, DrawBeginClip, DrawColor, DrawImage, DrawLinearGradient, DrawMonoid,
Expand All @@ -35,7 +35,8 @@ pub use encoding::{Encoding, StreamOffsets};
pub use math::Transform;
pub use monoid::Monoid;
pub use path::{
Cubic, Path, PathBbox, PathEncoder, PathMonoid, PathSegment, PathSegmentType, PathTag, Tile,
Cubic, LineSoup, Path, PathBbox, PathEncoder, PathMonoid, PathSegment, PathSegmentType,
PathTag, SegmentCount, Tile,
};
pub use resolve::{resolve_solid_paths_only, Layout};

Expand Down
26 changes: 24 additions & 2 deletions crates/encoding/src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,36 @@ use peniko::kurbo::Shape;

use super::Monoid;

/// Line segment (after flattening, before tiling).
#[derive(Clone, Copy, Debug, Zeroable, Pod, Default)]
#[repr(C)]
pub struct LineSoup {
pub path_ix: u32,
pub _padding: u32,
pub p0: [f32; 2],
pub p1: [f32; 2],
}

/// Line segment (after flattening, before tiling).
#[derive(Clone, Copy, Debug, Zeroable, Pod, Default)]
#[repr(C)]
pub struct SegmentCount {
pub line_ix: u32,
// This could more accurately be modeled as:
// segment_within_line: u16,
// segment_within_slice: u16,
// However, here we mirror the way it's written in WGSL
pub counts: u32,
}

/// Path segment.
#[derive(Clone, Copy, Debug, Zeroable, Pod, Default)]
#[repr(C)]
pub struct PathSegment {
pub origin: [f32; 2],
pub delta: [f32; 2],
pub y_edge: f32,
pub next: u32,
pub _padding: u32,
}

/// Path segment type.
Expand Down Expand Up @@ -193,7 +215,7 @@ pub struct PathBbox {
#[repr(C)]
pub struct Path {
/// Bounding box in tiles.
pub bbox: [f32; 4],
pub bbox: [u32; 4],
/// Offset (in u32s) to tile rectangle.
pub tiles: u32,
_padding: [u32; 3],
Expand Down
59 changes: 29 additions & 30 deletions shader/coarse.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ var<storage> info_bin_data: array<u32>;
var<storage> paths: array<Path>;

@group(0) @binding(6)
var<storage> tiles: array<Tile>;
var<storage, read_write> tiles: array<Tile>;

@group(0) @binding(7)
var<storage, read_write> bump: BumpAllocators;
Expand Down Expand Up @@ -82,31 +82,30 @@ fn alloc_cmd(size: u32) {
}
}

fn write_path(tile: Tile, linewidth: f32) -> bool {
// TODO: take flags
alloc_cmd(3u);
if linewidth < 0.0 {
let even_odd = linewidth < -1.0;
if tile.segments != 0u {
let fill = CmdFill(tile.segments, tile.backdrop);
ptcl[cmd_offset] = CMD_FILL;
let segments_and_rule = select(fill.tile << 1u, (fill.tile << 1u) | 1u, even_odd);
ptcl[cmd_offset + 1u] = segments_and_rule;
ptcl[cmd_offset + 2u] = u32(fill.backdrop);
cmd_offset += 3u;
} else {
if even_odd && (abs(tile.backdrop) & 1) == 0 {
return false;
}
ptcl[cmd_offset] = CMD_SOLID;
cmd_offset += 1u;
}
fn write_path(tile: Tile, tile_ix: u32, linewidth: f32) -> bool {
let even_odd = linewidth < -1.0;
// We overload the "segments" field to store both count (written by
// path_count stage) and segment allocation (used by path_tiling and
// fine).
let n_segs = tile.segments;
if n_segs != 0u {
var seg_ix = atomicAdd(&bump.segments, n_segs);
tiles[tile_ix].segments = ~seg_ix;
alloc_cmd(4u);
ptcl[cmd_offset] = CMD_FILL;
let size_and_rule = (n_segs << 1u) | u32(even_odd);
let fill = CmdFill(size_and_rule, seg_ix, tile.backdrop);
ptcl[cmd_offset + 1u] = fill.size_and_rule;
ptcl[cmd_offset + 2u] = fill.seg_data;
ptcl[cmd_offset + 3u] = u32(fill.backdrop);
cmd_offset += 4u;
} else {
let stroke = CmdStroke(tile.segments, 0.5 * linewidth);
ptcl[cmd_offset] = CMD_STROKE;
ptcl[cmd_offset + 1u] = stroke.tile;
ptcl[cmd_offset + 2u] = bitcast<u32>(stroke.half_width);
cmd_offset += 3u;
if even_odd && (abs(tile.backdrop) & 1) == 0 {
return false;
}
alloc_cmd(1u);
ptcl[cmd_offset] = CMD_SOLID;
cmd_offset += 1u;
}
return true;
}
Expand Down Expand Up @@ -352,15 +351,15 @@ fn main(
// DRAWTAG_FILL_COLOR
case 0x44u: {
let linewidth = bitcast<f32>(info_bin_data[di]);
if write_path(tile, linewidth) {
if write_path(tile, tile_ix, linewidth) {
let rgba_color = scene[dd];
write_color(CmdColor(rgba_color));
}
}
// DRAWTAG_FILL_LIN_GRADIENT
case 0x114u: {
let linewidth = bitcast<f32>(info_bin_data[di]);
if write_path(tile, linewidth) {
if write_path(tile, tile_ix, linewidth) {
let index = scene[dd];
let info_offset = di + 1u;
write_grad(CMD_LIN_GRAD, index, info_offset);
Expand All @@ -369,7 +368,7 @@ fn main(
// DRAWTAG_FILL_RAD_GRADIENT
case 0x29cu: {
let linewidth = bitcast<f32>(info_bin_data[di]);
if write_path(tile, linewidth) {
if write_path(tile, tile_ix, linewidth) {
let index = scene[dd];
let info_offset = di + 1u;
write_grad(CMD_RAD_GRAD, index, info_offset);
Expand All @@ -378,7 +377,7 @@ fn main(
// DRAWTAG_FILL_IMAGE
case 0x248u: {
let linewidth = bitcast<f32>(info_bin_data[di]);
if write_path(tile, linewidth) {
if write_path(tile, tile_ix, linewidth) {
write_image(di + 1u);
}
}
Expand All @@ -396,7 +395,7 @@ fn main(
// DRAWTAG_END_CLIP
case 0x21u: {
clip_depth -= 1u;
write_path(tile, -1.0);
write_path(tile, tile_ix, -1.0);
let blend = scene[dd];
let alpha = bitcast<f32>(scene[dd + 1u]);
write_end_clip(CmdEndClip(blend, alpha));
Expand Down
Loading

0 comments on commit 7220ce6

Please sign in to comment.