diff --git a/.vscode/settings.json b/.vscode/settings.json index e4341b2a74ac..15ecf8b28a32 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -24,9 +24,12 @@ "cSpell.words": [ "andreas", "bbox", + "bindgroup", "emath", "framebuffer", "hoverable", + "ilog", + "jumpflooding", "Keypoint", "memoffset", "nyud", @@ -37,6 +40,7 @@ "texcoords", "Tonemapper", "tonemapping", + "voronoi", "vram", "Wgsl" ], diff --git a/crates/re_renderer/examples/framework.rs b/crates/re_renderer/examples/framework.rs index b90a9291eb73..2d0964434c67 100644 --- a/crates/re_renderer/examples/framework.rs +++ b/crates/re_renderer/examples/framework.rs @@ -39,11 +39,13 @@ pub trait Example { fn on_keyboard_input(&mut self, input: winit::event::KeyboardInput); } +#[allow(dead_code)] pub struct SplitView { pub target_location: glam::Vec2, pub resolution_in_pixel: [u32; 2], } +#[allow(dead_code)] pub fn split_resolution( resolution: [u32; 2], num_rows: usize, @@ -69,6 +71,7 @@ pub fn split_resolution( pub struct Time { start_time: Instant, last_draw_time: Instant, + pub last_frame_duration: instant::Duration, } impl Time { @@ -120,7 +123,7 @@ impl Application { .await .context("failed to find an appropriate adapter")?; - let hardware_tier = HardwareTier::Web; + let hardware_tier = HardwareTier::default(); hardware_tier.check_downlevel_capabilities(&adapter.get_downlevel_capabilities())?; let (device, queue) = adapter .request_device( @@ -173,6 +176,7 @@ impl Application { time: Time { start_time: Instant::now(), last_draw_time: Instant::now(), + last_frame_duration: instant::Duration::from_secs(0), }, example, @@ -303,6 +307,7 @@ impl Application { let current_time = Instant::now(); let time_passed = current_time - self.time.last_draw_time; self.time.last_draw_time = current_time; + self.time.last_frame_duration = time_passed; // TODO(andreas): Display a median over n frames and while we're on it also stddev thereof. // Do it only every second. @@ -329,6 +334,21 @@ impl Application { } } +#[allow(dead_code)] +pub fn load_rerun_mesh(re_ctx: &mut RenderContext) -> Vec { + let reader = std::io::Cursor::new(include_bytes!("rerun.obj.zip")); + let mut zip = zip::ZipArchive::new(reader).unwrap(); + let mut zipped_obj = zip.by_name("rerun.obj").unwrap(); + let mut obj_data = Vec::new(); + std::io::Read::read_to_end(&mut zipped_obj, &mut obj_data).unwrap(); + re_renderer::importer::obj::load_obj_from_buffer( + &obj_data, + re_renderer::resource_managers::ResourceLifeTime::LongLived, + re_ctx, + ) + .unwrap() +} + async fn run(event_loop: EventLoop<()>, window: Window) { let app = Application::::new(event_loop, window).await.unwrap(); app.run(); diff --git a/crates/re_renderer/examples/multiview.rs b/crates/re_renderer/examples/multiview.rs index ada770308be4..0c48dd2b456c 100644 --- a/crates/re_renderer/examples/multiview.rs +++ b/crates/re_renderer/examples/multiview.rs @@ -1,4 +1,4 @@ -use std::{f32::consts::TAU, io::Read}; +use std::f32::consts::TAU; use ecolor::Hsva; use framework::Example; @@ -12,7 +12,6 @@ use re_renderer::{ GenericSkyboxDrawData, LineDrawData, LineStripFlags, MeshDrawData, MeshInstance, TestTriangleDrawData, }, - resource_managers::ResourceLifeTime, view_builder::{OrthographicCameraMode, Projection, TargetConfiguration, ViewBuilder}, Color32, LineStripSeriesBuilder, PointCloudBuilder, RenderContext, Rgba, Size, }; @@ -62,6 +61,7 @@ fn build_mesh_instances( *p, ) * model_mesh_instances.world_from_mesh, additive_tint: *c, + ..Default::default() }, ) }) @@ -206,19 +206,7 @@ impl Example for Multiview { .map(|_| random_color(&mut rnd)) .collect_vec(); - let model_mesh_instances = { - let reader = std::io::Cursor::new(include_bytes!("assets/rerun.obj.zip")); - let mut zip = zip::ZipArchive::new(reader).unwrap(); - let mut zipped_obj = zip.by_name("rerun.obj").unwrap(); - let mut obj_data = Vec::new(); - zipped_obj.read_to_end(&mut obj_data).unwrap(); - re_renderer::importer::obj::load_obj_from_buffer( - &obj_data, - ResourceLifeTime::LongLived, - re_ctx, - ) - .unwrap() - }; + let model_mesh_instances = crate::framework::load_rerun_mesh(re_ctx); let mesh_instance_positions_and_colors = lorenz_points(10.0) .iter() diff --git a/crates/re_renderer/examples/outlines.rs b/crates/re_renderer/examples/outlines.rs new file mode 100644 index 000000000000..cb39eed95df4 --- /dev/null +++ b/crates/re_renderer/examples/outlines.rs @@ -0,0 +1,154 @@ +use itertools::Itertools; +use re_renderer::{ + renderer::{MeshInstance, OutlineConfig, OutlineMaskPreference}, + view_builder::{Projection, TargetConfiguration, ViewBuilder}, +}; +use winit::event::{ElementState, VirtualKeyCode}; + +mod framework; + +struct Outlines { + is_paused: bool, + seconds_since_startup: f32, + model_mesh_instances: Vec, +} + +struct MeshProperties { + outline_mask: OutlineMaskPreference, + position: glam::Vec3, + rotation: glam::Quat, +} + +impl framework::Example for Outlines { + fn title() -> &'static str { + "Outlines" + } + + fn new(re_ctx: &mut re_renderer::RenderContext) -> Self { + Outlines { + is_paused: false, + seconds_since_startup: 0.0, + model_mesh_instances: crate::framework::load_rerun_mesh(re_ctx), + } + } + + fn draw( + &mut self, + re_ctx: &mut re_renderer::RenderContext, + resolution: [u32; 2], + time: &framework::Time, + pixels_from_point: f32, + ) -> Vec { + let mut view_builder = ViewBuilder::default(); + + if !self.is_paused { + self.seconds_since_startup += time.last_frame_duration.as_secs_f32(); + } + let seconds_since_startup = self.seconds_since_startup; + // TODO(#1426): unify camera logic between examples. + let camera_position = glam::vec3(1.0, 3.5, 7.0); + + view_builder + .setup_view( + re_ctx, + TargetConfiguration { + name: "OutlinesDemo".into(), + resolution_in_pixel: resolution, + view_from_world: macaw::IsoTransform::look_at_rh( + camera_position, + glam::Vec3::ZERO, + glam::Vec3::Y, + ) + .unwrap(), + projection_from_view: Projection::Perspective { + vertical_fov: 70.0 * std::f32::consts::TAU / 360.0, + near_plane_distance: 0.01, + }, + pixels_from_point, + outline_config: Some(OutlineConfig { + outline_radius_pixel: (seconds_since_startup * 2.0).sin().abs() * 10.0 + + 2.0, + color_layer_a: re_renderer::Rgba::from_rgb(1.0, 0.6, 0.0), + color_layer_b: re_renderer::Rgba::from_rgba_unmultiplied( + 0.25, 0.3, 1.0, 0.5, + ), + }), + ..Default::default() + }, + ) + .unwrap(); + + let outline_mask_large_mesh = match ((seconds_since_startup * 0.5) as u64) % 5 { + 0 => OutlineMaskPreference::None, + 1 => Some([1, 0]), // Same as the the y spinning mesh. + 2 => Some([2, 0]), // Different than both meshes, outline A. + 3 => Some([0, 1]), // Same as the the x spinning mesh. + 4 => Some([0, 2]), // Different than both meshes, outline B. + _ => unreachable!(), + }; + + let mesh_properties = [ + MeshProperties { + outline_mask: outline_mask_large_mesh, + position: glam::Vec3::ZERO, + rotation: glam::Quat::IDENTITY, + }, + MeshProperties { + outline_mask: Some([1, 0]), + position: glam::vec3(2.0, 0.0, -3.0), + rotation: glam::Quat::from_rotation_y(seconds_since_startup), + }, + MeshProperties { + outline_mask: Some([0, 1]), + position: glam::vec3(-2.0, 1.0, 3.0), + rotation: glam::Quat::from_rotation_x(seconds_since_startup), + }, + ]; + + let instances = mesh_properties + .into_iter() + .flat_map(|props| { + self.model_mesh_instances + .iter() + .map(move |instance| MeshInstance { + gpu_mesh: instance.gpu_mesh.clone(), + mesh: None, + world_from_mesh: glam::Affine3A::from_rotation_translation( + props.rotation, + props.position, + ) * instance.world_from_mesh, + outline_mask: props.outline_mask, + ..Default::default() + }) + }) + .collect_vec(); + + view_builder.queue_draw(&re_renderer::renderer::GenericSkyboxDrawData::new(re_ctx)); + view_builder + .queue_draw(&re_renderer::renderer::MeshDrawData::new(re_ctx, &instances).unwrap()); + + let command_buffer = view_builder + .draw(re_ctx, ecolor::Rgba::TRANSPARENT) + .unwrap(); + + vec![framework::ViewDrawResult { + view_builder, + command_buffer, + target_location: glam::Vec2::ZERO, + }] + } + + fn on_keyboard_input(&mut self, input: winit::event::KeyboardInput) { + #[allow(clippy::single_match)] + match (input.state, input.virtual_keycode) { + (ElementState::Pressed, Some(VirtualKeyCode::Space)) => { + self.is_paused ^= true; + } + _ => {} + } + } +} + +fn main() { + framework::start::(); +} diff --git a/crates/re_renderer/shader/composite.wgsl b/crates/re_renderer/shader/composite.wgsl index 96313383d665..22ce9acabc55 100644 --- a/crates/re_renderer/shader/composite.wgsl +++ b/crates/re_renderer/shader/composite.wgsl @@ -1,17 +1,13 @@ #import <./types.wgsl> #import <./utils/srgb.wgsl> #import <./global_bindings.wgsl> - -struct VertexOutput { - @builtin(position) position: Vec4, - @location(0) texcoord: Vec2, -}; +#import <./screen_triangle_vertex.wgsl> @group(1) @binding(0) var input_texture: texture_2d; @fragment -fn main(in: VertexOutput) -> @location(0) Vec4 { +fn main(in: FragmentInput) -> @location(0) Vec4 { // Note that we can't use a simple textureLoad using @builtin(position) here despite the lack of filtering. // The issue is that positions provided by @builtin(position) are not dependent on the set viewport, // but are about the location of the texel in the target texture. diff --git a/crates/re_renderer/shader/generic_skybox.wgsl b/crates/re_renderer/shader/generic_skybox.wgsl index a8584abfd19a..b94e714ff5a6 100644 --- a/crates/re_renderer/shader/generic_skybox.wgsl +++ b/crates/re_renderer/shader/generic_skybox.wgsl @@ -2,11 +2,7 @@ #import <./global_bindings.wgsl> #import <./utils/srgb.wgsl> #import <./utils/camera.wgsl> - -struct VertexOutput { - @builtin(position) position: Vec4, - @location(0) texcoord: Vec2, -}; +#import <./screen_triangle_vertex.wgsl> fn skybox_dark_srgb(dir: Vec3) -> Vec3 { let rgb = dir * 0.5 + Vec3(0.5); @@ -19,7 +15,7 @@ fn skybox_light_srgb(dir: Vec3) -> Vec3 { } @fragment -fn main(in: VertexOutput) -> @location(0) Vec4 { +fn main(in: FragmentInput) -> @location(0) Vec4 { let camera_dir = camera_ray_direction_from_screenuv(in.texcoord); // Messing with direction a bit so it looks like in our old three-d based renderer (for easier comparison) let rgb = skybox_dark_srgb(camera_dir); // TODO(andreas): Allow switching to skybox_light diff --git a/crates/re_renderer/shader/instanced_mesh.wgsl b/crates/re_renderer/shader/instanced_mesh.wgsl index bb5cc5e1cde6..cc7a43be12bb 100644 --- a/crates/re_renderer/shader/instanced_mesh.wgsl +++ b/crates/re_renderer/shader/instanced_mesh.wgsl @@ -18,6 +18,9 @@ struct VertexOut { @location(0) texcoord: Vec2, @location(1) normal_world_space: Vec3, @location(2) additive_tint_rgb: Vec3, + + @location(3) @interpolate(flat) + outline_mask: UVec2, }; @vertex @@ -38,12 +41,13 @@ fn vs_main(in_vertex: VertexIn, in_instance: InstanceIn) -> VertexOut { out.texcoord = in_vertex.texcoord; out.normal_world_space = world_normal; out.additive_tint_rgb = linear_from_srgb(in_instance.additive_tint_srgb.rgb); + out.outline_mask = in_instance.outline_mask; return out; } @fragment -fn fs_main(in: VertexOut) -> @location(0) Vec4 { +fn fs_main_shaded(in: VertexOut) -> @location(0) Vec4 { let albedo = textureSample(albedo_texture, trilinear_sampler, in.texcoord).rgb * material.albedo_factor.rgb + in.additive_tint_rgb; @@ -56,3 +60,8 @@ fn fs_main(in: VertexOut) -> @location(0) Vec4 { return Vec4(radiance, 1.0); } + +@fragment +fn fs_main_outline_mask(in: VertexOut) -> @location(0) UVec2 { + return in.outline_mask; +} diff --git a/crates/re_renderer/shader/mesh_vertex.wgsl b/crates/re_renderer/shader/mesh_vertex.wgsl index 4d3ee3df3ecf..904a6cf77ebb 100644 --- a/crates/re_renderer/shader/mesh_vertex.wgsl +++ b/crates/re_renderer/shader/mesh_vertex.wgsl @@ -16,4 +16,5 @@ struct InstanceIn { @location(7) world_from_mesh_normal_row_1: Vec3, @location(8) world_from_mesh_normal_row_2: Vec3, @location(9) additive_tint_srgb: Vec4, + @location(10) outline_mask: UVec2, }; diff --git a/crates/re_renderer/shader/outlines/jumpflooding_init.wgsl b/crates/re_renderer/shader/outlines/jumpflooding_init.wgsl new file mode 100644 index 000000000000..4ef9567e6f9c --- /dev/null +++ b/crates/re_renderer/shader/outlines/jumpflooding_init.wgsl @@ -0,0 +1,79 @@ +#import + +@group(0) @binding(0) +var mask_texture: texture_2d; + +fn has_edge(closest_center_sample: UVec2, sample_coord: IVec2) -> Vec2 { + let mask_neighbor = textureLoad(mask_texture, sample_coord, 0).xy; + return Vec2(closest_center_sample != mask_neighbor); +} + +// Determine *where* in texture coordinates the closest edge to the center is. +// For a more accurate version refer to `jumpflooding_init_msaa.wgsl`. +// This is a simplified version that works on WebGL. +@fragment +fn main(in: FragmentInput) -> @location(0) Vec4 { + let resolution = textureDimensions(mask_texture).xy; + let center_coord = IVec2(Vec2(resolution) * in.texcoord); + + let mask_center = textureLoad(mask_texture, center_coord, 0).xy; + + var edge_pos_a_and_b = Vec4(0.0); + var num_edges_a_and_b = Vec2(0.0); + + // A lot of this code is repetetive, but wgsl/naga doesn't know yet how to do static indexing from unrolled loops. + + // Sample closest neighbors top/bottom/left/right + { // right + let edge = has_edge(mask_center, center_coord + IVec2(1, 0)); + let edge_pos = Vec2(1.0, 0.5); + edge_pos_a_and_b += Vec4(edge_pos, edge_pos) * edge.xxyy; + num_edges_a_and_b += edge; + } + { // bottom + let edge = has_edge(mask_center, center_coord + IVec2(0, 1)); + let edge_pos = Vec2(0.5, 1.0); + edge_pos_a_and_b += Vec4(edge_pos, edge_pos) * edge.xxyy; + num_edges_a_and_b += edge; + } + { // left + let edge = has_edge(mask_center, center_coord + IVec2(-1, 0)); + let edge_pos = Vec2(0.0, 0.5); + edge_pos_a_and_b += Vec4(edge_pos, edge_pos) * edge.xxyy; + num_edges_a_and_b += edge; + } + { // top + let edge = has_edge(mask_center, center_coord + IVec2(0, -1)); + let edge_pos = Vec2(0.5, 0.0); + edge_pos_a_and_b += Vec4(edge_pos, edge_pos) * edge.xxyy; + num_edges_a_and_b += edge; + } + + // Sample closest neighbors diagonally. + { // top-right + let edge = has_edge(mask_center, center_coord + IVec2(1, -1)); + let edge_pos = Vec2(1.0, 0.0); + edge_pos_a_and_b += Vec4(edge_pos, edge_pos) * edge.xxyy; + num_edges_a_and_b += edge; + } + { // bottom-right + let edge = has_edge(mask_center, center_coord + IVec2(1, 1)); + let edge_pos = Vec2(1.0, 1.0); + edge_pos_a_and_b += Vec4(edge_pos, edge_pos) * edge.xxyy; + num_edges_a_and_b += edge; + } + { // bottom-left + let edge = has_edge(mask_center, center_coord + IVec2(-1, 1)); + let edge_pos = Vec2(0.0, 1.0); + edge_pos_a_and_b += Vec4(edge_pos, edge_pos) * edge.xxyy; + num_edges_a_and_b += edge; + } + { // top-left + let edge = has_edge(mask_center, center_coord + IVec2(-1, -1)); + let edge_pos = Vec2(0.0, 0.0); + //edge_pos_a_and_b += Vec4(edge_pos, edge_pos) * edge.xxyy; // multiplied by zero, optimize out + num_edges_a_and_b += edge; + } + + return compute_pixel_coords(center_coord, edge_pos_a_and_b, num_edges_a_and_b); +} diff --git a/crates/re_renderer/shader/outlines/jumpflooding_init_msaa.wgsl b/crates/re_renderer/shader/outlines/jumpflooding_init_msaa.wgsl new file mode 100644 index 000000000000..af440fd6a09a --- /dev/null +++ b/crates/re_renderer/shader/outlines/jumpflooding_init_msaa.wgsl @@ -0,0 +1,128 @@ +#import + +@group(0) @binding(0) +var mask_texture: texture_multisampled_2d; + +fn has_edge(closest_center_sample: UVec2, sample_coord: IVec2, sample_idx: i32) -> Vec2 { + let mask_neighbor = textureLoad(mask_texture, sample_coord, sample_idx).xy; + return Vec2(closest_center_sample != mask_neighbor); +} + + +// Determine *where* in texture coordinates (with sub-pixel accuracy!) the closest edge to the center is. +// +// In Ben Golus article on line rendering (https://bgolus.medium.com/the-quest-for-very-wide-outlines-ba82ed442cd9), +// anti-aliasing was achieved by a kind of sobel filter on an already resolved target. +// In our case however, we have a number of different masks, identified by an index per-pixel. +// Therefore, there is no straight-forward way to resolve this MSAA texture! +// Resolving accurate sub-pixel edges requires us to look at the sub-samples of the MSAA mask directly. +// +// There's a bunch of ways on how to go about this and it's not exactly clear where the trade-offs between quality & performance are. +// But I found that by using our knowledge of the sampling pattern +// we can detect the closest edges to each sample, and therefore get a pretty good result with *relatively* few texture fetches. +// +// We do so by checking particular edges, summing top their sub-sample positions and dividing by the number of edges. +// +// +// About the sampling pattern: +// Vulkan: https://registry.khronos.org/vulkan/specs/1.3-khr-extensions/html/chap25.html#primsrast-multisampling +// Metal: https://developer.apple.com/documentation/metal/mtldevice/2866120-getdefaultsamplepositions +// DX12 does *not* specify the sampling pattern. However DX11 does, again the same for 4 samples: +// https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ne-d3d11-d3d11_standard_multisample_quality_levels +// +// (0, 0) _____________ +// | 0 | +// | 1 | +// | 2 | +// | 3 | +// ‾‾‾‾‾‾‾‾‾‾‾‾(1, 1) +// +// var subsample_positions: array = array( +// Vec2(0.375, 0.125), +// Vec2(0.875, 0.375), +// Vec2(0.125, 0.625), +// Vec2(0.625, 0.875) +// ); +// +// Note that the algorithm should still produce _some_ edges if this is not the case! +@fragment +fn main(in: FragmentInput) -> @location(0) Vec4 { + let resolution = textureDimensions(mask_texture).xy; + let center_coord = IVec2(Vec2(resolution) * in.texcoord); + + //let num_samples = textureNumSamples(mask_texture); + // TODO(andreas): Should we assert somehow on textureNumSamples here? + + let mask_top_left = textureLoad(mask_texture, center_coord, 0).xy; + let mask_right_top = textureLoad(mask_texture, center_coord, 1).xy; + let mask_left_bottom = textureLoad(mask_texture, center_coord, 2).xy; + let mask_bottom_right = textureLoad(mask_texture, center_coord, 3).xy; + + var edge_pos_a_and_b = Vec4(0.0); + var num_edges_a_and_b = Vec2(0.0); + + // Internal samples across the center point + // Tried weighting this higher, didn't make a difference in quality since we almost always have only a single edge. + { + let edge = Vec2(mask_top_left != mask_bottom_right) + Vec2(mask_right_top != mask_left_bottom); + num_edges_a_and_b += edge; + edge_pos_a_and_b += edge.xxyy * 0.5; + } + + // A lot of this code is repetetive, but wgsl/naga doesn't know yet how to do static indexing from unrolled loops. + + // Sample closest neighbors top/bottom/left/right + { // right + let edge = has_edge(mask_right_top, center_coord + IVec2(1, 0), 2); + let edge_pos = Vec2(1.0, 0.5); + edge_pos_a_and_b += Vec4(edge_pos, edge_pos) * edge.xxyy; + num_edges_a_and_b += edge; + } + { // bottom + let edge = has_edge(mask_bottom_right, center_coord + IVec2(0, 1), 0); + let edge_pos = Vec2(0.5, 1.0); + edge_pos_a_and_b += Vec4(edge_pos, edge_pos) * edge.xxyy; + num_edges_a_and_b += edge; + } + { // left + let edge = has_edge(mask_left_bottom, center_coord + IVec2(-1, 0), 1); + let edge_pos = Vec2(0.0, 0.5); + edge_pos_a_and_b += Vec4(edge_pos, edge_pos) * edge.xxyy; + num_edges_a_and_b += edge; + } + { // top + let edge = has_edge(mask_top_left, center_coord + IVec2(0, -1), 3); + let edge_pos = Vec2(0.5, 0.0); + edge_pos_a_and_b += Vec4(edge_pos, edge_pos) * edge.xxyy; + num_edges_a_and_b += edge; + } + + // Sample closest neighbors diagonally. + // This is not strictly necessary, but empirically the result looks a lot better! + { // top-right + let edge = has_edge(mask_right_top, center_coord + IVec2(1, -1), 2); + let edge_pos = Vec2(1.0, 0.0); + edge_pos_a_and_b += Vec4(edge_pos, edge_pos) * edge.xxyy; + num_edges_a_and_b += edge; + } + { // bottom-right + let edge = has_edge(mask_bottom_right, center_coord + IVec2(1, 1), 0); + let edge_pos = Vec2(1.0, 1.0); + edge_pos_a_and_b += Vec4(edge_pos, edge_pos) * edge.xxyy; + num_edges_a_and_b += edge; + } + { // bottom-left + let edge = has_edge(mask_left_bottom, center_coord + IVec2(-1, 1), 1); + let edge_pos = Vec2(0.0, 1.0); + edge_pos_a_and_b += Vec4(edge_pos, edge_pos) * edge.xxyy; + num_edges_a_and_b += edge; + } + { // top-left + let edge = has_edge(mask_top_left, center_coord + IVec2(-1, -1), 3); + let edge_pos = Vec2(0.0, 0.0); + //edge_pos_a_and_b += Vec4(edge_pos, edge_pos) * edge.xxyy; // multiplied by zero, optimize out + num_edges_a_and_b += edge; + } + + return compute_pixel_coords(center_coord, edge_pos_a_and_b, num_edges_a_and_b); +} diff --git a/crates/re_renderer/shader/outlines/jumpflooding_init_shared.wgsl b/crates/re_renderer/shader/outlines/jumpflooding_init_shared.wgsl new file mode 100644 index 000000000000..cf6fdd3da36e --- /dev/null +++ b/crates/re_renderer/shader/outlines/jumpflooding_init_shared.wgsl @@ -0,0 +1,26 @@ +#import <../types.wgsl> +#import <../screen_triangle_vertex.wgsl> + +fn compute_pixel_coords(center_coord: IVec2, unnormalized_edge_pos_a_and_b: Vec4, num_edges_a_and_b: Vec2) -> Vec4 { + // Normalize edges ans get range from [0, 1] to [-0.5, 0.5]. + let edge_pos_a_and_b = unnormalized_edge_pos_a_and_b / num_edges_a_and_b.xxyy - Vec4(0.5); + + // We're outputting pixel coordinates (0-res) instead of texture coordinates (0-1). + // This way we don't need to correct for aspect ratio when comparing distances in the jumpflooding steps. + // When computing the actual outlines themselves we're also interested in pixel distances, not texcoord distances. + + var pixel_coord_a: Vec2; + if num_edges_a_and_b.x == 0.0 { + pixel_coord_a = Vec2(inf()); + } else { + pixel_coord_a = Vec2(center_coord) + edge_pos_a_and_b.xy; + } + var pixel_coord_b: Vec2; + if num_edges_a_and_b.y == 0.0 { + pixel_coord_b = Vec2(inf()); + } else { + pixel_coord_b = Vec2(center_coord) + edge_pos_a_and_b.zw; + } + + return Vec4(pixel_coord_a, pixel_coord_b); +} diff --git a/crates/re_renderer/shader/outlines/jumpflooding_step.wgsl b/crates/re_renderer/shader/outlines/jumpflooding_step.wgsl new file mode 100644 index 000000000000..296c8315d92c --- /dev/null +++ b/crates/re_renderer/shader/outlines/jumpflooding_step.wgsl @@ -0,0 +1,51 @@ +#import <../types.wgsl> +#import <../screen_triangle_vertex.wgsl> + +@group(0) @binding(0) +var voronoi_texture: texture_2d; +@group(0) @binding(1) +var voronoi_sampler: sampler; + +struct FrameUniformBuffer { + step_width: i32, + // There is actually more padding here. We're only putting this to satisfy lack of + // wgt::DownlevelFlags::BUFFER_BINDINGS_NOT_16_BYTE_ALIGNED + padding: IVec3, +}; +@group(0) @binding(2) +var uniforms: FrameUniformBuffer; + + +@fragment +fn main(in: FragmentInput) -> @location(0) Vec4 { + let resolution = Vec2(textureDimensions(voronoi_texture).xy); + let pixel_step = Vec2(f32(uniforms.step_width), f32(uniforms.step_width)) / resolution; + let pixel_coordinates = resolution * in.texcoord; + + var closest_positions_a = Vec2(-inf()); + var closest_distance_sq_a = inf(); + var closest_positions_b = Vec2(-inf()); + var closest_distance_sq_b = inf(); + + for (var y: i32 = -1; y <= 1; y += 1) { + for (var x: i32 = -1; x <= 1; x += 1) { + let texcoord = in.texcoord + Vec2(f32(x), f32(y)) * pixel_step; + let positions_a_and_b = textureSampleLevel(voronoi_texture, voronoi_sampler, texcoord, 0.0); + let to_positions_a_and_b = positions_a_and_b - pixel_coordinates.xyxy; + + let distance_sq_a = dot(to_positions_a_and_b.xy, to_positions_a_and_b.xy); + if closest_distance_sq_a > distance_sq_a { + closest_distance_sq_a = distance_sq_a; + closest_positions_a = positions_a_and_b.xy; + } + + let distance_sq_b = dot(to_positions_a_and_b.zw, to_positions_a_and_b.zw); + if closest_distance_sq_b > distance_sq_b { + closest_distance_sq_b = distance_sq_b; + closest_positions_b = positions_a_and_b.zw; + } + } + } + + return Vec4(closest_positions_a, closest_positions_b); +} diff --git a/crates/re_renderer/shader/outlines/outlines_from_voronoi.wgsl b/crates/re_renderer/shader/outlines/outlines_from_voronoi.wgsl new file mode 100644 index 000000000000..10a6b5164779 --- /dev/null +++ b/crates/re_renderer/shader/outlines/outlines_from_voronoi.wgsl @@ -0,0 +1,41 @@ +#import <../types.wgsl> +#import <../global_bindings.wgsl> +#import <../screen_triangle_vertex.wgsl> + +@group(1) @binding(0) +var voronoi_texture: texture_2d; + +struct OutlineConfigUniformBuffer { + color_layer_a: Vec4, + color_layer_b: Vec4, + outline_radius_pixel: f32, +}; +@group(1) @binding(1) +var uniforms: OutlineConfigUniformBuffer; + +@fragment +fn main(in: FragmentInput) -> @location(0) Vec4 { + let resolution = Vec2(textureDimensions(voronoi_texture).xy); + let pixel_coordinates = resolution * in.texcoord; + let closest_positions = textureSample(voronoi_texture, nearest_sampler, in.texcoord); + let to_closest_a_and_b = (closest_positions - pixel_coordinates.xyxy); + let distance_pixel_a = length(to_closest_a_and_b.xy); + let distance_pixel_b = length(to_closest_a_and_b.zw); + + let sharpness = 1.0; // Fun to play around with, but not exposed yet. + let outline_a = saturate((uniforms.outline_radius_pixel - distance_pixel_a) * sharpness); + let outline_b = saturate((uniforms.outline_radius_pixel - distance_pixel_b) * sharpness); + + let color_a = outline_a * uniforms.color_layer_a; + let color_b = outline_b * uniforms.color_layer_b; + + // Blend B over A. + let color = color_a * (1.0 - color_b.a) + color_b; + return color; + + // Show only the outline. Useful for debugging. + //return Vec4(color.rgb, 1.0); + + // Show the raw voronoi texture. Useful for debugging. + //return Vec4(closest_positions.xy / resolution, 0.0, 1.0); +} diff --git a/crates/re_renderer/shader/screen_triangle.wgsl b/crates/re_renderer/shader/screen_triangle.wgsl index ed387a11593c..293c7c0d7ea9 100644 --- a/crates/re_renderer/shader/screen_triangle.wgsl +++ b/crates/re_renderer/shader/screen_triangle.wgsl @@ -1,12 +1,5 @@ #import <./types.wgsl> - -struct VertexOutput { - // Mark output position as invariant so it's safe to use it with depth test Equal. - // Without @invariant, different usages in different render pipelines might optimize differently, - // causing slightly different results. - @invariant @builtin(position) position: Vec4, - @location(0) texcoord: Vec2, -}; +#import <./screen_triangle_vertex.wgsl> var positions: array = array( Vec2(-1.0, -3.0), diff --git a/crates/re_renderer/shader/screen_triangle_vertex.wgsl b/crates/re_renderer/shader/screen_triangle_vertex.wgsl new file mode 100644 index 000000000000..224da3317d4b --- /dev/null +++ b/crates/re_renderer/shader/screen_triangle_vertex.wgsl @@ -0,0 +1,16 @@ +#import <./types.wgsl> + +struct VertexOutput { + // Mark output position as invariant so it's safe to use it with depth test Equal. + // Without @invariant, different usages in different render pipelines might optimize differently, + // causing slightly different results. + @invariant @builtin(position) position: Vec4, + @location(0) texcoord: Vec2, +}; + +// Workaround for https://github.com/gfx-rs/naga/issues/2252 +// Naga emits invariant flag on fragment input, but some implementations don't allow this. +// Therefore we drop position here (we could still pass it in if needed if we drop the invariant flag) +struct FragmentInput { + @location(0) texcoord: Vec2, +}; diff --git a/crates/re_renderer/shader/types.wgsl b/crates/re_renderer/shader/types.wgsl index ed9251e549b5..842170f1a34f 100644 --- a/crates/re_renderer/shader/types.wgsl +++ b/crates/re_renderer/shader/types.wgsl @@ -25,3 +25,7 @@ const Z = Vec3(0.0, 0.0, 1.0); const ZERO = Vec4(0.0, 0.0, 0.0, 0.0); const ONE = Vec4(1.0, 1.0, 1.0, 1.0); + +fn inf() -> f32 { + return 1.0 / 0.0; +} diff --git a/crates/re_renderer/shader/utils/camera.wgsl b/crates/re_renderer/shader/utils/camera.wgsl index 4be97aad966b..dc8415f31b18 100644 --- a/crates/re_renderer/shader/utils/camera.wgsl +++ b/crates/re_renderer/shader/utils/camera.wgsl @@ -1,8 +1,4 @@ -// TODO(andreas): global_bindings are imported implicitly - -fn inf() -> f32 { - return 1.0 / 0.0; -} +#import <../global_bindings.wgsl> // True if the camera is orthographic fn is_camera_orthographic() -> bool { diff --git a/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs b/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs index 758a5a4fc810..8d1ee77d43bb 100644 --- a/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs +++ b/crates/re_renderer/src/allocator/cpu_write_gpu_read_belt.rs @@ -40,7 +40,12 @@ where #[inline(always)] fn as_slice(&mut self) -> &mut [T] { // TODO(andreas): Is this access slow given that it internally goes through a trait interface? Should we keep the pointer around? - &mut bytemuck::cast_slice_mut(&mut self.write_view)[self.unwritten_element_range.clone()] + // `write_view` may have padding at the end that isn't a multiple of T's size. + // Bytemuck get's unhappy about that, so cast the correct range. + bytemuck::cast_slice_mut( + &mut self.write_view[self.unwritten_element_range.start * std::mem::size_of::() + ..self.unwritten_element_range.end * std::mem::size_of::()], + ) } /// Pushes a slice of elements into the buffer. diff --git a/crates/re_renderer/src/allocator/uniform_buffer_fill.rs b/crates/re_renderer/src/allocator/uniform_buffer_fill.rs index f63cb69fd4ce..272f82141764 100644 --- a/crates/re_renderer/src/allocator/uniform_buffer_fill.rs +++ b/crates/re_renderer/src/allocator/uniform_buffer_fill.rs @@ -84,7 +84,7 @@ pub fn create_and_fill_uniform_buffer_batch( /// See [`create_and_fill_uniform_buffer`]. pub fn create_and_fill_uniform_buffer( - ctx: &mut RenderContext, + ctx: &RenderContext, label: DebugLabel, content: T, ) -> BindGroupEntry { diff --git a/crates/re_renderer/src/config.rs b/crates/re_renderer/src/config.rs index 713f868ef667..6477573c3a19 100644 --- a/crates/re_renderer/src/config.rs +++ b/crates/re_renderer/src/config.rs @@ -7,11 +7,24 @@ pub enum HardwareTier { /// Maintains strict WebGL capability. Web, // Run natively with Vulkan/Metal but don't demand anything that isn't widely available. - //Native, + Native, // Run natively with Vulkan/Metal and require additional features. //HighEnd } +impl Default for HardwareTier { + fn default() -> Self { + #[cfg(target_arch = "wasm32")] + { + Self::Web + } + #[cfg(not(target_arch = "wasm32"))] + { + Self::Native + } + } +} + impl HardwareTier { /// Wgpu limits required by the given hardware tier. pub fn limits(self) -> wgpu::Limits { diff --git a/crates/re_renderer/src/importer/gltf.rs b/crates/re_renderer/src/importer/gltf.rs index d0bdc298e96b..edec8d9ebe69 100644 --- a/crates/re_renderer/src/importer/gltf.rs +++ b/crates/re_renderer/src/importer/gltf.rs @@ -12,7 +12,7 @@ use crate::{ GpuMeshHandle, GpuTexture2DHandle, ResourceLifeTime, Texture2DCreationDesc, TextureManager2D, }, - Color32, RenderContext, + RenderContext, }; /// Loads both gltf and glb into the mesh & texture manager. @@ -289,7 +289,7 @@ fn gather_instances_recursive( gpu_mesh: gpu_mesh.clone(), mesh: Some(mesh.clone()), world_from_mesh: transform, - additive_tint: Color32::TRANSPARENT, + ..Default::default() }); } } diff --git a/crates/re_renderer/src/importer/obj.rs b/crates/re_renderer/src/importer/obj.rs index f06908683d70..cbf7cc31964f 100644 --- a/crates/re_renderer/src/importer/obj.rs +++ b/crates/re_renderer/src/importer/obj.rs @@ -7,7 +7,7 @@ use crate::{ mesh::{mesh_vertices::MeshVertexData, Material, Mesh}, renderer::MeshInstance, resource_managers::ResourceLifeTime, - Color32, RenderContext, + RenderContext, }; /// Load a [Wavefront .obj file](https://en.wikipedia.org/wiki/Wavefront_.obj_file) @@ -73,8 +73,7 @@ pub fn load_obj_from_buffer( MeshInstance { gpu_mesh, mesh: Some(Arc::new(mesh)), - world_from_mesh: glam::Affine3A::IDENTITY, - additive_tint: Color32::TRANSPARENT, + ..Default::default() } }) .collect()) diff --git a/crates/re_renderer/src/renderer/compositor.rs b/crates/re_renderer/src/renderer/compositor.rs index 88e8d1ca341c..5cdbfb25d00a 100644 --- a/crates/re_renderer/src/renderer/compositor.rs +++ b/crates/re_renderer/src/renderer/compositor.rs @@ -8,7 +8,10 @@ use crate::{ }, }; -use super::{DrawData, DrawPhase, FileResolver, FileSystem, RenderContext, Renderer}; +use super::{ + screen_triangle_vertex_shader, DrawData, DrawPhase, FileResolver, FileSystem, RenderContext, + Renderer, +}; use smallvec::smallvec; @@ -77,6 +80,7 @@ impl Renderer for Compositor { }, ); + let vertex_handle = screen_triangle_vertex_shader(pools, device, resolver); let render_pipeline = pools.render_pipelines.get_or_create( device, &RenderPipelineDesc { @@ -90,14 +94,7 @@ impl Renderer for Compositor { &pools.bind_group_layouts, ), vertex_entrypoint: "main".into(), - vertex_handle: pools.shader_modules.get_or_create( - device, - resolver, - &ShaderModuleDesc { - label: "screen_triangle (vertex)".into(), - source: include_file!("../../shader/screen_triangle.wgsl"), - }, - ), + vertex_handle, fragment_entrypoint: "main".into(), fragment_handle: pools.shader_modules.get_or_create( device, diff --git a/crates/re_renderer/src/renderer/generic_skybox.rs b/crates/re_renderer/src/renderer/generic_skybox.rs index 34f4ba8e9fc7..d2bfd9d347db 100644 --- a/crates/re_renderer/src/renderer/generic_skybox.rs +++ b/crates/re_renderer/src/renderer/generic_skybox.rs @@ -3,6 +3,7 @@ use smallvec::smallvec; use crate::{ context::SharedRendererData, include_file, + renderer::screen_triangle_vertex_shader, view_builder::ViewBuilder, wgpu_resources::{ GpuRenderPipelineHandle, PipelineLayoutDesc, RenderPipelineDesc, ShaderModuleDesc, @@ -51,6 +52,7 @@ impl Renderer for GenericSkybox { ) -> Self { crate::profile_function!(); + let vertex_handle = screen_triangle_vertex_shader(pools, device, resolver); let render_pipeline = pools.render_pipelines.get_or_create( device, &RenderPipelineDesc { @@ -65,14 +67,7 @@ impl Renderer for GenericSkybox { ), vertex_entrypoint: "main".into(), - vertex_handle: pools.shader_modules.get_or_create( - device, - resolver, - &ShaderModuleDesc { - label: "screen_triangle (vertex)".into(), - source: include_file!("../../shader/screen_triangle.wgsl"), - }, - ), + vertex_handle, fragment_entrypoint: "main".into(), fragment_handle: pools.shader_modules.get_or_create( device, diff --git a/crates/re_renderer/src/renderer/mesh_renderer.rs b/crates/re_renderer/src/renderer/mesh_renderer.rs index 7bbd955d5542..8c88f15c0ccc 100644 --- a/crates/re_renderer/src/renderer/mesh_renderer.rs +++ b/crates/re_renderer/src/renderer/mesh_renderer.rs @@ -11,6 +11,7 @@ use smallvec::smallvec; use crate::{ include_file, mesh::{gpu_data::MaterialUniformBuffer, mesh_vertices, GpuMesh, Mesh}, + renderer::OutlineMaskProcessor, resource_managers::GpuMeshHandle, view_builder::ViewBuilder, wgpu_resources::{ @@ -21,8 +22,8 @@ use crate::{ }; use super::{ - DrawData, DrawPhase, FileResolver, FileSystem, RenderContext, Renderer, SharedRendererData, - WgpuResourcePools, + DrawData, DrawPhase, FileResolver, FileSystem, OutlineMaskPreference, RenderContext, Renderer, + SharedRendererData, WgpuResourcePools, }; mod gpu_data { @@ -33,7 +34,7 @@ mod gpu_data { /// Element in the gpu residing instance buffer. /// /// Keep in sync with `mesh_vertex.wgsl` - #[repr(C, packed)] + #[repr(C)] #[derive(Clone, Copy, bytemuck::Pod, bytemuck::Zeroable)] pub struct InstanceData { // Don't use aligned glam types because they enforce alignment. @@ -47,6 +48,8 @@ mod gpu_data { pub world_from_mesh_normal_row_2: [f32; 3], pub additive_tint: Color32, + // Need only the first two bytes, but we want to keep everything aligned to at least 4 bytes. + pub outline_mask: [u8; 4], } impl InstanceData { @@ -69,6 +72,9 @@ mod gpu_data { wgpu::VertexFormat::Float32x3, // Tint color wgpu::VertexFormat::Unorm8x4, + // Outline mask. + // This adds a tiny bit of overhead to all instances during non-outline pass, but the alternative is having yet another vertex buffer. + wgpu::VertexFormat::Uint8x2, ] .into_iter(), ), @@ -81,6 +87,9 @@ mod gpu_data { struct MeshBatch { mesh: GpuMesh, count: u32, + /// Number of meshes out of `count` which have outlines. + /// We put all instances with outlines at the start of the instance buffer range. + count_with_outlines: u32, } #[derive(Clone)] @@ -109,6 +118,21 @@ pub struct MeshInstance { /// Per-instance (as opposed to per-material/mesh!) tint color that is added to the albedo texture. /// Alpha channel is currently unused. pub additive_tint: Color32, + + /// Optional outline mask setting for this instance. + pub outline_mask: OutlineMaskPreference, +} + +impl Default for MeshInstance { + fn default() -> Self { + Self { + gpu_mesh: GpuMeshHandle::Invalid, + mesh: None, + world_from_mesh: macaw::Affine3A::IDENTITY, + additive_tint: Color32::TRANSPARENT, + outline_mask: None, + } + } } impl MeshDrawData { @@ -149,89 +173,95 @@ impl MeshDrawData { }, ); - let mut mesh_runs = Vec::new(); + let mut batches = Vec::new(); { let mut instance_buffer_staging = ctx - .queue - .write_buffer_with( - &instance_buffer, - 0, - instance_buffer_size.try_into().unwrap(), - ) - .unwrap(); // Fails only if mapping is bigger than buffer size. - let instance_buffer_staging: &mut [gpu_data::InstanceData] = - bytemuck::cast_slice_mut(&mut instance_buffer_staging); + .cpu_write_gpu_read_belt + .lock() + .allocate::( + &ctx.device, + &ctx.gpu_resources.buffers, + instances.len(), + ); + let mesh_manager = ctx.mesh_manager.read(); let mut num_processed_instances = 0; + // TODO(#1530) This grouping doesn't seem to do its job correctly. We're not actually batching correctly right now in all cases. for (mesh, instances) in &instances.iter().group_by(|instance| &instance.gpu_mesh) { let mut count = 0; - for (instance, gpu_instance) in instances.zip( - instance_buffer_staging - .iter_mut() - .skip(num_processed_instances), - ) { + let mut count_with_outlines = 0; + + // Put all instances with outlines at the start of the instance buffer range. + let instances = instances + .sorted_by(|a, b| a.outline_mask.is_none().cmp(&b.outline_mask.is_none())); + + for instance in instances { count += 1; + count_with_outlines += instance.outline_mask.is_some() as u32; let world_from_mesh_mat3 = instance.world_from_mesh.matrix3; - gpu_instance.world_from_mesh_row_0 = world_from_mesh_mat3 - .row(0) - .extend(instance.world_from_mesh.translation.x) - .to_array(); - gpu_instance.world_from_mesh_row_1 = world_from_mesh_mat3 - .row(1) - .extend(instance.world_from_mesh.translation.y) - .to_array(); - gpu_instance.world_from_mesh_row_2 = world_from_mesh_mat3 - .row(2) - .extend(instance.world_from_mesh.translation.z) - .to_array(); - let world_from_mesh_normal = instance.world_from_mesh.matrix3.inverse().transpose(); - gpu_instance.world_from_mesh_normal_row_0 = - world_from_mesh_normal.row(0).to_array(); - gpu_instance.world_from_mesh_normal_row_1 = - world_from_mesh_normal.row(1).to_array(); - gpu_instance.world_from_mesh_normal_row_2 = - world_from_mesh_normal.row(2).to_array(); - - gpu_instance.additive_tint = instance.additive_tint; + instance_buffer_staging.push(gpu_data::InstanceData { + world_from_mesh_row_0: world_from_mesh_mat3 + .row(0) + .extend(instance.world_from_mesh.translation.x) + .to_array(), + world_from_mesh_row_1: world_from_mesh_mat3 + .row(1) + .extend(instance.world_from_mesh.translation.y) + .to_array(), + world_from_mesh_row_2: world_from_mesh_mat3 + .row(2) + .extend(instance.world_from_mesh.translation.z) + .to_array(), + world_from_mesh_normal_row_0: world_from_mesh_normal.row(0).to_array(), + world_from_mesh_normal_row_1: world_from_mesh_normal.row(1).to_array(), + world_from_mesh_normal_row_2: world_from_mesh_normal.row(2).to_array(), + additive_tint: instance.additive_tint, + outline_mask: instance + .outline_mask + .map_or([0, 0, 0, 0], |mask| [mask[0], mask[1], 0, 0]), + }); } num_processed_instances += count; - mesh_runs.push((mesh, count as u32)); + + // We resolve the meshes here already, so the actual draw call doesn't need to know about the MeshManager. + let mesh = mesh_manager.get(mesh)?; + batches.push(MeshBatch { + mesh: mesh.clone(), + count: count as _, + count_with_outlines, + }); } assert_eq!(num_processed_instances, instances.len()); + instance_buffer_staging.copy_to_buffer( + ctx.active_frame.encoder.lock().get(), + &instance_buffer, + 0, + ); } - // We resolve the meshes here already, so the actual draw call doesn't need to know about the MeshManager. - let batches: Result, _> = mesh_runs - .into_iter() - .map(|(mesh_handle, count)| { - ctx.mesh_manager - .read() - .get(mesh_handle) - .map(|mesh| MeshBatch { - mesh: mesh.clone(), - count, - }) - }) - .collect(); - Ok(MeshDrawData { - batches: batches?, + batches, instance_buffer: Some(instance_buffer), }) } } pub struct MeshRenderer { - render_pipeline: GpuRenderPipelineHandle, + render_pipeline_shaded: GpuRenderPipelineHandle, + render_pipeline_outline_mask: GpuRenderPipelineHandle, pub bind_group_layout: GpuBindGroupLayoutHandle, } impl Renderer for MeshRenderer { type RendererDrawData = MeshDrawData; + fn participated_phases() -> &'static [DrawPhase] { + &[DrawPhase::Opaque, DrawPhase::OutlineMask] + } + fn create_renderer( shared_data: &SharedRendererData, pools: &mut WgpuResourcePools, @@ -288,27 +318,29 @@ impl Renderer for MeshRenderer { }, ); - let render_pipeline = pools.render_pipelines.get_or_create( + let primitive = wgpu::PrimitiveState { + topology: wgpu::PrimitiveTopology::TriangleList, + cull_mode: None, //Some(wgpu::Face::Back), // TODO(andreas): Need to specify from outside if mesh is CW or CCW? + ..Default::default() + }; + // Put instance vertex buffer on slot 0 since it doesn't change for several draws. + let vertex_buffers: smallvec::SmallVec<[_; 4]> = + std::iter::once(gpu_data::InstanceData::vertex_buffer_layout()) + .chain(mesh_vertices::vertex_buffer_layouts()) + .collect(); + + let render_pipeline_shaded = pools.render_pipelines.get_or_create( device, &RenderPipelineDesc { - label: "mesh renderer".into(), + label: "mesh renderer - shaded".into(), pipeline_layout, vertex_entrypoint: "vs_main".into(), vertex_handle: shader_module, - fragment_entrypoint: "fs_main".into(), + fragment_entrypoint: "fs_main_shaded".into(), fragment_handle: shader_module, - - // Put instance vertex buffer on slot 0 since it doesn't change for several draws. - vertex_buffers: std::iter::once(gpu_data::InstanceData::vertex_buffer_layout()) - .chain(mesh_vertices::vertex_buffer_layouts()) - .collect(), - + vertex_buffers: vertex_buffers.clone(), render_targets: smallvec![Some(ViewBuilder::MAIN_TARGET_COLOR_FORMAT.into())], - primitive: wgpu::PrimitiveState { - topology: wgpu::PrimitiveTopology::TriangleList, - cull_mode: None, //Some(wgpu::Face::Back), // TODO(andreas): Need to specify from outside if mesh is CW or CCW? - ..Default::default() - }, + primitive, depth_stencil: ViewBuilder::MAIN_TARGET_DEFAULT_DEPTH_STATE, multisample: ViewBuilder::MAIN_TARGET_DEFAULT_MSAA_STATE, }, @@ -316,8 +348,30 @@ impl Renderer for MeshRenderer { &pools.shader_modules, ); + let render_pipeline_outline_mask = pools.render_pipelines.get_or_create( + device, + &RenderPipelineDesc { + label: "mesh renderer - outline mask".into(), + pipeline_layout, + vertex_entrypoint: "vs_main".into(), + vertex_handle: shader_module, + fragment_entrypoint: "fs_main_outline_mask".into(), + fragment_handle: shader_module, + vertex_buffers, + render_targets: smallvec![Some(OutlineMaskProcessor::MASK_FORMAT.into())], + primitive, + depth_stencil: OutlineMaskProcessor::MASK_DEPTH_STATE, + multisample: OutlineMaskProcessor::get_mask_default_msaa_state( + shared_data.config.hardware_tier, + ), + }, + &pools.pipeline_layouts, + &pools.shader_modules, + ); + MeshRenderer { - render_pipeline, + render_pipeline_shaded, + render_pipeline_outline_mask, bind_group_layout, } } @@ -325,7 +379,7 @@ impl Renderer for MeshRenderer { fn draw<'a>( &self, pools: &'a WgpuResourcePools, - _phase: DrawPhase, + phase: DrawPhase, pass: &mut wgpu::RenderPass<'a>, draw_data: &'a Self::RendererDrawData, ) -> anyhow::Result<()> { @@ -335,13 +389,24 @@ impl Renderer for MeshRenderer { return Ok(()); // Instance buffer was empty. }; - let pipeline = pools.render_pipelines.get_resource(self.render_pipeline)?; + let pipeline_handle = if phase == DrawPhase::OutlineMask { + self.render_pipeline_outline_mask + } else { + self.render_pipeline_shaded + }; + let pipeline = pools.render_pipelines.get_resource(pipeline_handle)?; + pass.set_pipeline(pipeline); pass.set_vertex_buffer(0, instance_buffer.slice(..)); let mut instance_start_index = 0; for mesh_batch in &draw_data.batches { + if phase == DrawPhase::OutlineMask && mesh_batch.count_with_outlines == 0 { + instance_start_index += mesh_batch.count; + continue; + } + let vertex_buffer_combined = &mesh_batch.mesh.vertex_buffer_combined; let index_buffer = &mesh_batch.mesh.index_buffer; @@ -358,17 +423,22 @@ impl Renderer for MeshRenderer { wgpu::IndexFormat::Uint32, ); - let instance_range = instance_start_index..(instance_start_index + mesh_batch.count); + let num_meshes_to_draw = if phase == DrawPhase::OutlineMask { + mesh_batch.count_with_outlines + } else { + mesh_batch.count + }; + let instance_range = instance_start_index..(instance_start_index + num_meshes_to_draw); for material in &mesh_batch.mesh.materials { - debug_assert!(mesh_batch.count > 0); + debug_assert!(num_meshes_to_draw > 0); pass.set_bind_group(1, &material.bind_group, &[]); - pass.draw_indexed(material.index_range.clone(), 0, instance_range.clone()); } - instance_start_index = instance_range.end; + // Advance instance start index with *total* number of instances in this batch. + instance_start_index += mesh_batch.count; } Ok(()) diff --git a/crates/re_renderer/src/renderer/mod.rs b/crates/re_renderer/src/renderer/mod.rs index dbe7d37c82b7..26857a052cdc 100644 --- a/crates/re_renderer/src/renderer/mod.rs +++ b/crates/re_renderer/src/renderer/mod.rs @@ -27,6 +27,10 @@ pub use mesh_renderer::{MeshDrawData, MeshInstance}; pub mod compositor; +mod outlines; +pub(crate) use outlines::OutlineMaskProcessor; +pub use outlines::{OutlineConfig, OutlineMaskPreference}; + use crate::{ context::{RenderContext, SharedRendererData}, wgpu_resources::WgpuResourcePools, @@ -77,6 +81,7 @@ pub trait Renderer { /// /// Currently we do not support sorting *within* a rendering phase! /// See [#702](https://github.com/rerun-io/rerun/issues/702) +/// Within a phase `DrawData` are drawn in the order they are submitted in. #[derive(Clone, Copy, Hash, PartialEq, Eq, Debug)] pub enum DrawPhase { /// Opaque objects, performing reads/writes to the depth buffer. @@ -87,6 +92,25 @@ pub enum DrawPhase { /// Background, rendering where depth wasn't written. Background, + /// Render mask for things that should get outlines. + OutlineMask, + /// Drawn when compositing with the main target. Compositing, } + +/// Gets or creates a vertex shader module for drawing a screen filling triangle. +fn screen_triangle_vertex_shader( + pools: &mut WgpuResourcePools, + device: &wgpu::Device, + resolver: &mut FileResolver, +) -> crate::wgpu_resources::GpuShaderModuleHandle { + pools.shader_modules.get_or_create( + device, + resolver, + &crate::wgpu_resources::ShaderModuleDesc { + label: "screen_triangle (vertex)".into(), + source: crate::include_file!("../../shader/screen_triangle.wgsl"), + }, + ) +} diff --git a/crates/re_renderer/src/renderer/outlines.rs b/crates/re_renderer/src/renderer/outlines.rs new file mode 100644 index 000000000000..8823aae36ff9 --- /dev/null +++ b/crates/re_renderer/src/renderer/outlines.rs @@ -0,0 +1,694 @@ +//! Outlines as postprocessing effect. +//! +//! This module provides the [`OutlineMaskProcessor`] which handles render passes around outlines +//! and [`OutlineCompositor`] which handles compositing the outlines into the final image. +//! +//! There are two channels (in shader code referred to as A and B) that are handled simultaneously. +//! For configuring the look of the outline refer to [`OutlineConfig`]. +//! For setting outlines for an individual primitive from another [`Renderer`]/[`DrawData`], +//! check for [`OutlineMaskPreference`] settings on that primitive. +//! +//! How it works: +//! ============= +//! The basic approach follows closely @bgolus' [blog post](https://bgolus.medium.com/the-quest-for-very-wide-outlines-ba82ed442cd9) +//! on jump-flooding based outlines. +//! +//! Quick recap & overview: +//! * Render scene into a mask texture +//! * Extract a contour from the mask texture, for each contour contour pixel write the position in the (to-be) voronoi texture. +//! * in our case we extract all pixels at which the mask changes (details below) +//! * Jump-flooding iterations: For each pixel in the voronoi texture, +//! sample the current pixel and an 8-neighborhood at a certain, for each pass decreasing, distance and write out the closest position seen so far. +//! * This is repeated for `log2(outline_width)` iterations. +//! * During composition, extract an outline by checking the distance to the closest contour using the voronoi texture +//! +//! What makes our implementation (a little bit) special: +//! ----------------------------------------------------- +//! In short: We have more complex outline relationships but do so without additional passes! +//! +//! * Different objects may have outlines between each other +//! * This is achieved by making the mask texture a 2 channel texture, where each channel is a different 8bit object id. +//! * object ids are arbitrary and only for the purpose of distinguishing between outlines +//! * Since we now no longer can resolve anti-aliasing in a straight forward manner (can't blend object ids!), +//! * This implies a custom resolve during contour extraction! +//! * It seems to force our hand towards outlines that extend inwards: +//! * For each channel A & B we only get a single voronoi texture (fused into one 4 channel texture), +//! meaning that we only have a single unsigned distance to the closest contour. +//! If we don't want to ignore objects drawn upon each other, we need to compute the distance to any contour (== pixel where object id changes). +//! * It might be possible to mask out inner outlines during composition, but it's not clear what the exact masking rules are for this. +//! * We use two channels (A and B) for outlines, so that we can have two independent outlines (even for the same object if desired) +//! * We do this in a single pass by using a 2 channel texture on the mask (object id A, object id B) and +//! a 4 channel texture on the voronoi texture (xy coordinates for A, xy coordinates for B) +//! +//! More details can be found in the respective shader code. +//! + +use super::{screen_triangle_vertex_shader, DrawData, DrawPhase, Renderer}; +use crate::{ + allocator::{create_and_fill_uniform_buffer, create_and_fill_uniform_buffer_batch}, + config::HardwareTier, + context::SharedRendererData, + include_file, + view_builder::ViewBuilder, + wgpu_resources::{ + BindGroupDesc, BindGroupEntry, BindGroupLayoutDesc, GpuBindGroup, GpuBindGroupLayoutHandle, + GpuRenderPipelineHandle, GpuTexture, GpuTextureHandle, PipelineLayoutDesc, PoolError, + RenderPipelineDesc, SamplerDesc, ShaderModuleDesc, WgpuResourcePools, + }, + DebugLabel, FileResolver, FileSystem, RenderContext, +}; + +use smallvec::smallvec; + +/// What outline (if any) should be drawn. +/// +/// Outlines have two channels (referred to as A and B). +/// Each channel can distinguish up 255 different objects, each getting their own outline. +/// +/// Object index 0 is special: It is the default background of each outline channel, thus rendering with it +/// is a form of "active no outline", effectively subtracting from the outline channel. +pub type OutlineMaskPreference = Option<[u8; 2]>; + +#[derive(Clone, Debug)] +pub struct OutlineConfig { + /// Outline radius for both layers in pixels. Fractional pixels are valid. + /// + /// Could do different radius for both layers if the need arises, but for now this simplifies things. + pub outline_radius_pixel: f32, + + /// Premultiplied RGBA color for the first outline layer. + pub color_layer_a: crate::Rgba, + /// Premultiplied RGBA color for the second outline layer. + pub color_layer_b: crate::Rgba, +} + +// TODO(andreas): Is this a sort of DrawPhase implementor? Need a system for this. +pub struct OutlineMaskProcessor { + label: DebugLabel, + + mask_texture: GpuTexture, + mask_depth: GpuTexture, + voronoi_textures: [GpuTexture; 2], + + bind_group_jumpflooding_init: GpuBindGroup, + bind_group_jumpflooding_steps: Vec, + bind_group_draw_outlines: GpuBindGroup, + + render_pipeline_jumpflooding_init: GpuRenderPipelineHandle, + render_pipeline_jumpflooding_step: GpuRenderPipelineHandle, +} + +mod gpu_data { + use crate::wgpu_buffer_types; + + /// Keep in sync with `jumpflooding_step.wgsl` + #[repr(C, align(256))] + #[derive(Clone, Copy, bytemuck::Pod, bytemuck::Zeroable)] + pub struct JumpfloodingStepUniformBuffer { + pub step_width: wgpu_buffer_types::U32RowPadded, + /// All this padding hurts. `step_width` be a PushConstant but they are not widely supported enough! + pub end_padding: [wgpu_buffer_types::PaddingRow; 16 - 1], + } + + /// Keep in sync with `outlines_from_voronoi.wgsl` + #[repr(C, align(256))] + #[derive(Clone, Copy, bytemuck::Pod, bytemuck::Zeroable)] + pub struct OutlineConfigUniformBuffer { + pub color_layer_a: wgpu_buffer_types::Vec4, + pub color_layer_b: wgpu_buffer_types::Vec4, + pub outline_radius_pixel: wgpu_buffer_types::F32RowPadded, + pub end_padding: [wgpu_buffer_types::PaddingRow; 16 - 3], + } +} + +impl OutlineMaskProcessor { + /// Format of the outline mask target. + /// + /// Two channels with each 256 object ids. + pub const MASK_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rg8Uint; + pub const MASK_DEPTH_FORMAT: wgpu::TextureFormat = ViewBuilder::MAIN_TARGET_DEPTH_FORMAT; + pub const MASK_DEPTH_STATE: Option = + ViewBuilder::MAIN_TARGET_DEFAULT_DEPTH_STATE; + + /// Holds two pairs of pixel coordinates (one for each layer). + const VORONOI_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba16Float; + + /// Default MSAA state for the outline mask target. + pub fn get_mask_default_msaa_state(tier: HardwareTier) -> wgpu::MultisampleState { + wgpu::MultisampleState { + count: Self::get_mask_sample_count(tier), + mask: !0, + alpha_to_coverage_enabled: false, + } + } + + /// Number of MSAA samples used for the outline mask target. + pub fn get_mask_sample_count(tier: HardwareTier) -> u32 { + match tier { + HardwareTier::Web => 1, + // The MSAA shader variant deals with *exactly* 4 samples. + // See `jumpflooding_step_msaa.wgsl`. + HardwareTier::Native => 4, + } + } + + pub fn new( + ctx: &mut RenderContext, + config: &OutlineConfig, + view_name: &DebugLabel, + resolution_in_pixel: [u32; 2], + ) -> Self { + crate::profile_function!(); + let instance_label = view_name.clone().push_str(" - OutlineMaskProcessor"); + + // ------------- Textures ------------- + let texture_pool = &ctx.gpu_resources.textures; + + let mask_sample_count = + Self::get_mask_sample_count(ctx.shared_renderer_data.config.hardware_tier); + let mask_texture_desc = crate::wgpu_resources::TextureDesc { + label: instance_label.clone().push_str("::mask_texture"), + size: wgpu::Extent3d { + width: resolution_in_pixel[0], + height: resolution_in_pixel[1], + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: mask_sample_count, + dimension: wgpu::TextureDimension::D2, + format: Self::MASK_FORMAT, + usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::RENDER_ATTACHMENT, + }; + let mask_texture = texture_pool.alloc(&ctx.device, &mask_texture_desc); + + // We have a fresh depth buffer here that we need because: + // * We want outlines visible even if there's an object in front, so don't re-use previous + // * Overdraw IDs correctly + // * TODO(andreas): Make overdrawn outlines more transparent by comparing depth + let mask_depth = texture_pool.alloc( + &ctx.device, + &crate::wgpu_resources::TextureDesc { + label: instance_label.clone().push_str("::mask_depth"), + format: Self::MASK_DEPTH_FORMAT, + usage: wgpu::TextureUsages::RENDER_ATTACHMENT, + ..mask_texture_desc + }, + ); + + let voronoi_texture_desc = crate::wgpu_resources::TextureDesc { + label: instance_label.clone().push_str("::distance_texture"), + sample_count: 1, + format: Self::VORONOI_FORMAT, + ..mask_texture_desc + }; + let voronoi_textures = [ + texture_pool.alloc(&ctx.device, &voronoi_texture_desc.with_label_push("0")), + texture_pool.alloc(&ctx.device, &voronoi_texture_desc.with_label_push("1")), + ]; + + // ------------- Bind Groups ------------- + + let (bind_group_jumpflooding_init, bind_group_layout_jumpflooding_init) = + Self::create_bind_group_jumpflooding_init(ctx, &instance_label, &mask_texture); + let (bind_group_jumpflooding_steps, bind_group_layout_jumpflooding_step) = + Self::create_bind_groups_for_jumpflooding_steps( + config, + ctx, + &instance_label, + &voronoi_textures, + ); + + // Create a bind group for the final compositor pass - it will read the last voronoi texture + let bind_group_draw_outlines = { + let mut renderers = ctx.renderers.write(); + let compositor_renderer = renderers.get_or_create::<_, OutlineCompositor>( + &ctx.shared_renderer_data, + &mut ctx.gpu_resources, + &ctx.device, + &mut ctx.resolver, + ); + + // Point to the last written voronoi texture + // We start writing to voronoi_textures[0] and then do `num_steps` ping-pong rendering. + // Therefore, the last texture is voronoi_textures[num_steps % 2] + compositor_renderer.create_bind_group( + ctx, + voronoi_textures[bind_group_jumpflooding_steps.len() % 2].handle, + config, + ) + }; + + // ------------- Render Pipelines ------------- + + let screen_triangle_vertex_shader = + screen_triangle_vertex_shader(&mut ctx.gpu_resources, &ctx.device, &mut ctx.resolver); + let jumpflooding_init_desc = RenderPipelineDesc { + label: "OutlineMaskProcessor::jumpflooding_init".into(), + pipeline_layout: ctx.gpu_resources.pipeline_layouts.get_or_create( + &ctx.device, + &PipelineLayoutDesc { + label: "OutlineMaskProcessor::jumpflooding_init".into(), + entries: vec![bind_group_layout_jumpflooding_init], + }, + &ctx.gpu_resources.bind_group_layouts, + ), + vertex_entrypoint: "main".into(), + vertex_handle: screen_triangle_vertex_shader, + fragment_entrypoint: "main".into(), + fragment_handle: ctx.gpu_resources.shader_modules.get_or_create( + &ctx.device, + &mut ctx.resolver, + &ShaderModuleDesc { + label: "jumpflooding_init".into(), + source: if mask_sample_count == 1 { + include_file!("../../shader/outlines/jumpflooding_init.wgsl") + } else { + include_file!("../../shader/outlines/jumpflooding_init_msaa.wgsl") + }, + }, + ), + vertex_buffers: smallvec![], + render_targets: smallvec![Some(Self::VORONOI_FORMAT.into())], + primitive: wgpu::PrimitiveState::default(), + depth_stencil: None, + multisample: wgpu::MultisampleState::default(), + }; + let render_pipeline_jumpflooding_init = ctx.gpu_resources.render_pipelines.get_or_create( + &ctx.device, + &jumpflooding_init_desc, + &ctx.gpu_resources.pipeline_layouts, + &ctx.gpu_resources.shader_modules, + ); + let render_pipeline_jumpflooding_step = ctx.gpu_resources.render_pipelines.get_or_create( + &ctx.device, + &RenderPipelineDesc { + label: "OutlineMaskProcessor::jumpflooding_step".into(), + pipeline_layout: ctx.gpu_resources.pipeline_layouts.get_or_create( + &ctx.device, + &PipelineLayoutDesc { + label: "OutlineMaskProcessor::jumpflooding_step".into(), + entries: vec![bind_group_layout_jumpflooding_step], + }, + &ctx.gpu_resources.bind_group_layouts, + ), + fragment_handle: ctx.gpu_resources.shader_modules.get_or_create( + &ctx.device, + &mut ctx.resolver, + &ShaderModuleDesc { + label: "jumpflooding_step".into(), + source: include_file!("../../shader/outlines/jumpflooding_step.wgsl"), + }, + ), + ..jumpflooding_init_desc + }, + &ctx.gpu_resources.pipeline_layouts, + &ctx.gpu_resources.shader_modules, + ); + + Self { + label: instance_label, + mask_texture, + mask_depth, + voronoi_textures, + bind_group_jumpflooding_init, + bind_group_jumpflooding_steps, + bind_group_draw_outlines, + render_pipeline_jumpflooding_init, + render_pipeline_jumpflooding_step, + } + } + + pub fn start_mask_render_pass<'a>( + &'a self, + encoder: &'a mut wgpu::CommandEncoder, + ) -> wgpu::RenderPass<'a> { + encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: self.label.clone().push_str(" - mask pass").get(), + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &self.mask_texture.default_view, + resolve_target: None, // We're going to do a manual resolve. + ops: wgpu::Operations { + load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT), + store: true, + }, + })], + depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment { + view: &self.mask_depth.default_view, + depth_ops: Some(wgpu::Operations { + load: ViewBuilder::DEFAULT_DEPTH_CLEAR, + store: false, + }), + stencil_ops: None, + }), + }) + } + + pub fn compute_outlines( + self, + pools: &WgpuResourcePools, + encoder: &mut wgpu::CommandEncoder, + ) -> Result { + let pipelines = &pools.render_pipelines; + + let ops = wgpu::Operations { + load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT), // Clear is the closest to "don't care" + store: true, + }; + + // Initialize the jump flooding into voronoi texture 0 by looking at the mask texture. + { + let mut jumpflooding_init = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: self.label.clone().push_str(" - jumpflooding_init").get(), + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &self.voronoi_textures[0].default_view, + resolve_target: None, + ops, + })], + depth_stencil_attachment: None, + }); + + let render_pipeline_init = + pipelines.get_resource(self.render_pipeline_jumpflooding_init)?; + jumpflooding_init.set_bind_group(0, &self.bind_group_jumpflooding_init, &[]); + jumpflooding_init.set_pipeline(render_pipeline_init); + jumpflooding_init.draw(0..3, 0..1); + } + + // Perform jump flooding. + let render_pipeline_step = + pipelines.get_resource(self.render_pipeline_jumpflooding_step)?; + for (i, bind_group) in self.bind_group_jumpflooding_steps.into_iter().enumerate() { + let mut jumpflooding_step = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: self + .label + .clone() + .push_str(&format!(" - jumpflooding_step {i}")) + .get(), + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + // Start with texture 1 since the init step wrote to texture 0 + view: &self.voronoi_textures[(i + 1) % 2].default_view, + resolve_target: None, + ops, + })], + depth_stencil_attachment: None, + }); + + jumpflooding_step.set_pipeline(render_pipeline_step); + jumpflooding_step.set_bind_group(0, &bind_group, &[]); + jumpflooding_step.draw(0..3, 0..1); + } + + Ok(OutlineCompositingDrawData { + bind_group: self.bind_group_draw_outlines, + }) + } + + fn create_bind_group_jumpflooding_init( + ctx: &mut RenderContext, + instance_label: &DebugLabel, + mask_texture: &GpuTexture, + ) -> (GpuBindGroup, GpuBindGroupLayoutHandle) { + let bind_group_layout_jumpflooding_init = + ctx.gpu_resources.bind_group_layouts.get_or_create( + &ctx.device, + &BindGroupLayoutDesc { + label: "OutlineMaskProcessor::bind_group_layout_jumpflooding_init".into(), + entries: vec![wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Uint, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: mask_texture.texture.sample_count() > 1, + }, + count: None, + }], + }, + ); + ( + ctx.gpu_resources.bind_groups.alloc( + &ctx.device, + &ctx.gpu_resources, + &BindGroupDesc { + label: instance_label.clone().push_str("::jumpflooding_init"), + entries: smallvec![BindGroupEntry::DefaultTextureView(mask_texture.handle)], + layout: bind_group_layout_jumpflooding_init, + }, + ), + bind_group_layout_jumpflooding_init, + ) + } + + fn create_bind_groups_for_jumpflooding_steps( + config: &OutlineConfig, + ctx: &mut RenderContext, + instance_label: &DebugLabel, + voronoi_textures: &[GpuTexture; 2], + ) -> (Vec, GpuBindGroupLayoutHandle) { + let bind_group_layout_jumpflooding_step = + ctx.gpu_resources.bind_group_layouts.get_or_create( + &ctx.device, + &BindGroupLayoutDesc { + label: "OutlineMaskProcessor::bind_group_layout_jumpflooding_step".into(), + entries: vec![ + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: false }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering), + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 2, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + // Dynamic offset would make sense here since we cycle through a bunch of these. + // But we need at least two bind groups anyways since we're ping-ponging between two textures, + // which would make this needlessly complicated. + has_dynamic_offset: false, + min_binding_size: std::num::NonZeroU64::new(std::mem::size_of::< + gpu_data::JumpfloodingStepUniformBuffer, + >( + ) + as _), + }, + count: None, + }, + ], + }, + ); + + let max_step_width = + (config.outline_radius_pixel.max(1.0).ceil() as u32).next_power_of_two(); + let num_steps = max_step_width.ilog2() + 1; + let uniform_buffer_jumpflooding_steps_bindings = create_and_fill_uniform_buffer_batch( + ctx, + "jumpflooding uniformbuffer".into(), + (0..num_steps) + .into_iter() + .map(|step| gpu_data::JumpfloodingStepUniformBuffer { + step_width: (max_step_width >> step).into(), + end_padding: Default::default(), + }), + ); + let sampler = ctx.gpu_resources.samplers.get_or_create( + &ctx.device, + &SamplerDesc { + label: "nearest_clamp".into(), + address_mode_u: wgpu::AddressMode::ClampToEdge, + address_mode_v: wgpu::AddressMode::ClampToEdge, + address_mode_w: wgpu::AddressMode::ClampToEdge, + ..Default::default() + }, + ); + let uniform_buffer_jumpflooding_steps = uniform_buffer_jumpflooding_steps_bindings + .into_iter() + .enumerate() + .map(|(i, uniform_buffer_binding)| { + ctx.gpu_resources.bind_groups.alloc( + &ctx.device, + &ctx.gpu_resources, + &BindGroupDesc { + label: instance_label + .clone() + .push_str(&format!("::jumpflooding_steps[{i}]")), + entries: smallvec![ + BindGroupEntry::DefaultTextureView(voronoi_textures[i % 2].handle), + BindGroupEntry::Sampler(sampler), + uniform_buffer_binding + ], + layout: bind_group_layout_jumpflooding_step, + }, + ) + }) + .collect(); + + ( + uniform_buffer_jumpflooding_steps, + bind_group_layout_jumpflooding_step, + ) + } +} + +pub struct OutlineCompositor { + render_pipeline: GpuRenderPipelineHandle, + bind_group_layout: GpuBindGroupLayoutHandle, +} + +#[derive(Clone)] +pub struct OutlineCompositingDrawData { + bind_group: GpuBindGroup, +} + +impl DrawData for OutlineCompositingDrawData { + type Renderer = OutlineCompositor; +} + +impl OutlineCompositor { + fn create_bind_group( + &self, + ctx: &RenderContext, + final_voronoi_texture: GpuTextureHandle, + config: &OutlineConfig, + ) -> GpuBindGroup { + let uniform_buffer_binding = create_and_fill_uniform_buffer( + ctx, + "OutlineCompositingDrawData".into(), + gpu_data::OutlineConfigUniformBuffer { + color_layer_a: config.color_layer_a.into(), + color_layer_b: config.color_layer_b.into(), + outline_radius_pixel: config.outline_radius_pixel.into(), + end_padding: Default::default(), + }, + ); + + ctx.gpu_resources.bind_groups.alloc( + &ctx.device, + &ctx.gpu_resources, + &BindGroupDesc { + label: "OutlineCompositingDrawData".into(), + entries: smallvec![ + BindGroupEntry::DefaultTextureView(final_voronoi_texture), + uniform_buffer_binding + ], + layout: self.bind_group_layout, + }, + ) + } +} + +impl Renderer for OutlineCompositor { + type RendererDrawData = OutlineCompositingDrawData; + + fn participated_phases() -> &'static [DrawPhase] { + &[DrawPhase::Compositing] + } + + fn create_renderer( + shared_data: &SharedRendererData, + pools: &mut WgpuResourcePools, + device: &wgpu::Device, + resolver: &mut FileResolver, + ) -> Self { + let bind_group_layout = pools.bind_group_layouts.get_or_create( + device, + &BindGroupLayoutDesc { + label: "OutlineCompositor::bind_group_layout".into(), + entries: vec![ + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: std::num::NonZeroU64::new(std::mem::size_of::< + gpu_data::OutlineConfigUniformBuffer, + >( + ) + as _), + }, + count: None, + }, + ], + }, + ); + let vertex_handle = screen_triangle_vertex_shader(pools, device, resolver); + let render_pipeline = pools.render_pipelines.get_or_create( + device, + &RenderPipelineDesc { + label: "OutlineCompositor".into(), + pipeline_layout: pools.pipeline_layouts.get_or_create( + device, + &PipelineLayoutDesc { + label: "OutlineCompositor".into(), + entries: vec![shared_data.global_bindings.layout, bind_group_layout], + }, + &pools.bind_group_layouts, + ), + vertex_entrypoint: "main".into(), + vertex_handle, + fragment_entrypoint: "main".into(), + fragment_handle: pools.shader_modules.get_or_create( + device, + resolver, + &ShaderModuleDesc { + label: "outlines_from_voronoi".into(), + source: include_file!("../../shader/outlines/outlines_from_voronoi.wgsl"), + }, + ), + vertex_buffers: smallvec![], + render_targets: smallvec![Some(wgpu::ColorTargetState { + format: shared_data.config.output_format_color, + blend: Some(wgpu::BlendState::PREMULTIPLIED_ALPHA_BLENDING), + write_mask: wgpu::ColorWrites::all() + })], + primitive: wgpu::PrimitiveState::default(), + depth_stencil: None, + multisample: wgpu::MultisampleState::default(), + }, + &pools.pipeline_layouts, + &pools.shader_modules, + ); + + OutlineCompositor { + render_pipeline, + bind_group_layout, + } + } + + fn draw<'a>( + &self, + pools: &'a WgpuResourcePools, + _phase: DrawPhase, + pass: &mut wgpu::RenderPass<'a>, + draw_data: &'a OutlineCompositingDrawData, + ) -> anyhow::Result<()> { + let pipeline = pools.render_pipelines.get_resource(self.render_pipeline)?; + + pass.set_pipeline(pipeline); + pass.set_bind_group(1, &draw_data.bind_group, &[]); + pass.draw(0..3, 0..1); + + Ok(()) + } +} diff --git a/crates/re_renderer/src/resource_managers/resource_manager.rs b/crates/re_renderer/src/resource_managers/resource_manager.rs index 8db5453f7e67..f84e95145f22 100644 --- a/crates/re_renderer/src/resource_managers/resource_manager.rs +++ b/crates/re_renderer/src/resource_managers/resource_manager.rs @@ -21,6 +21,9 @@ pub enum ResourceHandle { /// Querying it during any other frame will fail. valid_frame_index: u64, }, + + /// No handle, causes error on resolve. + Invalid, } #[derive(thiserror::Error, Debug, PartialEq, Eq)] @@ -136,6 +139,7 @@ where } }) } + ResourceHandle::Invalid => Err(ResourceManagerError::NullHandle), } } diff --git a/crates/re_renderer/src/view_builder.rs b/crates/re_renderer/src/view_builder.rs index 27e05bbfbe66..8c771632275a 100644 --- a/crates/re_renderer/src/view_builder.rs +++ b/crates/re_renderer/src/view_builder.rs @@ -6,7 +6,10 @@ use crate::{ allocator::create_and_fill_uniform_buffer, context::RenderContext, global_bindings::FrameUniformBuffer, - renderer::{compositor::CompositorDrawData, DrawData, DrawPhase, Renderer}, + renderer::{ + compositor::CompositorDrawData, DrawData, DrawPhase, OutlineConfig, OutlineMaskProcessor, + Renderer, + }, wgpu_resources::{GpuBindGroup, GpuTexture, TextureDesc}, DebugLabel, Rgba, Size, }; @@ -34,6 +37,9 @@ pub struct ViewBuilder { /// Result of [`ViewBuilder::setup_view`] - needs to be `Option` sine some of the fields don't have a default. setup: Option, queued_draws: Vec, + + // TODO(andreas): Consider making "render processors" a "thing" by establishing a form of hardcoded/limited-flexibility render-graph + outline_mask_processor: Option, } struct ViewTargetSetup { @@ -141,6 +147,8 @@ pub struct TargetConfiguration { /// How [`Size::AUTO`] is interpreted. pub auto_size_config: AutoSizeConfig, + + pub outline_config: Option, } impl Default for TargetConfiguration { @@ -155,6 +163,7 @@ impl Default for TargetConfiguration { }, pixels_from_point: 1.0, auto_size_config: Default::default(), + outline_config: None, } } } @@ -196,6 +205,11 @@ impl ViewBuilder { alpha_to_coverage_enabled: false, }; + /// Default value for clearing depth buffer to infinity. + /// + /// 0.0 == far since we're using reverse-z. + pub const DEFAULT_DEPTH_CLEAR: wgpu::LoadOp = wgpu::LoadOp::Clear(0.0); + /// Default depth state for enabled depth write & read. pub const MAIN_TARGET_DEFAULT_DEPTH_STATE: Option = Some(wgpu::DepthStencilState { @@ -407,6 +421,15 @@ impl ViewBuilder { frame_uniform_buffer, ); + self.outline_mask_processor = config.outline_config.map(|outline_config| { + OutlineMaskProcessor::new( + ctx, + &outline_config, + &config.name, + config.resolution_in_pixel, + ) + }); + self.setup = Some(ViewTargetSetup { name: config.name, bind_group_0, @@ -481,10 +504,10 @@ impl ViewBuilder { }); { - crate::profile_scope!("view builder main target pass"); + crate::profile_scope!("main target pass"); let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { - label: DebugLabel::from(format!("{:?} - main pass", setup.name)).get(), + label: setup.name.clone().push_str(" - main pass").get(), color_attachments: &[Some(wgpu::RenderPassColorAttachment { view: &setup.main_target_msaa.default_view, resolve_target: Some(&setup.main_target_resolved.default_view), @@ -503,9 +526,7 @@ impl ViewBuilder { depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment { view: &setup.depth_buffer.default_view, depth_ops: Some(wgpu::Operations { - load: wgpu::LoadOp::Clear(0.0), // 0.0 == far since we're using reverse-z - // Don't care about depth results afterwards. - // This can have be much better perf, especially on tiler gpus. + load: Self::DEFAULT_DEPTH_CLEAR, store: false, }), stencil_ops: None, @@ -519,6 +540,19 @@ impl ViewBuilder { } } + if let Some(outline_mask_processor) = self.outline_mask_processor.take() { + crate::profile_scope!("outlines"); + { + crate::profile_scope!("outline mask pass"); + let mut pass = outline_mask_processor.start_mask_render_pass(&mut encoder); + pass.set_bind_group(0, &setup.bind_group_0, &[]); + self.draw_phase(ctx, DrawPhase::OutlineMask, &mut pass); + } + self.queue_draw( + &outline_mask_processor.compute_outlines(&ctx.gpu_resources, &mut encoder)?, + ); + } + Ok(encoder.finish()) } diff --git a/crates/re_renderer/src/wgpu_resources/texture_pool.rs b/crates/re_renderer/src/wgpu_resources/texture_pool.rs index 3e6382fe4990..0c5ebe43dec9 100644 --- a/crates/re_renderer/src/wgpu_resources/texture_pool.rs +++ b/crates/re_renderer/src/wgpu_resources/texture_pool.rs @@ -49,9 +49,17 @@ pub struct TextureDesc { impl TextureDesc { /// Copies the desc but changes the label. pub fn with_label(&self, label: DebugLabel) -> Self { - let mut new = self.clone(); - new.label = label; - new + Self { + label, + ..self.clone() + } + } + + /// Copies the desc but adds a string to the label. + pub fn with_label_push(&self, append_this: &str) -> Self { + let mut copy = self.clone(); + copy.label = copy.label.push_str(append_this); + copy } } diff --git a/crates/re_renderer/src/workspace_shaders.rs b/crates/re_renderer/src/workspace_shaders.rs index c15de19e70f1..3db56b0f8d19 100644 --- a/crates/re_renderer/src/workspace_shaders.rs +++ b/crates/re_renderer/src/workspace_shaders.rs @@ -61,6 +61,36 @@ pub fn init() { fs.create_file(virtpath, content).unwrap(); } + { + let virtpath = Path::new("shader/outlines/jumpflooding_init.wgsl"); + let content = include_str!("../shader/outlines/jumpflooding_init.wgsl").into(); + fs.create_file(virtpath, content).unwrap(); + } + + { + let virtpath = Path::new("shader/outlines/jumpflooding_init_msaa.wgsl"); + let content = include_str!("../shader/outlines/jumpflooding_init_msaa.wgsl").into(); + fs.create_file(virtpath, content).unwrap(); + } + + { + let virtpath = Path::new("shader/outlines/jumpflooding_init_shared.wgsl"); + let content = include_str!("../shader/outlines/jumpflooding_init_shared.wgsl").into(); + fs.create_file(virtpath, content).unwrap(); + } + + { + let virtpath = Path::new("shader/outlines/jumpflooding_step.wgsl"); + let content = include_str!("../shader/outlines/jumpflooding_step.wgsl").into(); + fs.create_file(virtpath, content).unwrap(); + } + + { + let virtpath = Path::new("shader/outlines/outlines_from_voronoi.wgsl"); + let content = include_str!("../shader/outlines/outlines_from_voronoi.wgsl").into(); + fs.create_file(virtpath, content).unwrap(); + } + { let virtpath = Path::new("shader/point_cloud.wgsl"); let content = include_str!("../shader/point_cloud.wgsl").into(); @@ -79,6 +109,12 @@ pub fn init() { fs.create_file(virtpath, content).unwrap(); } + { + let virtpath = Path::new("shader/screen_triangle_vertex.wgsl"); + let content = include_str!("../shader/screen_triangle_vertex.wgsl").into(); + fs.create_file(virtpath, content).unwrap(); + } + { let virtpath = Path::new("shader/test_triangle.wgsl"); let content = include_str!("../shader/test_triangle.wgsl").into(); diff --git a/crates/re_viewer/src/lib.rs b/crates/re_viewer/src/lib.rs index 2af407f42115..d9942fe1928c 100644 --- a/crates/re_viewer/src/lib.rs +++ b/crates/re_viewer/src/lib.rs @@ -117,7 +117,7 @@ impl AppEnvironment { const APPLICATION_NAME: &str = "Rerun Viewer"; pub(crate) fn hardware_tier() -> re_renderer::config::HardwareTier { - re_renderer::config::HardwareTier::Web + re_renderer::config::HardwareTier::default() } pub(crate) fn wgpu_options() -> egui_wgpu::WgpuConfiguration { diff --git a/crates/re_viewer/src/misc/mesh_loader.rs b/crates/re_viewer/src/misc/mesh_loader.rs index b700b939a68d..dfa3be855ddf 100644 --- a/crates/re_viewer/src/misc/mesh_loader.rs +++ b/crates/re_viewer/src/misc/mesh_loader.rs @@ -157,9 +157,7 @@ impl LoadedMesh { }, ResourceLifeTime::LongLived, )?, - mesh: None, // Don't need to keep cpu-mesh data around, we already have everything we wanted from it (the bounding box) - world_from_mesh: Default::default(), - additive_tint: egui::Color32::TRANSPARENT, + ..Default::default() }]; Ok(Self { diff --git a/crates/re_viewer/src/ui/view_spatial/scene/primitives.rs b/crates/re_viewer/src/ui/view_spatial/scene/primitives.rs index d87b49828dd3..a9abfbe7beb6 100644 --- a/crates/re_viewer/src/ui/view_spatial/scene/primitives.rs +++ b/crates/re_viewer/src/ui/view_spatial/scene/primitives.rs @@ -136,9 +136,9 @@ impl SceneSpatialPrimitives { .iter() .map(move |mesh_instance| MeshInstance { gpu_mesh: mesh_instance.gpu_mesh.clone(), - mesh: None, // Don't care. world_from_mesh: base_transform * mesh_instance.world_from_mesh, additive_tint: mesh.additive_tint, + ..Default::default() }) }) .collect() diff --git a/crates/re_viewer/src/ui/view_spatial/ui_2d.rs b/crates/re_viewer/src/ui/view_spatial/ui_2d.rs index d0bc2c3f5c0e..952f1c61d29b 100644 --- a/crates/re_viewer/src/ui/view_spatial/ui_2d.rs +++ b/crates/re_viewer/src/ui/view_spatial/ui_2d.rs @@ -490,6 +490,7 @@ fn setup_target_config( }, pixels_from_point: pixels_from_points, auto_size_config, + ..Default::default() } }) } diff --git a/crates/re_viewer/src/ui/view_spatial/ui_3d.rs b/crates/re_viewer/src/ui/view_spatial/ui_3d.rs index 94fed1a3e9e4..4a4dcfe25f54 100644 --- a/crates/re_viewer/src/ui/view_spatial/ui_3d.rs +++ b/crates/re_viewer/src/ui/view_spatial/ui_3d.rs @@ -528,6 +528,8 @@ fn paint_view( pixels_from_point, auto_size_config, + + ..Default::default() }; let Ok(callback) = create_scene_paint_callback(