Skip to content

Commit

Permalink
[mtl] dynamic depth bias, callback coalescence, and active command bu…
Browse files Browse the repository at this point in the history
…ffer stats
  • Loading branch information
kvark committed Jul 13, 2018
1 parent 5667537 commit 3e95044
Show file tree
Hide file tree
Showing 10 changed files with 144 additions and 109 deletions.
12 changes: 8 additions & 4 deletions src/backend/dx11/src/conv.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use hal::format::{Format};
use hal::pso::{
BlendDesc, BlendOp, BlendState, ColorBlendDesc, Comparison, DepthStencilDesc,
BlendDesc, BlendOp, BlendState, ColorBlendDesc, Comparison, DepthBias, DepthStencilDesc,
DepthTest, Face, Factor, PolygonMode, Rasterizer, Rect, StencilFace, StencilOp, StencilTest,
Viewport, Stage, State, StencilValue, FrontFace,
};
Expand Down Expand Up @@ -271,16 +271,20 @@ fn map_cull_mode(mode: Face) -> D3D11_CULL_MODE {
}

pub(crate) fn map_rasterizer_desc(desc: &Rasterizer) -> D3D11_RASTERIZER_DESC {
let bias = match desc.depth_bias { //TODO: support dynamic depth bias
Some(State::Static(db)) => db,
Some(_) | None => DepthBias::default(),
};
D3D11_RASTERIZER_DESC {
FillMode: map_fill_mode(desc.polygon_mode),
CullMode: map_cull_mode(desc.cull_face),
FrontCounterClockwise: match desc.front_face {
FrontFace::Clockwise => FALSE,
FrontFace::CounterClockwise => TRUE,
},
DepthBias: desc.depth_bias.map_or(0, |bias| bias.const_factor as INT),
DepthBiasClamp: desc.depth_bias.map_or(0.0, |bias| bias.clamp),
SlopeScaledDepthBias: desc.depth_bias.map_or(0.0, |bias| bias.slope_factor),
DepthBias: bias.const_factor as INT,
DepthBiasClamp: bias.clamp,
SlopeScaledDepthBias: bias.slope_factor,
DepthClipEnable: !desc.depth_clamping as _,
// TODO:
ScissorEnable: TRUE,
Expand Down
11 changes: 8 additions & 3 deletions src/backend/dx12/src/conv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,11 @@ pub fn map_rasterizer(rasterizer: &pso::Rasterizer) -> D3D12_RASTERIZER_DESC {
use hal::pso::PolygonMode::*;
use hal::pso::FrontFace::*;

let bias = match rasterizer.depth_bias { //TODO: support dynamic depth bias
Some(pso::State::Static(db)) => db,
Some(_) | None => pso::DepthBias::default(),
};

D3D12_RASTERIZER_DESC {
FillMode: match rasterizer.polygon_mode {
Point => {
Expand All @@ -172,9 +177,9 @@ pub fn map_rasterizer(rasterizer: &pso::Rasterizer) -> D3D12_RASTERIZER_DESC {
Clockwise => FALSE,
CounterClockwise => TRUE,
},
DepthBias: rasterizer.depth_bias.map_or(0, |bias| bias.const_factor as INT),
DepthBiasClamp: rasterizer.depth_bias.map_or(0.0, |bias| bias.clamp),
SlopeScaledDepthBias: rasterizer.depth_bias.map_or(0.0, |bias| bias.slope_factor),
DepthBias: bias.const_factor as INT,
DepthBiasClamp: bias.clamp,
SlopeScaledDepthBias: bias.slope_factor,
DepthClipEnable: !rasterizer.depth_clamping as _,
MultisampleEnable: FALSE, // TODO: currently not supported
ForcedSampleCount: 0, // TODO: currently not supported
Expand Down
6 changes: 3 additions & 3 deletions src/backend/gl/src/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use hal::pso;
use gl;
use smallvec::SmallVec;

pub fn bind_polygon_mode(gl: &gl::Gl, mode: pso::PolygonMode, bias: Option<pso::DepthBias>) {
pub fn bind_polygon_mode(gl: &gl::Gl, mode: pso::PolygonMode, bias: Option<pso::State<pso::DepthBias>>) {
use hal::pso::PolygonMode::*;

let (gl_draw, gl_offset) = match mode {
Expand All @@ -20,11 +20,11 @@ pub fn bind_polygon_mode(gl: &gl::Gl, mode: pso::PolygonMode, bias: Option<pso::
unsafe { gl.PolygonMode(gl::FRONT_AND_BACK, gl_draw) };

match bias {
Some(bias) => unsafe {
Some(pso::State::Static(bias)) => unsafe {
gl.Enable(gl_offset);
gl.PolygonOffset(bias.slope_factor as _, bias.const_factor as _);
},
None => unsafe {
_ => unsafe {
gl.Disable(gl_offset)
},
}
Expand Down
189 changes: 103 additions & 86 deletions src/backend/metal/src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ const WORD_ALIGNMENT: u64 = WORD_SIZE as _;
/// with clear operations set up to implement our `clear_image`
/// Note: currently doesn't work, needs a repro case for Apple
const CLEAR_IMAGE_ARRAY: bool = false;
/// Number of frames to average when reporting the frame wait times.
const FRAME_WAIT_REPORT_WINDOW: usize = 0;
/// Number of frames to average when reporting the performance counters.
const COUNTERS_REPORT_WINDOW: usize = 0;

pub struct QueueInner {
raw: metal::CommandQueue,
Expand All @@ -61,13 +61,17 @@ impl QueueInner {
device: &metal::DeviceRef,
pool_size: Option<usize>,
) -> Self {
QueueInner {
raw: match pool_size {
Some(count) => device.new_command_queue_with_max_command_buffer_count(count as u64),
None => device.new_command_queue(),
match pool_size {
Some(count) => QueueInner {
raw: device.new_command_queue_with_max_command_buffer_count(count as u64),
reserve: 0 .. count,
debug_retain_references: false,
},
None => QueueInner {
raw: device.new_command_queue(),
reserve: 0 .. 64,
debug_retain_references: true,
},
reserve: 0 .. pool_size.unwrap_or(64),
debug_retain_references: false,
}
}

Expand Down Expand Up @@ -159,6 +163,7 @@ struct State {
resources_cs: StageResources,
index_buffer: Option<IndexBuffer<BufferPtr>>,
rasterizer_state: Option<native::RasterizerState>,
depth_bias: pso::DepthBias,
stencil: native::StencilState<pso::StencilValue>,
push_constants: Vec<u32>,
vertex_buffers: Vec<Option<(BufferPtr, u64)>>,
Expand Down Expand Up @@ -201,9 +206,7 @@ impl State {
None
};
let com_depth_bias = if aspects.contains(Aspects::DEPTH) {
Some(soft::RenderCommand::SetDepthBias(
self.rasterizer_state.as_ref().map(|r| r.depth_bias).unwrap_or_default()
))
Some(soft::RenderCommand::SetDepthBias(self.depth_bias))
} else {
None
};
Expand Down Expand Up @@ -412,14 +415,7 @@ impl State {
}

fn set_depth_bias<'a>(&mut self, depth_bias: &pso::DepthBias) -> soft::RenderCommand<&'a soft::Own> {
if let Some(ref mut r) = self.rasterizer_state {
r.depth_bias = *depth_bias;
} else {
self.rasterizer_state = Some(native::RasterizerState {
depth_bias: *depth_bias,
..Default::default()
});
}
self.depth_bias = *depth_bias;
soft::RenderCommand::SetDepthBias(*depth_bias)
}

Expand Down Expand Up @@ -1024,8 +1020,6 @@ fn exec_render<'a>(encoder: &metal::RenderCommandEncoderRef, command: soft::Rend
encoder.set_front_facing_winding(rs.front_winding);
encoder.set_cull_mode(rs.cull_mode);
encoder.set_depth_clip_mode(rs.depth_clip);
let db = rs.depth_bias;
encoder.set_depth_bias(db.const_factor, db.slope_factor, db.clamp);
}
}
Cmd::Draw { primitive_type, vertices, instances } => {
Expand Down Expand Up @@ -1236,24 +1230,33 @@ fn record_commands(command_buf: &metal::CommandBufferRef, passes: &[soft::Pass])
}

#[derive(Default)]
struct FrameWaitReport {
duration: time::Duration,
count: usize,
struct PerformanceCounters {
active_command_buffer_count: usize,
temporary_command_buffer_count: usize,
frame_wait_duration: time::Duration,
frame_wait_count: usize,
frame: usize,
}


pub struct CommandQueue {
shared: Arc<Shared>,
frame_wait: Option<FrameWaitReport>,
retained_buffers: Vec<metal::Buffer>,
retained_textures: Vec<metal::Texture>,
perf_counters: Option<PerformanceCounters>,
}

unsafe impl Send for CommandQueue {}
unsafe impl Sync for CommandQueue {}

impl CommandQueue {
pub(crate) fn new(shared: Arc<Shared>) -> Self {
CommandQueue {
shared,
frame_wait: if FRAME_WAIT_REPORT_WINDOW != 0 {
Some(FrameWaitReport::default())
retained_buffers: Vec::new(),
retained_textures: Vec::new(),
perf_counters: if COUNTERS_REPORT_WINDOW != 0 {
Some(PerformanceCounters::default())
} else {
None
},
Expand All @@ -1273,9 +1276,9 @@ impl CommandQueue {
if let Some(swap_image) = sem.image_ready.lock().unwrap().take() {
let start = time::Instant::now();
swap_image.wait_until_ready();
if let Some(ref mut wait) = self.frame_wait {
wait.count += 1;
wait.duration += start.elapsed();
if let Some(ref mut counters) = self.perf_counters {
counters.frame_wait_count += 1;
counters.frame_wait_duration += start.elapsed();
}
}
}
Expand All @@ -1295,23 +1298,6 @@ impl RawCommandQueue<Backend> for CommandQueue {

self.wait(submit.wait_semaphores.iter().map(|&(s, _)| s));

let system_semaphores = submit.signal_semaphores
.into_iter()
.filter_map(|semaphore| {
semaphore.system.clone()
})
.collect::<Vec<_>>();
let signal_block = if !system_semaphores.is_empty() {
//Note: careful with those `ConcreteBlock::copy()` calls!
Some(ConcreteBlock::new(move |_cb: *mut ()| -> () {
for semaphore in &system_semaphores {
semaphore.signal();
}
}).copy())
} else {
None
};

let queue = self.shared.queue.lock().unwrap();
let (mut num_immediate, mut num_deferred) = (0, 0);

Expand All @@ -1323,52 +1309,72 @@ impl RawCommandQueue<Backend> for CommandQueue {
ref mut retained_textures,
} = *inner;

let temp_cmd_buffer;
let command_buffer: &metal::CommandBufferRef = match *sink {
match *sink {
Some(CommandSink::Immediate { ref cmd_buffer, ref token, .. }) => {
num_immediate += 1;
trace!("\timmediate {:?}", token);
// schedule the retained buffers to release after the commands are done
if !retained_buffers.is_empty() || !retained_textures.is_empty() {
let free_buffers = mem::replace(retained_buffers, Vec::new());
let free_textures = mem::replace(retained_textures, Vec::new());
let release_block = ConcreteBlock::new(move |_cb: *mut ()| -> () {
// move and auto-release
let _ = free_buffers;
let _ = free_textures;
}).copy();
msg_send![*cmd_buffer, addCompletedHandler: release_block.deref() as *const _];
}
cmd_buffer
self.retained_buffers.extend(retained_buffers.drain(..));
self.retained_textures.extend(retained_textures.drain(..));
cmd_buffer.commit();
}
Some(CommandSink::Deferred { ref passes, .. }) => {
num_deferred += 1;
trace!("\tdeferred with {} passes", passes.len());
temp_cmd_buffer = queue.spawn_temp();
temp_cmd_buffer.set_label("deferred");
record_commands(&*temp_cmd_buffer, passes);
&*temp_cmd_buffer
if let Some(ref mut counters) = self.perf_counters {
counters.temporary_command_buffer_count += 1;;
}
let cmd_buffer = queue.spawn_temp();
cmd_buffer.set_label("deferred");
record_commands(&*cmd_buffer, passes);
cmd_buffer.commit();
}
_ => panic!("Command buffer not recorded for submission")
};
if let Some(ref signal_block) = signal_block {
msg_send![command_buffer, addCompletedHandler: signal_block.deref() as *const _];
}
command_buffer.commit();
}

debug!("\t{} immediate, {} deferred command buffers", num_immediate, num_deferred);

if let Some(ref fence) = fence {
let command_buffer = queue.spawn_temp();
command_buffer.set_label("fence");
let fence = Arc::clone(fence);
let fence_block = ConcreteBlock::new(move |_cb: *mut ()| -> () {
*fence.mutex.lock().unwrap() = true;
fence.condvar.notify_all();
const BLOCK_BUCKET: usize = 4;
let system_semaphores = submit.signal_semaphores
.into_iter()
.filter_map(|semaphore| {
semaphore.system.clone()
})
.collect::<SmallVec<[_; BLOCK_BUCKET]>>();

// Note: completion handlers can stall the GPU, so we only make one
// when strictly required, and collect the retained resources otherwise.
if fence.is_some() || !system_semaphores.is_empty() {
let moved_fence = fence.map(Arc::clone);
let free_buffers = self.retained_buffers
.drain(..)
.collect::<SmallVec<[_; BLOCK_BUCKET]>>();
let free_textures = self.retained_textures
.drain(..)
.collect::<SmallVec<[_; BLOCK_BUCKET]>>();

let block = ConcreteBlock::new(move |_cb: *mut ()| -> () {
// release the fence
if let Some(ref f) = moved_fence {
*f.mutex.lock().unwrap() = true;
f.condvar.notify_all();
}
// signal the semaphores
for semaphore in &system_semaphores {
semaphore.signal();
}
// free all the manually retained resources
let _ = free_buffers;
let _ = free_textures;
}).copy();
msg_send![command_buffer, addCompletedHandler: fence_block.deref() as *const _];
command_buffer.commit();

if let Some(ref mut counters) = self.perf_counters {
counters.temporary_command_buffer_count += 1;;
}
let cmd_buffer = queue.spawn_temp();
cmd_buffer.set_label("signal");
msg_send![cmd_buffer, addCompletedHandler: block.deref() as *const _];
cmd_buffer.commit();
}
}

Expand All @@ -1393,15 +1399,21 @@ impl RawCommandQueue<Backend> for CommandQueue {

command_buffer.commit();

if let Some(ref mut wait) = self.frame_wait {
wait.frame += 1;
if wait.frame >= FRAME_WAIT_REPORT_WINDOW {
let time = wait.duration / wait.frame as u32;
println!("Frame wait time={}ms count={}",
if let Some(ref mut counters) = self.perf_counters {
counters.active_command_buffer_count += queue.reserve.start;
counters.frame += 1;
if counters.frame >= COUNTERS_REPORT_WINDOW {
let time = counters.frame_wait_duration / counters.frame as u32;
println!("Performance counters:");
println!("\tActive command buffers: {} plus {} temporaries",
counters.active_command_buffer_count / counters.frame,
counters.temporary_command_buffer_count / counters.frame,
);
println!("\tFrame wait time:{}ms over {} requests",
time.as_secs() as u32 * 1000 + time.subsec_millis(),
wait.count as f32 / wait.frame as f32,
counters.frame_wait_count as f32 / counters.frame as f32,
);
*wait = FrameWaitReport::default();
*counters = PerformanceCounters::default();
}
}

Expand Down Expand Up @@ -1452,7 +1464,8 @@ impl pool::RawCommandPool<Backend> for CommandPool {
resources_cs: StageResources::new(),
index_buffer: None,
rasterizer_state: None,
stencil: native::StencilState::<pso::StencilValue> {
depth_bias: pso::DepthBias::default(),
stencil: native::StencilState {
front_reference: 0,
back_reference: 0,
front_read_mask: !0,
Expand Down Expand Up @@ -2583,6 +2596,10 @@ impl com::RawCommandBuffer<Backend> for CommandBuffer {
self.state.stencil.back_reference,
));
}
if let pso::State::Static(value) = pipeline.depth_bias {
self.state.depth_bias = value;
pre.issue(soft::RenderCommand::SetDepthBias(value));
}

if let Some(ref vp) = pipeline.baked_states.viewport {
pre.issue(self.state.set_viewport(vp, &self.shared.disabilities));
Expand Down
Loading

0 comments on commit 3e95044

Please sign in to comment.