-
Notifications
You must be signed in to change notification settings - Fork 544
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Re-expose a smart DOWNLOAD memory type on Metal #3515
Comments
I played around it, and it appears to be more painful than it seems. Our CPU-visible memory has an associated buffer to it, but Metal's I also tried to refactor it to have dedicated allocation for these non-coherent buffers, but then mapping memory becomes complicated, just like in DX11. At this point, I'm not convinced it's worth it. My WIP patch for the records: diff --git a/src/backend/gl/src/command.rs b/src/backend/gl/src/command.rs
index 3a7b47e0a..de213928b 100644
--- a/src/backend/gl/src/command.rs
+++ b/src/backend/gl/src/command.rs
@@ -688,7 +688,7 @@ impl command::CommandBuffer<Backend> for CommandBuffer {
T: IntoIterator,
T::Item: Borrow<memory::Barrier<'a, Backend>>,
{
- // TODO
+ //TODO
}
unsafe fn fill_buffer(&mut self, _buffer: &n::Buffer, _range: buffer::SubRange, _data: u32) {
diff --git a/src/backend/metal/src/command.rs b/src/backend/metal/src/command.rs
index 4db9a098a..05fcb8532 100644
--- a/src/backend/metal/src/command.rs
+++ b/src/backend/metal/src/command.rs
@@ -2035,6 +2035,9 @@ where
);
}
}
+ Cmd::InvalidateBuffer(buffer) => {
+ encoder.synchronize_resource(buffer.as_native());
+ }
}
}
@@ -2675,11 +2678,27 @@ impl com::CommandBuffer<Backend> for CommandBuffer {
&mut self,
_stages: Range<pso::PipelineStage>,
_dependencies: memory::Dependencies,
- _barriers: T,
+ barriers: T,
) where
T: IntoIterator,
T::Item: Borrow<memory::Barrier<'a, Backend>>,
{
+ for barrier in barriers {
+ match *barrier.borrow() {
+ // take care of memory invalidations
+ memory::Barrier::Buffer {
+ states, target, range: _, families: _,
+ } => {
+ if let native::Buffer::Bound { ref raw, non_coherent: true, .. } = *target {
+ if states.end.contains(buffer::Access::HOST_READ) {
+ let command = soft::BlitCommand::InvalidateBuffer(AsNative::from(&**raw));
+ self.inner.borrow_mut().sink().blit_commands(iter::once(command));
+ }
+ }
+ }
+ _ => {}
+ }
+ }
}
unsafe fn fill_buffer(&mut self, buffer: &native::Buffer, sub: buffer::SubRange, data: u32) {
diff --git a/src/backend/metal/src/device.rs b/src/backend/metal/src/device.rs
index 160076ced..895608346 100644
--- a/src/backend/metal/src/device.rs
+++ b/src/backend/metal/src/device.rs
@@ -25,10 +25,7 @@ use metal::{
MTLPrimitiveType, MTLResourceOptions, MTLSamplerMipFilter, MTLStorageMode, MTLTextureType,
MTLVertexStepFunction,
};
-use objc::{
- rc::autoreleasepool,
- runtime::{Object, BOOL, NO},
-};
+use objc::runtime::{Object, BOOL, NO};
use parking_lot::Mutex;
use spirv_cross::{msl, spirv, ErrorCode as SpirvErrorCode};
@@ -170,10 +167,10 @@ bitflags! {
const SHARED = 1<<1;
// = `DEVICE_LOCAL | CPU_VISIBLE`
const MANAGED_UPLOAD = 1<<2;
- // = `DEVICE_LOCAL | CPU_VISIBLE | CACHED`
+ // = `DEVICE_LOCAL | CPU_VISIBLE | CPU_CACHED`
// Memory range invalidation is implemented to stall the whole pipeline.
// It's inefficient, therefore we aren't going to expose this type.
- //const MANAGED_DOWNLOAD = 1<<3;
+ const MANAGED_DOWNLOAD = 1<<3;
}
}
@@ -183,7 +180,7 @@ impl MemoryTypes {
Self::PRIVATE => (MTLStorageMode::Private, MTLCPUCacheMode::DefaultCache),
Self::SHARED => (MTLStorageMode::Shared, MTLCPUCacheMode::DefaultCache),
Self::MANAGED_UPLOAD => (MTLStorageMode::Managed, MTLCPUCacheMode::WriteCombined),
- //Self::MANAGED_DOWNLOAD => (MTLStorageMode::Managed, MTLCPUCacheMode::DefaultCache),
+ Self::MANAGED_DOWNLOAD => (MTLStorageMode::Managed, MTLCPUCacheMode::DefaultCache),
_ => unreachable!(),
}
}
@@ -216,7 +213,11 @@ impl PhysicalDevice {
properties: Properties::DEVICE_LOCAL | Properties::CPU_VISIBLE,
heap_index: 1,
},
- // MANAGED_DOWNLOAD (removed)
+ adapter::MemoryType {
+ // MANAGED_DOWNLOAD
+ properties: Properties::DEVICE_LOCAL | Properties::CPU_VISIBLE | Properties::CPU_CACHED,
+ heap_index: 1,
+ },
]
} else {
vec![
@@ -562,6 +563,7 @@ impl Device {
fn _is_heap_coherent(&self, heap: &n::MemoryHeap) -> bool {
match *heap {
n::MemoryHeap::Private => false,
+ n::MemoryHeap::PublicNonCoherent => false,
n::MemoryHeap::Public(memory_type, _) => self.memory_types[memory_type.0]
.properties
.contains(Properties::COHERENT),
@@ -1917,48 +1919,11 @@ impl hal::device::Device<Backend> for Device {
Ok(())
}
- unsafe fn invalidate_mapped_memory_ranges<'a, I>(&self, iter: I) -> Result<(), d::OutOfMemory>
+ unsafe fn invalidate_mapped_memory_ranges<'a, I>(&self, _iter: I) -> Result<(), d::OutOfMemory>
where
I: IntoIterator,
I::Item: Borrow<(&'a n::Memory, memory::Segment)>,
{
- let mut num_syncs = 0;
- debug!("invalidate_mapped_memory_ranges");
-
- // temporary command buffer to copy the contents from
- // the given buffers into the allocated CPU-visible buffers
- // Note: using a separate internal queue in order to avoid a stall
- let cmd_buffer = self.invalidation_queue.spawn_temp();
- autoreleasepool(|| {
- let encoder = cmd_buffer.new_blit_command_encoder();
-
- for item in iter {
- let (memory, ref segment) = *item.borrow();
- let range = memory.resolve(segment);
- debug!("\trange {:?}", range);
-
- match memory.heap {
- n::MemoryHeap::Native(_) => unimplemented!(),
- n::MemoryHeap::Public(mt, ref cpu_buffer)
- if 1 << mt.0 != MemoryTypes::SHARED.bits() as usize =>
- {
- num_syncs += 1;
- encoder.synchronize_resource(cpu_buffer);
- }
- n::MemoryHeap::Public(..) => continue,
- n::MemoryHeap::Private => panic!("Can't map private memory!"),
- };
- }
- encoder.end_encoding();
- });
-
- if num_syncs != 0 {
- debug!("\twaiting...");
- cmd_buffer.set_label("invalidate_mapped_memory_ranges");
- cmd_buffer.commit();
- cmd_buffer.wait_until_completed();
- }
-
Ok(())
}
@@ -2397,6 +2362,8 @@ impl hal::device::Device<Backend> for Device {
n::MemoryHeap::Native(heap_raw)
} else if storage == MTLStorageMode::Private {
n::MemoryHeap::Private
+ } else if storage == MTLStorageMode::Managed && cache == MTLCPUCacheMode::Default {
+ n::MemoryHeap::PublicNonCoherent
} else {
let options = conv::resource_options_from_storage_and_cache(storage, cache);
let cpu_buffer = device.new_buffer(size, options);
@@ -2480,20 +2447,39 @@ impl hal::device::Device<Backend> for Device {
};
debug!("bind_buffer_memory of size {} at offset {}", size, offset);
*buffer = match memory.heap {
- n::MemoryHeap::Native(ref heap) => {
- let options = conv::resource_options_from_storage_and_cache(
- heap.storage_mode(),
- heap.cpu_cache_mode(),
- );
- let raw = heap.new_buffer(size, options).unwrap_or_else(|| {
- // TODO: disable hazard tracking?
- self.shared.device.lock().new_buffer(size, options)
- });
+ n::MemoryHeap::Private => {
+ //TODO: check for aliasing
+ let options = MTLResourceOptions::StorageModePrivate
+ | MTLResourceOptions::CPUCacheModeDefaultCache;
+ let raw = self.shared.device.lock().new_buffer(size, options);
raw.set_label(name);
n::Buffer::Bound {
raw,
options,
- range: 0..size, //TODO?
+ range: 0..size,
+ non_coherent: false,
+ }
+ }
+ n::MemoryHeap::PublicNonCoherent => {
+ debug!("\tmapped to public non-coherent heap");
+ let (storage, cache) = MemoryTypes::describe(mt.0);
+ let options = conv::resource_options_from_storage_and_cache(storage, cache);
+ if offset == 0x0 && size == cpu_buffer.length() {
+ cpu_buffer.set_label(name);
+ } else if self.shared.private_caps.supports_debug_markers {
+ cpu_buffer.add_debug_marker(
+ name,
+ NSRange {
+ location: offset,
+ length: size,
+ },
+ );
+ }
+ n::Buffer::Bound {
+ raw: cpu_buffer.clone(),
+ options,
+ range: offset..offset + size,
+ non_coherent: false,
}
}
n::MemoryHeap::Public(mt, ref cpu_buffer) => {
@@ -2518,18 +2504,24 @@ impl hal::device::Device<Backend> for Device {
raw: cpu_buffer.clone(),
options,
range: offset..offset + size,
+ non_coherent: false,
}
}
- n::MemoryHeap::Private => {
- //TODO: check for aliasing
- let options = MTLResourceOptions::StorageModePrivate
- | MTLResourceOptions::CPUCacheModeDefaultCache;
- let raw = self.shared.device.lock().new_buffer(size, options);
+ n::MemoryHeap::Native(ref heap) => {
+ let options = conv::resource_options_from_storage_and_cache(
+ heap.storage_mode(),
+ heap.cpu_cache_mode(),
+ );
+ let raw = heap.new_buffer(size, options).unwrap_or_else(|| {
+ // TODO: disable hazard tracking?
+ self.shared.device.lock().new_buffer(size, options)
+ });
raw.set_label(name);
n::Buffer::Bound {
raw,
options,
- range: 0..size,
+ range: 0..size, //TODO?
+ non_coherent: false,
}
}
};
@@ -2558,6 +2550,7 @@ impl hal::device::Device<Backend> for Device {
ref raw,
ref range,
options,
+ non_coherent: _,
} => (raw, range, options),
n::Buffer::Unbound { .. } => panic!("Unexpected Buffer::Unbound"),
};
diff --git a/src/backend/metal/src/native.rs b/src/backend/metal/src/native.rs
index c25e8cabe..948da57e2 100644
--- a/src/backend/metal/src/native.rs
+++ b/src/backend/metal/src/native.rs
@@ -412,6 +412,7 @@ pub enum Buffer {
raw: metal::Buffer,
range: Range<u64>,
options: metal::MTLResourceOptions,
+ non_coherent: bool,
},
}
@@ -918,6 +919,7 @@ unsafe impl Sync for Memory {}
#[derive(Debug)]
pub(crate) enum MemoryHeap {
Private,
+ PublicNonCoherent,
Public(MemoryTypeId, metal::Buffer),
Native(metal::Heap),
}
diff --git a/src/backend/metal/src/soft.rs b/src/backend/metal/src/soft.rs
index 278df1f27..0b1438f88 100644
--- a/src/backend/metal/src/soft.rs
+++ b/src/backend/metal/src/soft.rs
@@ -148,6 +148,7 @@ pub enum BlitCommand {
dst: BufferPtr,
region: hal::command::BufferImageCopy,
},
+ InvalidateBuffer(BufferPtr),
}
#[derive(Clone, Debug)] |
The main problem with the old DOWNLOAD type, which got removed in #3514, was that invalidating it resulted in a pipeline stall, since we need to submit that to a queue as a command. Perhaps, instead we can do the following:
HOST_READ
access would be implemented assynchronize_resource
I'm not 100% sure that this is a valid implementation, but it sounds like it could be one, and it wouldn't include any stalls.
The text was updated successfully, but these errors were encountered: