From c68dfdaf79215129b8735a67a53eccda9c2190e3 Mon Sep 17 00:00:00 2001 From: charles-r-earp Date: Thu, 26 Nov 2020 18:28:30 -0800 Subject: [PATCH 01/12] Create 002-global-buffer.md --- docs/src/rfcs/002-global-buffer.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 docs/src/rfcs/002-global-buffer.md diff --git a/docs/src/rfcs/002-global-buffer.md b/docs/src/rfcs/002-global-buffer.md new file mode 100644 index 0000000000..50f59a0067 --- /dev/null +++ b/docs/src/rfcs/002-global-buffer.md @@ -0,0 +1,20 @@ + +# Summary + +State the problem that this RFC is trying to address clearly but briefly + +# Explanation + +Give examples, elaborate on the proposal + +# Drawbacks + +List potential issues that one would want to have discussed in the RFC comment section + +# Alternatives + +A list of potential alternatives, though sometimes they can arise from the comments as well. + +# Prior art + +Usually this will involve looking at current shading languages out there to see if we can either borrow concepts from them, or if we can improve upon existing concepts. From 2dda6b42626ce2c051f3778f5057066bb8b25928 Mon Sep 17 00:00:00 2001 From: charles-r-earp Date: Thu, 26 Nov 2020 20:00:15 -0800 Subject: [PATCH 02/12] Update 002-global-buffer.md --- docs/src/rfcs/002-global-buffer.md | 91 ++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/docs/src/rfcs/002-global-buffer.md b/docs/src/rfcs/002-global-buffer.md index 50f59a0067..046a992630 100644 --- a/docs/src/rfcs/002-global-buffer.md +++ b/docs/src/rfcs/002-global-buffer.md @@ -3,10 +3,101 @@ State the problem that this RFC is trying to address clearly but briefly +This proposal aims to define a core interface for storage buffers aka "buffer blocks", specifically for compute shaders but potentially applicable for other modes. Accessing mutable global memory from a compute shader is typically unsafe, and may require the use of barriers to ensure correctness. This proposal exposes only a minimal public interface, and a private interface that is intended to be built upon with higher level abstractions. + # Explanation Give examples, elaborate on the proposal +## Global Storage + +For the purposes of simplicity and consistency, the term "global" will be used both for gl_GlobalInvocationID (and friends) and storage buffers using the StorageBuffer storage class, as well as potentially OpenCL global memory using the CrossWorkgroup storage class. + +Currently, the StorageBuffer struct in spirv-std is used for both mutable and immutable buffers. It has load and store methods, for T: Copy. Instead, Global and GlobalMut structs will be included. This will require an additional attribute, "global", which may convert to either StorageBuffer or CrossWorkgroup (for kernel mode). + + // spirv-std/src/lib.rs + + #[allow(unused_attributes)] + #[spirv(global)] + pub struct Global<'a, T> { + x: &'a T + } + +Here a "new" method is provide for convenience and privacy: + + impl<'a, T> Global<'a, T> { + pub(crate) fn new(x: &'a T) -> Self { + Self { x } + } + } + +Global implments the same "load" function common to spirv-std, for Copy types: + + impl Global<'a, T> { + pub fn load(&self) -> T { + *self.x + } + } + +In addition, Global also acts like a slice, when it wraps a slice. Note that we can't use Index / AsRef / Deref here because those traits return a &T, not a T. Note that "get_unchecked" is unsafe, and potentially accesses out of bounds data. + + impl<'a, T> Global<'a, [T]> { + pub fn get>(&self, index: I) -> Global<'a, >::Output> { + self.x.get(index) + .map(|x| Global::new(x)) + } + pub unsafe fn get_unchecked(&self, index: I) -> Self::Output { + Global::new(self.x.get_unchecked(index)) + } + } + +Similar to Global, GlobalMut will also be introduced: + + #[allow(unused_attributes)] + #[spirv(global)] + pub struct GlobalMut<'a, T> { + x: &'a mut T + } + + impl<'a, T> GlobalMut<'a, T> { + pub(crate) fn new(x: &'a mut T) -> Self { + Self { x } + } + } + + impl GlobalMut<'a, T> { + pub fn load(&self) -> T { + *self.x + } + pub fn store(&mut self, x: T) { + *self.x = T; + } + } + + impl<'a, T> GlobalMut<'a, [T]> { + pub fn get>(&self, index: I) -> Option>::Output>> { + self.x.get(index) + .map(|x| Global::new(x)) + } + pub unsafe fn get_unchecked(&self, index: I) -> Global<'a, >::Output { + Global::new(self.x.get_unchecked(index)) + } + pub fn get_mut>(&mut self, index: I) -> Option>::Output>> { + self.x.get(index) + .map(|x| GlobalMut::new(x)) + } + pub unsafe fn get_unchecked_mut(&mut self, index: I) -> GlobalMut<'a, >::Output> { + GlobalMut::new(self.x.get_unchecked(index)) + } + } + +The intent with Global and GlobalMut, is that they are returned from higher level abstractions. SPIR-V has a requirement that with several pointer operations, including Load, Store, and AccessChain, the storage class of the pointer matches the storage class of the variable. That means if we have a buffer, ie a *{StorageBuffer} RuntimeArray, we can't return a plain reference or plain slice, ie *{Function} T, to the user to perform the derefence. There are potentially alternatives, but the simplest approach is to just maintain the wrapped Global / GlobalMut struct until the store or load. This is the current design of spirv-std. + +Global and GlobalMut are essentially either &(mut)T or &(mut)\[T\]. They are only to be provided to user code when they are not aliased. Safe abstractions should ensure that this is the case, and then yield these objects. + +Due to the nature of GlobalMut, it is critical that the compiler prevents GlobalMut from being passed in as a parameter, since it marks a safe to access reference or slice. Global, since it is immutable, is not problematic, but might as well be banned as well. Further, the spirv attribute, at least for "storage_buffer" and "cross_workgroup", etc, should be limited only to within the crate. + + # Drawbacks List potential issues that one would want to have discussed in the RFC comment section From 8f34f68bf683fc2c231aad9f8c8ec639bc164876 Mon Sep 17 00:00:00 2001 From: charles-r-earp Date: Thu, 26 Nov 2020 20:52:03 -0800 Subject: [PATCH 03/12] Update 002-global-buffer.md --- docs/src/rfcs/002-global-buffer.md | 72 ++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/docs/src/rfcs/002-global-buffer.md b/docs/src/rfcs/002-global-buffer.md index 046a992630..bd1e41ab40 100644 --- a/docs/src/rfcs/002-global-buffer.md +++ b/docs/src/rfcs/002-global-buffer.md @@ -97,6 +97,78 @@ Global and GlobalMut are essentially either &(mut)T or &(mut)\[T\]. They are onl Due to the nature of GlobalMut, it is critical that the compiler prevents GlobalMut from being passed in as a parameter, since it marks a safe to access reference or slice. Global, since it is immutable, is not problematic, but might as well be banned as well. Further, the spirv attribute, at least for "storage_buffer" and "cross_workgroup", etc, should be limited only to within the crate. +## GlobalBlock + +Per the SPIRV-V spec, "buffer blocks" must have a variable with StorageBuffer storage class point to a "block" decorated struct. On certain platforms, the block decoration is ignored, on others the shader will not access the buffer at all. It may be possible to just have one Block struct, but it may be helpful to have one specific to the Global -> StorageBuffer / CrossWorkgroup classes. It is my understanding that Rust requires fields of public structs to be public, even though this was relaxed at one point. This struct is hidden from the docs because it is merely meant to be a placeholder, for the block decoration. The "block" attribute will be added to the compiler. For convenience, this struct will impl Deref and DerefMut. This allows it to be treated as if it is the interior type, since it isn't meant to do anything but be a block decorated wrapper. + + #[doc(hidden)] + #[allow(unused_attributes)] + #[spirv(block)] + #[repr(transparent)] + pub struct GlobalBlock(B); + + use core::ops::{Deref, DerefMut}; + + impl Deref for GlobalBlock { + type Target = B; + fn deref(&self) -> &B { + &*self.0 + } + } + + impl DerefMut for GlobalBlock { + fn deref_mut(&mut self) -> &B { + &mut *self.0 + } + } + +As noted above, SPIR-V requires that storage classes match. In Rust, it's pretty common to borrow an object, especially when calling a method on it. However, this may produce a *Function pointer to it, which may cause problems. Thus, this GlobalBlock is specific to the "Global" storage class, in case hard coding is necessary. + +## AsSlice and AsMutSlice + +These traits allow us to be generic over types that can be borrowed as slices. This will be used to acquire a slice from either a slice or an array in GlobalBuffer(Mut): + + pub(crate) trait AsSlice { + type Item; + fn as_slice(&self) -> &[Self::Item]; + } + + pub(crate) trait AsMutSlice: AsSlice { + fn as_mut_slice(&mut self) -> &[Self::Item]; + } + + impl AsSlice for [T] { + type Item = T; + fn as_slice(&self) -> &[Self::Item] { + self + } + } + + impl AsMutSlice for [T] { + fn as_slice(&mut self) -> &[Self::Item] { + self + } + } + + impl AsSlice for [T; N] { + type Item = T; + fn as_slice(&self) -> &[Self::Item] { + self.as_ref() + } + } + + impl AsSlice for [T; N] { + fn as_slice(&self) -> &[Self::Item] { + self.as_mut() + } + } + +These traits may potentially be private to a "global_buffer" module. Note that the implementation for arrays requires the unstable feature "min_const_generics", but this is set to be stable in 1.50 (currently 1.48). The AsRef trait does not specify T, ie it's a generic parameter to the trait not an associated type, which makes it more cumbersone to use. + + + + + # Drawbacks From be7c3cded92046b46a19a6e06fb45e3cbc2a00e3 Mon Sep 17 00:00:00 2001 From: charles-r-earp Date: Thu, 26 Nov 2020 22:43:33 -0800 Subject: [PATCH 04/12] Update 002-global-buffer.md --- docs/src/rfcs/002-global-buffer.md | 132 +++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/docs/src/rfcs/002-global-buffer.md b/docs/src/rfcs/002-global-buffer.md index bd1e41ab40..2cd2608b28 100644 --- a/docs/src/rfcs/002-global-buffer.md +++ b/docs/src/rfcs/002-global-buffer.md @@ -165,10 +165,142 @@ These traits allow us to be generic over types that can be borrowed as slices. T These traits may potentially be private to a "global_buffer" module. Note that the implementation for arrays requires the unstable feature "min_const_generics", but this is set to be stable in 1.50 (currently 1.48). The AsRef trait does not specify T, ie it's a generic parameter to the trait not an associated type, which makes it more cumbersone to use. +## GlobalBuffer +The GlobalBuffer and GlobalBufferMut structs will be used as entry parameters. They have a reference to a GlobalBlock\, where B may be a slice aka RuntimeArray, an array, or some arbitrary type (like an iterface block of glam Mat or Vec types). For Copy types, GlobalBuffer implements "load", returning a copy of the data. For arrays and slices, it implements "as_slice", which returns a Global<\[T]>. This is safe because it is immutable: + + #[allow(unused_attributes)] + #[spirv(global)] + pub struct GlobalBuffer<'a, B> { + x: &'a GlobalBlock + } + + impl<'a, B: Copy> GlobalBuffer<'a, B> { + pub fn load(&self) -> B { + *self.x + } + } + + impl<'a, T, B: AsSlice> GlobalBuffer<'a, B> { + pub fn as_slice(&self) -> Global<'a, [T]> { + Global::new(self.x.as_slice()) + } + } + +MutableBuffers are more restricted. There will be no public interface, instead "as_unsafe_slice" and "as_unsafe_mut_slice", private to spirv-std, which are unsafe because multiple invocations could both read and write to it. Potentially, a safe high-level abstraction like an iterator will partition this slice between invocations and perform any necessary barriers. + + #[allow(unused_attributes)] + #[spirv(global)] + pub struct GlobalBufferMut<'a, B> { + x: &'a mut GlobalBlock + } + + impl<'a, T, B: AsSlice> GlobalBufferMut<'a, B> { + pub(crate) unsafe fn as_unsafe_slice(&self) -> Global<'a, [T]> { + Global::new(self.x.as_slice()) + } + } + + impl<'a, T, B: AsMutSlice> GlobalBufferMut<'a, B> { + pub(crate) unsafe fn as_unsafe_mut_slice(&mut self) -> GlobalMut<'a, [T]> { + GlobalMut::new(self.x.as_mut_slice()) + } + } + +## GlobalIndex + +In order to do work in parallel, the shader must acquire the global index, ie gl_GlobalInvocationIndex. Use of this value presents several problems. First, the programmer must take care to have each invocation access different memory, or use appropriate synchronization. The index may be out of bounds, larger than the length of the buffer. Using the value conditionally may lead to non-uniform control flow, impeding optimizations or leading to undefined behavior. Lastly, both non-uniform control flow and invalid memory use may invalidate other safe abstractions. In order to provide some protection, a GlobalIndex struct will wrap the value, which allows it to be returned without allowing the value to be read. + #[derive(Clone, Copy)] + pub(crate) struct GlobalIndex(u32); + + pub(crate) fn global_index() -> GlobalIndex; +Not sure how to implement "global_index". Currently, an Input with the "GlobalInvocationID" builtin decoration, where u32x3 is a vector of u32's, will then be stored with the global id. In combination with global size, acquired the same way, the global index can be computed. However, this would have to be built in to the compiler, to emit the appropriate input variables and expose them to spirv-std. Or maybe via the asm! macro. At some point it may be useful to have a "global_xyz" function, but I'm uncertain how to handle these. Is everything just a GlobalIndex? Do we have a GlobalDim, a GlobaX, etc? I'm not sure that much can be done from a security standpoint beyond just wrapping the type, and for now just getting the global index is enough to build on. +The "get" and "get_unchecked" methods on Global and (mut equivalents for GlobalMut) are overloaded via the GetExt and GetMutExt traits for GlobalIndex, ensuring that the value is protected. The get / get_mut methods aare safe because the Global(Mut) must have been acquired via a safe method, or the "unsafe_as_slice" and "unsafe_as_mut_slice" methods, for which the caller has already taken responsibility for safety. Futher, these methods are private to spirv-std. + + pub(crate) trait GetExt { + type Output; + fn get(&self, index: I) -> Option; + unsafe fn get_unchecked(&self, index: I) -> Self::Output; + } + + pub(crate) trait GetMutExt { + type Output; + fn get_mut(&mut self, index: I) -> Option; + unsafe fn get_unchecked_mut(&mut self, index: I) -> Self::Output; + } + + + impl<'a, T> GetExt for Global<'a, [T]> { + type Output = Global<'a, T>; + fn get(&self, index: GlobalIndex) -> Option { + self.get(index.0) + } + unsafe fn get_unchecked(&self, index: GlobalIndex) -> Self::Output [ + self.get_unchecked(index.0) + } + } + + impl<'a, T> GetExt for GlobalMut<'a, [T]> { + type Output = Global<'a, T>; + fn get(&self, index: GlobalIndex) -> Option { + self.get(index.0) + } + unsafe fn get_unchecked(&self, index: GlobalIndex) -> Self::Output [ + self.get_unchecked(index.0) + } + } + + impl<'a, T> GetMutExt for GlobalMut<'a, [T]> { + type Output = GlobalMut<'a, T>; + fn get_mut(&mut self, index: GlobalIndex) -> Option { + self.get_mut(index.0) + } + unsafe fn get_unchecked_mut(&mut self, index: GlobalIndex) -> Self::Output [ + self.get_unchecked_mut(index.0) + } + } + +## Example + +As an example, suppose that a hypothetical "zip_mut_with" (see ndarray) function was added to spirv-std. + + // spirv-std/src/lib.rs + + impl<'a, T, const N: usize> BufferMut<[T; N]> { + pub fn zip_mut_with(mut self, rhs: Buffer<[T; N]>, constants: C, f: impl fn(GloablMut, Global, C)) { + let index = global_index(); + barrier(); // barrier for any previous writes to self + self.get_mut(index).zip(rhs.get(index)) + .map(|(lhs, rhs)| f(lhs, rhs, constants)); + // this method consumes self + // alternatively take self by reference, and emit a barrier here + } + } + + // scaled_add.rs + use spirv_std::{Buffer, BufferMut}; + + type T = f32; + const N: usize = 1024; + + #[allow(unused_attributes)] + #[spirv(gl_compute(local_size=64)] + pub fn scaled_add( + #[spirv(descriptor_set=1, binding=0)] x: Buffer<[T; N]>, + #[spirv(descriptor_set=1, binding=1)] mut y: BufferMut<[T; N]>, + push_constants: PushConstant + ) { + let alpha = push_constants.load(); + y.zip_mut_with(x, alpha, |(y, x, alpha)| { + let result = y.load() + alpha * x.load(); + y.store(result); + }); + } + +To be clear, this proposal neglects to include Barriers or PushConstants. Potentially, having functions like "zip_mut_with" or iterators consume their buffers, combined with the fn closure (which doesn't capture), will be secure enough to make public. The idea is that if y is consumed, it can't be used again in a subsequent operation, and thus no barriers are required. But I'm not sure how that fits into a larger ecosystem, where some functions may borrow buffers, so that subsequent operations can be performed. These problems are left for futher work. # Drawbacks From 390130fdfdb24212362f82e31935d75398dc6fda Mon Sep 17 00:00:00 2001 From: charles-r-earp Date: Thu, 26 Nov 2020 22:44:23 -0800 Subject: [PATCH 05/12] Update 002-global-buffer.md --- docs/src/rfcs/002-global-buffer.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/src/rfcs/002-global-buffer.md b/docs/src/rfcs/002-global-buffer.md index 2cd2608b28..97533d8e96 100644 --- a/docs/src/rfcs/002-global-buffer.md +++ b/docs/src/rfcs/002-global-buffer.md @@ -1,14 +1,10 @@ # Summary -State the problem that this RFC is trying to address clearly but briefly - This proposal aims to define a core interface for storage buffers aka "buffer blocks", specifically for compute shaders but potentially applicable for other modes. Accessing mutable global memory from a compute shader is typically unsafe, and may require the use of barriers to ensure correctness. This proposal exposes only a minimal public interface, and a private interface that is intended to be built upon with higher level abstractions. # Explanation -Give examples, elaborate on the proposal - ## Global Storage For the purposes of simplicity and consistency, the term "global" will be used both for gl_GlobalInvocationID (and friends) and storage buffers using the StorageBuffer storage class, as well as potentially OpenCL global memory using the CrossWorkgroup storage class. From 4e233f8e4e9fba64359e23ff15cd14ef0b5228ea Mon Sep 17 00:00:00 2001 From: charles-r-earp Date: Thu, 26 Nov 2020 23:14:12 -0800 Subject: [PATCH 06/12] Update 002-global-buffer.md --- docs/src/rfcs/002-global-buffer.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/src/rfcs/002-global-buffer.md b/docs/src/rfcs/002-global-buffer.md index 97533d8e96..fee9f078fd 100644 --- a/docs/src/rfcs/002-global-buffer.md +++ b/docs/src/rfcs/002-global-buffer.md @@ -302,10 +302,14 @@ To be clear, this proposal neglects to include Barriers or PushConstants. Potent List potential issues that one would want to have discussed in the RFC comment section +Not really a drawback, but it may be necessary to move some code into submodules with spirv-std, in order to maintain privacy, prevent access outside of explicit functions, and hide utility types, traits, or functions. + +This proposal neglects how StorageBuffers will work in other shaders, where access patterns are different. + # Alternatives A list of potential alternatives, though sometimes they can arise from the comments as well. # Prior art -Usually this will involve looking at current shading languages out there to see if we can either borrow concepts from them, or if we can improve upon existing concepts. +Typically in gpu code, which is often some superset of c, "buffers" are essentially just pointers and the user just indexes them freely utilizing global_id and friends. While this "works", and is fairly straigtforward as well as being typical to programming in general, it can potentially lead to various memory use errors. In particular, reading / writing out of bounds. The shader doesn't just have access to the buffers it is provided as parameters, it actually has access to the entire gpu memory space, and invalid writes will be written to other buffers used by other shaders / kernels. This is very hard to troubleshoot, because the code causing the problem may fuction correctly by itself, but "poison" other shaders that happen to be used in some sequence. Rust as a language seeks to prevent and or limit such errors to small, well scrutinized blocks of code. From b6dff2c1421c1a5ad49f9712ba5534fdc2f7f6af Mon Sep 17 00:00:00 2001 From: charles-r-earp Date: Thu, 26 Nov 2020 23:18:36 -0800 Subject: [PATCH 07/12] Update 002-global-buffer.md --- docs/src/rfcs/002-global-buffer.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/rfcs/002-global-buffer.md b/docs/src/rfcs/002-global-buffer.md index fee9f078fd..c470acd977 100644 --- a/docs/src/rfcs/002-global-buffer.md +++ b/docs/src/rfcs/002-global-buffer.md @@ -91,7 +91,7 @@ The intent with Global and GlobalMut, is that they are returned from higher leve Global and GlobalMut are essentially either &(mut)T or &(mut)\[T\]. They are only to be provided to user code when they are not aliased. Safe abstractions should ensure that this is the case, and then yield these objects. -Due to the nature of GlobalMut, it is critical that the compiler prevents GlobalMut from being passed in as a parameter, since it marks a safe to access reference or slice. Global, since it is immutable, is not problematic, but might as well be banned as well. Further, the spirv attribute, at least for "storage_buffer" and "cross_workgroup", etc, should be limited only to within the crate. +Due to the nature of GlobalMut, it is critical that the compiler prevents GlobalMut from being passed in as a parameter, since it marks a safe to access reference or slice. Global, since it is immutable, is not problematic, but might as well be banned as well, as it isn't meant to be used this way, and will not be portable. Further, the spirv attribute, at least for "storage_buffer" and "cross_workgroup", etc, should be limited only to within the crate. ## GlobalBlock From 534bb0827a8ec05ef5c587c319677ae98cc468c3 Mon Sep 17 00:00:00 2001 From: charles-r-earp Date: Fri, 27 Nov 2020 12:25:15 -0800 Subject: [PATCH 08/12] Update 002-global-buffer.md --- docs/src/rfcs/002-global-buffer.md | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/docs/src/rfcs/002-global-buffer.md b/docs/src/rfcs/002-global-buffer.md index c470acd977..a24cd0fb24 100644 --- a/docs/src/rfcs/002-global-buffer.md +++ b/docs/src/rfcs/002-global-buffer.md @@ -42,10 +42,12 @@ In addition, Global also acts like a slice, when it wraps a slice. Note that we self.x.get(index) .map(|x| Global::new(x)) } - pub unsafe fn get_unchecked(&self, index: I) -> Self::Output { + pub(crate) unsafe fn get_unchecked(&self, index: I) -> Self::Output { Global::new(self.x.get_unchecked(index)) } } + +The "get_unchecked" method could be public, as it emulates core::slice::get_unchecked. However, this would of course expose an unsafe public interface. Instead, potentially a future addition would be an Iter (like slice::Iter), which allows for sequential access without repeated bounds checks. The intent is to allow for something like a "Chunks" iterator, which could yield a Global<\[T]>, which can then be used in some user defined function per invocation. Similar to Global, GlobalMut will also be introduced: @@ -75,14 +77,14 @@ Similar to Global, GlobalMut will also be introduced: self.x.get(index) .map(|x| Global::new(x)) } - pub unsafe fn get_unchecked(&self, index: I) -> Global<'a, >::Output { + pub(crate) unsafe fn get_unchecked(&self, index: I) -> Global<'a, >::Output { Global::new(self.x.get_unchecked(index)) } pub fn get_mut>(&mut self, index: I) -> Option>::Output>> { self.x.get(index) .map(|x| GlobalMut::new(x)) } - pub unsafe fn get_unchecked_mut(&mut self, index: I) -> GlobalMut<'a, >::Output> { + pub(crate) unsafe fn get_unchecked_mut(&mut self, index: I) -> GlobalMut<'a, >::Output> { GlobalMut::new(self.x.get_unchecked(index)) } } @@ -261,16 +263,18 @@ The "get" and "get_unchecked" methods on Global and (mut equivalents for GlobalM ## Example -As an example, suppose that a hypothetical "zip_mut_with" (see ndarray) function was added to spirv-std. +As an example, suppose that a hypothetical "zip_mut_with" (see ndarray [zip_mut_with](https://docs.rs/ndarray/0.13.1/ndarray/struct.ArrayBase.html#method.zip_mut_with)) function was added to spirv-std. // spirv-std/src/lib.rs impl<'a, T, const N: usize> BufferMut<[T; N]> { pub fn zip_mut_with(mut self, rhs: Buffer<[T; N]>, constants: C, f: impl fn(GloablMut, Global, C)) { let index = global_index(); - barrier(); // barrier for any previous writes to self - self.get_mut(index).zip(rhs.get(index)) - .map(|(lhs, rhs)| f(lhs, rhs, constants)); + barrier(); // barrier for any previous writes to self + unsafe { + self.as_unsafe_mut_slice().get_mut(index).zip(rhs.as_slice().get(index)) + .map(|(lhs, rhs)| f(lhs, rhs, constants)); + } // this method consumes self // alternatively take self by reference, and emit a barrier here } @@ -304,12 +308,17 @@ List potential issues that one would want to have discussed in the RFC comment s Not really a drawback, but it may be necessary to move some code into submodules with spirv-std, in order to maintain privacy, prevent access outside of explicit functions, and hide utility types, traits, or functions. -This proposal neglects how StorageBuffers will work in other shaders, where access patterns are different. +This proposal neglects how GlobalBuffer(Mut) will work in other shaders, where access patterns are different. # Alternatives -A list of potential alternatives, though sometimes they can arise from the comments as well. +A list of potential alternatives, though sometimes they can arise from the comments as well. + +Potentially, instead of GlobalBuffer and GlobalBufferMut, we might have 2 or 3 variants, for 4 or 6 structs total. This eliminates the need for the awkward "AsSlice" trait, which enables the as_slice / as_unsafe_slice methods, which in turn will potentially enable higher abstractions like iterators. That means that AsSlice potentially needs to be visable, or else traits are simply implemented twice. There are different ways to rework this for better ergonomics and stability. # Prior art Typically in gpu code, which is often some superset of c, "buffers" are essentially just pointers and the user just indexes them freely utilizing global_id and friends. While this "works", and is fairly straigtforward as well as being typical to programming in general, it can potentially lead to various memory use errors. In particular, reading / writing out of bounds. The shader doesn't just have access to the buffers it is provided as parameters, it actually has access to the entire gpu memory space, and invalid writes will be written to other buffers used by other shaders / kernels. This is very hard to troubleshoot, because the code causing the problem may fuction correctly by itself, but "poison" other shaders that happen to be used in some sequence. Rust as a language seeks to prevent and or limit such errors to small, well scrutinized blocks of code. + +As far as I know, there isn't any gpu language that hides global invocation builtins like the global id, and shaders can index into declared buffer inputs without restriction, necessary synchronization is the responsibility of the programmer. Note that in some cases, Metal for example, out of bounds writes are specified as ignored. This prevents "poisoning" as described earlier, but doesn't fully protect against security vulnerabilities, and arguably out of bounds reads or writes are bugs, which should be caught and fixed. Safe Rust promises to prevent out of bounds accesses. + From d7f9d04df954b899eb9ccf44aaf4b6b88358a6a4 Mon Sep 17 00:00:00 2001 From: charles-r-earp Date: Fri, 27 Nov 2020 12:42:01 -0800 Subject: [PATCH 09/12] Update 002-global-buffer.md --- docs/src/rfcs/002-global-buffer.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/src/rfcs/002-global-buffer.md b/docs/src/rfcs/002-global-buffer.md index a24cd0fb24..7f23d8623c 100644 --- a/docs/src/rfcs/002-global-buffer.md +++ b/docs/src/rfcs/002-global-buffer.md @@ -289,8 +289,8 @@ As an example, suppose that a hypothetical "zip_mut_with" (see ndarray [zip_mut_ #[allow(unused_attributes)] #[spirv(gl_compute(local_size=64)] pub fn scaled_add( - #[spirv(descriptor_set=1, binding=0)] x: Buffer<[T; N]>, - #[spirv(descriptor_set=1, binding=1)] mut y: BufferMut<[T; N]>, + #[spirv(descriptor_set=1, binding=0)] x: GlobalBuffer<[T; N]>, + #[spirv(descriptor_set=1, binding=1)] mut y: GlobalBufferMut<[T; N]>, push_constants: PushConstant ) { let alpha = push_constants.load(); @@ -304,17 +304,17 @@ To be clear, this proposal neglects to include Barriers or PushConstants. Potent # Drawbacks -List potential issues that one would want to have discussed in the RFC comment section - Not really a drawback, but it may be necessary to move some code into submodules with spirv-std, in order to maintain privacy, prevent access outside of explicit functions, and hide utility types, traits, or functions. -This proposal neglects how GlobalBuffer(Mut) will work in other shaders, where access patterns are different. +This proposal neglects how GlobalBuffer(Mut) will work in other shaders, where access patterns may be different. + +Barriers are not adressed in this proposal, but GlobalIndex and the scheme of restricting access all have to work in concert. Without some more thought into how barriers will work and how that interacts with the rest of the system, it won't be possible to allow access to GlobalBufferMut, which is of course necessary to actually use compute shaders. -# Alternatives +GlobalIndex is meant to be a simple way of performing parallel work, but restricting access to the global id means that at some point combinations of operations, combined with global size, will need to be exposed, though potentially this can be handled with iterators. -A list of potential alternatives, though sometimes they can arise from the comments as well. +# Alternatives -Potentially, instead of GlobalBuffer and GlobalBufferMut, we might have 2 or 3 variants, for 4 or 6 structs total. This eliminates the need for the awkward "AsSlice" trait, which enables the as_slice / as_unsafe_slice methods, which in turn will potentially enable higher abstractions like iterators. That means that AsSlice potentially needs to be visable, or else traits are simply implemented twice. There are different ways to rework this for better ergonomics and stability. +Potentially, instead of GlobalBuffer and GlobalBufferMut, we might have 2 or 3 variants, for 4 or 6 structs total. Those being one for arbitrary T: Copy, one for Arrays, and one for RuntimeArrays. This eliminates the need for the awkward "AsSlice" trait, which enables the as_slice / as_unsafe_slice methods, which in turn will potentially enable higher abstractions like iterators. That means that AsSlice potentially needs to be visable, or else traits are simply implemented twice. There are different ways to rework this for better ergonomics and stability. # Prior art From 893d70dd6a3b5c5d06039b621fd9289519bb71aa Mon Sep 17 00:00:00 2001 From: charles-r-earp Date: Fri, 27 Nov 2020 12:42:52 -0800 Subject: [PATCH 10/12] Rename 002-global-buffer.md to 00X-global-buffer.md --- docs/src/rfcs/{002-global-buffer.md => 00X-global-buffer.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/src/rfcs/{002-global-buffer.md => 00X-global-buffer.md} (100%) diff --git a/docs/src/rfcs/002-global-buffer.md b/docs/src/rfcs/00X-global-buffer.md similarity index 100% rename from docs/src/rfcs/002-global-buffer.md rename to docs/src/rfcs/00X-global-buffer.md From 5e769379c539b3b4e724525a7bc0e0df0058c223 Mon Sep 17 00:00:00 2001 From: charles-r-earp Date: Fri, 27 Nov 2020 13:02:33 -0800 Subject: [PATCH 11/12] Update 00X-global-buffer.md --- docs/src/rfcs/00X-global-buffer.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/src/rfcs/00X-global-buffer.md b/docs/src/rfcs/00X-global-buffer.md index 7f23d8623c..6b5fbc71cb 100644 --- a/docs/src/rfcs/00X-global-buffer.md +++ b/docs/src/rfcs/00X-global-buffer.md @@ -267,8 +267,8 @@ As an example, suppose that a hypothetical "zip_mut_with" (see ndarray [zip_mut_ // spirv-std/src/lib.rs - impl<'a, T, const N: usize> BufferMut<[T; N]> { - pub fn zip_mut_with(mut self, rhs: Buffer<[T; N]>, constants: C, f: impl fn(GloablMut, Global, C)) { + impl<'a, T, const N: usize> GlobalBufferMut<[T; N]> { + pub fn zip_mut_with(mut self, rhs: &GlobalBuffer<[T; N]>, constants: C, f: impl fn(GloablMut, Global, C)) { let index = global_index(); barrier(); // barrier for any previous writes to self unsafe { @@ -294,7 +294,7 @@ As an example, suppose that a hypothetical "zip_mut_with" (see ndarray [zip_mut_ push_constants: PushConstant ) { let alpha = push_constants.load(); - y.zip_mut_with(x, alpha, |(y, x, alpha)| { + y.zip_mut_with(&x, alpha, |(y, x, alpha)| { let result = y.load() + alpha * x.load(); y.store(result); }); From 03e3753c0dd5abb8c48ddd0197bf9ae90f5ebe63 Mon Sep 17 00:00:00 2001 From: charles-r-earp Date: Sat, 28 Nov 2020 11:05:51 -0800 Subject: [PATCH 12/12] Update 00X-global-buffer.md --- docs/src/rfcs/00X-global-buffer.md | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/docs/src/rfcs/00X-global-buffer.md b/docs/src/rfcs/00X-global-buffer.md index 6b5fbc71cb..b72162d83d 100644 --- a/docs/src/rfcs/00X-global-buffer.md +++ b/docs/src/rfcs/00X-global-buffer.md @@ -267,8 +267,8 @@ As an example, suppose that a hypothetical "zip_mut_with" (see ndarray [zip_mut_ // spirv-std/src/lib.rs - impl<'a, T, const N: usize> GlobalBufferMut<[T; N]> { - pub fn zip_mut_with(mut self, rhs: &GlobalBuffer<[T; N]>, constants: C, f: impl fn(GloablMut, Global, C)) { + impl<'a, T> GlobalBufferMut<[T]> { + pub fn zip_mut_with(mut self, rhs: &GlobalBuffer<[T]>, constants: C, f: impl fn(GloablMut, Global, C)) { let index = global_index(); barrier(); // barrier for any previous writes to self unsafe { @@ -283,20 +283,22 @@ As an example, suppose that a hypothetical "zip_mut_with" (see ndarray [zip_mut_ // scaled_add.rs use spirv_std::{Buffer, BufferMut}; - type T = f32; - const N: usize = 1024; + // probably shared with host code + #[derive(Clone, Copy)] + pub struct ScaledAddConsts { + alpha: f32 + } #[allow(unused_attributes)] #[spirv(gl_compute(local_size=64)] pub fn scaled_add( - #[spirv(descriptor_set=1, binding=0)] x: GlobalBuffer<[T; N]>, - #[spirv(descriptor_set=1, binding=1)] mut y: GlobalBufferMut<[T; N]>, - push_constants: PushConstant + #[spirv(descriptor_set=1, binding=0)] x: GlobalBuffer<[f32]>, + #[spirv(descriptor_set=1, binding=1)] mut y: GlobalBufferMut<[f32]>, + push_constants: PushConstant ) { - let alpha = push_constants.load(); - y.zip_mut_with(&x, alpha, |(y, x, alpha)| { - let result = y.load() + alpha * x.load(); - y.store(result); + let c = push_constants.load(); + y.zip_mut_with(&x, c, |(y, x, c)| { + y.then(|y| y + c.alpha * x.load()); }); }