Add an example/regression test for lifetime-only kernels

juntyr · Nov 11, 2024 · 40e02ed · 40e02ed
1 parent f2a6654
commit 40e02ed
Show file tree

Hide file tree

Showing 5 changed files with 134 additions and 1 deletion.
diff --git a/Cargo.toml b/Cargo.toml
@@ -3,7 +3,8 @@ resolver = "2"
 
 members = [
     ".", "rust-cuda-derive", "rust-cuda-kernel",
-    "examples/derive", "examples/print", "examples/single-source",
+    "examples/derive", "examples/lifetime", "examples/print",
+    "examples/single-source",
 ]
 default-members = [
     ".", "rust-cuda-derive", "rust-cuda-kernel",

diff --git a/examples/lifetime/.cargo/config.toml b/examples/lifetime/.cargo/config.toml
@@ -0,0 +1,2 @@
+[target.nvptx64-nvidia-cuda]
+rustflags = ["-Zunstable-options", "-Clinker-flavor=llbc", "-Ctarget-cpu=sm_35", "-Ccodegen-units=1", "-Clink-arg=-O3"]
diff --git a/examples/lifetime/Cargo.toml b/examples/lifetime/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "lifetime"
+version = "0.1.0"
+authors = { workspace = true }
+repository = { workspace = true }
+license = { workspace = true }
+edition = { workspace = true }
+rust-version = { workspace = true }
+
+[lib]
+crate-type = ["cdylib", "rlib"]
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[target.'cfg(target_os = "cuda")'.dependencies]
+rust-cuda = { workspace = true, features = ["kernel", "device"] }
+
+[target.'cfg(not(target_os = "cuda"))'.dependencies]
+rust-cuda = { workspace = true, features = ["kernel", "host"] }
+
+[lints]
+workspace = true
diff --git a/examples/lifetime/src/lib.rs b/examples/lifetime/src/lib.rs
@@ -0,0 +1,54 @@
+#![allow(missing_docs)] // FIXME: use expect
+#![no_std]
+#![cfg_attr(target_os = "cuda", feature(abi_ptx))]
+#![cfg_attr(target_os = "cuda", feature(alloc_error_handler))]
+#![cfg_attr(target_os = "cuda", feature(asm_experimental_arch))]
+#![feature(cfg_version)]
+#![feature(type_alias_impl_trait)]
+#![feature(decl_macro)]
+
+extern crate alloc;
+
+#[rust_cuda::kernel::kernel(pub use link! for impl)]
+#[kernel(allow(ptx::local_memory_use))]
+pub fn kernel<'a, 'b>(
+    a: &'a rust_cuda::kernel::param::PerThreadShallowCopy<u32>,
+    b: &'b rust_cuda::kernel::param::ShallowInteriorMutable<core::sync::atomic::AtomicU32>,
+    c: &rust_cuda::kernel::param::DeepPerThreadBorrow<
+        Option<
+            rust_cuda::utils::adapter::RustToCudaWithPortableBitCopySemantics<
+                core::num::NonZeroU32,
+            >,
+        >,
+    >,
+) {
+    let _ = (a, c);
+    b.fetch_add(1, core::sync::atomic::Ordering::Relaxed);
+}
+
+#[cfg(target_os = "cuda")]
+mod cuda_prelude {
+    use rust_cuda::device::alloc::PTXAllocator;
+
+    #[global_allocator]
+    static _GLOBAL_ALLOCATOR: PTXAllocator = PTXAllocator;
+
+    #[panic_handler]
+    fn panic(info: &::core::panic::PanicInfo) -> ! {
+        // pretty format and print the panic message
+        //  but don't allow dynamic formatting
+        rust_cuda::device::utils::pretty_print_panic_info(info, false);
+
+        // Safety: no mutable data is shared with the kernel
+        unsafe { rust_cuda::device::utils::exit() }
+    }
+
+    #[alloc_error_handler]
+    #[track_caller]
+    fn alloc_error_handler(layout: ::core::alloc::Layout) -> ! {
+        rust_cuda::device::utils::pretty_print_alloc_error(layout);
+
+        // Safety: no mutable data is shared with the kernel
+        unsafe { rust_cuda::device::utils::exit() }
+    }
+}
diff --git a/examples/lifetime/src/main.rs b/examples/lifetime/src/main.rs
@@ -0,0 +1,54 @@
+#![allow(missing_docs)] // FIXME: use expect
+
+use lifetime::{kernel, link};
+
+fn main() -> rust_cuda::deps::rustacuda::error::CudaResult<()> {
+    // Link the lifetime-only-generic CUDA kernel
+    struct KernelPtx<'a, 'b>(core::marker::PhantomData<(&'a (), &'b ())>);
+    link! { impl kernel<'a, 'b> for KernelPtx }
+
+    // Initialize the CUDA API
+    rust_cuda::deps::rustacuda::init(rust_cuda::deps::rustacuda::CudaFlags::empty())?;
+
+    // Get the first CUDA GPU device
+    let device = rust_cuda::deps::rustacuda::device::Device::get_device(0)?;
+
+    // Create a CUDA context associated to this device
+    let _context = rust_cuda::host::CudaDropWrapper::from(
+        rust_cuda::deps::rustacuda::context::Context::create_and_push(
+            rust_cuda::deps::rustacuda::context::ContextFlags::MAP_HOST
+                | rust_cuda::deps::rustacuda::context::ContextFlags::SCHED_AUTO,
+            device,
+        )?,
+    );
+
+    // Create a new CUDA stream to submit kernels to
+    let mut stream =
+        rust_cuda::host::CudaDropWrapper::from(rust_cuda::deps::rustacuda::stream::Stream::new(
+            rust_cuda::deps::rustacuda::stream::StreamFlags::NON_BLOCKING,
+            None,
+        )?);
+
+    let mut shared = core::sync::atomic::AtomicU32::new(0);
+
+    // Create a new instance of the CUDA kernel and prepare the launch config
+    let mut kernel = rust_cuda::kernel::TypedPtxKernel::<kernel>::new::<KernelPtx>(None);
+    let config = rust_cuda::kernel::LaunchConfig {
+        grid: rust_cuda::deps::rustacuda::function::GridSize::x(1),
+        block: rust_cuda::deps::rustacuda::function::BlockSize::x(4),
+        ptx_jit: false,
+    };
+
+    println!("shared(before)={shared:?}");
+
+    // Launch the CUDA kernel on the stream and synchronise to its completion
+    rust_cuda::host::Stream::with(&mut stream, |stream| {
+        kernel.launch3(stream, &config, &1, &mut shared, &None)
+    })?;
+
+    std::mem::drop(kernel);
+
+    println!("shared(after)={shared:?}");
+
+    Ok(())
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		[target.nvptx64-nvidia-cuda]
		rustflags = ["-Zunstable-options", "-Clinker-flavor=llbc", "-Ctarget-cpu=sm_35", "-Ccodegen-units=1", "-Clink-arg=-O3"]