Skip to content

Commit

Permalink
Add an example/regression test for lifetime-only kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
juntyr committed Nov 11, 2024
1 parent f2a6654 commit 40e02ed
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 1 deletion.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ resolver = "2"

members = [
".", "rust-cuda-derive", "rust-cuda-kernel",
"examples/derive", "examples/print", "examples/single-source",
"examples/derive", "examples/lifetime", "examples/print",
"examples/single-source",
]
default-members = [
".", "rust-cuda-derive", "rust-cuda-kernel",
Expand Down
2 changes: 2 additions & 0 deletions examples/lifetime/.cargo/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[target.nvptx64-nvidia-cuda]
rustflags = ["-Zunstable-options", "-Clinker-flavor=llbc", "-Ctarget-cpu=sm_35", "-Ccodegen-units=1", "-Clink-arg=-O3"]
22 changes: 22 additions & 0 deletions examples/lifetime/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[package]
name = "lifetime"
version = "0.1.0"
authors = { workspace = true }
repository = { workspace = true }
license = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }

[lib]
crate-type = ["cdylib", "rlib"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[target.'cfg(target_os = "cuda")'.dependencies]
rust-cuda = { workspace = true, features = ["kernel", "device"] }

[target.'cfg(not(target_os = "cuda"))'.dependencies]
rust-cuda = { workspace = true, features = ["kernel", "host"] }

[lints]
workspace = true
54 changes: 54 additions & 0 deletions examples/lifetime/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#![allow(missing_docs)] // FIXME: use expect
#![no_std]
#![cfg_attr(target_os = "cuda", feature(abi_ptx))]
#![cfg_attr(target_os = "cuda", feature(alloc_error_handler))]
#![cfg_attr(target_os = "cuda", feature(asm_experimental_arch))]
#![feature(cfg_version)]
#![feature(type_alias_impl_trait)]
#![feature(decl_macro)]

extern crate alloc;

#[rust_cuda::kernel::kernel(pub use link! for impl)]
#[kernel(allow(ptx::local_memory_use))]
pub fn kernel<'a, 'b>(
a: &'a rust_cuda::kernel::param::PerThreadShallowCopy<u32>,
b: &'b rust_cuda::kernel::param::ShallowInteriorMutable<core::sync::atomic::AtomicU32>,
c: &rust_cuda::kernel::param::DeepPerThreadBorrow<
Option<
rust_cuda::utils::adapter::RustToCudaWithPortableBitCopySemantics<
core::num::NonZeroU32,
>,
>,
>,
) {
let _ = (a, c);
b.fetch_add(1, core::sync::atomic::Ordering::Relaxed);
}

#[cfg(target_os = "cuda")]
mod cuda_prelude {
use rust_cuda::device::alloc::PTXAllocator;

#[global_allocator]
static _GLOBAL_ALLOCATOR: PTXAllocator = PTXAllocator;

#[panic_handler]
fn panic(info: &::core::panic::PanicInfo) -> ! {
// pretty format and print the panic message
// but don't allow dynamic formatting
rust_cuda::device::utils::pretty_print_panic_info(info, false);

// Safety: no mutable data is shared with the kernel
unsafe { rust_cuda::device::utils::exit() }
}

#[alloc_error_handler]
#[track_caller]
fn alloc_error_handler(layout: ::core::alloc::Layout) -> ! {
rust_cuda::device::utils::pretty_print_alloc_error(layout);

// Safety: no mutable data is shared with the kernel
unsafe { rust_cuda::device::utils::exit() }
}
}
54 changes: 54 additions & 0 deletions examples/lifetime/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#![allow(missing_docs)] // FIXME: use expect

use lifetime::{kernel, link};

fn main() -> rust_cuda::deps::rustacuda::error::CudaResult<()> {
// Link the lifetime-only-generic CUDA kernel
struct KernelPtx<'a, 'b>(core::marker::PhantomData<(&'a (), &'b ())>);
link! { impl kernel<'a, 'b> for KernelPtx }

// Initialize the CUDA API
rust_cuda::deps::rustacuda::init(rust_cuda::deps::rustacuda::CudaFlags::empty())?;

// Get the first CUDA GPU device
let device = rust_cuda::deps::rustacuda::device::Device::get_device(0)?;

// Create a CUDA context associated to this device
let _context = rust_cuda::host::CudaDropWrapper::from(
rust_cuda::deps::rustacuda::context::Context::create_and_push(
rust_cuda::deps::rustacuda::context::ContextFlags::MAP_HOST
| rust_cuda::deps::rustacuda::context::ContextFlags::SCHED_AUTO,
device,
)?,
);

// Create a new CUDA stream to submit kernels to
let mut stream =
rust_cuda::host::CudaDropWrapper::from(rust_cuda::deps::rustacuda::stream::Stream::new(
rust_cuda::deps::rustacuda::stream::StreamFlags::NON_BLOCKING,
None,
)?);

let mut shared = core::sync::atomic::AtomicU32::new(0);

// Create a new instance of the CUDA kernel and prepare the launch config
let mut kernel = rust_cuda::kernel::TypedPtxKernel::<kernel>::new::<KernelPtx>(None);
let config = rust_cuda::kernel::LaunchConfig {
grid: rust_cuda::deps::rustacuda::function::GridSize::x(1),
block: rust_cuda::deps::rustacuda::function::BlockSize::x(4),
ptx_jit: false,
};

println!("shared(before)={shared:?}");

// Launch the CUDA kernel on the stream and synchronise to its completion
rust_cuda::host::Stream::with(&mut stream, |stream| {
kernel.launch3(stream, &config, &1, &mut shared, &None)
})?;

std::mem::drop(kernel);

println!("shared(after)={shared:?}");

Ok(())
}

0 comments on commit 40e02ed

Please sign in to comment.