-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add an example/regression test for lifetime-only kernels
- Loading branch information
Showing
5 changed files
with
134 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[target.nvptx64-nvidia-cuda] | ||
rustflags = ["-Zunstable-options", "-Clinker-flavor=llbc", "-Ctarget-cpu=sm_35", "-Ccodegen-units=1", "-Clink-arg=-O3"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
[package] | ||
name = "lifetime" | ||
version = "0.1.0" | ||
authors = { workspace = true } | ||
repository = { workspace = true } | ||
license = { workspace = true } | ||
edition = { workspace = true } | ||
rust-version = { workspace = true } | ||
|
||
[lib] | ||
crate-type = ["cdylib", "rlib"] | ||
|
||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
|
||
[target.'cfg(target_os = "cuda")'.dependencies] | ||
rust-cuda = { workspace = true, features = ["kernel", "device"] } | ||
|
||
[target.'cfg(not(target_os = "cuda"))'.dependencies] | ||
rust-cuda = { workspace = true, features = ["kernel", "host"] } | ||
|
||
[lints] | ||
workspace = true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#![allow(missing_docs)] // FIXME: use expect | ||
#![no_std] | ||
#![cfg_attr(target_os = "cuda", feature(abi_ptx))] | ||
#![cfg_attr(target_os = "cuda", feature(alloc_error_handler))] | ||
#![cfg_attr(target_os = "cuda", feature(asm_experimental_arch))] | ||
#![feature(cfg_version)] | ||
#![feature(type_alias_impl_trait)] | ||
#![feature(decl_macro)] | ||
|
||
extern crate alloc; | ||
|
||
#[rust_cuda::kernel::kernel(pub use link! for impl)] | ||
#[kernel(allow(ptx::local_memory_use))] | ||
pub fn kernel<'a, 'b>( | ||
a: &'a rust_cuda::kernel::param::PerThreadShallowCopy<u32>, | ||
b: &'b rust_cuda::kernel::param::ShallowInteriorMutable<core::sync::atomic::AtomicU32>, | ||
c: &rust_cuda::kernel::param::DeepPerThreadBorrow< | ||
Option< | ||
rust_cuda::utils::adapter::RustToCudaWithPortableBitCopySemantics< | ||
core::num::NonZeroU32, | ||
>, | ||
>, | ||
>, | ||
) { | ||
let _ = (a, c); | ||
b.fetch_add(1, core::sync::atomic::Ordering::Relaxed); | ||
} | ||
|
||
#[cfg(target_os = "cuda")] | ||
mod cuda_prelude { | ||
use rust_cuda::device::alloc::PTXAllocator; | ||
|
||
#[global_allocator] | ||
static _GLOBAL_ALLOCATOR: PTXAllocator = PTXAllocator; | ||
|
||
#[panic_handler] | ||
fn panic(info: &::core::panic::PanicInfo) -> ! { | ||
// pretty format and print the panic message | ||
// but don't allow dynamic formatting | ||
rust_cuda::device::utils::pretty_print_panic_info(info, false); | ||
|
||
// Safety: no mutable data is shared with the kernel | ||
unsafe { rust_cuda::device::utils::exit() } | ||
} | ||
|
||
#[alloc_error_handler] | ||
#[track_caller] | ||
fn alloc_error_handler(layout: ::core::alloc::Layout) -> ! { | ||
rust_cuda::device::utils::pretty_print_alloc_error(layout); | ||
|
||
// Safety: no mutable data is shared with the kernel | ||
unsafe { rust_cuda::device::utils::exit() } | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#![allow(missing_docs)] // FIXME: use expect | ||
|
||
use lifetime::{kernel, link}; | ||
|
||
fn main() -> rust_cuda::deps::rustacuda::error::CudaResult<()> { | ||
// Link the lifetime-only-generic CUDA kernel | ||
struct KernelPtx<'a, 'b>(core::marker::PhantomData<(&'a (), &'b ())>); | ||
link! { impl kernel<'a, 'b> for KernelPtx } | ||
|
||
// Initialize the CUDA API | ||
rust_cuda::deps::rustacuda::init(rust_cuda::deps::rustacuda::CudaFlags::empty())?; | ||
|
||
// Get the first CUDA GPU device | ||
let device = rust_cuda::deps::rustacuda::device::Device::get_device(0)?; | ||
|
||
// Create a CUDA context associated to this device | ||
let _context = rust_cuda::host::CudaDropWrapper::from( | ||
rust_cuda::deps::rustacuda::context::Context::create_and_push( | ||
rust_cuda::deps::rustacuda::context::ContextFlags::MAP_HOST | ||
| rust_cuda::deps::rustacuda::context::ContextFlags::SCHED_AUTO, | ||
device, | ||
)?, | ||
); | ||
|
||
// Create a new CUDA stream to submit kernels to | ||
let mut stream = | ||
rust_cuda::host::CudaDropWrapper::from(rust_cuda::deps::rustacuda::stream::Stream::new( | ||
rust_cuda::deps::rustacuda::stream::StreamFlags::NON_BLOCKING, | ||
None, | ||
)?); | ||
|
||
let mut shared = core::sync::atomic::AtomicU32::new(0); | ||
|
||
// Create a new instance of the CUDA kernel and prepare the launch config | ||
let mut kernel = rust_cuda::kernel::TypedPtxKernel::<kernel>::new::<KernelPtx>(None); | ||
let config = rust_cuda::kernel::LaunchConfig { | ||
grid: rust_cuda::deps::rustacuda::function::GridSize::x(1), | ||
block: rust_cuda::deps::rustacuda::function::BlockSize::x(4), | ||
ptx_jit: false, | ||
}; | ||
|
||
println!("shared(before)={shared:?}"); | ||
|
||
// Launch the CUDA kernel on the stream and synchronise to its completion | ||
rust_cuda::host::Stream::with(&mut stream, |stream| { | ||
kernel.launch3(stream, &config, &1, &mut shared, &None) | ||
})?; | ||
|
||
std::mem::drop(kernel); | ||
|
||
println!("shared(after)={shared:?}"); | ||
|
||
Ok(()) | ||
} |