Skip to content

Commit 083650f

Browse files
authored
Unrolled build for rust-lang#135909
Rollup merge of rust-lang#135909 - Flakebi:amdgpu-kd, r=jieyouxu,workingjubilee Export kernel descriptor for amdgpu kernels The host runtime (HIP or HSA) expects a kernel descriptor object for each kernel in the ELF file. The amdgpu LLVM backend generates the object. It is created as a symbol with the name of the kernel plus a `.kd` suffix. Add it to the exported symbols in the linker script, so that it can be found. For reference, the symbol is created here in LLVM: https://github.com/llvm/llvm-project/blob/d5457e4c1619e5dbeefd49841e284cbc24d35cb4/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp#L966 I wrote [a test](Flakebi@6a9115b) for this as well, I’ll add that once the target is merged and working. With this, all PRs to get working code for amdgpu are open (this + the target + the two patches adding addrspacecasts for alloca and global variables). Tracking issue: rust-lang#135024 r? `@workingjubilee`
2 parents c3fe9e7 + 99ec64c commit 083650f

File tree

4 files changed

+92
-32
lines changed

4 files changed

+92
-32
lines changed

compiler/rustc_codegen_ssa/src/back/linker.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1776,6 +1776,7 @@ fn exported_symbols_for_non_proc_macro(tcx: TyCtxt<'_>, crate_type: CrateType) -
17761776
symbols.push(symbol_export::exporting_symbol_name_for_instance_in_crate(
17771777
tcx, symbol, cnum,
17781778
));
1779+
symbol_export::extend_exported_symbols(&mut symbols, tcx, symbol, cnum);
17791780
}
17801781
});
17811782

compiler/rustc_codegen_ssa/src/back/symbol_export.rs

+60-32
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@ use rustc_middle::middle::exported_symbols::{
1010
ExportedSymbol, SymbolExportInfo, SymbolExportKind, SymbolExportLevel, metadata_symbol_name,
1111
};
1212
use rustc_middle::query::LocalCrate;
13-
use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, TyCtxt};
13+
use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, Ty, TyCtxt};
1414
use rustc_middle::util::Providers;
1515
use rustc_session::config::{CrateType, OomStrategy};
16+
use rustc_target::callconv::Conv;
1617
use rustc_target::spec::{SanitizerSet, TlsModel};
1718
use tracing::debug;
1819

@@ -584,6 +585,42 @@ pub(crate) fn symbol_name_for_instance_in_crate<'tcx>(
584585
}
585586
}
586587

588+
fn calling_convention_for_symbol<'tcx>(
589+
tcx: TyCtxt<'tcx>,
590+
symbol: ExportedSymbol<'tcx>,
591+
) -> (Conv, &'tcx [rustc_target::callconv::ArgAbi<'tcx, Ty<'tcx>>]) {
592+
let instance = match symbol {
593+
ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _)
594+
if tcx.is_static(def_id) =>
595+
{
596+
None
597+
}
598+
ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)),
599+
ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)),
600+
// DropGlue always use the Rust calling convention and thus follow the target's default
601+
// symbol decoration scheme.
602+
ExportedSymbol::DropGlue(..) => None,
603+
// AsyncDropGlueCtorShim always use the Rust calling convention and thus follow the
604+
// target's default symbol decoration scheme.
605+
ExportedSymbol::AsyncDropGlueCtorShim(..) => None,
606+
// NoDefId always follow the target's default symbol decoration scheme.
607+
ExportedSymbol::NoDefId(..) => None,
608+
// ThreadLocalShim always follow the target's default symbol decoration scheme.
609+
ExportedSymbol::ThreadLocalShim(..) => None,
610+
};
611+
612+
instance
613+
.map(|i| {
614+
tcx.fn_abi_of_instance(
615+
ty::TypingEnv::fully_monomorphized().as_query_input((i, ty::List::empty())),
616+
)
617+
.unwrap_or_else(|_| bug!("fn_abi_of_instance({i:?}) failed"))
618+
})
619+
.map(|fnabi| (fnabi.conv, &fnabi.args[..]))
620+
// FIXME(workingjubilee): why don't we know the convention here?
621+
.unwrap_or((Conv::Rust, &[]))
622+
}
623+
587624
/// This is the symbol name of the given instance as seen by the linker.
588625
///
589626
/// On 32-bit Windows symbols are decorated according to their calling conventions.
@@ -592,8 +629,6 @@ pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>(
592629
symbol: ExportedSymbol<'tcx>,
593630
instantiating_crate: CrateNum,
594631
) -> String {
595-
use rustc_target::callconv::Conv;
596-
597632
let mut undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate);
598633

599634
// thread local will not be a function call,
@@ -617,35 +652,7 @@ pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>(
617652
_ => return undecorated,
618653
};
619654

620-
let instance = match symbol {
621-
ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _)
622-
if tcx.is_static(def_id) =>
623-
{
624-
None
625-
}
626-
ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)),
627-
ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)),
628-
// DropGlue always use the Rust calling convention and thus follow the target's default
629-
// symbol decoration scheme.
630-
ExportedSymbol::DropGlue(..) => None,
631-
// AsyncDropGlueCtorShim always use the Rust calling convention and thus follow the
632-
// target's default symbol decoration scheme.
633-
ExportedSymbol::AsyncDropGlueCtorShim(..) => None,
634-
// NoDefId always follow the target's default symbol decoration scheme.
635-
ExportedSymbol::NoDefId(..) => None,
636-
// ThreadLocalShim always follow the target's default symbol decoration scheme.
637-
ExportedSymbol::ThreadLocalShim(..) => None,
638-
};
639-
640-
let (conv, args) = instance
641-
.map(|i| {
642-
tcx.fn_abi_of_instance(
643-
ty::TypingEnv::fully_monomorphized().as_query_input((i, ty::List::empty())),
644-
)
645-
.unwrap_or_else(|_| bug!("fn_abi_of_instance({i:?}) failed"))
646-
})
647-
.map(|fnabi| (fnabi.conv, &fnabi.args[..]))
648-
.unwrap_or((Conv::Rust, &[]));
655+
let (conv, args) = calling_convention_for_symbol(tcx, symbol);
649656

650657
// Decorate symbols with prefixes, suffixes and total number of bytes of arguments.
651658
// Reference: https://docs.microsoft.com/en-us/cpp/build/reference/decorated-names?view=msvc-170
@@ -677,6 +684,27 @@ pub(crate) fn exporting_symbol_name_for_instance_in_crate<'tcx>(
677684
maybe_emutls_symbol_name(tcx, symbol, &undecorated).unwrap_or(undecorated)
678685
}
679686

687+
/// On amdhsa, `gpu-kernel` functions have an associated metadata object with a `.kd` suffix.
688+
/// Add it to the symbols list for all kernel functions, so that it is exported in the linked
689+
/// object.
690+
pub(crate) fn extend_exported_symbols<'tcx>(
691+
symbols: &mut Vec<String>,
692+
tcx: TyCtxt<'tcx>,
693+
symbol: ExportedSymbol<'tcx>,
694+
instantiating_crate: CrateNum,
695+
) {
696+
let (conv, _) = calling_convention_for_symbol(tcx, symbol);
697+
698+
if conv != Conv::GpuKernel || tcx.sess.target.os != "amdhsa" {
699+
return;
700+
}
701+
702+
let undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate);
703+
704+
// Add the symbol for the kernel descriptor (with .kd suffix)
705+
symbols.push(format!("{undecorated}.kd"));
706+
}
707+
680708
fn maybe_emutls_symbol_name<'tcx>(
681709
tcx: TyCtxt<'tcx>,
682710
symbol: ExportedSymbol<'tcx>,

tests/run-make/amdgpu-kd/foo.rs

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#![allow(internal_features)]
2+
#![feature(no_core, lang_items, abi_gpu_kernel)]
3+
#![no_core]
4+
#![no_std]
5+
6+
// This is needed because of #![no_core]:
7+
#[lang = "sized"]
8+
trait Sized {}
9+
10+
#[no_mangle]
11+
extern "gpu-kernel" fn kernel() {}

tests/run-make/amdgpu-kd/rmake.rs

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// On the amdhsa OS, the host runtime (HIP or HSA) expects a kernel descriptor object for each
2+
// kernel in the ELF file. The amdgpu LLVM backend generates the object. It is created as a symbol
3+
// with the name of the kernel plus a .kd suffix.
4+
// Check that the produced object has the .kd symbol exported.
5+
6+
//@ needs-llvm-components: amdgpu
7+
//@ needs-rust-lld
8+
9+
use run_make_support::{llvm_readobj, rustc};
10+
11+
fn main() {
12+
rustc()
13+
.crate_name("foo")
14+
.target("amdgcn-amd-amdhsa")
15+
.arg("-Ctarget-cpu=gfx900")
16+
.crate_type("cdylib")
17+
.input("foo.rs")
18+
.run();
19+
llvm_readobj().input("foo.elf").symbols().run().assert_stdout_contains("kernel.kd");
20+
}

0 commit comments

Comments
 (0)