Skip to content

Commit e625a78

Browse files
jhuber6tstellar
authored andcommitted
[LLVM] Update CUDA ELF flags for their new ABI (#149534)
Summary: We rely on these flags to do things in the runtime and print the contents of binaries correctly. CUDA updated their ABI encoding recently and we didn't handle that. it's a new ABI entirely so we just select on it when it shows up. Fixes: #148703 [LLVM] Fix offload and update CUDA ABI for all SM values (#159354) Summary: Turns out the new CUDA ABI now applies retroactively to all the other SMs if you upgrade to CUDA 13.0. This patch changes the scheme, keeping all the SM flags consistent but using an offset. Fixes: #159088
1 parent 3e93017 commit e625a78

File tree

6 files changed

+158
-46
lines changed

6 files changed

+158
-46
lines changed

llvm/include/llvm/BinaryFormat/ELF.h

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,7 @@ enum {
362362
ELFOSABI_FENIXOS = 16, // FenixOS
363363
ELFOSABI_CLOUDABI = 17, // Nuxi CloudABI
364364
ELFOSABI_CUDA = 51, // NVIDIA CUDA architecture.
365+
ELFOSABI_CUDA_V2 = 41, // NVIDIA CUDA architecture.
365366
ELFOSABI_FIRST_ARCH = 64, // First architecture-specific OS ABI
366367
ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime
367368
ELFOSABI_AMDGPU_PAL = 65, // AMD PAL runtime
@@ -385,6 +386,12 @@ enum {
385386
ELFABIVERSION_AMDGPU_HSA_V6 = 4,
386387
};
387388

389+
// CUDA OS ABI Version identification.
390+
enum {
391+
ELFABIVERSION_CUDA_V1 = 7,
392+
ELFABIVERSION_CUDA_V2 = 8,
393+
};
394+
388395
#define ELF_RELOC(name, value) name = value,
389396

390397
// X86_64 relocations.
@@ -921,9 +928,15 @@ enum {
921928

922929
// NVPTX specific e_flags.
923930
enum : unsigned {
924-
// Processor selection mask for EF_CUDA_SM* values.
931+
// Processor selection mask for EF_CUDA_SM* values prior to blackwell.
925932
EF_CUDA_SM = 0xff,
926933

934+
// Processor selection mask for EF_CUDA_SM* values following blackwell.
935+
EF_CUDA_SM_MASK = 0xff00,
936+
937+
// Processor selection mask for EF_CUDA_SM* values following blackwell.
938+
EF_CUDA_SM_OFFSET = 8,
939+
927940
// SM based processor values.
928941
EF_CUDA_SM20 = 0x14,
929942
EF_CUDA_SM21 = 0x15,
@@ -943,9 +956,15 @@ enum : unsigned {
943956
EF_CUDA_SM80 = 0x50,
944957
EF_CUDA_SM86 = 0x56,
945958
EF_CUDA_SM87 = 0x57,
959+
EF_CUDA_SM88 = 0x58,
946960
EF_CUDA_SM89 = 0x59,
947-
// The sm_90a variant uses the same machine flag.
948961
EF_CUDA_SM90 = 0x5a,
962+
EF_CUDA_SM100 = 0x64,
963+
EF_CUDA_SM101 = 0x65,
964+
EF_CUDA_SM103 = 0x67,
965+
EF_CUDA_SM110 = 0x6e,
966+
EF_CUDA_SM120 = 0x78,
967+
EF_CUDA_SM121 = 0x79,
949968

950969
// Unified texture binding is enabled.
951970
EF_CUDA_TEXMODE_UNIFIED = 0x100,
@@ -954,12 +973,15 @@ enum : unsigned {
954973
// The target is using 64-bit addressing.
955974
EF_CUDA_64BIT_ADDRESS = 0x400,
956975
// Set when using the sm_90a processor.
957-
EF_CUDA_ACCELERATORS = 0x800,
976+
EF_CUDA_ACCELERATORS_V1 = 0x800,
958977
// Undocumented software feature.
959978
EF_CUDA_SW_FLAG_V2 = 0x1000,
960979

961980
// Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values.
962981
EF_CUDA_VIRTUAL_SM = 0xff0000,
982+
983+
// Set when using an accelerator variant like sm_100a.
984+
EF_CUDA_ACCELERATORS = 0x8,
963985
};
964986

965987
// ELF Relocation types for BPF

llvm/include/llvm/Object/ELFObjectFile.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,6 +1479,7 @@ template <class ELFT> Triple::OSType ELFObjectFile<ELFT>::getOS() const {
14791479
case ELF::ELFOSABI_OPENBSD:
14801480
return Triple::OpenBSD;
14811481
case ELF::ELFOSABI_CUDA:
1482+
case ELF::ELFOSABI_CUDA_V2:
14821483
return Triple::CUDA;
14831484
case ELF::ELFOSABI_AMDGPU_HSA:
14841485
return Triple::AMDHSA;

llvm/lib/Object/ELFObjectFile.cpp

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,10 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
620620

621621
StringRef ELFObjectFileBase::getNVPTXCPUName() const {
622622
assert(getEMachine() == ELF::EM_CUDA);
623-
unsigned SM = getPlatformFlags() & ELF::EF_CUDA_SM;
623+
unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1
624+
? getPlatformFlags() & ELF::EF_CUDA_SM
625+
: (getPlatformFlags() & ELF::EF_CUDA_SM_MASK) >>
626+
ELF::EF_CUDA_SM_OFFSET;
624627

625628
switch (SM) {
626629
// Fermi architecture.
@@ -672,14 +675,39 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {
672675
return "sm_86";
673676
case ELF::EF_CUDA_SM87:
674677
return "sm_87";
678+
case ELF::EF_CUDA_SM88:
679+
return "sm_88";
675680

676681
// Ada architecture.
677682
case ELF::EF_CUDA_SM89:
678683
return "sm_89";
679684

680685
// Hopper architecture.
681686
case ELF::EF_CUDA_SM90:
682-
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_90a" : "sm_90";
687+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS_V1 ? "sm_90a"
688+
: "sm_90";
689+
690+
// Blackwell architecture.
691+
case ELF::EF_CUDA_SM100:
692+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_100a"
693+
: "sm_100";
694+
case ELF::EF_CUDA_SM101:
695+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_101a"
696+
: "sm_101";
697+
case ELF::EF_CUDA_SM103:
698+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_103a"
699+
: "sm_103";
700+
case ELF::EF_CUDA_SM110:
701+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_110a"
702+
: "sm_110";
703+
704+
// Blackwell architecture.
705+
case ELF::EF_CUDA_SM120:
706+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_120a"
707+
: "sm_120";
708+
case ELF::EF_CUDA_SM121:
709+
return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_121a"
710+
: "sm_121";
683711
default:
684712
llvm_unreachable("Unknown EF_CUDA_SM value");
685713
}

llvm/tools/llvm-readobj/ELFDumper.cpp

Lines changed: 82 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,26 +1083,26 @@ const EnumEntry<unsigned> ElfObjectFileType[] = {
10831083
};
10841084

10851085
const EnumEntry<unsigned> ElfOSABI[] = {
1086-
{"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
1087-
{"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
1088-
{"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
1089-
{"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
1090-
{"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
1091-
{"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
1092-
{"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
1093-
{"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
1094-
{"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
1095-
{"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
1096-
{"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
1097-
{"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
1098-
{"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
1099-
{"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
1100-
{"AROS", "AROS", ELF::ELFOSABI_AROS},
1101-
{"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
1102-
{"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
1103-
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
1104-
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}
1105-
};
1086+
{"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
1087+
{"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
1088+
{"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
1089+
{"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
1090+
{"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
1091+
{"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
1092+
{"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
1093+
{"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
1094+
{"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
1095+
{"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
1096+
{"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
1097+
{"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
1098+
{"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
1099+
{"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
1100+
{"AROS", "AROS", ELF::ELFOSABI_AROS},
1101+
{"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
1102+
{"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
1103+
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
1104+
{"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA_V2},
1105+
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}};
11061106

11071107
const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
11081108
{"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA},
@@ -1666,16 +1666,60 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
16661666
};
16671667

16681668
const EnumEntry<unsigned> ElfHeaderNVPTXFlags[] = {
1669-
ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"),
1670-
ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"),
1671-
ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"),
1672-
ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"),
1673-
ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"),
1674-
ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"),
1675-
ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"),
1676-
ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"),
1677-
ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"),
1678-
ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"),
1669+
ENUM_ENT(EF_CUDA_SM20, "sm_20"),
1670+
ENUM_ENT(EF_CUDA_SM21, "sm_21"),
1671+
ENUM_ENT(EF_CUDA_SM30, "sm_30"),
1672+
ENUM_ENT(EF_CUDA_SM32, "sm_32"),
1673+
ENUM_ENT(EF_CUDA_SM35, "sm_35"),
1674+
ENUM_ENT(EF_CUDA_SM37, "sm_37"),
1675+
ENUM_ENT(EF_CUDA_SM50, "sm_50"),
1676+
ENUM_ENT(EF_CUDA_SM52, "sm_52"),
1677+
ENUM_ENT(EF_CUDA_SM53, "sm_53"),
1678+
ENUM_ENT(EF_CUDA_SM60, "sm_60"),
1679+
ENUM_ENT(EF_CUDA_SM61, "sm_61"),
1680+
ENUM_ENT(EF_CUDA_SM62, "sm_62"),
1681+
ENUM_ENT(EF_CUDA_SM70, "sm_70"),
1682+
ENUM_ENT(EF_CUDA_SM72, "sm_72"),
1683+
ENUM_ENT(EF_CUDA_SM75, "sm_75"),
1684+
ENUM_ENT(EF_CUDA_SM80, "sm_80"),
1685+
ENUM_ENT(EF_CUDA_SM86, "sm_86"),
1686+
ENUM_ENT(EF_CUDA_SM87, "sm_87"),
1687+
ENUM_ENT(EF_CUDA_SM88, "sm_88"),
1688+
ENUM_ENT(EF_CUDA_SM89, "sm_89"),
1689+
ENUM_ENT(EF_CUDA_SM90, "sm_90"),
1690+
ENUM_ENT(EF_CUDA_SM100, "sm_100"),
1691+
ENUM_ENT(EF_CUDA_SM101, "sm_101"),
1692+
ENUM_ENT(EF_CUDA_SM103, "sm_103"),
1693+
ENUM_ENT(EF_CUDA_SM110, "sm_110"),
1694+
ENUM_ENT(EF_CUDA_SM120, "sm_120"),
1695+
ENUM_ENT(EF_CUDA_SM121, "sm_121"),
1696+
ENUM_ENT(EF_CUDA_SM20 << EF_CUDA_SM_OFFSET, "sm_20"),
1697+
ENUM_ENT(EF_CUDA_SM21 << EF_CUDA_SM_OFFSET, "sm_21"),
1698+
ENUM_ENT(EF_CUDA_SM30 << EF_CUDA_SM_OFFSET, "sm_30"),
1699+
ENUM_ENT(EF_CUDA_SM32 << EF_CUDA_SM_OFFSET, "sm_32"),
1700+
ENUM_ENT(EF_CUDA_SM35 << EF_CUDA_SM_OFFSET, "sm_35"),
1701+
ENUM_ENT(EF_CUDA_SM37 << EF_CUDA_SM_OFFSET, "sm_37"),
1702+
ENUM_ENT(EF_CUDA_SM50 << EF_CUDA_SM_OFFSET, "sm_50"),
1703+
ENUM_ENT(EF_CUDA_SM52 << EF_CUDA_SM_OFFSET, "sm_52"),
1704+
ENUM_ENT(EF_CUDA_SM53 << EF_CUDA_SM_OFFSET, "sm_53"),
1705+
ENUM_ENT(EF_CUDA_SM60 << EF_CUDA_SM_OFFSET, "sm_60"),
1706+
ENUM_ENT(EF_CUDA_SM61 << EF_CUDA_SM_OFFSET, "sm_61"),
1707+
ENUM_ENT(EF_CUDA_SM62 << EF_CUDA_SM_OFFSET, "sm_62"),
1708+
ENUM_ENT(EF_CUDA_SM70 << EF_CUDA_SM_OFFSET, "sm_70"),
1709+
ENUM_ENT(EF_CUDA_SM72 << EF_CUDA_SM_OFFSET, "sm_72"),
1710+
ENUM_ENT(EF_CUDA_SM75 << EF_CUDA_SM_OFFSET, "sm_75"),
1711+
ENUM_ENT(EF_CUDA_SM80 << EF_CUDA_SM_OFFSET, "sm_80"),
1712+
ENUM_ENT(EF_CUDA_SM86 << EF_CUDA_SM_OFFSET, "sm_86"),
1713+
ENUM_ENT(EF_CUDA_SM87 << EF_CUDA_SM_OFFSET, "sm_87"),
1714+
ENUM_ENT(EF_CUDA_SM88 << EF_CUDA_SM_OFFSET, "sm_88"),
1715+
ENUM_ENT(EF_CUDA_SM89 << EF_CUDA_SM_OFFSET, "sm_89"),
1716+
ENUM_ENT(EF_CUDA_SM90 << EF_CUDA_SM_OFFSET, "sm_90"),
1717+
ENUM_ENT(EF_CUDA_SM100 << EF_CUDA_SM_OFFSET, "sm_100"),
1718+
ENUM_ENT(EF_CUDA_SM101 << EF_CUDA_SM_OFFSET, "sm_101"),
1719+
ENUM_ENT(EF_CUDA_SM103 << EF_CUDA_SM_OFFSET, "sm_103"),
1720+
ENUM_ENT(EF_CUDA_SM110 << EF_CUDA_SM_OFFSET, "sm_110"),
1721+
ENUM_ENT(EF_CUDA_SM120 << EF_CUDA_SM_OFFSET, "sm_120"),
1722+
ENUM_ENT(EF_CUDA_SM121 << EF_CUDA_SM_OFFSET, "sm_121"),
16791723
};
16801724

16811725
const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
@@ -3650,10 +3694,16 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
36503694
else if (e.e_machine == EM_XTENSA)
36513695
ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderXtensaFlags),
36523696
unsigned(ELF::EF_XTENSA_MACH));
3653-
else if (e.e_machine == EM_CUDA)
3697+
else if (e.e_machine == EM_CUDA) {
36543698
ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderNVPTXFlags),
36553699
unsigned(ELF::EF_CUDA_SM));
3656-
else if (e.e_machine == EM_AMDGPU) {
3700+
if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1 &&
3701+
(e.e_flags & ELF::EF_CUDA_ACCELERATORS_V1))
3702+
ElfFlags += "a";
3703+
else if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V2 &&
3704+
(e.e_flags & ELF::EF_CUDA_ACCELERATORS))
3705+
ElfFlags += "a";
3706+
} else if (e.e_machine == EM_AMDGPU) {
36573707
switch (e.e_ident[ELF::EI_ABIVERSION]) {
36583708
default:
36593709
break;

offload/plugins-nextgen/common/src/Utils/ELF.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,23 +60,30 @@ static Expected<bool>
6060
checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
6161
const auto Header = ELFObj.getELFFile().getHeader();
6262
if (Header.e_type != ET_EXEC && Header.e_type != ET_DYN)
63-
return createError("Only executable ELF files are supported");
63+
return createError("only executable ELF files are supported");
6464

6565
if (Header.e_machine == EM_AMDGPU) {
6666
if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA)
67-
return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA");
67+
return createError("invalid AMD OS/ABI, must be AMDGPU_HSA");
6868
if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
6969
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
70-
return createError("Invalid AMD ABI version, must be version 5 or above");
70+
return createError("invalid AMD ABI version, must be version 5 or above");
7171
if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
7272
(Header.e_flags & EF_AMDGPU_MACH) >
7373
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
74-
return createError("Unsupported AMDGPU architecture");
74+
return createError("unsupported AMDGPU architecture");
7575
} else if (Header.e_machine == EM_CUDA) {
76-
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
77-
return createError("Invalid CUDA addressing mode");
78-
if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
79-
return createError("Unsupported NVPTX architecture");
76+
if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V1) {
77+
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
78+
return createError("invalid CUDA addressing mode");
79+
if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
80+
return createError("unsupported NVPTX architecture");
81+
} else if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V2) {
82+
if ((Header.e_flags & EF_CUDA_SM_MASK) < EF_CUDA_SM100)
83+
return createError("unsupported NVPTX architecture");
84+
} else {
85+
return createError("invalid CUDA ABI version");
86+
}
8087
}
8188

8289
return Header.e_machine == EMachine;

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1442,7 +1442,11 @@ struct CUDAPluginTy final : public GenericPluginTy {
14421442
return ElfOrErr.takeError();
14431443

14441444
// Get the numeric value for the image's `sm_` value.
1445-
auto SM = ElfOrErr->getPlatformFlags() & ELF::EF_CUDA_SM;
1445+
const auto Header = ElfOrErr->getELFFile().getHeader();
1446+
unsigned SM =
1447+
Header.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1
1448+
? Header.e_flags & ELF::EF_CUDA_SM
1449+
: (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> ELF::EF_CUDA_SM_OFFSET;
14461450

14471451
CUdevice Device;
14481452
CUresult Res = cuDeviceGet(&Device, DeviceId);

0 commit comments

Comments
 (0)