|
15 | 15 | #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" |
16 | 16 |
|
17 | 17 | #include "llvm/ADT/StringMap.h" |
18 | | -#include "llvm/BinaryFormat/MsgPackDocument.h" |
19 | | -#include "llvm/Object/ELFObjectFile.h" |
20 | | -#include "llvm/Object/ObjectFile.h" |
21 | | -#include "llvm/Support/AMDGPUMetadata.h" |
| 18 | +#include "llvm/Frontend/Offloading/Utility.h" |
22 | 19 |
|
23 | 20 | using namespace mlir; |
24 | 21 | using namespace mlir::ROCDL; |
25 | 22 |
|
26 | | -/// Search the ELF object and return an object containing the `amdhsa.kernels` |
27 | | -/// metadata note. Function adapted from: |
28 | | -/// llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp Also see |
29 | | -/// `amdhsa.kernels`: |
30 | | -/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3-metadata |
31 | | -template <typename ELFT> |
32 | | -static std::unique_ptr<llvm::msgpack::Document> |
33 | | -getAMDHSANote(llvm::object::ELFObjectFile<ELFT> &elfObj) { |
34 | | - using namespace llvm; |
35 | | - using namespace llvm::object; |
36 | | - using namespace llvm::ELF; |
37 | | - const ELFFile<ELFT> &elf = elfObj.getELFFile(); |
38 | | - Expected<typename ELFT::ShdrRange> secOrErr = elf.sections(); |
39 | | - if (!secOrErr) { |
40 | | - consumeError(secOrErr.takeError()); |
41 | | - return nullptr; |
42 | | - } |
43 | | - ArrayRef<typename ELFT::Shdr> sections = *secOrErr; |
44 | | - for (const typename ELFT::Shdr §ion : sections) { |
45 | | - if (section.sh_type != ELF::SHT_NOTE) |
46 | | - continue; |
47 | | - size_t align = std::max(static_cast<unsigned>(section.sh_addralign), 4u); |
48 | | - Error err = Error::success(); |
49 | | - for (const typename ELFT::Note note : elf.notes(section, err)) { |
50 | | - StringRef name = note.getName(); |
51 | | - if (name != "AMDGPU") |
52 | | - continue; |
53 | | - uint32_t type = note.getType(); |
54 | | - if (type != ELF::NT_AMDGPU_METADATA) |
55 | | - continue; |
56 | | - ArrayRef<uint8_t> desc = note.getDesc(align); |
57 | | - StringRef msgPackString = |
58 | | - StringRef(reinterpret_cast<const char *>(desc.data()), desc.size()); |
59 | | - std::unique_ptr<llvm::msgpack::Document> msgPackDoc( |
60 | | - new llvm::msgpack::Document()); |
61 | | - if (!msgPackDoc->readFromBlob(msgPackString, /*Multi=*/false)) |
62 | | - return nullptr; |
63 | | - if (msgPackDoc->getRoot().isScalar()) |
64 | | - return nullptr; |
65 | | - return msgPackDoc; |
66 | | - } |
67 | | - } |
68 | | - return nullptr; |
69 | | -} |
70 | | - |
71 | | -/// Return the `amdhsa.kernels` metadata in the ELF object or nullptr on |
72 | | -/// failure. This is a helper function that casts a generic `ObjectFile` to the |
73 | | -/// appropiate `ELFObjectFile`. |
74 | | -static std::unique_ptr<llvm::msgpack::Document> |
75 | | -getAMDHSANote(ArrayRef<char> elfData) { |
76 | | - using namespace llvm; |
77 | | - using namespace llvm::object; |
78 | | - if (elfData.empty()) |
79 | | - return nullptr; |
80 | | - MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()), "buffer"); |
81 | | - Expected<std::unique_ptr<ObjectFile>> objOrErr = |
82 | | - ObjectFile::createELFObjectFile(buffer); |
83 | | - if (!objOrErr || !objOrErr.get()) { |
84 | | - // Drop the error. |
85 | | - llvm::consumeError(objOrErr.takeError()); |
86 | | - return nullptr; |
87 | | - } |
88 | | - ObjectFile &elf = *(objOrErr.get()); |
89 | | - if (auto *obj = dyn_cast<ELF32LEObjectFile>(&elf)) |
90 | | - return getAMDHSANote(*obj); |
91 | | - else if (auto *obj = dyn_cast<ELF32BEObjectFile>(&elf)) |
92 | | - return getAMDHSANote(*obj); |
93 | | - else if (auto *obj = dyn_cast<ELF64LEObjectFile>(&elf)) |
94 | | - return getAMDHSANote(*obj); |
95 | | - else if (auto *obj = dyn_cast<ELF64BEObjectFile>(&elf)) |
96 | | - return getAMDHSANote(*obj); |
97 | | - return nullptr; |
98 | | -} |
99 | | - |
100 | | -/// Utility functions for converting `llvm::msgpack::DocNode` nodes. |
101 | | -static Attribute convertNode(Builder &builder, llvm::msgpack::DocNode &node); |
102 | | -static Attribute convertNode(Builder &builder, |
103 | | - llvm::msgpack::MapDocNode &node) { |
104 | | - NamedAttrList attrs; |
105 | | - for (auto &[keyNode, valueNode] : node) { |
106 | | - if (!keyNode.isString()) |
107 | | - continue; |
108 | | - StringRef key = keyNode.getString(); |
109 | | - if (Attribute attr = convertNode(builder, valueNode)) { |
110 | | - key.consume_front("."); |
111 | | - key.consume_back("."); |
112 | | - attrs.append(key, attr); |
113 | | - } |
114 | | - } |
115 | | - if (attrs.empty()) |
116 | | - return nullptr; |
117 | | - return builder.getDictionaryAttr(attrs); |
118 | | -} |
119 | | - |
120 | | -static Attribute convertNode(Builder &builder, |
121 | | - llvm::msgpack::ArrayDocNode &node) { |
122 | | - // Use `DenseIntAttr` if we know all the attrs are ints. |
123 | | - if (llvm::all_of(node, [](llvm::msgpack::DocNode &n) { |
124 | | - llvm::msgpack::Type kind = n.getKind(); |
125 | | - return kind == llvm::msgpack::Type::Int || |
126 | | - kind == llvm::msgpack::Type::UInt; |
127 | | - })) { |
128 | | - SmallVector<int64_t> values; |
129 | | - for (llvm::msgpack::DocNode &n : node) { |
130 | | - llvm::msgpack::Type kind = n.getKind(); |
131 | | - if (kind == llvm::msgpack::Type::Int) |
132 | | - values.push_back(n.getInt()); |
133 | | - else if (kind == llvm::msgpack::Type::UInt) |
134 | | - values.push_back(n.getUInt()); |
135 | | - } |
136 | | - return builder.getDenseI64ArrayAttr(values); |
137 | | - } |
138 | | - // Convert the array. |
139 | | - SmallVector<Attribute> attrs; |
140 | | - for (llvm::msgpack::DocNode &n : node) { |
141 | | - if (Attribute attr = convertNode(builder, n)) |
142 | | - attrs.push_back(attr); |
143 | | - } |
144 | | - if (attrs.empty()) |
145 | | - return nullptr; |
146 | | - return builder.getArrayAttr(attrs); |
147 | | -} |
148 | | - |
149 | | -static Attribute convertNode(Builder &builder, llvm::msgpack::DocNode &node) { |
150 | | - using namespace llvm::msgpack; |
151 | | - switch (node.getKind()) { |
152 | | - case llvm::msgpack::Type::Int: |
153 | | - return builder.getI64IntegerAttr(node.getInt()); |
154 | | - case llvm::msgpack::Type::UInt: |
155 | | - return builder.getI64IntegerAttr(node.getUInt()); |
156 | | - case llvm::msgpack::Type::Boolean: |
157 | | - return builder.getI64IntegerAttr(node.getBool()); |
158 | | - case llvm::msgpack::Type::String: |
159 | | - return builder.getStringAttr(node.getString()); |
160 | | - case llvm::msgpack::Type::Array: |
161 | | - return convertNode(builder, node.getArray()); |
162 | | - case llvm::msgpack::Type::Map: |
163 | | - return convertNode(builder, node.getMap()); |
164 | | - default: |
165 | | - return nullptr; |
166 | | - } |
167 | | -} |
168 | | - |
169 | | -/// The following function should succeed for Code object V3 and above. |
170 | 23 | std::optional<DenseMap<StringAttr, NamedAttrList>> |
171 | 24 | mlir::ROCDL::getAMDHSAKernelsELFMetadata(Builder &builder, |
172 | 25 | ArrayRef<char> elfData) { |
173 | | - using namespace llvm::msgpack; |
174 | | - std::unique_ptr<llvm::msgpack::Document> metadata = getAMDHSANote(elfData); |
175 | | - if (!metadata) |
| 26 | + uint16_t elfABIVersion; |
| 27 | + llvm::StringMap<llvm::offloading::amdgpu::AMDGPUKernelMetaData> kernels; |
| 28 | + llvm::MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()), |
| 29 | + "buffer"); |
| 30 | + // Get the metadata. |
| 31 | + llvm::Error error = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage( |
| 32 | + buffer, kernels, elfABIVersion); |
| 33 | + // Return `nullopt` if the metadata couldn't be retrieved. |
| 34 | + if (!error) { |
| 35 | + llvm::consumeError(std::move(error)); |
176 | 36 | return std::nullopt; |
| 37 | + } |
| 38 | + // Helper lambda for converting values. |
| 39 | + auto getI32Array = [&builder](const uint32_t *array) { |
| 40 | + return builder.getDenseI32ArrayAttr({static_cast<int32_t>(array[0]), |
| 41 | + static_cast<int32_t>(array[1]), |
| 42 | + static_cast<int32_t>(array[2])}); |
| 43 | + }; |
177 | 44 | DenseMap<StringAttr, NamedAttrList> kernelMD; |
178 | | - DocNode &rootNode = (metadata)->getRoot(); |
179 | | - // Fail if `rootNode` is not a map -it should be for AMD Obj Ver 3. |
180 | | - if (!rootNode.isMap()) |
181 | | - return std::nullopt; |
182 | | - DocNode &kernels = rootNode.getMap()["amdhsa.kernels"]; |
183 | | - // Fail if `amdhsa.kernels` is not an array. |
184 | | - if (!kernels.isArray()) |
185 | | - return std::nullopt; |
186 | | - // Convert each of the kernels. |
187 | | - for (DocNode &kernel : kernels.getArray()) { |
188 | | - if (!kernel.isMap()) |
189 | | - continue; |
190 | | - MapDocNode &kernelMap = kernel.getMap(); |
191 | | - DocNode &nameNode = kernelMap[".name"]; |
192 | | - if (!nameNode.isString()) |
193 | | - continue; |
194 | | - StringRef name = nameNode.getString(); |
195 | | - NamedAttrList attrList; |
196 | | - // Convert the kernel properties. |
197 | | - for (auto &[keyNode, valueNode] : kernelMap) { |
198 | | - if (!keyNode.isString()) |
199 | | - continue; |
200 | | - StringRef key = keyNode.getString(); |
201 | | - key.consume_front("."); |
202 | | - key.consume_back("."); |
203 | | - if (key == "name") |
204 | | - continue; |
205 | | - if (Attribute attr = convertNode(builder, valueNode)) |
206 | | - attrList.append(key, attr); |
207 | | - } |
208 | | - if (!attrList.empty()) |
209 | | - kernelMD[builder.getStringAttr(name)] = std::move(attrList); |
| 45 | + for (const auto &[name, kernel] : kernels) { |
| 46 | + NamedAttrList attrs; |
| 47 | + // Add kernel metadata. |
| 48 | + attrs.append("agpr_count", builder.getI64IntegerAttr(kernel.AGPRCount)); |
| 49 | + attrs.append("sgpr_count", builder.getI64IntegerAttr(kernel.SGPRCount)); |
| 50 | + attrs.append("vgpr_count", builder.getI64IntegerAttr(kernel.VGPRCount)); |
| 51 | + attrs.append("sgpr_spill_count", |
| 52 | + builder.getI64IntegerAttr(kernel.SGPRSpillCount)); |
| 53 | + attrs.append("vgpr_spill_count", |
| 54 | + builder.getI64IntegerAttr(kernel.VGPRSpillCount)); |
| 55 | + attrs.append("wavefront_size", |
| 56 | + builder.getI64IntegerAttr(kernel.WavefrontSize)); |
| 57 | + attrs.append("max_flat_workgroup_size", |
| 58 | + builder.getI64IntegerAttr(kernel.MaxFlatWorkgroupSize)); |
| 59 | + attrs.append("group_segment_fixed_size", |
| 60 | + builder.getI64IntegerAttr(kernel.GroupSegmentList)); |
| 61 | + attrs.append("private_segment_fixed_size", |
| 62 | + builder.getI64IntegerAttr(kernel.PrivateSegmentSize)); |
| 63 | + attrs.append("reqd_workgroup_size", |
| 64 | + getI32Array(kernel.RequestedWorkgroupSize)); |
| 65 | + attrs.append("workgroup_size_hint", getI32Array(kernel.WorkgroupSizeHint)); |
| 66 | + kernelMD[builder.getStringAttr(name)] = std::move(attrs); |
210 | 67 | } |
211 | | - return kernelMD; |
| 68 | + return std::move(kernelMD); |
212 | 69 | } |
213 | 70 |
|
214 | 71 | gpu::KernelTableAttr mlir::ROCDL::getKernelMetadata(Operation *gpuModule, |
|
0 commit comments