Skip to content

Commit 7e28234

Browse files
committed
Reland "[HIP] Support compressing device binary"
Original PR: #67162 The commit was reverted due to UB detected by santizer: https://lab.llvm.org/buildbot/#/builders/238/builds/5955 clang/lib/Driver/OffloadBundler.cpp:1012:25: runtime error: load of misaligned address 0xaaaae2d90e7c for type 'const uint64_t' (aka 'const unsigned long'), which requires 8 byte alignment It was fixed by using memcpy instead of dereferencing int* casted from unaligned char*.
1 parent 4732b0c commit 7e28234

File tree

16 files changed

+658
-60
lines changed

16 files changed

+658
-60
lines changed

Diff for: clang/docs/ClangOffloadBundler.rst

+27
Original file line numberDiff line numberDiff line change
@@ -309,3 +309,30 @@ target by comparing bundle ID's. Two bundle ID's are considered compatible if:
309309
* Their offload kind are the same
310310
* Their target triple are the same
311311
* Their GPUArch are the same
312+
313+
Compression and Decompression
314+
=============================
315+
316+
``clang-offload-bundler`` provides features to compress and decompress the full
317+
bundle, leveraging inherent redundancies within the bundle entries. Use the
318+
`-compress` command-line option to enable this compression capability.
319+
320+
The compressed offload bundle begins with a header followed by the compressed binary data:
321+
322+
- **Magic Number (4 bytes)**:
323+
This is a unique identifier to distinguish compressed offload bundles. The value is the string 'CCOB' (Compressed Clang Offload Bundle).
324+
325+
- **Version Number (16-bit unsigned int)**:
326+
This denotes the version of the compressed offload bundle format. The current version is `1`.
327+
328+
- **Compression Method (16-bit unsigned int)**:
329+
This field indicates the compression method used. The value corresponds to either `zlib` or `zstd`, represented as a 16-bit unsigned integer cast from the LLVM compression enumeration.
330+
331+
- **Uncompressed Binary Size (32-bit unsigned int)**:
332+
This is the size (in bytes) of the binary data before it was compressed.
333+
334+
- **Hash (64-bit unsigned int)**:
335+
This is a 64-bit truncated MD5 hash of the uncompressed binary data. It serves for verification and caching purposes.
336+
337+
- **Compressed Data**:
338+
The actual compressed binary data follows the header. Its size can be inferred from the total size of the file minus the header size.

Diff for: clang/include/clang/Driver/OffloadBundler.h

+37
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,23 @@
1919

2020
#include "llvm/Support/Error.h"
2121
#include "llvm/TargetParser/Triple.h"
22+
#include <llvm/Support/MemoryBuffer.h>
2223
#include <string>
2324
#include <vector>
2425

2526
namespace clang {
2627

2728
class OffloadBundlerConfig {
2829
public:
30+
OffloadBundlerConfig();
31+
2932
bool AllowNoHost = false;
3033
bool AllowMissingBundles = false;
3134
bool CheckInputArchive = false;
3235
bool PrintExternalCommands = false;
3336
bool HipOpenmpCompatible = false;
37+
bool Compress = false;
38+
bool Verbose = false;
3439

3540
unsigned BundleAlignment = 1;
3641
unsigned HostInputIndex = ~0u;
@@ -84,6 +89,38 @@ struct OffloadTargetInfo {
8489
std::string str() const;
8590
};
8691

92+
// CompressedOffloadBundle represents the format for the compressed offload
93+
// bundles.
94+
//
95+
// The format is as follows:
96+
// - Magic Number (4 bytes) - A constant "CCOB".
97+
// - Version (2 bytes)
98+
// - Compression Method (2 bytes) - Uses the values from
99+
// llvm::compression::Format.
100+
// - Uncompressed Size (4 bytes).
101+
// - Truncated MD5 Hash (8 bytes).
102+
// - Compressed Data (variable length).
103+
104+
class CompressedOffloadBundle {
105+
private:
106+
static inline const size_t MagicSize = 4;
107+
static inline const size_t VersionFieldSize = sizeof(uint16_t);
108+
static inline const size_t MethodFieldSize = sizeof(uint16_t);
109+
static inline const size_t SizeFieldSize = sizeof(uint32_t);
110+
static inline const size_t HashFieldSize = 8;
111+
static inline const size_t HeaderSize = MagicSize + VersionFieldSize +
112+
MethodFieldSize + SizeFieldSize +
113+
HashFieldSize;
114+
static inline const llvm::StringRef MagicNumber = "CCOB";
115+
static inline const uint16_t Version = 1;
116+
117+
public:
118+
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
119+
compress(const llvm::MemoryBuffer &Input, bool Verbose = false);
120+
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
121+
decompress(const llvm::MemoryBuffer &Input, bool Verbose = false);
122+
};
123+
87124
} // namespace clang
88125

89126
#endif // LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H

Diff for: clang/include/clang/Driver/Options.td

+4
Original file line numberDiff line numberDiff line change
@@ -1183,6 +1183,10 @@ def fgpu_inline_threshold_EQ : Joined<["-"], "fgpu-inline-threshold=">,
11831183
def fgpu_sanitize : Flag<["-"], "fgpu-sanitize">, Group<f_Group>,
11841184
HelpText<"Enable sanitizer for supported offloading devices">;
11851185
def fno_gpu_sanitize : Flag<["-"], "fno-gpu-sanitize">, Group<f_Group>;
1186+
1187+
def offload_compress : Flag<["--"], "offload-compress">,
1188+
HelpText<"Compress offload device binaries (HIP only)">;
1189+
def no_offload_compress : Flag<["--"], "no-offload-compress">;
11861190
}
11871191

11881192
// CUDA options

0 commit comments

Comments
 (0)